// Copyright 2021 The Mumble Developers. All rights reserved.
// Use of this source code is governed by a BSD-style license
// that can be found in the LICENSE file at the root of the
// Mumble source tree or at <https://www.mumble.info/LICENSE>.

syntax = "proto3";

package MumbleUDP;

option optimize_for = SPEED;

message Audio {
	oneof Header {
		// When this audio is sent by the client to the server, this is set to the target of the audio data. This target
		// is a number in the range [0, 2^{32} - 1], where 0 means "normal talking", 2^{5} - 1 means "server loopback"
		// and all other targets are understood as shout/whisper targets that have previously been registered via a
		// VoiceTarget message (via TCP).
		uint32 target  = 1;	
		// When this audio is sent by the server to the client, this indicates the context in which the audio has been sent.
		// 0: Normal speech
		// 1: Shout to channel
		// 2: Whisper to user
		// 3: Received via channel listener
		uint32 context = 2;
	};

	// The session of the client (sender) this audio was originally sent from. This field is not required when sending
	// audio to the server, but will always be set when receiving audio from the server.
	uint32 sender_session = 3;

	// The number of the first contained audio frame (indicating the position of that frame in the overall audio stream)
	uint64 frame_number = 4;

	// The actual voice data payload in the Opus format.
	bytes opus_data = 5;

	// Optional positional data indicating the speaker's position in a virtual world (in meters). This "list" is really
	// expected to be an array of size 3 containing the X, Y and Z coordinates of the position (in that order).
	repeated float positional_data = 6;

	// A volume adjustment determined by the server for this audio packet. It is up to the client to apply this adjustment to
	// the resulting audio (or not). Note: A value of 0 means that this field is unset.
	float volume_adjustment = 7;

	// Note that we skip the field indices up to (including) 15 in order to have them available for future extensions of the
	// protocol with fields that are encountered very often. The reason is that all field indices <= 15 require only a single
	// byte of encoding overhead, whereas the once > 15 require (at least) two bytes. The reason lies in the Protobuf encoding
	// scheme that uses 1 bit for a varint continuation flag, 3 bit to encode a field's type and the remaining 4 bit of the
	// first byte are thus available for the field index. Therefore the first 2^4 = 16 field indices (aka values 0 to 15) can
	// be encoded using only a single byte. For details see https://developers.google.com/protocol-buffers/docs/encoding

	// A flag indicating whether this audio packet represents the end of transmission for the current audio stream
	bool is_terminator = 16;
}

/**
 * Ping message for checking UDP connectivity (and roundtrip ping) and potentially obtaining further server
 * details (e.g. version).
 */
message Ping {
	// Timestamp as encoded by the client. A server is not supposed to attempt to decode or modify this field. Therefore,
	// clients may choose an arbitrary format for this timestamp (as long as it fits into a uint64 field).
	uint64 timestamp = 1;

	// A flag set by the sending client, if it wants to obtain additional information about the server.
	bool request_extended_information = 2;


	// Below are the fields for the "additional information" that are filled out by the server on request.

	// The version of the server in the new version format.
	// The new protobuf Ping packet introduced with 1.5 drops support for the legacy version format
	// since both server and client have to support this new format.
	// (See https://github.com/mumble-voip/mumble/issues/5827)
	uint64 server_version_v2 = 3;

	// The amount of users currently connected to the server
	uint32 user_count = 4;

	// The maximum amount of users permitted on this server
	uint32 max_user_count = 5;

	// The maximum bandwidth each user is allowed to use for sending audio to the server
	uint32 max_bandwidth_per_user = 6;
}