CHANGE: Use Protobuf for UDP messages

Previously Mumble was using a custom binary format for transmitting data via UDP (mainly audio). This has worked for a long time but besides being inconvenient for 3rdParty implementors (they had to manually re-implement encoding and decoding support for this format) this format was not very flexible and changes to the data format were very hard. In order to improve on this situation, this commit introduces changes that allow to use Protobuf for the UDP messages as well (it's already used for TCP). With that it should be relatively easy to extend/change the UDP packet formats in the future and 3rdParty implementors can now simply use Protobuf to handle decoding/encoding packets for them (much less work and much less prone to errors). Since the new Protobuf format is incompatible with the old UDP format, this commit also includes support for dealing with older clients or servers that don't recognize the new protocol yet. That way the new protocol format is only used if both the client and the server are recent enough to have it implemented (assumed to be the case >=1.5.0). Note also that the server will make sure that clients using the old and the new format can seamlessly communicate with one another. Therefore, on the surface it should not be noticeable to the user which protocol is currently used. Note also that the new protocol format only supports Opus as an audio codec. If one of the legacy codecs is to be used, the legacy packet format has to be used as well. However, all codecs except for Opus will be removed from Mumble in the future anyway. Fixes #4350
author: Robert Adam <dev@robert-adam.de> 2021-11-22 13:17:58 +0300
committer: Robert Adam <dev@robert-adam.de> 2022-03-27 10:49:58 +0300
commit: 1d45d991aa4d53b6c1bd7d7cae0126a21f3991e1 (patch)
tree: bd810f21a6cc43fa718e0f2807ac373403244edf
parent: 06b56530997fb623c9690ad7bdb8d3f5915d48a0 (diff)
70 files changed, 3787 insertions, 866 deletions
diff --git a/docs/dev/build-instructions/cmake_options.md b/docs/dev/build-instructions/cmake_options.md
index 9c4c45c82..dd871d510 100644
--- a/docs/dev/build-instructions/cmake_options.md
+++ b/docs/dev/build-instructions/cmake_options.md
@@ -19,6 +19,11 @@ Build support for ALSA.
 Build support for ASIO audio input.
 (Default: OFF)
 
+### benchmarks
+
+Build benchmarks
+(Default: OFF)
+
 ### BUILD_OVERLAY_XCOMPILE
 
 Build an x86 overlay
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 64eae5f27..d570dd693 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -4,10 +4,13 @@
 # Mumble source tree or at <https://www.mumble.info/LICENSE>.
 
 set(PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/Mumble.proto")
+set(UDP_PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/MumbleUDP.proto")
 
 option(client "Build the client (Mumble)" ON)
 option(server "Build the server (Murmur)" ON)
 
+option(benchmarks "Build benchmarks" OFF)
+
 option(qssldiffiehellmanparameters "Build support for custom Diffie-Hellman parameters." ON)
 
 option(zeroconf "Build support for zeroconf (mDNS/DNS-SD)." ON)
@@ -38,9 +41,10 @@ add_library(shared STATIC)
 set_property(TARGET shared PROPERTY INTERPROCEDURAL_OPTIMIZATION ${lto})
 
 protobuf_generate(LANGUAGE cpp TARGET shared PROTOS ${PROTO_FILE} OUT_VAR BUILT_PROTO_FILES)
+protobuf_generate(LANGUAGE cpp TARGET shared PROTOS ${UDP_PROTO_FILE} OUT_VAR BUILT_UDP_PROTO_FILES)
 
 # Disable warnings for the generated source files
-foreach(CURRENT_FILE IN LISTS BUILT_PROTO_FILES)
+foreach(CURRENT_FILE IN LISTS BUILT_PROTO_FILES BUILT_UDP_PROTO_FILES)
 	set_source_files_properties("${CURRENT_FILE}" PROPERTIES COMPILE_FLAGS "-w")
 endforeach()
 
@@ -62,6 +66,7 @@ set(SHARED_SOURCES
 	"HTMLFilter.cpp"
 	"License.cpp"
 	"LogEmitter.cpp"
+	"MumbleProtocol.cpp"
 	"OSInfo.cpp"
 	"PasswordGenerator.cpp"
 	"PlatformCheck.cpp"
@@ -76,6 +81,7 @@ set(SHARED_SOURCES
 	"Timer.cpp"
 	"UnresolvedServerAddress.cpp"
 	"Version.cpp"
+	"VolumeAdjustment.cpp"
 
 	"crypto/CryptographicHash.cpp"
 	"crypto/CryptographicRandom.cpp"
@@ -95,6 +101,7 @@ set(SHARED_HEADERS
 	"License.h"
 	"licenses.h"
 	"LogEmitter.h"
+	"MumbleProtocol.h"
 	"Net.h"
 	"OSInfo.h"
 	"PasswordGenerator.h"
@@ -109,6 +116,7 @@ set(SHARED_HEADERS
 	"Timer.h"
 	"UnresolvedServerAddress.h"
 	"Version.h"
+	"VolumeAdjustment.h"
 
 	"crypto/CryptographicHash.h"
 	"crypto/CryptographicRandom.h"
@@ -173,6 +181,10 @@ message(STATUS "Tracy: ${TRACY_ENABLE}")
 
 target_link_libraries(shared PUBLIC Tracy::TracyClient)
 
+# Add the GSL
+add_subdirectory("${3RDPARTY_DIR}/gsl" "${CMAKE_CURRENT_BINARY_DIR}/gsl")
+target_link_libraries(shared PUBLIC GSL)
+
 if(client)
 	add_subdirectory(mumble)
 
@@ -193,3 +205,7 @@ endif()
 if(tests)
 	add_subdirectory(tests)
 endif()
+
+if(benchmarks)
+	add_subdirectory(benchmarks)
+endif()
diff --git a/src/Connection.cpp b/src/Connection.cpp
index db68506ff..3f7b62a6b 100644
--- a/src/Connection.cpp
+++ b/src/Connection.cpp
@@ -4,7 +4,6 @@
 // Mumble source tree or at <https://www.mumble.info/LICENSE>.
 
 #include "Connection.h"
-#include "Message.h"
 #include "Mumble.pb.h"
 #include "SSL.h"
 
@@ -121,7 +120,7 @@ void Connection::socketRead() {
 			unsigned char a_ucBuffer[6];
 
 			qtsSocket->read(reinterpret_cast< char * >(a_ucBuffer), 6);
-			uiType        = qFromBigEndian< quint16 >(&a_ucBuffer[0]);
+			m_type        = static_cast< Mumble::Protocol::TCPMessageType >(qFromBigEndian< quint16 >(&a_ucBuffer[0]));
 			iPacketLength = qFromBigEndian< quint32 >(&a_ucBuffer[2]);
 			iAvailable -= 6;
 		}
@@ -139,7 +138,7 @@ void Connection::socketRead() {
 		iPacketLength        = -1;
 		iAvailable -= iPacketLength;
 
-		emit message(uiType, qbaBuffer);
+		emit message(m_type, qbaBuffer);
 	}
 }
 
@@ -159,7 +158,8 @@ void Connection::socketDisconnected() {
 	emit connectionClosed(QAbstractSocket::UnknownSocketError, QString());
 }
 
-void Connection::messageToNetwork(const ::google::protobuf::Message &msg, unsigned int msgType, QByteArray &cache) {
+void Connection::messageToNetwork(const ::google::protobuf::Message &msg, Mumble::Protocol::TCPMessageType msgType,
+								  QByteArray &cache) {
 #if GOOGLE_PROTOBUF_VERSION >= 3004000
 	int len = msg.ByteSizeLong();
 #else
@@ -176,7 +176,8 @@ void Connection::messageToNetwork(const ::google::protobuf::Message &msg, unsign
 	msg.SerializeToArray(uc + 6, len);
 }
 
-void Connection::sendMessage(const ::google::protobuf::Message &msg, unsigned int msgType, QByteArray &cache) {
+void Connection::sendMessage(const ::google::protobuf::Message &msg, Mumble::Protocol::TCPMessageType msgType,
+							 QByteArray &cache) {
 	if (cache.isEmpty()) {
 		messageToNetwork(msg, msgType, cache);
 	}
diff --git a/src/Connection.h b/src/Connection.h
index 5a99c592c..e8743d4b2 100644
--- a/src/Connection.h
+++ b/src/Connection.h
@@ -6,6 +6,8 @@
 #ifndef MUMBLE_CONNECTION_H_
 #define MUMBLE_CONNECTION_H_
 
+#include "MumbleProtocol.h"
+
 #include <QtCore/QtGlobal>
 
 #ifdef Q_OS_WIN
@@ -39,7 +41,7 @@ private:
 protected:
 	QSslSocket *qtsSocket;
 	QElapsedTimer qtLastPacket;
-	unsigned int uiType;
+	Mumble::Protocol::TCPMessageType m_type;
 	int iPacketLength;
 #ifdef Q_OS_WIN
 	static HANDLE hQoS;
@@ -55,14 +57,16 @@ public slots:
 signals:
 	void encrypted();
 	void connectionClosed(QAbstractSocket::SocketError, const QString &reason);
-	void message(unsigned int type, const QByteArray &);
+	void message(Mumble::Protocol::TCPMessageType type, const QByteArray &);
 	void handleSslErrors(const QList< QSslError > &);
 
 public:
 	Connection(QObject *parent, QSslSocket *qtsSocket);
 	~Connection();
-	static void messageToNetwork(const ::google::protobuf::Message &msg, unsigned int msgType, QByteArray &cache);
-	void sendMessage(const ::google::protobuf::Message &msg, unsigned int msgType, QByteArray &cache);
+	static void messageToNetwork(const ::google::protobuf::Message &msg, Mumble::Protocol::TCPMessageType msgType,
+								 QByteArray &cache);
+	void sendMessage(const ::google::protobuf::Message &msg, Mumble::Protocol::TCPMessageType msgType,
+					 QByteArray &cache);
 	void sendMessage(const QByteArray &qbaMsg);
 	void disconnectSocket(bool force = false);
 	void forceFlush();
diff --git a/src/Message.h b/src/Message.h
deleted file mode 100644
index 34f404d26..000000000
--- a/src/Message.h
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright 2007-2022 The Mumble Developers. All rights reserved.
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file at the root of the
-// Mumble source tree or at <https://www.mumble.info/LICENSE>.
-
-#ifndef MUMBLE_MESSAGE_H_
-#define MUMBLE_MESSAGE_H_
-
-#include <QtCore/QCryptographicHash>
-#include <QtCore/QString>
-#include <string>
-
-/**
-  Protobuf packet type enumeration for message handler generation.
-
-  Warning: Only append to the end.
- */
-#define MUMBLE_MH_ALL                  \
-	MUMBLE_MH_MSG(Version)             \
-	MUMBLE_MH_MSG(UDPTunnel)           \
-	MUMBLE_MH_MSG(Authenticate)        \
-	MUMBLE_MH_MSG(Ping)                \
-	MUMBLE_MH_MSG(Reject)              \
-	MUMBLE_MH_MSG(ServerSync)          \
-	MUMBLE_MH_MSG(ChannelRemove)       \
-	MUMBLE_MH_MSG(ChannelState)        \
-	MUMBLE_MH_MSG(UserRemove)          \
-	MUMBLE_MH_MSG(UserState)           \
-	MUMBLE_MH_MSG(BanList)             \
-	MUMBLE_MH_MSG(TextMessage)         \
-	MUMBLE_MH_MSG(PermissionDenied)    \
-	MUMBLE_MH_MSG(ACL)                 \
-	MUMBLE_MH_MSG(QueryUsers)          \
-	MUMBLE_MH_MSG(CryptSetup)          \
-	MUMBLE_MH_MSG(ContextActionModify) \
-	MUMBLE_MH_MSG(ContextAction)       \
-	MUMBLE_MH_MSG(UserList)            \
-	MUMBLE_MH_MSG(VoiceTarget)         \
-	MUMBLE_MH_MSG(PermissionQuery)     \
-	MUMBLE_MH_MSG(CodecVersion)        \
-	MUMBLE_MH_MSG(UserStats)           \
-	MUMBLE_MH_MSG(RequestBlob)         \
-	MUMBLE_MH_MSG(ServerConfig)        \
-	MUMBLE_MH_MSG(SuggestConfig)       \
-	MUMBLE_MH_MSG(PluginDataTransmission)
-
-class MessageHandler {
-public:
-	enum UDPMessageType { UDPVoiceCELTAlpha, UDPPing, UDPVoiceSpeex, UDPVoiceCELTBeta, UDPVoiceOpus };
-
-#define MUMBLE_MH_MSG(x) x,
-	enum MessageType { MUMBLE_MH_ALL };
-#undef MUMBLE_MH_MSG
-};
-
-/// UDPMessageTypeIsValidVoicePacket checks whether the given
-/// UDPMessageType is a valid voice packet.
-inline bool UDPMessageTypeIsValidVoicePacket(MessageHandler::UDPMessageType umt) {
-	switch (umt) {
-		case MessageHandler::UDPVoiceCELTAlpha:
-		case MessageHandler::UDPVoiceSpeex:
-		case MessageHandler::UDPVoiceCELTBeta:
-		case MessageHandler::UDPVoiceOpus:
-			return true;
-		case MessageHandler::UDPPing:
-			return false;
-	}
-	return false;
-}
-
-inline QString u8(const ::std::string &str) {
-	return QString::fromUtf8(str.data(), static_cast< int >(str.length()));
-}
-
-inline QString u8(const ::std::wstring &str) {
-	return QString::fromStdWString(str);
-}
-
-inline ::std::string u8(const QString &str) {
-	const QByteArray &qba = str.toUtf8();
-	return ::std::string(qba.constData(), qba.length());
-}
-
-inline QByteArray blob(const ::std::string &str) {
-	return QByteArray(str.data(), static_cast< int >(str.length()));
-}
-
-inline ::std::string blob(const QByteArray &str) {
-	return ::std::string(str.constData(), str.length());
-}
-
-inline QByteArray sha1(const QByteArray &blob) {
-	return QCryptographicHash::hash(blob, QCryptographicHash::Sha1);
-}
-
-inline QByteArray sha1(const QString &str) {
-	return QCryptographicHash::hash(str.toUtf8(), QCryptographicHash::Sha1);
-}
-
-#endif
diff --git a/src/MumbleProtocol.cpp b/src/MumbleProtocol.cpp
new file mode 100644
index 000000000..bc0a18ada
--- /dev/null
+++ b/src/MumbleProtocol.cpp
@@ -0,0 +1,887 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#include "MumbleProtocol.h"
+#include "PacketDataStream.h"
+
+#include <QtEndian>
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstring>
+
+namespace Mumble {
+namespace Protocol {
+
+	bool protocolVersionsAreCompatible(Version::mumble_raw_version_t lhs, Version::mumble_raw_version_t rhs) {
+		// At this point the protocol version only makes a difference between pre-protobuf and post-protobuf
+		return (lhs < PROTOBUF_INTRODUCTION_VERSION) == (rhs < PROTOBUF_INTRODUCTION_VERSION);
+	}
+
+
+	std::size_t getProtobufSize(const ::google::protobuf::Message &message) {
+#if GOOGLE_PROTOBUF_VERSION >= 3002000
+		// ByteSizeLong() was introduced in Protobuf v3.2 as a replacement for ByteSize()
+		return message.ByteSizeLong();
+#else
+		return message.ByteSize();
+#endif
+	}
+
+	std::size_t encodeProtobuf(const ::google::protobuf::Message &message, std::vector< byte > &buffer,
+							   std::size_t offset, std::size_t maxAllowedSize, bool useCachedSize) {
+		// Serialize to buffer
+		std::size_t serializedSize;
+		if (!useCachedSize) {
+			serializedSize = getProtobufSize(message);
+		} else {
+			serializedSize = message.GetCachedSize();
+		}
+
+		assert(serializedSize + offset <= maxAllowedSize);
+		if (serializedSize + offset > maxAllowedSize) {
+			// In non-Debug builds above assertion will not fire, thus we have to explicitly catch
+			// this issue here
+			qWarning("Protobuf package size (%zu) would exceed UDP packet size limit (%zu)", serializedSize,
+					 maxAllowedSize);
+
+			return 0;
+		}
+
+		buffer.resize(serializedSize + offset);
+
+		message.SerializePartialToArray(buffer.data() + offset, serializedSize);
+
+		return serializedSize;
+	}
+
+	template< Role role >
+	ProtocolHandler< role >::ProtocolHandler(Version::mumble_raw_version_t protocolVersion)
+		: m_protocolVersion(protocolVersion) {}
+
+	template< Role role > Version::mumble_raw_version_t ProtocolHandler< role >::getProtocolVersion() const {
+		return m_protocolVersion;
+	}
+
+	template< Role role >
+	void ProtocolHandler< role >::setProtocolVersion(Version::mumble_raw_version_t protocolVersion) {
+		m_protocolVersion = protocolVersion;
+	}
+
+
+	template< Role role >
+	UDPAudioEncoder< role >::UDPAudioEncoder(Version::mumble_raw_version_t protocolVersion)
+		: ProtocolHandler< role >(protocolVersion) {
+		m_byteBuffer.resize(MAX_UDP_PACKET_SIZE);
+
+		preparePreEncodedSnippets();
+	}
+
+	template< Role role > gsl::span< const byte > UDPAudioEncoder< role >::encodeAudioPacket(const AudioData &data) {
+		prepareAudioPacket(data);
+		addPositionalData(data);
+		return updateAudioPacket(data);
+	}
+
+	template< Role role > void UDPAudioEncoder< role >::prepareAudioPacket(const AudioData &data) {
+		if (this->getProtocolVersion() < PROTOBUF_INTRODUCTION_VERSION) {
+			return prepareAudioPacket_legacy(data);
+		} else {
+			return prepareAudioPacket_protobuf(data);
+		}
+	}
+
+	template< Role role > gsl::span< const byte > UDPAudioEncoder< role >::updateAudioPacket(const AudioData &data) {
+		if (this->getProtocolVersion() < PROTOBUF_INTRODUCTION_VERSION) {
+			return updateAudioPacket_legacy(data);
+		} else {
+			return updateAudioPacket_protobuf(data);
+		}
+	}
+
+	template< Role role > void UDPAudioEncoder< role >::addPositionalData(const AudioData &data) {
+		if (this->getProtocolVersion() < PROTOBUF_INTRODUCTION_VERSION) {
+			addPositionalData_legacy(data);
+		} else {
+			addPositionalData_protobuf(data);
+		}
+	}
+
+	template< Role role > void UDPAudioEncoder< role >::dropPositionalData() {
+		// Pretend the positional data wasn't there
+		m_positionalAudioSize = m_staticPartSize;
+	}
+
+	template< Role role > void UDPAudioEncoder< role >::prepareAudioPacket_legacy(const AudioData &data) {
+		m_byteBuffer.resize(MAX_UDP_PACKET_SIZE);
+
+		byte type = 0;
+		switch (data.usedCodec) {
+			case AudioCodec::CELT_Alpha:
+				type = 0;
+				break;
+				// flag = 1 is reserved for ping packets
+			case AudioCodec::Speex:
+				type = 2;
+				break;
+			case AudioCodec::CELT_Beta:
+				type = 3;
+				break;
+			case AudioCodec::Opus:
+				type = 4;
+				break;
+		}
+		// The audio format (aka: package type) has to be written to the 3 most significant bits of the header byte
+		assert(type < (1 << 3));
+		type = type << 5;
+
+		m_byteBuffer[0] = type;
+
+		PacketDataStream stream(m_byteBuffer.data() + 1, m_byteBuffer.size() - 1);
+
+		if (this->getRole() == Role::Server) {
+			stream << data.senderSession;
+		}
+
+		// The next field contains the sequence number of the first contained audio frame
+		stream << static_cast< int >(data.frameNumber);
+
+		switch (data.usedCodec) {
+			case AudioCodec::Opus: {
+				// If the sent frame is the last one, we set the 14th bit of the size field to indicate this
+				assert(data.payload.size() < (1 << 13));
+				stream << static_cast< int >(data.isLastFrame ? data.payload.size() | (1 << 13) : data.payload.size());
+
+				// After the size has been encoded, we write the actual Opus frame to the message
+				stream.append(reinterpret_cast< const char * >(data.payload.data()), data.payload.size());
+				break;
+			}
+			case AudioCodec::CELT_Alpha:
+			case AudioCodec::CELT_Beta:
+			case AudioCodec::Speex: {
+				// Simply append the provided payload
+				stream.append(reinterpret_cast< const char * >(data.payload.data()), data.payload.size());
+				break;
+			}
+		}
+
+		// +1 since the stream doesn't know about the flags header byte
+		m_staticPartSize = stream.size() + 1;
+
+		if (!stream.isValid()) {
+			qWarning("MumbleProtocol: Encoding legacy packet (fixed part) overflowed buffer size");
+			m_staticPartSize = 0;
+		}
+
+		m_positionalAudioSize = m_staticPartSize;
+	}
+
+	template< Role role >
+	gsl::span< const byte > UDPAudioEncoder< role >::updateAudioPacket_legacy(const AudioData &data) {
+		m_byteBuffer.resize(MAX_UDP_PACKET_SIZE);
+
+		// The 5 least significant bits are where the target is supposed to be encoded
+		if (data.targetOrContext >= (1 << 5)) {
+			// Invalid target - this can easily happen when activating PTT before being connected to a server
+			return {};
+		}
+		// Re-assemble the header byte by overtaking the 3 most significant bits encoding the audio/packet type
+		// and combine that with the target.
+		m_byteBuffer[0] = static_cast< byte >(data.targetOrContext) | (m_byteBuffer[0] & 0xe0);
+
+		std::size_t packetSize = data.containsPositionalData ? m_positionalAudioSize : m_staticPartSize;
+
+		return gsl::span< byte >(m_byteBuffer.data(), packetSize);
+	}
+
+
+	template< Role role > void UDPAudioEncoder< role >::addPositionalData_legacy(const AudioData &data) {
+		if (data.containsPositionalData) {
+			PacketDataStream stream(m_byteBuffer.data() + m_staticPartSize, m_byteBuffer.size() - m_staticPartSize);
+
+			// Positional data simply gets attached to the stream after the audio payload
+			assert(data.position.size() == 3);
+			stream << data.position[0];
+			stream << data.position[1];
+			stream << data.position[2];
+
+			m_positionalAudioSize = stream.size() + m_staticPartSize;
+
+			if (!stream.isValid()) {
+				qWarning("MumbleProtocol: Adding positional data to legacy packet overflowed buffer size");
+				m_positionalAudioSize = m_staticPartSize;
+			}
+		}
+	}
+
+	template< Role role > void UDPAudioEncoder< role >::prepareAudioPacket_protobuf(const AudioData &data) {
+		// At the moment only Opus is supported in the newer Protobuf UDP protocol
+		// if the encoding is different, we automatically fall back to the legacy package format.
+		if (data.usedCodec != AudioCodec::Opus) {
+			prepareAudioPacket_legacy(data);
+		}
+
+		// Note that we are partitioning the audio packet into two segments: a "fixed" part and a "variable" part.
+		// The former contains all fields of the audio data that do not depend on where the audio is sent, whereas
+		// everything that may be different, depending to whom the message is sent, is part of the latter.
+		// This allows to use partial encoding (making use of the fact to Protobuf messages may be concatenated
+		// once in wire-format), which avoids having to re-encode the entire message.
+		// This is mainly important on the server-side.
+
+		m_audioMessage.Clear();
+
+		if (this->getRole() == Role::Server) {
+			m_audioMessage.set_sender_session(data.senderSession);
+		}
+
+		m_audioMessage.set_frame_number(data.frameNumber);
+		m_audioMessage.set_opus_data(data.payload.data(), data.payload.size());
+		m_audioMessage.set_is_terminator(data.isLastFrame);
+
+		// +1 to account for the header byte set below
+		m_staticPartSize      = encodeProtobuf(m_audioMessage, m_byteBuffer, 1, MAX_UDP_PACKET_SIZE, false) + 1;
+		m_positionalAudioSize = m_staticPartSize;
+		m_byteBuffer[0]       = static_cast< byte >(UDPMessageType::Audio);
+	}
+
+	std::size_t writeSnippet(gsl::span< const byte > source, std::vector< byte > &destination, std::size_t offset,
+							 std::size_t maxPacketSize) {
+		if (maxPacketSize <= offset + source.size()) {
+			qWarning("MumbleProtocol: Buffer overflow while writing snippet. Max buffer size is %zu and required size "
+					 "is %zu",
+					 maxPacketSize, offset + source.size());
+			return 0;
+		}
+		destination.resize(offset + source.size());
+
+		std::memcpy(destination.data() + offset, source.data(), source.size());
+
+		return source.size();
+	}
+
+	template< Role role >
+	gsl::span< const byte > UDPAudioEncoder< role >::updateAudioPacket_protobuf(const AudioData &data) {
+		std::size_t offset = data.containsPositionalData ? m_positionalAudioSize : m_staticPartSize;
+
+		// We assume that something was encoded before
+		if (offset == 0) {
+			qWarning("MumbleProtocol: Can't update a packet that hasn't been prepared yet");
+			return {};
+		}
+
+		switch (this->getRole()) {
+			case Role::Client: {
+				m_audioMessage.Clear();
+				m_audioMessage.set_target(data.targetOrContext);
+
+				offset += encodeProtobuf(m_audioMessage, m_byteBuffer, offset, MAX_UDP_PACKET_SIZE, false);
+
+				return { m_byteBuffer.data(), offset };
+			}
+			case Role::Server: {
+				if (data.volumeAdjustment.factor != 1.0f) {
+					gsl::span< const byte > buffer = getPreEncodedVolumeAdjustment(data.volumeAdjustment);
+					if (!buffer.empty()) {
+						// Use pre-encoded snippet
+						offset += writeSnippet(buffer, m_byteBuffer, offset, MAX_UDP_PACKET_SIZE);
+					} else {
+						// No pre-encoded snippet found -> use explicit encoding
+						m_audioMessage.Clear();
+						m_audioMessage.set_volume_adjustment(data.volumeAdjustment.factor);
+
+						offset += encodeProtobuf(m_audioMessage, m_byteBuffer, offset, MAX_UDP_PACKET_SIZE, false);
+					}
+				}
+
+				gsl::span< const byte > buffer = getPreEncodedContext(data.targetOrContext);
+				if (!buffer.empty()) {
+					// Use pre-encoded snippet
+					offset += writeSnippet(buffer, m_byteBuffer, offset, MAX_UDP_PACKET_SIZE);
+				} else {
+					// No pre-encoded snippet found -> use explicit encoding
+					m_audioMessage.Clear();
+					m_audioMessage.set_context(data.targetOrContext);
+
+					offset += encodeProtobuf(m_audioMessage, m_byteBuffer, offset, MAX_UDP_PACKET_SIZE, false);
+				}
+
+				return { m_byteBuffer.data(), offset };
+			}
+		}
+
+		qWarning("MumbleProtocol: Reached theoretically unreachable code");
+		return {};
+	}
+
+
+	template< Role role > void UDPAudioEncoder< role >::addPositionalData_protobuf(const AudioData &data) {
+		if (data.containsPositionalData) {
+			m_audioMessage.Clear();
+
+			for (int i = 0; i < 3; ++i) {
+				m_audioMessage.add_positional_data(data.position[i]);
+			}
+
+			m_positionalAudioSize =
+				m_staticPartSize
+				+ encodeProtobuf(m_audioMessage, m_byteBuffer, m_staticPartSize, MAX_UDP_PACKET_SIZE, false);
+		}
+	}
+
+	template< Role role > void UDPAudioEncoder< role >::preparePreEncodedSnippets() {
+		m_audioMessage.Clear();
+
+		static_assert(AudioContext::begin == 0, "AudioContext::begin is not zero (breaks assumption)");
+		static_assert(AudioContext::end >= 0, "AudioContext::end is negative (breaks assumption)");
+		m_preEncodedContext.resize(AudioContext::end);
+
+		// Pre-encode the expected voice audio contexts.
+		for (audio_context_t current = AudioContext::begin; current < AudioContext::end; ++current) {
+			m_audioMessage.set_context(current);
+
+			// The max size of the properly encoded package is the size of the used field type (uint32) plus 1 byte
+			// overhead for the varint-encoding plus 1 byte of overhead for encoding the message type and field number.
+			bool successful =
+				encodeProtobuf(m_audioMessage, m_preEncodedContext[current], 0, sizeof(std::uint32_t) + 1 + 1, false);
+			(void) successful;
+			assert(successful);
+		}
+
+		m_audioMessage.Clear();
+
+		// Pre-encode the expected volume adjustments (the client UI allows to specify integer values between
+		// -60dB and +30dB).
+		m_preEncodedVolumeAdjustment.resize(preEncodedDBAdjustmentEnd - preEncodedDBAdjustmentBegin);
+
+		for (int dbAdjustment = preEncodedDBAdjustmentBegin; dbAdjustment < preEncodedDBAdjustmentEnd; ++dbAdjustment) {
+			// +6dB means doubling the volume
+			float adjustmentFactor = std::pow(2.0, dbAdjustment / 6.0);
+
+			m_audioMessage.set_volume_adjustment(adjustmentFactor);
+
+			// Store the pre-encoded packet
+			// The max-size is the size of the used field (float) plus 1 byte overhead for encoding the field type and
+			// number
+			bool successful =
+				encodeProtobuf(m_audioMessage, m_preEncodedVolumeAdjustment[dbAdjustment - preEncodedDBAdjustmentBegin],
+							   0, sizeof(float) + 1, false);
+			(void) successful;
+			assert(successful);
+		}
+	}
+
+	template< Role role >
+	gsl::span< const byte > UDPAudioEncoder< role >::getPreEncodedContext(audio_context_t context) const {
+		if (context >= m_preEncodedContext.size()) {
+			return {};
+		}
+
+		const std::vector< byte > &data = m_preEncodedContext[context];
+
+		return gsl::span< const byte >(data.data(), data.size());
+	}
+
+	template< Role role >
+	gsl::span< const byte >
+		UDPAudioEncoder< role >::getPreEncodedVolumeAdjustment(const VolumeAdjustment &adjustment) const {
+		int index = (adjustment.dbAdjustment - preEncodedDBAdjustmentBegin);
+
+		if (adjustment.dbAdjustment == VolumeAdjustment::InvalidDBAdjustment || index < 0
+			|| static_cast< std::size_t >(index) >= m_preEncodedVolumeAdjustment.size()) {
+			// No pre-encoded snippet for the given adjustment
+			return {};
+		}
+
+		const std::vector< byte > &data = m_preEncodedVolumeAdjustment[index];
+
+		return gsl::span< const byte >(data.data(), data.size());
+	}
+
+
+	template< Role role >
+	UDPPingEncoder< role >::UDPPingEncoder(Version::mumble_raw_version_t protocolVersion)
+		: ProtocolHandler< role >(protocolVersion) {
+		// Use the assumption that a general ping package will be < 32bytes long (the legacy ping packet is at most
+		// 12bytes long)
+		m_byteBuffer.reserve(32);
+	}
+
+	template< Role role > gsl::span< const byte > UDPPingEncoder< role >::encodePingPacket(const PingData &data) {
+		if (this->getProtocolVersion() < PROTOBUF_INTRODUCTION_VERSION) {
+			return encodePingPacket_legacy(data);
+		} else {
+			return encodePingPacket_protobuf(data);
+		}
+	}
+
+	template< Role role >
+	gsl::span< const byte > UDPPingEncoder< role >::encodePingPacket_legacy(const PingData &data) {
+		m_byteBuffer.clear();
+
+		std::size_t actualSize = 0;
+
+		if (data.requestAdditionalInformation || data.containsAdditionalInformation) {
+			const bool writeAdditionalInformation =
+				data.containsAdditionalInformation && this->getRole() == Role::Server;
+
+			// 8 bytes for a uint64 timestamp and 4 emtpy bytes (the server will write its (protocol) version (uint32)
+			// in that place before bouncing the ping back; Any further additional info beyond that will be appended to
+			// the packet by the server).
+			std::size_t packetSize;
+			if (writeAdditionalInformation) {
+				// uint32: server version
+				// uint64: timestamp
+				// uint32: user count
+				// uint32: max. user count
+				// uint32: max. bandwidth per user
+				packetSize = 4 * sizeof(std::uint32_t) + sizeof(std::uint64_t);
+			} else {
+				// uint32: zero (empty spot for the server to fill in its version)
+				// uint64: timestamp
+				packetSize = sizeof(std::uint32_t) + sizeof(std::uint64_t);
+			}
+
+			m_byteBuffer.resize(packetSize);
+
+			if (writeAdditionalInformation) {
+				std::uint32_t *dataArray = reinterpret_cast< std::uint32_t * >(m_byteBuffer.data());
+				dataArray[0]             = qToBigEndian(data.serverVersion);
+				// dataArray[1] and dataArray[2] together hold the timestamp (written below)
+				dataArray[3] = qToBigEndian(data.userCount);
+				dataArray[4] = qToBigEndian(data.maxUserCount);
+				dataArray[5] = qToBigEndian(data.maxBandwidthPerUser);
+			}
+
+			// Leave four empty bytes up front and then write the uint64 timestamp to the remaining 8 bytes
+			*reinterpret_cast< std::uint64_t * >(m_byteBuffer.data() + sizeof(std::uint32_t)) = data.timestamp;
+
+			actualSize = packetSize;
+		} else {
+			// 8 bytes for a uint64 timestamp + 1byte for the varint encoding of that stamp + 1byte header
+			constexpr std::size_t maxSize = 8 + 1 + 1;
+			m_byteBuffer.resize(maxSize);
+
+			// Write header byte (type bits are zero, so they don't have to be set explicitly)
+			m_byteBuffer[0] = static_cast< byte >(LegacyUDPMessageType::Ping) << 5;
+
+			PacketDataStream stream(m_byteBuffer.data() + 1, maxSize - 1);
+
+			stream << static_cast< quint64 >(data.timestamp);
+
+			// +1 as the stream doesn't know about the header byte
+			actualSize = stream.size() + 1;
+		}
+
+		return gsl::span< byte >(m_byteBuffer.data(), actualSize);
+	}
+
+	template< Role role >
+	gsl::span< const byte > UDPPingEncoder< role >::encodePingPacket_protobuf(const PingData &data) {
+		m_pingMessage.Clear();
+
+		m_pingMessage.set_timestamp(data.timestamp);
+
+		if (data.requestAdditionalInformation) {
+			m_pingMessage.set_request_extended_information(true);
+		} else if (data.containsAdditionalInformation) {
+			m_pingMessage.set_server_version(data.serverVersion);
+			m_pingMessage.set_user_count(data.userCount);
+			m_pingMessage.set_max_user_count(data.maxUserCount);
+			m_pingMessage.set_max_bandwidth_per_user(data.maxBandwidthPerUser);
+		}
+
+		// +1 in order to account for the header byte written below
+		std::size_t serializedSize = encodeProtobuf(m_pingMessage, m_byteBuffer, 1, MAX_UDP_PACKET_SIZE, false) + 1;
+		m_byteBuffer[0]            = static_cast< byte >(UDPMessageType::Ping);
+
+		return gsl::span< byte >(m_byteBuffer.data(), serializedSize);
+	}
+
+
+	template< Role role >
+	UDPDecoder< role >::UDPDecoder(Version::mumble_raw_version_t protocolVersion)
+		: ProtocolHandler< role >(protocolVersion) {
+		m_byteBuffer.resize(MAX_UDP_PACKET_SIZE);
+	}
+
+	template< Role role > gsl::span< byte > UDPDecoder< role >::getBuffer() {
+		return gsl::span< byte >(m_byteBuffer.data(), m_byteBuffer.size());
+	}
+
+	template< Role role > bool UDPDecoder< role >::decode(const gsl::span< const byte > data, bool restrictToPing) {
+		if (data.size() <= 1) {
+			// Empty packages or packages consisting only of the header byte are invalid
+			return false;
+		}
+
+		byte header = data[0];
+
+		if (this->getProtocolVersion() < PROTOBUF_INTRODUCTION_VERSION) {
+			// Note: For ping messages we might still have to check the new format, since it could happen that we
+			// are receiving pings of a server/client whose version we don't know yet.
+			static_assert(static_cast< byte >(UDPMessageType::Ping)
+							  != (static_cast< unsigned int >(TCPMessageType::Ping) << 5),
+						  "Unexpected coincidence of ping header byte values");
+
+			if (header == static_cast< byte >(UDPMessageType::Ping)) {
+				// If the ping message is in the new format, we assume a protocol version of at least
+				// PROTOBUF_INTRODUCTION_VERSION
+				this->setProtocolVersion(std::max(this->getProtocolVersion(), PROTOBUF_INTRODUCTION_VERSION));
+
+				return decodePing_protobuf(data.subspan(1, data.size() - 1));
+			}
+
+			// This might be a legacy ping that requests additional information (they don't come with a header)
+			// When set from the client this will have a length of 12 bytes and when sent from the server it will
+			// have a size of 24 bytes.
+			if ((data.size() == 12 || data.size() == 24) && decodePing_legacy(data)) {
+				return true;
+			}
+
+			if (restrictToPing) {
+				// This is not a ping, so we error out early
+				return false;
+			}
+
+			// In the legacy format, the message type is encoded as the 3 most significant bits in the header byte
+			LegacyUDPMessageType legacyMessageType = static_cast< LegacyUDPMessageType >((header >> 5) & 0x7);
+
+			switch (legacyMessageType) {
+				case LegacyUDPMessageType::Ping:
+					return decodePing_legacy(data.subspan(1, data.size() - 1));
+				case LegacyUDPMessageType::VoiceCELTAlpha:
+					return decodeAudio_legacy(data, AudioCodec::CELT_Alpha);
+				case LegacyUDPMessageType::VoiceCELTBeta:
+					return decodeAudio_legacy(data, AudioCodec::CELT_Beta);
+				case LegacyUDPMessageType::VoiceSpeex:
+					return decodeAudio_legacy(data, AudioCodec::Speex);
+				case LegacyUDPMessageType::VoiceOpus:
+					return decodeAudio_legacy(data, AudioCodec::Opus);
+			}
+
+			// Invalid message
+			return false;
+		} else {
+			switch (static_cast< UDPMessageType >(header)) {
+				case UDPMessageType::Audio:
+					if (restrictToPing) {
+						// Not a ping
+						return false;
+					}
+
+					return decodeAudio_protobuf(data.subspan(1, data.size() - 1));
+				case UDPMessageType::Ping:
+					return decodePing_protobuf(data.subspan(1, data.size() - 1));
+			}
+
+			// Unknown package type
+			return false;
+		}
+	}
+
+	template< Role role > bool UDPDecoder< role >::decodePing(const gsl::span< const byte > data) {
+		return decode(data, true);
+	}
+
+	template< Role role > UDPMessageType UDPDecoder< role >::getMessageType() const { return m_messageType; }
+
+	template< Role role > AudioData UDPDecoder< role >::getAudioData() const {
+		assert(m_messageType == UDPMessageType::Audio);
+
+		return m_audioData;
+	}
+
+	template< Role role > PingData UDPDecoder< role >::getPingData() const {
+		assert(m_messageType == UDPMessageType::Ping);
+
+		return m_pingData;
+	}
+
+	template< Role role > bool UDPDecoder< role >::decodePing_legacy(const gsl::span< const byte > data) {
+		m_messageType = UDPMessageType::Ping;
+		m_pingData    = {};
+
+		if (data.empty()) {
+			return false;
+		}
+
+		PacketDataStream stream(data.data(), data.size());
+
+		if (data.size() <= sizeof(std::uint64_t) + 1) {
+			// Regular connectivity ping (contains a single varint which may be up to a full 64bit number plus
+			// one header byte
+			quint64 timestamp;
+			stream >> timestamp;
+
+			m_pingData.timestamp = timestamp;
+
+			return true;
+		}
+
+		switch (this->getRole()) {
+			case Role::Client: {
+				// Client-specific code
+				static_assert(6 * sizeof(std::uint32_t) == 24, "Unexpected size of uint32_t");
+				if (data.size() == 6 * sizeof(std::uint32_t)) {
+					// Extended ping containing meta-information
+					const std::uint32_t *dataArray = reinterpret_cast< const std::uint32_t * >(data.data());
+
+					m_pingData.serverVersion = qFromBigEndian(dataArray[0]);
+					// Virtual array entries 1 and 2 are actually a single uint64. Note that the timestamp is
+					// whatever the client sent to the server and thus it does not require an endian-transformation
+					m_pingData.timestamp           = *reinterpret_cast< const std::uint64_t * >(&dataArray[1]);
+					m_pingData.userCount           = qFromBigEndian(dataArray[3]);
+					m_pingData.maxUserCount        = qFromBigEndian(dataArray[4]);
+					m_pingData.maxBandwidthPerUser = qFromBigEndian(dataArray[5]);
+
+					return true;
+				} else {
+					// Invalid size for legacy ping packet
+					return false;
+				}
+
+				break;
+			}
+			case Role::Server: {
+				// Server-specific code
+				if (data.size() == 4 + sizeof(std::uint64_t) && data[0] == 0 && data[1] == 0 && data[2] == 0
+					&& data[3] == 0) {
+					// Extended information ping request message. When received by the server, the message contains 4
+					// leading, blank bytes followed by a 64bit client-specific timestamp. Thus, the only meaningful
+					// decoding to do right now, is reading out the timestamp. Note that the byte-order of this field
+					// (and its contents in general) is unspecified and thus the server code should never try to make
+					// sense of it.
+					m_pingData.timestamp = *reinterpret_cast< const std::uint64_t * >(data.data() + 4);
+					m_pingData.requestAdditionalInformation = true;
+
+					return true;
+				} else {
+					// Invalid size for legacy ping packet
+					return false;
+				}
+				break;
+			}
+		}
+
+		// This code should never be reached
+		return false;
+	}
+
+	template< Role role > bool UDPDecoder< role >::decodePing_protobuf(const gsl::span< const byte > data) {
+		m_messageType = UDPMessageType::Ping;
+		m_pingData    = {};
+
+		if (data.empty()) {
+			return false;
+		}
+
+		if (!m_pingMessage.ParseFromArray(data.data(), data.size())) {
+			// Invalid format
+			return false;
+		}
+
+		// m_pingMessage now contains the parsed data
+		m_pingData.timestamp     = m_pingMessage.timestamp();
+		m_pingData.serverVersion = m_pingMessage.server_version();
+
+		// 0 is not a valid version specifier, so if this field is zero, it means Protobuf has used a default
+		// value and thus the field was not set. Thus we assume that none of the extra fields are set.
+		m_pingData.containsAdditionalInformation = m_pingData.serverVersion != 0;
+		if (m_pingData.containsAdditionalInformation) {
+			m_pingData.userCount           = m_pingMessage.user_count();
+			m_pingData.maxUserCount        = m_pingMessage.max_user_count();
+			m_pingData.maxBandwidthPerUser = m_pingMessage.max_bandwidth_per_user();
+		}
+
+		m_pingData.requestAdditionalInformation = m_pingMessage.request_extended_information();
+
+		return true;
+	}
+
+	template< Role role >
+	bool UDPDecoder< role >::decodeAudio_legacy(const gsl::span< const byte > data, AudioCodec codec) {
+		m_messageType = UDPMessageType::Audio;
+		m_audioData   = {};
+
+		if (data.size() < 1 + 1 + 1) {
+			// Audio packets must at least contain one header byte, at least one byte varint-encoding the sequence
+			// number and at least one byte of audio payload.
+			return false;
+		}
+
+		// The target or context is encoded as the five least significant bits of the header byte
+		m_audioData.targetOrContext = data[0] & 0x1f;
+		m_audioData.usedCodec       = codec;
+
+		PacketDataStream stream(data.data() + 1, data.size() - 1);
+
+		if (this->getRole() == Role::Client) {
+			// When the client receives audio packets from the server, there will be an extra field containing the
+			// session ID of the client whose audio this is. This field is not present when a client sends audio to the
+			// server (as the server knows where the connection is coming from).
+			stream >> m_audioData.senderSession;
+		}
+
+		quint64 helper;
+		stream >> helper;
+		m_audioData.frameNumber = helper;
+
+		byte *payloadBegin        = nullptr;
+		std::uint64_t payloadSize = 0;
+		switch (codec) {
+			case AudioCodec::CELT_Alpha:
+			case AudioCodec::CELT_Beta:
+			case AudioCodec::Speex:
+				payloadBegin = stream.dataPtr();
+				// For these old codecs, multiple frames may be sent as one payload. Each frame is started by a TOC byte
+				// which encodes the length of the following frame and whether there will be a frame after it. The
+				// length is encoded as the 7 least significant bits (0x7f) whereas the continuation flag is encoded in
+				// the most significant bit (0x80).
+				byte header;
+				do {
+					header = static_cast< byte >(stream.next());
+
+					unsigned int currentFrameSize = header & 0x7f;
+
+					if (currentFrameSize == 0) {
+						// An empty frame means that this is the end of the audio transmission
+						m_audioData.isLastFrame = true;
+					}
+
+					payloadSize += currentFrameSize;
+
+					stream.skip(currentFrameSize);
+				} while ((header & 0x80) && stream.isValid());
+
+				break;
+			case AudioCodec::Opus:
+				// An Opus payload starts with a varint-encoded size field. The max. size is 0x1FFF (13 least
+				// significant bits of the flag). If the 14 th bit (0x2000) is set, this means that this is the last
+				// packet in the audio transmission.
+				stream >> helper;
+				payloadSize             = helper & 0x1FFF;
+				m_audioData.isLastFrame = helper & 0x2000;
+
+				// We don't include the size/header-field in the actual payload
+				payloadBegin = stream.dataPtr();
+
+				stream.skip(payloadSize);
+				break;
+		}
+
+		if (!stream.isValid()) {
+			return false;
+		}
+
+
+		m_audioData.payload = gsl::span< byte >(payloadBegin, payloadSize);
+
+		if (stream.left() == 3 * sizeof(float)) {
+			// If there are further bytes after the audio payload, this means that there is positional data attached to
+			// the packet.
+			m_audioData.containsPositionalData = true;
+			for (int i = 0; i < 3; ++i) {
+				stream >> m_audioData.position[i];
+			}
+		} else if (stream.left() > 0) {
+			// The remaining data does not fit the size of positional data -> seems like a invalid package format
+			return false;
+		}
+
+		// Legacy audio packets don't contain volume adjustments
+
+		return true;
+	}
+
+	template< Role role > bool UDPDecoder< role >::decodeAudio_protobuf(const gsl::span< const byte > data) {
+		m_messageType = UDPMessageType::Audio;
+		m_audioData   = {};
+
+		if (!m_audioMessage.ParseFromArray(data.data(), data.size())) {
+			// Invalid format
+			return false;
+		}
+
+		m_audioData.targetOrContext =
+			this->getRole() == Role::Client ? m_audioMessage.context() : m_audioMessage.target();
+		// Atm the only codec supported by the new package format is Opus
+		m_audioData.usedCodec     = AudioCodec::Opus;
+		m_audioData.senderSession = m_audioMessage.sender_session();
+		m_audioData.frameNumber   = m_audioMessage.frame_number();
+		if (m_audioMessage.opus_data().empty()) {
+			// Audio packets without audio data are invalid
+			return false;
+		}
+
+		std::string &audioPayload = *m_audioMessage.mutable_opus_data();
+		m_audioData.payload = gsl::span< byte >(reinterpret_cast< byte * >(&audioPayload[0]), audioPayload.size());
+
+		m_audioData.isLastFrame = m_audioMessage.is_terminator();
+
+		if (m_audioMessage.positional_data_size() != 0) {
+			if (m_audioMessage.positional_data_size() != 3) {
+				// We always expect a 3D position, if positional data is present
+				return false;
+			}
+			for (int i = 0; i < 3; ++i) {
+				m_audioData.position[i] = m_audioMessage.positional_data(i);
+			}
+
+			m_audioData.containsPositionalData = true;
+		}
+
+		m_audioData.volumeAdjustment = VolumeAdjustment::fromFactor(m_audioMessage.volume_adjustment());
+		if (m_audioData.volumeAdjustment.factor == 0.0f) {
+			// No volume adjustment was set, reset to default
+			m_audioData.volumeAdjustment = VolumeAdjustment::fromFactor(1.0f);
+		}
+
+		return true;
+	}
+
+
+	bool operator==(const AudioData &lhs, const AudioData &rhs) {
+		if (lhs.isLastFrame == rhs.isLastFrame && lhs.containsPositionalData == rhs.containsPositionalData
+			&& lhs.targetOrContext == rhs.targetOrContext && lhs.usedCodec == rhs.usedCodec
+			&& lhs.senderSession == rhs.senderSession && lhs.frameNumber == rhs.frameNumber
+			&& lhs.payload.size() == rhs.payload.size() && (!lhs.containsPositionalData || lhs.position == rhs.position)
+			&& lhs.volumeAdjustment == rhs.volumeAdjustment) {
+			// Compare payload
+			return std::memcmp(lhs.payload.data(), rhs.payload.data(), lhs.payload.size()) == 0;
+		} else {
+			return false;
+		}
+	}
+
+	bool operator!=(const AudioData &lhs, const AudioData &rhs) { return !(lhs == rhs); }
+
+	bool operator==(const PingData &lhs, const PingData &rhs) {
+		return lhs.timestamp == rhs.timestamp && lhs.requestAdditionalInformation == rhs.requestAdditionalInformation
+			   && lhs.containsAdditionalInformation == rhs.containsAdditionalInformation
+			   && lhs.serverVersion == rhs.serverVersion && lhs.userCount == rhs.userCount
+			   && lhs.maxUserCount == rhs.maxUserCount && lhs.maxBandwidthPerUser == rhs.maxBandwidthPerUser;
+	}
+
+	bool operator!=(const PingData &lhs, const PingData &rhs) { return !(lhs == rhs); }
+
+	// Explicit template instantiation of our classes. We require once instantiation for every available Role.
+#define ALL_CLASSES                \
+	PROCESS_CLASS(ProtocolHandler) \
+	PROCESS_CLASS(UDPAudioEncoder) \
+	PROCESS_CLASS(UDPPingEncoder)  \
+	PROCESS_CLASS(UDPDecoder)
+
+#define PROCESS_CLASS(className)              \
+	template class className< Role::Client >; \
+	template class className< Role::Server >;
+
+	ALL_CLASSES
+
+#undef ALL_CLASSES
+#undef PROCESS_CLASS
+
+} // namespace Protocol
+}; // namespace Mumble
diff --git a/src/MumbleProtocol.h b/src/MumbleProtocol.h
new file mode 100644
index 000000000..7c0bd7d6e
--- /dev/null
+++ b/src/MumbleProtocol.h
@@ -0,0 +1,310 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#ifndef MUMBLE_MUMBLEPROTOCOL_H_
+#define MUMBLE_MUMBLEPROTOCOL_H_
+
+#include "MumbleUDP.pb.h"
+#include "Version.h"
+#include "VolumeAdjustment.h"
+
+#include <cstdint>
+#include <vector>
+
+#include <gsl/span>
+
+/**
+ * "X-macro" for all Mumble Protobuf TCP messages types.
+ *
+ * Warning: Only append to the end. Never insert in between or remove an existing entry.
+ */
+#define MUMBLE_ALL_TCP_MESSAGES                         \
+	PROCESS_MUMBLE_TCP_MESSAGE(Version, 0)              \
+	PROCESS_MUMBLE_TCP_MESSAGE(UDPTunnel, 1)            \
+	PROCESS_MUMBLE_TCP_MESSAGE(Authenticate, 2)         \
+	PROCESS_MUMBLE_TCP_MESSAGE(Ping, 3)                 \
+	PROCESS_MUMBLE_TCP_MESSAGE(Reject, 4)               \
+	PROCESS_MUMBLE_TCP_MESSAGE(ServerSync, 5)           \
+	PROCESS_MUMBLE_TCP_MESSAGE(ChannelRemove, 6)        \
+	PROCESS_MUMBLE_TCP_MESSAGE(ChannelState, 7)         \
+	PROCESS_MUMBLE_TCP_MESSAGE(UserRemove, 8)           \
+	PROCESS_MUMBLE_TCP_MESSAGE(UserState, 9)            \
+	PROCESS_MUMBLE_TCP_MESSAGE(BanList, 10)             \
+	PROCESS_MUMBLE_TCP_MESSAGE(TextMessage, 11)         \
+	PROCESS_MUMBLE_TCP_MESSAGE(PermissionDenied, 12)    \
+	PROCESS_MUMBLE_TCP_MESSAGE(ACL, 13)                 \
+	PROCESS_MUMBLE_TCP_MESSAGE(QueryUsers, 14)          \
+	PROCESS_MUMBLE_TCP_MESSAGE(CryptSetup, 15)          \
+	PROCESS_MUMBLE_TCP_MESSAGE(ContextActionModify, 16) \
+	PROCESS_MUMBLE_TCP_MESSAGE(ContextAction, 17)       \
+	PROCESS_MUMBLE_TCP_MESSAGE(UserList, 18)            \
+	PROCESS_MUMBLE_TCP_MESSAGE(VoiceTarget, 19)         \
+	PROCESS_MUMBLE_TCP_MESSAGE(PermissionQuery, 20)     \
+	PROCESS_MUMBLE_TCP_MESSAGE(CodecVersion, 21)        \
+	PROCESS_MUMBLE_TCP_MESSAGE(UserStats, 22)           \
+	PROCESS_MUMBLE_TCP_MESSAGE(RequestBlob, 23)         \
+	PROCESS_MUMBLE_TCP_MESSAGE(ServerConfig, 24)        \
+	PROCESS_MUMBLE_TCP_MESSAGE(SuggestConfig, 25)       \
+	PROCESS_MUMBLE_TCP_MESSAGE(PluginDataTransmission, 26)
+
+/**
+ * "X-macro" for all Mumble Protobuf UDP messages types.
+ *
+ * Warning: Only append to the end. Never insert in between or remove an existing entry.
+ */
+#define MUMBLE_ALL_UDP_MESSAGES          \
+	PROCESS_MUMBLE_UDP_MESSAGE(Audio, 0) \
+	PROCESS_MUMBLE_UDP_MESSAGE(Ping, 1)
+
+namespace Mumble {
+namespace Protocol {
+
+	using byte = std::uint8_t;
+
+	// The maximum allowed size in bytes of UDP packets (according to the Mumble protocol)
+	constexpr std::size_t MAX_UDP_PACKET_SIZE = 1024;
+
+#define PROCESS_MUMBLE_TCP_MESSAGE(name, value) name = value,
+	/**
+	 * Enum holding all possible TCP message types
+	 */
+	enum class TCPMessageType : byte { MUMBLE_ALL_TCP_MESSAGES };
+#undef PROCESS_MUMBLE_TCP_MESSAGE
+#define PROCESS_MUMBLE_UDP_MESSAGE(name, value) name = value,
+	/**
+	 * Enum holding all possible UDP message types
+	 */
+	enum class UDPMessageType : byte { MUMBLE_ALL_UDP_MESSAGES };
+#undef PROCESS_MUMBLE_UDP_MESSAGE
+
+	enum class LegacyUDPMessageType : byte { VoiceCELTAlpha, Ping, VoiceSpeex, VoiceCELTBeta, VoiceOpus };
+
+	enum class AudioCodec {
+		Opus,
+		CELT_Alpha, // 0.7.0
+		CELT_Beta,  // 0.11.0
+		Speex,
+	};
+
+	namespace ReservedTargetIDs {
+		constexpr const unsigned int RegularSpeech  = 0;
+		constexpr const unsigned int ServerLoopback = 31;
+	}; // namespace ReservedTargetIDs
+
+	using audio_context_t = byte;
+	namespace AudioContext {
+		constexpr audio_context_t Invalid = 0xFF; // This is the equivalent of -1 as a signed 8bit number
+		constexpr audio_context_t Normal  = 0;
+		constexpr audio_context_t Shout   = 1;
+		constexpr audio_context_t Whisper = 2;
+		constexpr audio_context_t Listen  = 3;
+
+		constexpr audio_context_t begin = Normal;
+		constexpr audio_context_t end   = Listen + 1;
+	}; // namespace AudioContext
+
+	enum class Role { Server, Client };
+
+	constexpr Version::mumble_raw_version_t PROTOBUF_INTRODUCTION_VERSION = Version::toRaw(1, 5, 0);
+
+
+	bool protocolVersionsAreCompatible(Version::mumble_raw_version_t lhs, Version::mumble_raw_version_t rhs);
+
+
+	template< Role role > class ProtocolHandler {
+	public:
+		ProtocolHandler(Version::mumble_raw_version_t protocolVersion = Version::UNKNOWN);
+
+		Version::mumble_raw_version_t getProtocolVersion() const;
+		void setProtocolVersion(Version::mumble_raw_version_t protocolVersion);
+
+		constexpr Role getRole() const { return role; };
+
+	protected:
+		Version::mumble_raw_version_t m_protocolVersion;
+	};
+
+	struct AudioData {
+		std::uint32_t targetOrContext = ReservedTargetIDs::RegularSpeech;
+		AudioCodec usedCodec          = AudioCodec::Opus;
+		std::uint32_t senderSession   = 0;
+		std::uint64_t frameNumber     = 0;
+		gsl::span< const byte > payload;
+		bool isLastFrame                  = false;
+		bool containsPositionalData       = false;
+		std::array< float, 3 > position   = { 0, 0, 0 };
+		VolumeAdjustment volumeAdjustment = VolumeAdjustment::fromFactor(1.0f);
+
+		friend bool operator==(const AudioData &lhs, const AudioData &rhs);
+		friend bool operator!=(const AudioData &lhs, const AudioData &rhs);
+	};
+
+	struct PingData {
+		std::uint64_t timestamp            = 0;
+		bool requestAdditionalInformation  = false;
+		bool containsAdditionalInformation = false;
+		std::uint32_t serverVersion        = Version::UNKNOWN;
+		std::uint32_t userCount            = 0;
+		std::uint32_t maxUserCount         = 0;
+		std::uint32_t maxBandwidthPerUser  = 0;
+
+		friend bool operator==(const PingData &lhs, const PingData &rhs);
+		friend bool operator!=(const PingData &lhs, const PingData &rhs);
+	};
+
+	template< Role role > class UDPAudioEncoder : public ProtocolHandler< role > {
+	public:
+		UDPAudioEncoder(Version::mumble_raw_version_t protocolVersion = Version::UNKNOWN);
+
+		/**
+		 * Encodes an audio packet based on the provided data.
+		 * Note: Incremental encoding is also supported via the prepare and update functions.
+		 *
+		 * @param data The AudioData to encode
+		 * @return A span to the encoded data (ready to be sent out)
+		 */
+		gsl::span< const byte > encodeAudioPacket(const AudioData &data);
+		/**
+		 * Prepares an audio packet by encoding the "static" part of the audio data. The static part contains
+		 * things like the actual audio payload, its type and the sender's session.
+		 * In order to also encode positional data, call addPositionalData after calling this function and
+		 * to encode the rest of the audio data, call updateAudioPacket after that.
+		 *
+		 * Note: Calls to this function remove any previously encoded variable parts or positional audio
+		 * from the audio packet.
+		 *
+		 * @param data The AudioData to encode (partially!)
+		 */
+		void prepareAudioPacket(const AudioData &data);
+		/**
+		 * This function assumes that an audio packet has already been prepared. In that case it will encode
+		 * the "variable" part of the audio packet which contains e.g. audio context (or audio target) and
+		 * volume adjustments (if supported by the used protocol).
+		 *
+		 * @param data The AudioData to encode (partially!)
+		 * @return A span to the encoded audio packet (including the static part and potentially positional data)
+		 */
+		gsl::span< const byte > updateAudioPacket(const AudioData &data);
+		/**
+		 * This function assumes that an audio packet has already been prepared. In that case it will encode
+		 * the given positional data (if any) into the audio packet.
+		 *
+		 * Note: A call to this function invalidates any variable part that might have been added to the audio
+		 * packet before. Thus, another call to updateAudioPacket is required after calling this function.
+		 *
+		 * @param data The AudioData to take the positional data from
+		 */
+		void addPositionalData(const AudioData &data);
+		/**
+		 * This function assumes that an audio packet has already been prepared. In that case it will remove
+		 * positional data from the audio packet that was previously added using addPositionalData.
+		 *
+		 * Note: A call to this function invalidates any variable part that might have been added to the audio
+		 * packet before. Thus, another call to updateAudioPacket is required after calling this function.
+		 *
+		 */
+		void dropPositionalData();
+
+	protected:
+		static constexpr const int preEncodedDBAdjustmentBegin = -60;
+		static constexpr const int preEncodedDBAdjustmentEnd   = 30 + 1;
+
+		std::vector< byte > m_byteBuffer;
+		std::size_t m_staticPartSize      = 0;
+		std::size_t m_positionalAudioSize = 0;
+		MumbleUDP::Audio m_audioMessage;
+		std::vector< std::vector< byte > > m_preEncodedContext;
+		std::vector< std::vector< byte > > m_preEncodedVolumeAdjustment;
+
+		void prepareAudioPacket_legacy(const AudioData &data);
+		gsl::span< const byte > updateAudioPacket_legacy(const AudioData &data);
+		void addPositionalData_legacy(const AudioData &data);
+
+		void prepareAudioPacket_protobuf(const AudioData &data);
+		gsl::span< const byte > updateAudioPacket_protobuf(const AudioData &data);
+		void addPositionalData_protobuf(const AudioData &data);
+
+		void preparePreEncodedSnippets();
+
+		gsl::span< const byte > getPreEncodedContext(audio_context_t context) const;
+		gsl::span< const byte > getPreEncodedVolumeAdjustment(const VolumeAdjustment &adjustment) const;
+	};
+
+	template< Role role > class UDPPingEncoder : public ProtocolHandler< role > {
+	public:
+		UDPPingEncoder(Version::mumble_raw_version_t protocolVersion = Version::UNKNOWN);
+
+		gsl::span< const byte > encodePingPacket(const PingData &data);
+
+	protected:
+		std::vector< byte > m_byteBuffer;
+		MumbleUDP::Ping m_pingMessage;
+
+		gsl::span< const byte > encodePingPacket_legacy(const PingData &data);
+		gsl::span< const byte > encodePingPacket_protobuf(const PingData &data);
+	};
+
+	template< Role role > class UDPDecoder : public ProtocolHandler< role > {
+	public:
+		UDPDecoder(Version::mumble_raw_version_t protocolVersion = Version::UNKNOWN);
+
+		gsl::span< byte > getBuffer();
+		bool decode(const gsl::span< const byte > data, bool restrictToPing = false);
+		bool decodePing(const gsl::span< const byte > data);
+
+		UDPMessageType getMessageType() const;
+
+		AudioData getAudioData() const;
+		PingData getPingData() const;
+
+	protected:
+		std::vector< byte > m_byteBuffer;
+		UDPMessageType m_messageType;
+		AudioData m_audioData = {};
+		PingData m_pingData   = {};
+		MumbleUDP::Ping m_pingMessage;
+		MumbleUDP::Audio m_audioMessage;
+
+		bool decodePing_legacy(const gsl::span< const byte > data);
+		bool decodePing_protobuf(const gsl::span< const byte > data);
+		bool decodeAudio_legacy(const gsl::span< const byte > data, AudioCodec codec);
+		bool decodeAudio_protobuf(const gsl::span< const byte > data);
+	};
+
+}; // namespace Protocol
+}; // namespace Mumble
+
+/**
+ * This is merely a dummy-function (never used) that is required as a scope for dummy-switch statements on our message
+ * type enums. These will cause a compiler error, if there are any entries that have the same numeric value (which we
+ * never want to happen). See https://stackoverflow.com/a/50385277
+ */
+inline void ThisFunctionIsNeverCalledAndShouldSimplyBeOptimizedOut() {
+#define ARRAY_NAME Mumble::Protocol::TCPMessageType
+#define PROCESS_MUMBLE_TCP_MESSAGE(name, value) \
+	case ARRAY_NAME::name:                      \
+		break;
+	switch (static_cast< ARRAY_NAME >(0)) {
+		MUMBLE_ALL_TCP_MESSAGES
+		default:
+			break;
+	}
+#undef ARRAY_NAME
+#undef PROCESS_MUMBLE_TCP_MESSAGE
+
+#define ARRAY_NAME Mumble::Protocol::UDPMessageType
+#define PROCESS_MUMBLE_UDP_MESSAGE(name, value) \
+	case ARRAY_NAME::name:                      \
+		break;
+	switch (static_cast< ARRAY_NAME >(0)) {
+		MUMBLE_ALL_UDP_MESSAGES
+		default:
+			break;
+	}
+#undef ARRAY_NAME
+#undef PROCESS_MUMBLE_UDP_MESSAGE
+}
+
+#endif // MUMBLE_MUMBLEPROTOCOL_H_
diff --git a/src/MumbleUDP.proto b/src/MumbleUDP.proto
new file mode 100644
index 000000000..aabc463ca
--- /dev/null
+++ b/src/MumbleUDP.proto
@@ -0,0 +1,84 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+syntax = "proto3";
+
+package MumbleUDP;
+
+option optimize_for = SPEED;
+
+message Audio {
+	oneof Header {
+		// When this audio is sent by the client to the server, this is set to the target of the audio data. This target
+		// is a number in the range [0, 2^{32} - 1], where 0 means "normal talking", 2^{5} - 1 means "server loopback"
+		// and all other targets are understood as shout/whisper targets that have previously been registered via a
+		// VoiceTarget message (via TCP).
+		uint32 target  = 1;	
+		// When this audio is sent by the server to the client, this indicates the context in which the audio has been sent.
+		// 0: Normal speech
+		// 1: Shout to channel
+		// 2: Whisper to user
+		// 3: Received via channel listener
+		uint32 context = 2;
+	};
+
+	// The session of the client (sender) this audio was originally sent from. This field is not required when sending
+	// audio to the server, but will always be set when receiving audio from the server.
+	uint32 sender_session = 3;
+
+	// The number of the first contained audio frame (indicating the position of that frame in the overall audio stream)
+	uint64 frame_number = 4;
+
+	// The actual voice data payload in the Opus format.
+	bytes opus_data = 5;
+
+	// Optional positional data indicating the speaker's position in a virtual world (in meters). This "list" is really
+	// expected to be an array of size 3 containing the X, Y and Z coordinates of the position (in that order).
+	repeated float positional_data = 6;
+
+	// A volume adjustment determined by the server for this audio packet. It is up to the client to apply this adjustment to
+	// the resulting audio (or not). Note: A value of 0 means that this field is unset.
+	float volume_adjustment = 7;
+
+	// Note that we skip the field indices up to (including) 15 in order to have them available for future extensions of the
+	// protocol with fields that are encountered very often. The reason is that all field indices <= 15 require only a single
+	// byte of encoding overhead, whereas the once > 15 require (at least) two bytes. The reason lies in the Protobuf encoding
+	// scheme that uses 1 bit for a varint continuation flag, 3 bit to encode a field's type and the remaining 4 bit of the
+	// first byte are thus available for the field index. Therefore the first 2^4 = 16 field indices (aka values 0 to 15) can
+	// be encoded using only a single byte. For details see https://developers.google.com/protocol-buffers/docs/encoding
+
+	// A flag indicating whether this audio packet represents the end of transmission for the current audio stream
+	bool is_terminator = 16;
+}
+
+/**
+ * Ping message for checking UDP connectivity (and roundtrip ping) and potentially obtaining further server
+ * details (e.g. version).
+ */
+message Ping {
+	// Timestamp as encoded by the client. A server is not supposed to attempt to decode or modify this field. Therefore,
+	// clients may choose an arbitrary format for this timestamp (as long as it fits into a uint64 field).
+	uint64 timestamp = 1;
+
+	// A flag set by the sending client, if it wants to obtain additional information about the server.
+	bool request_extended_information = 2;
+
+
+	// Below are the fields for the "additional information" that are filled out by the server on request.
+
+
+	// The version of the server encoded into a single integer. The 32 bits are partitioned in regions of (left to right)
+	// 16, 8 and 8 bits representing the major, minor and patch version number respectively.
+	uint32 server_version = 3;
+
+	// The amount of users currently connected to the server
+	uint32 user_count = 4;
+
+	// The maximum amount of users permitted on this server
+	uint32 max_user_count = 5;
+
+	// The maximum bandwidth each user is allowed to use for sending audio to the server
+	uint32 max_bandwidth_per_user = 6;
+}
diff --git a/src/PacketDataStream.h b/src/PacketDataStream.h
index 30ba8d01a..f3241bb92 100644
--- a/src/PacketDataStream.h
+++ b/src/PacketDataStream.h
@@ -93,6 +93,8 @@ public:
 
 	const unsigned char *dataPtr() const { return reinterpret_cast< const unsigned char * >(&data[offset]); }
 
+	unsigned char *dataPtr() { return reinterpret_cast< unsigned char * >(&data[offset]); }
+
 	const char *charPtr() const { return reinterpret_cast< const char * >(&data[offset]); }
 
 	QByteArray dataBlock(quint32 len) {
@@ -116,6 +118,8 @@ protected:
 	}
 
 public:
+	PacketDataStream(const unsigned char *d, int msize) { setup(const_cast< unsigned char * >(d), msize); };
+
 	PacketDataStream(const char *d, int msize) {
 		setup(const_cast< unsigned char * >(reinterpret_cast< const unsigned char * >(d)), msize);
 	};
diff --git a/src/QtUtils.h b/src/QtUtils.h
index c5bf32d38..8a9c3aec1 100644
--- a/src/QtUtils.h
+++ b/src/QtUtils.h
@@ -6,6 +6,7 @@
 #ifndef MUMBLE_QTUTILS_H_
 #define MUMBLE_QTUTILS_H_
 
+#include <QCryptographicHash>
 #include <QString>
 
 class QObject;
@@ -34,4 +35,34 @@ namespace QtUtils {
 }; // namespace QtUtils
 }; // namespace Mumble
 
+// For backwards compatibility we have to keep these functions in the global namespace
+inline QString u8(const ::std::string &str) {
+	return QString::fromUtf8(str.data(), static_cast< int >(str.length()));
+}
+
+inline QString u8(const ::std::wstring &str) {
+	return QString::fromStdWString(str);
+}
+
+inline ::std::string u8(const QString &str) {
+	const QByteArray &qba = str.toUtf8();
+	return ::std::string(qba.constData(), qba.length());
+}
+
+inline QByteArray blob(const ::std::string &str) {
+	return QByteArray(str.data(), static_cast< int >(str.length()));
+}
+
+inline ::std::string blob(const QByteArray &str) {
+	return ::std::string(str.constData(), str.length());
+}
+
+inline QByteArray sha1(const QByteArray &blob) {
+	return QCryptographicHash::hash(blob, QCryptographicHash::Sha1);
+}
+
+inline QByteArray sha1(const QString &str) {
+	return QCryptographicHash::hash(str.toUtf8(), QCryptographicHash::Sha1);
+}
+
 #endif // MUMBLE_QTUTILS_H_
diff --git a/src/SpeechFlags.h b/src/SpeechFlags.h
deleted file mode 100644
index a4491d76e..000000000
--- a/src/SpeechFlags.h
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2020-2022 The Mumble Developers. All rights reserved.
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file at the root of the
-// Mumble source tree or at <https://www.mumble.info/LICENSE>.
-
-#ifndef MUMBLE_SPEECHFLAGS_H_
-#define MUMBLE_SPEECHFLAGS_H_
-
-namespace SpeechFlags {
-enum SpeechFlags {
-	Invalid = 0xFF, // This is the unsigned equivalent of -1
-	Normal  = 0,
-	Shout   = 1,
-	Whisper = 2,
-	Listen  = 3
-};
-}; // namespace SpeechFlags
-
-#endif // MUMBLE_SPEECHFLAGS_H_
diff --git a/src/VolumeAdjustment.cpp b/src/VolumeAdjustment.cpp
new file mode 100644
index 000000000..261b53bbe
--- /dev/null
+++ b/src/VolumeAdjustment.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#include "VolumeAdjustment.h"
+
+#include <cassert>
+#include <cmath>
+
+constexpr float dbThreshold = 0.1;
+
+VolumeAdjustment::VolumeAdjustment(float factor, int dbAdjustment) : factor(factor), dbAdjustment(dbAdjustment) {
+	assert(dbAdjustment == InvalidDBAdjustment
+		   // Verify that the used dbAdjustment is a reasonable representation of the given factor. Particularly, we
+		   // want to make sure that the deviation of the true dB representation of the factor (usually a floating point
+		   // value) doesn't deviate more than dbThreshold from the passed integer dB value. For all cases, where this
+		   // would be the case, we expect InvalidDBAdjustment to be passed instead.
+		   //
+		   // If dB is the dB-representation of a loudness change factor f, we have
+		   // dB = log2(f) * 6    <=>    f = 2^{dB/6}
+		   // (+6dB equals a doubling in loudness)
+		   || dbThreshold >= std::abs(dbAdjustment - std::log2(factor) * 6));
+}
+
+VolumeAdjustment VolumeAdjustment::fromFactor(float factor) {
+	if (factor > 0) {
+		float dB = std::log2(factor) * 6;
+
+		if (std::abs(dB - static_cast< int >(dB)) < dbThreshold) {
+			// Close-enough
+			return VolumeAdjustment(factor, std::round(dB));
+		} else {
+			return VolumeAdjustment(factor, InvalidDBAdjustment);
+		}
+	} else {
+		return VolumeAdjustment(factor, InvalidDBAdjustment);
+	}
+}
+
+VolumeAdjustment VolumeAdjustment::fromDBAdjustment(int dbAdjustment) {
+	float factor = std::pow(2.0f, dbAdjustment / 6.0f);
+
+	return VolumeAdjustment(factor, dbAdjustment);
+}
+
+bool operator==(const VolumeAdjustment &lhs, const VolumeAdjustment &rhs) {
+	return lhs.dbAdjustment == rhs.dbAdjustment && std::abs(lhs.factor - rhs.factor) < 0.1f;
+}
+
+bool operator!=(const VolumeAdjustment &lhs, const VolumeAdjustment &rhs) {
+	return !(lhs == rhs);
+}
diff --git a/src/VolumeAdjustment.h b/src/VolumeAdjustment.h
new file mode 100644
index 000000000..6c13be9e1
--- /dev/null
+++ b/src/VolumeAdjustment.h
@@ -0,0 +1,28 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#ifndef MUMBLE_VOLUMEADJUSTMENT_H_
+#define MUMBLE_VOLUMEADJUSTMENT_H_
+
+#include <limits>
+
+class VolumeAdjustment {
+public:
+	static constexpr const int InvalidDBAdjustment = std::numeric_limits< int >::max();
+
+	explicit VolumeAdjustment(float factor = 1.0f, int dbAdjustment = InvalidDBAdjustment);
+
+	float factor;
+	int dbAdjustment;
+
+	static VolumeAdjustment fromFactor(float factor);
+	static VolumeAdjustment fromDBAdjustment(int dbAdjustment);
+
+	friend bool operator==(const VolumeAdjustment &lhs, const VolumeAdjustment &rhs);
+	friend bool operator!=(const VolumeAdjustment &lhs, const VolumeAdjustment &rhs);
+};
+
+
+#endif
diff --git a/src/benchmarks/AudioReceiverBuffer/AudioReceiverBuffer_benchmark.cpp b/src/benchmarks/AudioReceiverBuffer/AudioReceiverBuffer_benchmark.cpp
new file mode 100644
index 000000000..1362b9208
--- /dev/null
+++ b/src/benchmarks/AudioReceiverBuffer/AudioReceiverBuffer_benchmark.cpp
@@ -0,0 +1,155 @@
+#include <benchmark/benchmark.h>
+
+#include "AudioReceiverBuffer.h"
+#include "MumbleProtocol.h"
+
+#include <algorithm>
+#include <random>
+#include <vector>
+
+std::random_device rd;
+std::mt19937 rng(rd());
+std::uniform_int_distribution< unsigned int > random_context(Mumble::Protocol::AudioContext::begin,
+															 Mumble::Protocol::AudioContext::end);
+std::uniform_int_distribution< int > random_volume_adjustment(-60, 30 + 1);
+std::uniform_int_distribution< unsigned int > random_version(Version::toRaw(1, 2, 0), Version::toRaw(1, 6, 0));
+
+std::vector< Mumble::Protocol::audio_context_t > contexts;
+std::vector< VolumeAdjustment > volumeAdjustments;
+std::vector< ServerUser > users;
+
+constexpr const std::size_t ReceiverCountRange = 0;
+constexpr const std::size_t DuplicateRange     = 1;
+
+constexpr int Multiplier         = 2;
+constexpr int ReceiverCountBegin = 1;
+constexpr int ReceiverCountEnd   = 512;
+
+struct ReceiverData {
+	ServerUser *receiver;
+	Mumble::Protocol::audio_context_t context;
+	bool containsPositionalData;
+	VolumeAdjustment volumeAdjustment;
+};
+
+std::vector< ReceiverData > selectedData;
+
+void globalInit() {
+	for (int i = 0; i <= ReceiverCountEnd; ++i) {
+		contexts.push_back(random_context(rng));
+		volumeAdjustments.push_back(VolumeAdjustment::fromDBAdjustment(random_volume_adjustment(rng)));
+		users.push_back(ServerUser(i, random_version(rng)));
+	}
+
+	// add one additional user acting as the sender
+	users.push_back(ServerUser(ReceiverCountEnd + 1, random_version(rng)));
+}
+
+class Fixture : public ::benchmark::Fixture {
+public:
+	void SetUp(const ::benchmark::State &state) {
+		selectedData.clear();
+
+		std::size_t totalReceivers     = state.range(ReceiverCountRange);
+		std::size_t duplicateReceivers = totalReceivers * (state.range(DuplicateRange) / 100.0f);
+
+		for (std::size_t i = 0; i < totalReceivers - duplicateReceivers; ++i) {
+			selectedData.push_back({ &users[i], contexts[i], false, volumeAdjustments[i] });
+		}
+
+		std::uniform_int_distribution< unsigned int > random_index(0, selectedData.size() - 1);
+
+		for (std::size_t i = 0; i < duplicateReceivers; ++i) {
+			selectedData.push_back(selectedData[random_index(rng)]);
+		}
+
+		std::random_shuffle(selectedData.begin(), selectedData.end());
+	}
+};
+
+std::size_t getUniqueReceivers(std::vector< ReceiverData > &receivers) {
+	ServerUser sender = users[users.size() - 1];
+	AudioReceiverBuffer buffer;
+
+	for (std::size_t i = 0; i < receivers.size(); ++i) {
+		ReceiverData &data = receivers[i];
+
+		buffer.addReceiver(sender, *data.receiver, data.context, data.containsPositionalData, data.volumeAdjustment);
+	}
+
+	return buffer.getReceivers(false).size();
+}
+
+BENCHMARK_DEFINE_F(Fixture, BM_addReceiver)(::benchmark::State &state) {
+	AudioReceiverBuffer buffer;
+
+	ServerUser sender = users[users.size() - 1];
+
+	for (auto _ : state) {
+		for (std::size_t i = 0; i < selectedData.size(); ++i) {
+			ReceiverData &data = selectedData[i];
+
+			buffer.addReceiver(sender, *data.receiver, data.context, data.containsPositionalData,
+							   data.volumeAdjustment);
+		}
+
+		buffer.clear();
+	}
+
+	state.counters["unique receivers"] = getUniqueReceivers(selectedData);
+}
+
+BENCHMARK_REGISTER_F(Fixture, BM_addReceiver)
+	->ArgsProduct({ benchmark::CreateRange(ReceiverCountBegin, ReceiverCountEnd, /*multi=*/Multiplier),
+					{ 0, 10, 40, 80 } });
+
+
+unsigned int dummyProcessing(const AudioReceiver &receiver) {
+	return receiver.getReceiver().uiSession;
+}
+
+BENCHMARK_DEFINE_F(Fixture, BM_full)(::benchmark::State &state) {
+	AudioReceiverBuffer buffer;
+
+	ServerUser sender = users[users.size() - 1];
+
+	for (auto _ : state) {
+		for (std::size_t i = 0; i < selectedData.size(); ++i) {
+			ReceiverData &data = selectedData[i];
+
+			buffer.addReceiver(sender, *data.receiver, data.context, data.containsPositionalData,
+							   data.volumeAdjustment);
+		}
+
+		buffer.preprocessBuffer();
+
+		std::vector< AudioReceiver > &receivers = buffer.getReceivers(false);
+		ReceiverRange< std::vector< AudioReceiver >::iterator > currentRange =
+			AudioReceiverBuffer::getReceiverRange(receivers.begin(), receivers.end());
+
+		while (currentRange.begin != currentRange.end) {
+			for (auto it = currentRange.begin; it != currentRange.end; ++it) {
+				benchmark::DoNotOptimize(dummyProcessing(*it));
+			}
+
+			// Find next range
+			currentRange = AudioReceiverBuffer::getReceiverRange(currentRange.end, receivers.end());
+		}
+
+		buffer.clear();
+	}
+
+	state.counters["unique receivers"] = getUniqueReceivers(selectedData);
+}
+
+BENCHMARK_REGISTER_F(Fixture, BM_full)
+	->ArgsProduct({ benchmark::CreateRange(ReceiverCountBegin, ReceiverCountEnd, /*multi=*/Multiplier),
+					{ 0, 10, 40, 80 } });
+
+
+int main(int argc, char **argv) {
+	globalInit();
+
+	::benchmark::Initialize(&argc, argv);
+	::benchmark::RunSpecifiedBenchmarks();
+}
diff --git a/src/benchmarks/AudioReceiverBuffer/CMakeLists.txt b/src/benchmarks/AudioReceiverBuffer/CMakeLists.txt
new file mode 100644
index 000000000..b18e228eb
--- /dev/null
+++ b/src/benchmarks/AudioReceiverBuffer/CMakeLists.txt
@@ -0,0 +1,29 @@
+add_executable(AudioReceiverBuffer_benchmark "AudioReceiverBuffer_benchmark.cpp")
+
+target_link_libraries(AudioReceiverBuffer_benchmark PRIVATE shared)
+
+target_link_libraries(AudioReceiverBuffer_benchmark PRIVATE benchmark::benchmark)
+
+target_include_directories(AudioReceiverBuffer_benchmark PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
+
+
+# In order to be able to mock the ServerUser class, we have to extract the server-specific source and header
+# files into an isolated environment, such that they don't include/link with the remaining server files.
+set(CUSTOM_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
+file(MAKE_DIRECTORY "${CUSTOM_INCLUDE_DIR}")
+set(HEADER_TO_COPY "${CMAKE_SOURCE_DIR}/src/murmur/AudioReceiverBuffer.h")
+set(SOURCE_TO_COPY "${CMAKE_SOURCE_DIR}/src/murmur/AudioReceiverBuffer.cpp")
+get_filename_component(HEADER_NAME "${HEADER_TO_COPY}" NAME)
+get_filename_component(SOURCE_NAME "${SOURCE_TO_COPY}" NAME)
+set(COPIED_HEADER "${CUSTOM_INCLUDE_DIR}/${HEADER_NAME}")
+set(COPIED_SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${SOURCE_NAME}")
+
+add_custom_command(OUTPUT "${COPIED_SOURCE}"
+	COMMAND ${CMAKE_COMMAND} -E copy "${HEADER_TO_COPY}" "${COPIED_HEADER}"
+	COMMAND ${CMAKE_COMMAND} -E copy "${SOURCE_TO_COPY}" "${COPIED_SOURCE}"
+	DEPENDS "${HEADER_TO_COPY}" "${SOURCE_TO_COPY}"
+)
+
+target_sources(AudioReceiverBuffer_benchmark PRIVATE "${COPIED_SOURCE}")
+
+target_include_directories(AudioReceiverBuffer_benchmark PRIVATE "${CUSTOM_INCLUDE_DIR}")
diff --git a/src/benchmarks/AudioReceiverBuffer/ServerUser.h b/src/benchmarks/AudioReceiverBuffer/ServerUser.h
new file mode 100644
index 000000000..d07985227
--- /dev/null
+++ b/src/benchmarks/AudioReceiverBuffer/ServerUser.h
@@ -0,0 +1,23 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+
+// NOTE: This is merely a mock of the ServerUser class
+
+#include "Version.h"
+
+#include <string>
+
+struct ServerUser {
+	ServerUser(unsigned int uiSession, Version::mumble_raw_version_t version, bool deaf = false, bool selfDeaf = false,
+			   const std::string context = "")
+		: uiSession(uiSession), uiVersion(version), bDeaf(deaf), bSelfDeaf(selfDeaf), ssContext(context) {}
+
+	unsigned int uiSession;
+	Version::mumble_raw_version_t uiVersion;
+	bool bDeaf;
+	bool bSelfDeaf;
+	std::string ssContext;
+};
diff --git a/src/benchmarks/CMakeLists.txt b/src/benchmarks/CMakeLists.txt
new file mode 100644
index 000000000..d72017c87
--- /dev/null
+++ b/src/benchmarks/CMakeLists.txt
@@ -0,0 +1,14 @@
+include(FetchContent)
+
+FetchContent_Declare(
+  googlebenchmark
+  GIT_REPOSITORY https://github.com/google/benchmark.git
+  GIT_TAG        v1.6.0
+)
+
+set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "")
+
+FetchContent_MakeAvailable(googlebenchmark)
+
+add_subdirectory(protocol)
+add_subdirectory(AudioReceiverBuffer)
diff --git a/src/benchmarks/protocol/CMakeLists.txt b/src/benchmarks/protocol/CMakeLists.txt
new file mode 100644
index 000000000..558ec7d4f
--- /dev/null
+++ b/src/benchmarks/protocol/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_executable(protocol_benchmark "protocol_benchmark.cpp")
+
+target_link_libraries(protocol_benchmark PRIVATE shared)
+
+target_link_libraries(protocol_benchmark PRIVATE benchmark::benchmark)
diff --git a/src/benchmarks/protocol/protocol_benchmark.cpp b/src/benchmarks/protocol/protocol_benchmark.cpp
new file mode 100644
index 000000000..4ff078757
--- /dev/null
+++ b/src/benchmarks/protocol/protocol_benchmark.cpp
@@ -0,0 +1,126 @@
+#include <benchmark/benchmark.h>
+
+#include "MumbleProtocol.h"
+#include "PacketDataStream.h"
+
+#include <limits>
+#include <random>
+#include <vector>
+
+std::random_device rd;
+std::mt19937 rng(rd());
+std::uniform_int_distribution< unsigned int > random_integer(1, 1024);
+std::uniform_int_distribution< unsigned int > random_byte(0, std::numeric_limits< Mumble::Protocol::byte >::max());
+
+constexpr int PAYLOAD_SIZE_RANGE = 0;
+
+constexpr int fromPayloadSize       = 0;
+constexpr int toPayloadSize         = 900;
+constexpr int payloadSizeMultiplier = 2;
+
+std::vector< Mumble::Protocol::byte > audioPayload;
+Mumble::Protocol::AudioData audioData;
+
+Mumble::Protocol::UDPAudioEncoder< Mumble::Protocol::Role::Server > encoder;
+
+class Fixture : public ::benchmark::Fixture {
+public:
+	void SetUp(const ::benchmark::State &state) {
+		audioPayload.resize(state.range(PAYLOAD_SIZE_RANGE));
+
+		for (std::size_t i = 0; i < audioPayload.size(); ++i) {
+			audioPayload[i] = random_byte(rng);
+		}
+
+		audioData.payload                = { audioPayload.data(), audioPayload.size() };
+		audioData.frameNumber            = 42;
+		audioData.isLastFrame            = false;
+		audioData.senderSession          = 137;
+		audioData.targetOrContext        = Mumble::Protocol::AudioContext::Normal;
+		audioData.usedCodec              = Mumble::Protocol::AudioCodec::Opus;
+		audioData.position               = { 1.25f, 1260.539f, -3.0765f };
+		audioData.containsPositionalData = true;
+
+		encoder.setProtocolVersion(Version::toRaw(1, 3, 0));
+		encoder.encodeAudioPacket(audioData);
+		encoder.setProtocolVersion(Mumble::Protocol::PROTOBUF_INTRODUCTION_VERSION);
+		encoder.encodeAudioPacket(audioData);
+	}
+};
+
+BENCHMARK_DEFINE_F(Fixture, BM_encodeLegacyDirect)(::benchmark::State &state) {
+	std::vector< Mumble::Protocol::byte > buffer;
+	buffer.resize(Mumble::Protocol::MAX_UDP_PACKET_SIZE);
+
+	for (auto _ : state) {
+		PacketDataStream stream(buffer.data() + 1, buffer.size() - 1);
+
+		buffer[0] = (static_cast< Mumble::Protocol::byte >(audioData.usedCodec) << 5)
+					| static_cast< Mumble::Protocol::byte >(audioData.targetOrContext);
+
+		stream << audioData.senderSession;
+		stream << static_cast< quint64 >(audioData.frameNumber);
+		stream.append(reinterpret_cast< const char * >(audioData.payload.data()), audioData.payload.size());
+		stream.append(reinterpret_cast< const char * >(&audioData.position[0]),
+					  sizeof(float) * audioData.position.size());
+	}
+}
+
+BENCHMARK_REGISTER_F(Fixture, BM_encodeLegacyDirect)
+	->RangeMultiplier(payloadSizeMultiplier)
+	->Range(fromPayloadSize, toPayloadSize);
+
+BENCHMARK_DEFINE_F(Fixture, BM_encodeLegacy)(::benchmark::State &state) {
+	encoder.setProtocolVersion(Version::toRaw(1, 3, 0));
+
+	for (auto _ : state) {
+		encoder.encodeAudioPacket(audioData);
+	}
+}
+
+BENCHMARK_REGISTER_F(Fixture, BM_encodeLegacy)
+	->RangeMultiplier(payloadSizeMultiplier)
+	->Range(fromPayloadSize, toPayloadSize);
+
+BENCHMARK_DEFINE_F(Fixture, BM_encodeLegacy_UpdateOnly)(::benchmark::State &state) {
+	encoder.setProtocolVersion(Version::toRaw(1, 3, 0));
+
+	encoder.prepareAudioPacket(audioData);
+
+	for (auto _ : state) {
+		encoder.updateAudioPacket(audioData);
+	}
+}
+
+BENCHMARK_REGISTER_F(Fixture, BM_encodeLegacy_UpdateOnly)
+	->RangeMultiplier(payloadSizeMultiplier)
+	->Range(fromPayloadSize, toPayloadSize);
+
+BENCHMARK_DEFINE_F(Fixture, BM_encodeNew)(::benchmark::State &state) {
+	encoder.setProtocolVersion(Mumble::Protocol::PROTOBUF_INTRODUCTION_VERSION);
+
+	for (auto _ : state) {
+		encoder.encodeAudioPacket(audioData);
+	}
+}
+
+BENCHMARK_REGISTER_F(Fixture, BM_encodeNew)
+	->RangeMultiplier(payloadSizeMultiplier)
+	->Range(fromPayloadSize, toPayloadSize);
+
+BENCHMARK_DEFINE_F(Fixture, BM_encodeNew_UpdateOnly)(::benchmark::State &state) {
+	encoder.setProtocolVersion(Mumble::Protocol::PROTOBUF_INTRODUCTION_VERSION);
+
+	encoder.prepareAudioPacket(audioData);
+
+	for (auto _ : state) {
+		encoder.updateAudioPacket(audioData);
+	}
+}
+
+BENCHMARK_REGISTER_F(Fixture, BM_encodeNew_UpdateOnly)
+	->RangeMultiplier(payloadSizeMultiplier)
+	->Range(fromPayloadSize, toPayloadSize);
+
+
+BENCHMARK_MAIN();
diff --git a/src/mumble/ACLEditor.cpp b/src/mumble/ACLEditor.cpp
index 47f30ba39..f56134052 100644
--- a/src/mumble/ACLEditor.cpp
+++ b/src/mumble/ACLEditor.cpp
@@ -10,6 +10,7 @@
 #include "ClientUser.h"
 #include "Database.h"
 #include "Log.h"
+#include "QtUtils.h"
 #include "ServerHandler.h"
 #include "User.h"
 
diff --git a/src/mumble/Audio.cpp b/src/mumble/Audio.cpp
index 599f3f334..0ec63e8a9 100644
--- a/src/mumble/Audio.cpp
+++ b/src/mumble/Audio.cpp
@@ -16,6 +16,8 @@
 
 #include <QtCore/QObject>
 
+#include <cstring>
+
 
 class CodecInit : public DeferInit {
 public:
@@ -83,7 +85,7 @@ LoopUser::LoopUser() {
 	qetLastFetch.start();
 }
 
-void LoopUser::addFrame(const QByteArray &packet) {
+void LoopUser::addFrame(const Mumble::Protocol::AudioData &audioData) {
 	if (DOUBLE_RAND < Global::get().s.dPacketLoss) {
 		qWarning("Drop");
 		return;
@@ -101,16 +103,30 @@ void LoopUser::addFrame(const QByteArray &packet) {
 		else
 			r = DOUBLE_RAND * Global::get().s.dMaxPacketDelay;
 
-		qmPackets.insert(static_cast< float >(time + r), packet);
+
+		float virtualArrivalTime = time + r;
+		// Insert default-constructed AudioPacket object and only then fill its data in-place. This is necessary to
+		// avoid any moving around of the payload vector which would mess up our pointers in the AudioData object.
+		m_packets[virtualArrivalTime] = AudioPacket{};
+		AudioPacket &packet           = m_packets[virtualArrivalTime];
+
+		// copy audio data to packet
+		packet.payload.resize(audioData.payload.size());
+		std::memcpy(packet.payload.data(), audioData.payload.data(), audioData.payload.size());
+
+		packet.audioData = audioData;
+		// The audio data is now stored in the payload vector and thus this is where we should point the used view (we
+		// don't own the original buffer and can thus not guarantee what happens with it once this function returns).
+		packet.audioData.payload = { packet.payload.data(), packet.payload.size() };
 	}
 
 	// Restart check
 	if (qetLastFetch.elapsed() > 100) {
 		AudioOutputPtr ao = Global::get().ao;
 		if (ao) {
-			MessageHandler::UDPMessageType msgType =
-				static_cast< MessageHandler::UDPMessageType >((packet.at(0) >> 5) & 0x7);
-			ao->addFrameToBuffer(this, QByteArray(), 0, msgType);
+			Mumble::Protocol::AudioData empty;
+			empty.usedCodec = audioData.usedCodec;
+			ao->addFrameToBuffer(this, empty);
 		}
 	}
 }
@@ -119,41 +135,27 @@ void LoopUser::fetchFrames() {
 	QMutexLocker l(&qmLock);
 
 	AudioOutputPtr ao(Global::get().ao);
-	if (!ao || qmPackets.isEmpty()) {
+	if (!ao || m_packets.empty()) {
 		return;
 	}
 
 	double cmp = qetTicker.elapsed();
 
-	QMultiMap< float, QByteArray >::iterator i = qmPackets.begin();
-
-	while (i != qmPackets.end()) {
-		if (i.key() > cmp)
+	auto it = m_packets.begin();
+	while (it != m_packets.end()) {
+		if (it->first > cmp) {
 			break;
+		}
 
-		int iSeq;
-		const QByteArray &data = i.value();
-		PacketDataStream pds(data.constData(), data.size());
-
-		unsigned int msgFlags = static_cast< unsigned int >(pds.next());
-
-		pds >> iSeq;
-
-		QByteArray qba;
-		qba.reserve(pds.left() + 1);
-		qba.append(static_cast< char >(msgFlags));
-		qba.append(pds.dataBlock(pds.left()));
-
-		MessageHandler::UDPMessageType msgType = static_cast< MessageHandler::UDPMessageType >((msgFlags >> 5) & 0x7);
+		ao->addFrameToBuffer(this, it->second.audioData);
 
-		ao->addFrameToBuffer(this, qba, iSeq, msgType);
-		i = qmPackets.erase(i);
+		it = m_packets.erase(it);
 	}
 
 	qetLastFetch.restart();
 }
 
-RecordUser::RecordUser() : LoopUser() {
+RecordUser::RecordUser() {
 	qsName = QLatin1String("Recorder");
 }
 
@@ -163,26 +165,12 @@ RecordUser::~RecordUser() {
 		ao->removeBuffer(this);
 }
 
-void RecordUser::addFrame(const QByteArray &packet) {
+void RecordUser::addFrame(const Mumble::Protocol::AudioData &audioData) {
 	AudioOutputPtr ao(Global::get().ao);
 	if (!ao)
 		return;
 
-	int iSeq;
-	PacketDataStream pds(packet.constData(), packet.size());
-
-	unsigned int msgFlags = static_cast< unsigned int >(pds.next());
-
-	pds >> iSeq;
-
-	QByteArray qba;
-	qba.reserve(pds.left() + 1);
-	qba.append(static_cast< char >(msgFlags));
-	qba.append(pds.dataBlock(pds.left()));
-
-	MessageHandler::UDPMessageType msgType = static_cast< MessageHandler::UDPMessageType >((msgFlags >> 5) & 0x7);
-
-	ao->addFrameToBuffer(this, qba, iSeq, msgType);
+	ao->addFrameToBuffer(this, audioData);
 }
 
 void Audio::startOutput(const QString &output) {
diff --git a/src/mumble/Audio.h b/src/mumble/Audio.h
index 9a3dcb4fc..df88a2e1c 100644
--- a/src/mumble/Audio.h
+++ b/src/mumble/Audio.h
@@ -14,6 +14,10 @@
 #include <QtCore/QVariant>
 
 #include "ClientUser.h"
+#include "MumbleProtocol.h"
+
+#include <unordered_map>
+#include <vector>
 
 #define SAMPLE_RATE 48000
 
@@ -29,26 +33,30 @@ class LoopUser : public ClientUser {
 private:
 	Q_DISABLE_COPY(LoopUser)
 protected:
+	struct AudioPacket {
+		std::vector< Mumble::Protocol::byte > payload;
+		Mumble::Protocol::AudioData audioData;
+	};
 	QMutex qmLock;
 	QElapsedTimer qetTicker;
 	QElapsedTimer qetLastFetch;
-	QMultiMap< float, QByteArray > qmPackets;
+	std::unordered_map< float, AudioPacket > m_packets;
 	LoopUser();
 
 public:
 	static LoopUser lpLoopy;
-	virtual void addFrame(const QByteArray &packet);
+	void addFrame(const Mumble::Protocol::AudioData &audioData);
 	void fetchFrames();
 };
 
-class RecordUser : public LoopUser {
+class RecordUser : public ClientUser {
 private:
 	Q_OBJECT
 	Q_DISABLE_COPY(RecordUser)
 public:
 	RecordUser();
 	~RecordUser() Q_DECL_OVERRIDE;
-	void addFrame(const QByteArray &packet) Q_DECL_OVERRIDE;
+	void addFrame(const Mumble::Protocol::AudioData &audioData);
 };
 
 namespace Audio {
diff --git a/src/mumble/AudioInput.cpp b/src/mumble/AudioInput.cpp
index 2f4d4b594..1249e52ab 100644
--- a/src/mumble/AudioInput.cpp
+++ b/src/mumble/AudioInput.cpp
@@ -9,7 +9,7 @@
 #include "AudioOutput.h"
 #include "CELTCodec.h"
 #include "MainWindow.h"
-#include "Message.h"
+#include "MumbleProtocol.h"
 #include "NetworkConfig.h"
 #include "OpusCodec.h"
 #include "PacketDataStream.h"
@@ -18,7 +18,6 @@
 #include "User.h"
 #include "Utils.h"
 #include "VoiceRecorder.h"
-
 #include "Global.h"
 
 #ifdef USE_RNNOISE
@@ -28,6 +27,8 @@ extern "C" {
 #endif
 
 #include <algorithm>
+#include <cassert>
+#include <exception>
 #include <limits>
 
 #ifdef USE_RNNOISE
@@ -225,7 +226,7 @@ AudioInput::AudioInput() : opusBuffer(Global::get().s.iFramesPerPacket * (SAMPLE
 
 	Global::get().iAudioBandwidth = getNetworkBandwidth(iAudioQuality, iAudioFrames);
 
-	umtType = MessageHandler::UDPVoiceCELTAlpha;
+	m_codec = Mumble::Protocol::AudioCodec::CELT_Alpha;
 
 	activityState = ActivityStateActive;
 	oCodec        = nullptr;
@@ -803,7 +804,7 @@ bool AudioInput::selectCodec() {
 
 	// Currently talking, use previous Opus status.
 	if (bPreviousVoice) {
-		useOpus = (umtType == MessageHandler::UDPVoiceOpus);
+		useOpus = (m_codec == Mumble::Protocol::AudioCodec::Opus);
 	} else {
 		if (Global::get().bOpus || (Global::get().s.lmLoopMode == Settings::Local)) {
 			useOpus = true;
@@ -847,25 +848,25 @@ bool AudioInput::selectCodec() {
 			return false;
 	}
 
-	MessageHandler::UDPMessageType previousType = umtType;
+	Mumble::Protocol::AudioCodec previousCodec = m_codec;
 	if (useOpus) {
-		umtType = MessageHandler::UDPVoiceOpus;
+		m_codec = Mumble::Protocol::AudioCodec::Opus;
 	} else {
 		if (!Global::get().uiSession) {
-			umtType = MessageHandler::UDPVoiceCELTAlpha;
+			m_codec = Mumble::Protocol::AudioCodec::CELT_Alpha;
 		} else {
 			int v = cCodec->bitstreamVersion();
-			if (v == Global::get().iCodecAlpha)
-				umtType = MessageHandler::UDPVoiceCELTAlpha;
-			else if (v == Global::get().iCodecBeta)
-				umtType = MessageHandler::UDPVoiceCELTBeta;
-			else {
+			if (v == Global::get().iCodecAlpha) {
+				m_codec = Mumble::Protocol::AudioCodec::CELT_Alpha;
+			} else if (v == Global::get().iCodecBeta) {
+				m_codec = Mumble::Protocol::AudioCodec::CELT_Beta;
+			} else {
 				qWarning() << "Couldn't find message type for codec version" << v;
 			}
 		}
 	}
 
-	if (umtType != previousType) {
+	if (m_codec != previousCodec) {
 		iBufferedFrames = 0;
 		qlFrames.clear();
 		opusBuffer.clear();
@@ -1175,7 +1176,7 @@ void AudioInput::encodeAudioFrame(AudioChunk chunk) {
 	if (!selectCodec())
 		return;
 
-	if (umtType == MessageHandler::UDPVoiceCELTAlpha || umtType == MessageHandler::UDPVoiceCELTBeta) {
+	if (m_codec == Mumble::Protocol::AudioCodec::CELT_Alpha || m_codec == Mumble::Protocol::AudioCodec::CELT_Beta) {
 		len = encodeCELTFrame(psSource, buffer);
 		if (len <= 0) {
 			iBitrate = 0;
@@ -1183,7 +1184,7 @@ void AudioInput::encodeAudioFrame(AudioChunk chunk) {
 			return;
 		}
 		++iBufferedFrames;
-	} else if (umtType == MessageHandler::UDPVoiceOpus) {
+	} else if (m_codec == Mumble::Protocol::AudioCodec::Opus) {
 		encoded = false;
 		opusBuffer.insert(opusBuffer.end(), psSource, psSource + iFrameSize);
 		++iBufferedFrames;
@@ -1224,18 +1225,11 @@ void AudioInput::encodeAudioFrame(AudioChunk chunk) {
 	bPreviousVoice = bIsSpeech;
 }
 
-static void sendAudioFrame(const char *data, PacketDataStream &pds) {
+static void sendAudioFrame(gsl::span< const Mumble::Protocol::byte > encodedPacket) {
 	ServerHandlerPtr sh = Global::get().sh;
 	if (sh) {
-		VoiceRecorderPtr recorder(sh->recorder);
-		if (recorder)
-			recorder->getRecordUser().addFrame(QByteArray(data, pds.size() + 1));
+		sh->sendMessage(encodedPacket.data(), encodedPacket.size());
 	}
-
-	if (Global::get().s.lmLoopMode == Settings::Local)
-		LoopUser::lpLoopy.addFrame(QByteArray(data, pds.size() + 1));
-	else if (sh)
-		sh->sendMessage(data, pds.size() + 1);
 }
 
 void AudioInput::flushCheck(const QByteArray &frame, bool terminator, int voiceTargetID) {
@@ -1244,10 +1238,10 @@ void AudioInput::flushCheck(const QByteArray &frame, bool terminator, int voiceT
 	if (!terminator && iBufferedFrames < iAudioFrames)
 		return;
 
-	int flags = 0;
-	if (voiceTargetID > 0) {
-		flags = voiceTargetID;
-	}
+	Mumble::Protocol::AudioData audioData;
+	audioData.targetOrContext = voiceTargetID;
+	audioData.isLastFrame     = terminator;
+
 	if (terminator && Global::get().iPrevTarget > 0) {
 		// If we have been whispering to some target but have just ended, terminator will be true. However
 		// in the case of whispering this means that we just released the whisper key so this here is the
@@ -1255,63 +1249,89 @@ void AudioInput::flushCheck(const QByteArray &frame, bool terminator, int voiceT
 		// is reset to 0 by now. In order to send the last whisper frame correctly, we have to use
 		// Global::get().iPrevTarget which is set to whatever Global::get().iTarget has been before its last change.
 
-		flags = Global::get().iPrevTarget;
+		audioData.targetOrContext = Global::get().iPrevTarget;
 
 		// We reset Global::get().iPrevTarget as it has fulfilled its purpose for this whisper-action. It'll be set
 		// accordingly once the client whispers for the next time.
 		Global::get().iPrevTarget = 0;
 	}
+	if (Global::get().s.lmLoopMode == Settings::Server) {
+		audioData.targetOrContext = Mumble::Protocol::ReservedTargetIDs::ServerLoopback;
+	}
 
-	if (Global::get().s.lmLoopMode == Settings::Server)
-		flags = 0x1f; // Server loopback
-
-	flags |= (umtType << 5);
-
-	char data[1024];
-	data[0] = static_cast< unsigned char >(flags);
+	audioData.usedCodec = m_codec;
 
 	int frames      = iBufferedFrames;
 	iBufferedFrames = 0;
 
-	PacketDataStream pds(data + 1, 1023);
-	// Sequence number
-	pds << iFrameCounter - frames;
-
-	if (umtType == MessageHandler::UDPVoiceOpus) {
-		const QByteArray &qba = qlFrames.takeFirst();
-		int size              = qba.size();
-		if (terminator)
-			size |= 1 << 13;
-		pds << size;
-		pds.append(qba.constData(), qba.size());
+	audioData.frameNumber = iFrameCounter - frames;
+
+	if (Global::get().s.bTransmitPosition && Global::get().pluginManager && !Global::get().bCenterPosition
+		&& Global::get().pluginManager->fetchPositionalData()) {
+		Position3D currentPos = Global::get().pluginManager->getPositionalData().getPlayerPos();
+
+		audioData.position[0] = currentPos.x;
+		audioData.position[1] = currentPos.y;
+		audioData.position[2] = currentPos.z;
+
+		audioData.containsPositionalData = true;
+	}
+
+	if (m_codec == Mumble::Protocol::AudioCodec::Opus) {
+		// In Opus mode we only expect a single frame per packet
+		assert(qlFrames.size() == 1);
+
+		audioData.payload = gsl::span< const Mumble::Protocol::byte >(
+			reinterpret_cast< const Mumble::Protocol::byte * >(qlFrames[0].constData()), qlFrames[0].size());
 	} else {
+		// Legacy codecs (Speex or CELT) may use multiple frames for a single packet
+		if (!m_legacyBuffer) {
+			m_legacyBuffer = std::make_unique< Mumble::Protocol::byte[] >(Mumble::Protocol::MAX_UDP_PACKET_SIZE);
+		}
+
 		if (terminator) {
 			qlFrames << QByteArray();
 			++frames;
 		}
 
+		std::size_t offset = 0;
 		for (int i = 0; i < frames; ++i) {
-			const QByteArray &qba = qlFrames.takeFirst();
+			const QByteArray &qba = qlFrames[0];
 			unsigned char head    = static_cast< unsigned char >(qba.size());
 			if (i < frames - 1)
 				head |= 0x80;
-			pds.append(head);
-			pds.append(qba.constData(), qba.size());
+			std::memcpy(m_legacyBuffer.get() + offset, &head, sizeof(head));
+			offset += sizeof(head);
+			std::memcpy(m_legacyBuffer.get() + offset, qba.constData(), qba.size());
+			offset += qba.size();
 		}
+
+		audioData.payload = gsl::span< const Mumble::Protocol::byte >(m_legacyBuffer.get(), offset);
 	}
 
-	if (Global::get().s.bTransmitPosition && Global::get().pluginManager && !Global::get().bCenterPosition
-		&& Global::get().pluginManager->fetchPositionalData()) {
-		Position3D currentPos = Global::get().pluginManager->getPositionalData().getPlayerPos();
+	{
+		ServerHandlerPtr sh = Global::get().sh;
+		if (sh) {
+			VoiceRecorderPtr recorder(sh->recorder);
+			if (recorder) {
+				recorder->getRecordUser().addFrame(audioData);
+			}
 
-		pds << currentPos.x;
-		pds << currentPos.y;
-		pds << currentPos.z;
+			m_udpEncoder.setProtocolVersion(sh->uiVersion);
+		}
 	}
 
-	sendAudioFrame(data, pds);
+	if (Global::get().s.lmLoopMode == Settings::Local) {
+		// Only add audio data to local loop buffer
+		LoopUser::lpLoopy.addFrame(audioData);
+	} else {
+		// Encode audio frame and send out
+		gsl::span< const Mumble::Protocol::byte > encodedAudioPacket = m_udpEncoder.encodeAudioPacket(audioData);
+
+		sendAudioFrame(encodedAudioPacket);
+	}
 
-	Q_ASSERT(qlFrames.isEmpty());
+	qlFrames.clear();
 }
 
 bool AudioInput::isAlive() const {
diff --git a/src/mumble/AudioInput.h b/src/mumble/AudioInput.h
index 6145591e3..9fa755e53 100644
--- a/src/mumble/AudioInput.h
+++ b/src/mumble/AudioInput.h
@@ -13,6 +13,7 @@
 #include <boost/shared_ptr.hpp>
 #include <fstream>
 #include <list>
+#include <memory>
 #include <mutex>
 #include <speex/speex.h>
 #include <speex/speex_echo.h>
@@ -22,7 +23,7 @@
 
 #include "Audio.h"
 #include "EchoCancelOption.h"
-#include "Message.h"
+#include "MumbleProtocol.h"
 #include "Settings.h"
 #include "Timer.h"
 
@@ -179,6 +180,9 @@ private:
 
 	SpeexResamplerState *srsMic, *srsEcho;
 
+	std::unique_ptr< Mumble::Protocol::byte[] > m_legacyBuffer;
+	Mumble::Protocol::UDPAudioEncoder< Mumble::Protocol::Role::Client > m_udpEncoder;
+
 	unsigned int iMicFilled, iEchoFilled;
 	inMixerFunc imfMic, imfEcho;
 	inMixerFunc chooseMixer(const unsigned int nchan, SampleFormat sf, quint64 mask);
@@ -198,7 +202,7 @@ private:
 	QElapsedTimer qetLastMuteCue;
 
 protected:
-	MessageHandler::UDPMessageType umtType;
+	Mumble::Protocol::AudioCodec m_codec;
 	SampleFormat eMicFormat, eEchoFormat;
 
 	unsigned int iMicChannels, iEchoChannels;
diff --git a/src/mumble/AudioOutput.cpp b/src/mumble/AudioOutput.cpp
index 40399b0fa..a873a47c6 100644
--- a/src/mumble/AudioOutput.cpp
+++ b/src/mumble/AudioOutput.cpp
@@ -10,11 +10,8 @@
 #include "AudioOutputSpeech.h"
 #include "Channel.h"
 #include "ChannelListenerManager.h"
-#include "Message.h"
-#include "PacketDataStream.h"
 #include "PluginManager.h"
 #include "ServerHandler.h"
-#include "SpeechFlags.h"
 #include "Timer.h"
 #include "User.h"
 #include "Utils.h"
@@ -142,41 +139,39 @@ const float *AudioOutput::getSpeakerPos(unsigned int &speakers) {
 	return nullptr;
 }
 
-void AudioOutput::addFrameToBuffer(ClientUser *user, const QByteArray &qbaPacket, unsigned int iSeq,
-								   MessageHandler::UDPMessageType type) {
-	if (iChannels == 0)
+void AudioOutput::addFrameToBuffer(ClientUser *sender, const Mumble::Protocol::AudioData &audioData) {
+	if (iChannels == 0) {
 		return;
+	}
+
 	qrwlOutputs.lockForRead();
-	// qmOutputs is a map of users and their AudioOutputUser objects, which will be create when audio from that user
+	// qmOutputs is a map of users and their AudioOutputUser objects, which will be created when audio from that user
 	// is received. This map will be iterated in mix(). After one's audio is finished, his AudioOutputUser will be
 	// removed from this map.
-	AudioOutputSpeech *aop = qobject_cast< AudioOutputSpeech * >(qmOutputs.value(user));
-
-	if (!UDPMessageTypeIsValidVoicePacket(type)) {
-		qWarning("AudioOutput: ignored frame with invalid message type 0x%x in addFrameToBuffer().",
-				 static_cast< unsigned char >(type));
-		return;
-	}
+	AudioOutputSpeech *aop = qobject_cast< AudioOutputSpeech * >(qmOutputs.value(sender));
 
-	if (!aop || (aop->umtType != type)) {
+	if (!aop || (aop->m_codec != audioData.usedCodec)) {
 		qrwlOutputs.unlock();
 
-		if (aop)
+		if (aop) {
 			removeBuffer(aop);
+		}
 
 		while ((iMixerFreq == 0) && isAlive()) {
 			QThread::yieldCurrentThread();
 		}
 
-		if (!iMixerFreq)
+		if (!iMixerFreq) {
 			return;
+		}
 
 		qrwlOutputs.lockForWrite();
-		aop = new AudioOutputSpeech(user, iMixerFreq, type, iBufferSize);
-		qmOutputs.replace(user, aop);
+
+		aop = new AudioOutputSpeech(sender, iMixerFreq, audioData.usedCodec, iBufferSize);
+		qmOutputs.replace(sender, aop);
 	}
 
-	aop->addFrameToBuffer(qbaPacket, iSeq);
+	aop->addFrameToBuffer(audioData);
 
 	qrwlOutputs.unlock();
 }
@@ -502,11 +497,12 @@ bool AudioOutput::mix(void *outbuff, unsigned int frameCount) {
 			const ClientUser *user    = nullptr;
 			if (speech) {
 				user = speech->p;
+
 				volumeAdjustment *= user->getLocalVolumeAdjustments();
 
 				if (user->cChannel
 					&& Global::get().channelListenerManager->isListening(Global::get().uiSession, user->cChannel->iId)
-					&& (speech->ucFlags & SpeechFlags::Listen)) {
+					&& (speech->m_audioContext == Mumble::Protocol::AudioContext::Listen)) {
 					// We are receiving this audio packet only because we are listening to the channel
 					// the speaking user is in. Thus we receive the audio via our "listener proxy".
 					// Thus we'll apply the volume adjustment for our listener proxy as well
diff --git a/src/mumble/AudioOutput.h b/src/mumble/AudioOutput.h
index fd96fa359..60a6cb13c 100644
--- a/src/mumble/AudioOutput.h
+++ b/src/mumble/AudioOutput.h
@@ -10,6 +10,8 @@
 #include <QtCore/QThread>
 #include <boost/shared_ptr.hpp>
 
+#include "MumbleProtocol.h"
+
 #ifdef USE_MANUAL_PLUGIN
 #	include "ManualPlugin.h"
 #endif
@@ -42,7 +44,6 @@
 #endif
 
 #include "Audio.h"
-#include "Message.h"
 
 class AudioOutput;
 class ClientUser;
@@ -119,7 +120,7 @@ public:
 	/// and is guaranteed to be called on the application's main thread.
 	~AudioOutput() Q_DECL_OVERRIDE;
 
-	void addFrameToBuffer(ClientUser *, const QByteArray &, unsigned int iSeq, MessageHandler::UDPMessageType type);
+	void addFrameToBuffer(ClientUser *sender, const Mumble::Protocol::AudioData &audioData);
 	void removeBuffer(const ClientUser *);
 	AudioOutputSample *playSample(const QString &filename, bool loop = false);
 	void run() Q_DECL_OVERRIDE = 0;
diff --git a/src/mumble/AudioOutputCache.cpp b/src/mumble/AudioOutputCache.cpp
new file mode 100644
index 000000000..dcf34680e
--- /dev/null
+++ b/src/mumble/AudioOutputCache.cpp
@@ -0,0 +1,81 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#include "AudioOutputCache.h"
+
+#include <cassert>
+#include <cstring>
+
+AudioOutputCache::AudioOutputCache(std::size_t initialCapacity) {
+	m_audioData.reserve(initialCapacity);
+}
+
+gsl::span< const Mumble::Protocol::byte > AudioOutputCache::getAudioData() const {
+	return m_audioData;
+}
+
+bool AudioOutputCache::isLastFrame() const {
+	return m_isLastFrame;
+}
+
+float AudioOutputCache::getVolumeAdjustment() const {
+	return m_volumeAdjustment;
+}
+
+bool AudioOutputCache::containsPositionalInformation() const {
+	return m_containsPosition;
+}
+
+const std::array< float, 3 > &AudioOutputCache::getPositionalInformation() const {
+	assert(m_containsPosition);
+
+	return m_position;
+}
+
+Mumble::Protocol::audio_context_t AudioOutputCache::getContext() const {
+	return m_audioContext;
+}
+
+void AudioOutputCache::setCapacity(std::size_t capacity) {
+	m_audioData.reserve(capacity);
+}
+
+void AudioOutputCache::loadFrom(const Mumble::Protocol::AudioData &audioData) {
+	// First copy audio data
+	m_audioData.clear();
+	m_audioData.resize(audioData.payload.size());
+	std::memcpy(m_audioData.data(), audioData.payload.data(), audioData.payload.size());
+
+	// Then copy remaining fields (that we care about)
+	m_isLastFrame      = audioData.isLastFrame;
+	m_volumeAdjustment = audioData.volumeAdjustment.factor;
+	m_audioContext     = audioData.targetOrContext;
+
+	// And finally copy positional data, if available
+	if (audioData.containsPositionalData) {
+		m_containsPosition = true;
+
+		assert(m_position.size() == 3);
+		assert(audioData.position.size() == 3);
+
+		for (int i = 0; i < 3; ++i) {
+			m_position[i] = audioData.position[i];
+		}
+	} else {
+		m_containsPosition = false;
+	}
+}
+
+void AudioOutputCache::clear() {
+	m_audioData.clear();
+}
+
+bool AudioOutputCache::isValid() const {
+	return !m_audioData.empty();
+}
+
+AudioOutputCache::operator bool() const {
+	return isValid();
+}
diff --git a/src/mumble/AudioOutputCache.h b/src/mumble/AudioOutputCache.h
new file mode 100644
index 000000000..ae81b564b
--- /dev/null
+++ b/src/mumble/AudioOutputCache.h
@@ -0,0 +1,50 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#ifndef MUMBLE_MUMBLE_AUDIOCACHE_H_
+#define MUMBLE_MUMBLE_AUDIOCACHE_H_
+
+#include "MumbleProtocol.h"
+
+#include <array>
+#include <vector>
+
+#include <gsl/span>
+
+class AudioOutputCache {
+public:
+	AudioOutputCache(std::size_t initialCapacity = 512);
+	AudioOutputCache(AudioOutputCache &&) = default;
+
+	gsl::span< const Mumble::Protocol::byte > getAudioData() const;
+	bool isLastFrame() const;
+
+	float getVolumeAdjustment() const;
+
+	bool containsPositionalInformation() const;
+	const std::array< float, 3 > &getPositionalInformation() const;
+
+	Mumble::Protocol::audio_context_t getContext() const;
+
+	void setCapacity(std::size_t capacity);
+
+	void loadFrom(const Mumble::Protocol::AudioData &audioData);
+
+	void clear();
+
+	bool isValid() const;
+
+	operator bool() const;
+
+private:
+	std::vector< Mumble::Protocol::byte > m_audioData;
+	bool m_isLastFrame                               = false;
+	Mumble::Protocol::audio_context_t m_audioContext = Mumble::Protocol::AudioContext::Invalid;
+	float m_volumeAdjustment                         = 1.0f;
+	bool m_containsPosition                          = false;
+	std::array< float, 3 > m_position;
+};
+
+#endif // MUMBLE_MUMBLE_AUDIOCACHE_H_
diff --git a/src/mumble/AudioOutputSpeech.cpp b/src/mumble/AudioOutputSpeech.cpp
index 97a332b11..4552b391d 100644
--- a/src/mumble/AudioOutputSpeech.cpp
+++ b/src/mumble/AudioOutputSpeech.cpp
@@ -10,19 +10,55 @@
 #include "ClientUser.h"
 #include "OpusCodec.h"
 #include "PacketDataStream.h"
-#include "SpeechFlags.h"
 #include "Utils.h"
 #include "Global.h"
 
+#include <algorithm>
+#include <cassert>
 #include <cmath>
 
-AudioOutputSpeech::AudioOutputSpeech(ClientUser *user, unsigned int freq, MessageHandler::UDPMessageType type,
+std::mutex AudioOutputSpeech::s_audioCachesMutex;
+std::vector< AudioOutputCache > AudioOutputSpeech::s_audioCaches(100);
+
+void AudioOutputSpeech::invalidateAudioOutputCache(void *maskedIndex) {
+	// The given "pointer" actually is to be understood as an index
+	std::size_t index = reinterpret_cast< std::size_t >(maskedIndex);
+
+	std::lock_guard< std::mutex > lock(s_audioCachesMutex);
+
+	if (index < s_audioCaches.size()) {
+		s_audioCaches[index].clear();
+	}
+}
+
+std::size_t AudioOutputSpeech::storeAudioOutputCache(const Mumble::Protocol::AudioData &audioData) {
+	std::lock_guard< std::mutex > lock(s_audioCachesMutex);
+
+	// Find free spot in s_audioCaches
+	auto it = std::find_if(s_audioCaches.begin(), s_audioCaches.end(),
+						   [](const AudioOutputCache &chunk) { return !chunk.isValid(); });
+
+	if (it != s_audioCaches.end()) {
+		// Write audio data to that free (currently unused) chunk
+		it->loadFrom(audioData);
+
+		return std::distance(s_audioCaches.begin(), it);
+	} else {
+		// The list of audio chunks is full -> extend it
+		AudioOutputCache chunk;
+		chunk.loadFrom(audioData);
+
+		s_audioCaches.push_back(std::move(chunk));
+
+		return s_audioCaches.size() - 1;
+	}
+}
+
+
+AudioOutputSpeech::AudioOutputSpeech(ClientUser *user, unsigned int freq, Mumble::Protocol::AudioCodec codec,
 									 unsigned int systemMaxBufferSize)
-	: AudioOutputUser(user->qsName) {
+	: AudioOutputUser(user->qsName), iMixerFreq(freq), m_codec(codec), p(user) {
 	int err;
-	p          = user;
-	umtType    = type;
-	iMixerFreq = freq;
 
 	cCodec    = nullptr;
 	cdDecoder = nullptr;
@@ -44,7 +80,7 @@ AudioOutputSpeech::AudioOutputSpeech(ClientUser *user, unsigned int freq, Messag
 	// sample rate / 100 means 10ms mono audio data per frame.
 	iFrameSizePerChannel = iFrameSize = iSampleRate / 100; // for mono stream
 
-	if (umtType == MessageHandler::UDPVoiceOpus) {
+	if (m_codec == Mumble::Protocol::AudioCodec::Opus) {
 		// Always pretend Stereo mode is true by default. since opus will convert mono stream to stereo stream.
 		// https://tools.ietf.org/html/rfc6716#section-2.1.2
 		bStereo = true;
@@ -54,7 +90,7 @@ AudioOutputSpeech::AudioOutputSpeech(ClientUser *user, unsigned int freq, Messag
 			oCodec->opus_decoder_ctl(
 				opusState, OPUS_SET_PHASE_INVERSION_DISABLED(1)); // Disable phase inversion for better mono downmix.
 		}
-	} else if (umtType == MessageHandler::UDPVoiceSpeex) {
+	} else if (m_codec == Mumble::Protocol::AudioCodec::Speex) {
 		speex_bits_init(&sbBits);
 
 		dsSpeex  = speex_decoder_init(speex_lib_get_mode(SPEEX_MODEID_UWB));
@@ -108,12 +144,22 @@ AudioOutputSpeech::AudioOutputSpeech(ClientUser *user, unsigned int freq, Messag
 	iMissCount    = 0;
 	iMissedFrames = 0;
 
-	ucFlags = SpeechFlags::Invalid;
+	m_audioContext = Mumble::Protocol::AudioContext::Invalid;
 
 	jbJitter   = jitter_buffer_init(iFrameSize);
 	int margin = Global::get().s.iJitterBufferSize * iFrameSize;
 	jitter_buffer_ctl(jbJitter, JITTER_BUFFER_SET_MARGIN, &margin);
 
+	// We are configuring our Jitter buffer to use a custom deleter function. This prevents the buffer from
+	// copying the stored data into the buffer itself and also from releasing the memory of it. Instead it
+	// will now call this "deleter" function instead.
+	// This allows us to manage our own (global) storage for our audio data. With that, we can reuse the same
+	// memory regions in order to avoid frequent memory allocations and deallocations.
+	// Also this is the basis for using our trick of actually only storing indices instead of proper data
+	// pointers in the buffer.
+	jitter_buffer_ctl(jbJitter, JITTER_BUFFER_SET_DESTROY_CALLBACK,
+					  reinterpret_cast< void * >(&AudioOutputSpeech::invalidateAudioOutputCache));
+
 	fFadeIn  = new float[iFrameSizePerChannel];
 	fFadeOut = new float[iFrameSizePerChannel];
 
@@ -146,63 +192,80 @@ AudioOutputSpeech::~AudioOutputSpeech() {
 	delete[] fResamplerBuffer;
 }
 
-void AudioOutputSpeech::addFrameToBuffer(const QByteArray &qbaPacket, unsigned int iSeq) {
+void AudioOutputSpeech::addFrameToBuffer(const Mumble::Protocol::AudioData &audioData) {
 	QMutexLocker lock(&qmJitter);
 
-	if (qbaPacket.size() < 2)
+	if (audioData.payload.empty()) {
 		return;
+	}
 
-	// Voice data is transmitted through UDP packets and is not formatted by protobuf.
-	// Structure is: flags + size + audio data + pos*3
-	PacketDataStream pds(qbaPacket);
-
-	// skip flags
-	pds.next();
+	assert(audioData.usedCodec == m_codec);
 
 	int samples = 0;
-	if (umtType == MessageHandler::UDPVoiceOpus) {
-		int size;
-		pds >> size;
-		size &= 0x1fff;
-		if (size == 0) {
-			return;
-		}
+	switch (audioData.usedCodec) {
+		case Mumble::Protocol::AudioCodec::Opus: {
+			if (oCodec) {
+				samples = oCodec->opus_decoder_get_nb_samples(
+					opusState, audioData.payload.data(),
+					audioData.payload.size()); // this function return samples per channel
+				samples *= 2;                  // since we assume all input stream is stereo.
+			}
 
-		const QByteArray &qba = pds.dataBlock(size);
-		if (size != qba.size() || !pds.isValid()) {
-			return;
+			// We can't handle frames which are not a multiple of our configured framesize.
+			if (samples % iFrameSize != 0) {
+				qWarning("AudioOutputSpeech: Dropping Opus audio packet, because its sample count (%d) is not a "
+						 "multiple of our frame size (%d)",
+						 samples, iFrameSize);
+				return;
+			}
+			break;
 		}
+		case Mumble::Protocol::AudioCodec::CELT_Alpha:
+		case Mumble::Protocol::AudioCodec::CELT_Beta:
+		case Mumble::Protocol::AudioCodec::Speex: {
+			// These legacy codecs may send multiple frames at once, so we want to add up all samples across
+			// all frames
+			std::size_t offset  = 0;
+			bool framesContinue = true;
+			while (framesContinue && offset < audioData.payload.size()) {
+				Mumble::Protocol::byte headerByte = audioData.payload[offset];
+
+				// The least significant 7 bits encode the frame's size and the most significant bit is the
+				// "continuation bit" indicating that there are more frames to come.
+				unsigned int currentFrameSize = headerByte & 0x7f;
+				framesContinue                = headerByte & 0x80;
+
+				offset += currentFrameSize;
+				samples += iFrameSize;
+			}
 
-		const unsigned char *packet = reinterpret_cast< const unsigned char * >(qba.constData());
+			if (offset >= audioData.payload.size()) {
+				qWarning(
+					"AudioOutputSpeech: Invalid legacy audio packet encountered (specification exceeds actual length)");
+				return;
+			} else if (offset < audioData.payload.size() - 1) {
+				qWarning(
+					"AudioOutputSpeech: Invalid legacy audio packet encountered (audio payload contains unused data)");
+				return;
+			}
 
-		if (oCodec) {
-			samples = oCodec->opus_decoder_get_nb_samples(opusState, packet,
-														  size); // this function return samples per channel
-			samples *= 2;                                        // since we assume all input stream is stereo.
+			break;
 		}
-
-		// We can't handle frames which are not a multiple of 10ms.
-		Q_ASSERT(samples % iFrameSize == 0);
-	} else {
-		// If packet not in opus format
-		unsigned int header = 0;
-
-		do {
-			header = static_cast< unsigned char >(pds.next());
-			samples += iFrameSize;
-			pds.skip(header & 0x7f);
-		} while ((header & 0x80) && pds.isValid());
 	}
 
-	if (pds.isValid()) {
-		JitterBufferPacket jbp;
-		jbp.data      = const_cast< char * >(qbaPacket.constData());
-		jbp.len       = qbaPacket.size();
-		jbp.span      = samples;
-		jbp.timestamp = iFrameSize * iSeq;
+	// Copy the audio data to an AudioOutputCache instance and store that in our global chunk list
+	std::size_t storageIndex = storeAudioOutputCache(audioData);
 
-		jitter_buffer_put(jbJitter, &jbp);
-	}
+	// We cheat a bit and instead of storing the actual audio data in the jitter buffer, we store the index to
+	// the created audio chunk in the buffer. Passing a length of 0 should ensure that this "pointer" will never
+	// be dereferenced.
+	JitterBufferPacket jbp;
+	jbp.data      = reinterpret_cast< char * >(storageIndex);
+	jbp.len       = 0;
+	jbp.span      = samples;
+	jbp.timestamp = iFrameSize * audioData.frameNumber;
+
+	jitter_buffer_put(jbJitter, &jbp);
 }
 
 bool AudioOutputSpeech::prepareSampleBuffer(unsigned int frameCount) {
@@ -262,48 +325,68 @@ bool AudioOutputSpeech::prepareSampleBuffer(unsigned int frameCount) {
 			if (qlFrames.isEmpty()) {
 				QMutexLocker lock(&qmJitter);
 
-				char data[4096];
 				JitterBufferPacket jbp;
-				jbp.data = data;
-				jbp.len  = 4096;
 
 				spx_int32_t startofs = 0;
-
 				if (jitter_buffer_get(jbJitter, &jbp, iFrameSize, &startofs) == JITTER_BUFFER_OK) {
-					PacketDataStream pds(jbp.data, jbp.len);
-					// pds structure is: flags + size (14-16 terminator + 1-15 size) + audio data + pos*3
+					std::lock_guard< std::mutex > audioChunkLock(s_audioCachesMutex);
 
 					iMissCount = 0;
-					ucFlags    = static_cast< unsigned char >(pds.next());
 
-					bHasTerminator = false;
-					if (umtType == MessageHandler::UDPVoiceOpus) {
-						int size;
-						pds >> size;
+					// The "data pointer" that is stored in the buffer is actually just an index to s_audioCaches
+					std::size_t index = reinterpret_cast< std::size_t >(jbp.data);
+					assert(jbp.len == 0);
+					assert(index < s_audioCaches.size());
+
+					const AudioOutputCache &cache = s_audioCaches[index];
+					assert(cache.isValid());
+
+					bHasTerminator = cache.isLastFrame();
 
-						bHasTerminator = size & 0x2000;
-						qlFrames << pds.dataBlock(size & 0x1fff);
-						// if using opus, there will be at most only one element in qlFrames
-						// Q_ASSERT(qlFrames.size() == 1);
+					if (m_codec == Mumble::Protocol::AudioCodec::Opus) {
+						// Copy audio data into qlFrames
+						qlFrames << QByteArray(reinterpret_cast< const char * >(cache.getAudioData().data()),
+											   cache.getAudioData().size());
 					} else {
-						unsigned int header = 0;
-						do {
-							header = static_cast< unsigned int >(pds.next());
-							if (header)
-								qlFrames << pds.dataBlock(header & 0x7f);
-							else
-								bHasTerminator = true;
-						} while ((header & 0x80) && pds.isValid());
+						// Split data into the individual frames and copy those into qlFrames
+						const gsl::span< const Mumble::Protocol::byte > audioData = cache.getAudioData();
+						std::size_t offset                                        = 0;
+						bool hasNextFrame                                         = true;
+						while (hasNextFrame && offset < audioData.size()) {
+							Mumble::Protocol::byte headerByte = audioData[offset];
+
+							// Least significant 7bits encode the frame's size
+							int currentFrameSize = headerByte & 0x7f;
+							// The most significant bit is the "continuation bit"
+							hasNextFrame = headerByte & 0x80;
+
+							// Copy current frame into qlFrames
+							if (currentFrameSize > 0) {
+								if (offset + 1 >= audioData.size()
+									|| offset + 1 + currentFrameSize >= audioData.size()) {
+									qWarning("AudioOutputSpeech: Malformed legacy audio data encountered (mismatched "
+											 "frame size)");
+								} else {
+									qlFrames << QByteArray(audioData[offset + 1], currentFrameSize);
+								}
+							}
+						}
 					}
 
-					if (pds.left()) {
-						pds >> fPos[0];
-						pds >> fPos[1];
-						pds >> fPos[2];
+					if (cache.containsPositionalInformation()) {
+						assert(cache.getPositionalInformation().size() == 3);
+						assert(fPos.size() == 3);
+
+						for (int i = 0; i < 3; ++i) {
+							fPos[i] = cache.getPositionalInformation()[i];
+						}
 					} else {
 						fPos[0] = fPos[1] = fPos[2] = 0.0f;
 					}
 
+					m_suggestedVolumeAdjustment = cache.getVolumeAdjustment();
+					m_audioContext              = cache.getContext();
+
 					if (p) {
 						float a = static_cast< float >(avail);
 						if (avail >= p->fAverageAvailable)
@@ -325,70 +408,78 @@ bool AudioOutputSpeech::prepareSampleBuffer(unsigned int frameCount) {
 			if (!qlFrames.isEmpty()) {
 				QByteArray qba = qlFrames.takeFirst();
 
-				if (umtType == MessageHandler::UDPVoiceCELTAlpha || umtType == MessageHandler::UDPVoiceCELTBeta) {
-					int wantversion = (umtType == MessageHandler::UDPVoiceCELTAlpha) ? Global::get().iCodecAlpha
-																					 : Global::get().iCodecBeta;
-					if ((p == &LoopUser::lpLoopy) && (!Global::get().qmCodecs.isEmpty())) {
-						QMap< int, CELTCodec * >::const_iterator i = Global::get().qmCodecs.constEnd();
-						--i;
-						wantversion = i.key();
-					}
-					if (cCodec && (cCodec->bitstreamVersion() != wantversion)) {
-						cCodec->celt_decoder_destroy(cdDecoder);
-						cdDecoder = nullptr;
-					}
-					if (!cCodec) {
-						cCodec = Global::get().qmCodecs.value(wantversion);
-						if (cCodec) {
-							cdDecoder = cCodec->decoderCreate();
+				switch (m_codec) {
+					case Mumble::Protocol::AudioCodec::CELT_Alpha:
+					case Mumble::Protocol::AudioCodec::CELT_Beta: {
+						int wantversion = (m_codec == Mumble::Protocol::AudioCodec::CELT_Alpha)
+											  ? Global::get().iCodecAlpha
+											  : Global::get().iCodecBeta;
+						if ((p == &LoopUser::lpLoopy) && (!Global::get().qmCodecs.isEmpty())) {
+							QMap< int, CELTCodec * >::const_iterator i = Global::get().qmCodecs.constEnd();
+							--i;
+							wantversion = i.key();
 						}
-					}
-					if (cdDecoder)
-						cCodec->decode_float(cdDecoder,
-											 qba.isEmpty() ? nullptr
-														   : reinterpret_cast< const unsigned char * >(qba.constData()),
-											 qba.size(), pOut);
-					else
-						memset(pOut, 0, sizeof(float) * iFrameSize);
-				} else if (umtType == MessageHandler::UDPVoiceOpus) {
-					if (oCodec) {
-						if (qba.isEmpty() || !(p && p->bLocalMute)) {
-							// If qba is empty, we have to let Opus know about the packet loss
-							// Otherwise if the associated user is not locally muted, we want to decode the audio packet
-							// normally in order to be able to play it.
-							decodedSamples = oCodec->opus_decode_float(
-								opusState,
-								qba.isEmpty() ? nullptr : reinterpret_cast< const unsigned char * >(qba.constData()),
-								qba.size(), pOut, iAudioBufferSize, 0);
-						} else {
-							// If the packet is non-empty, but the associated user is locally muted,
-							// we don't have to decode the packet. Instead it is enough to know how many
-							// samples it contained so that we can then mute the appropriate output length
-							decodedSamples = oCodec->opus_packet_get_samples_per_frame(
-								reinterpret_cast< const unsigned char * >(qba.constData()), SAMPLE_RATE);
+						if (cCodec && (cCodec->bitstreamVersion() != wantversion)) {
+							cCodec->celt_decoder_destroy(cdDecoder);
+							cdDecoder = nullptr;
 						}
+						if (!cCodec) {
+							cCodec = Global::get().qmCodecs.value(wantversion);
+							if (cCodec) {
+								cdDecoder = cCodec->decoderCreate();
+							}
+						}
+						if (cdDecoder)
+							cCodec->decode_float(
+								cdDecoder,
+								qba.isEmpty() ? nullptr : reinterpret_cast< const unsigned char * >(qba.constData()),
+								qba.size(), pOut);
+						else
+							memset(pOut, 0, sizeof(float) * iFrameSize);
 
-						// The returned sample count we get from the Opus functions refer to samples per channel.
-						// Thus in order to get the total amount, we have to multiply by the channel count.
-						decodedSamples *= channels;
+						break;
 					}
+					case Mumble::Protocol::AudioCodec::Opus: {
+						if (oCodec) {
+							if (qba.isEmpty() || !(p && p->bLocalMute)) {
+								// If qba is empty, we have to let Opus know about the packet loss
+								// Otherwise if the associated user is not locally muted, we want to decode the audio
+								// packet normally in order to be able to play it.
+								decodedSamples = oCodec->opus_decode_float(
+									opusState,
+									qba.isEmpty() ? nullptr
+												  : reinterpret_cast< const unsigned char * >(qba.constData()),
+									qba.size(), pOut, iAudioBufferSize, 0);
+							} else {
+								// If the packet is non-empty, but the associated user is locally muted,
+								// we don't have to decode the packet. Instead it is enough to know how many
+								// samples it contained so that we can then mute the appropriate output length
+								decodedSamples = oCodec->opus_packet_get_samples_per_frame(
+									reinterpret_cast< const unsigned char * >(qba.constData()), SAMPLE_RATE);
+							}
+
+							// The returned sample count we get from the Opus functions refer to samples per channel.
+							// Thus in order to get the total amount, we have to multiply by the channel count.
+							decodedSamples *= channels;
+						}
 
-					if (decodedSamples < 0) {
-						decodedSamples = iFrameSize;
-						memset(pOut, 0, iFrameSize * sizeof(float));
+						if (decodedSamples < 0) {
+							decodedSamples = iFrameSize;
+							memset(pOut, 0, iFrameSize * sizeof(float));
+						}
+						break;
 					}
-				} else if (umtType == MessageHandler::UDPVoiceSpeex) {
-					if (qba.isEmpty()) {
-						speex_decode(dsSpeex, nullptr, pOut);
-					} else {
-						speex_bits_read_from(&sbBits, qba.data(), qba.size());
-						speex_decode(dsSpeex, &sbBits, pOut);
+					case Mumble::Protocol::AudioCodec::Speex: {
+						if (qba.isEmpty()) {
+							speex_decode(dsSpeex, nullptr, pOut);
+						} else {
+							speex_bits_read_from(&sbBits, qba.data(), qba.size());
+							speex_decode(dsSpeex, &sbBits, pOut);
+						}
+						for (unsigned int i = 0; i < iFrameSize; ++i)
+							pOut[i] *= (1.0f / 32767.f);
+						break;
 					}
-					for (unsigned int i = 0; i < iFrameSize; ++i)
-						pOut[i] *= (1.0f / 32767.f);
-				} else {
-					qWarning("AudioOutputSpeech: encountered unknown message type %li in prepareSampleBuffer().",
-							 static_cast< long >(umtType));
 				}
 
 				bool update = true;
@@ -397,8 +488,9 @@ bool AudioOutputSpeech::prepareSampleBuffer(unsigned int frameCount) {
 					float &fPowerMin = p->fPowerMin;
 
 					float pow = 0.0f;
-					for (int i = 0; i < decodedSamples; ++i)
+					for (int i = 0; i < decodedSamples; ++i) {
 						pow += pOut[i] * pOut[i];
+					}
 					pow = sqrtf(pow / static_cast< float >(decodedSamples)); // Average over both L and R channel.
 
 					if (pow >= fPowerMax) {
@@ -414,33 +506,45 @@ bool AudioOutputSpeech::prepareSampleBuffer(unsigned int frameCount) {
 
 					update = (pow < (fPowerMin + 0.01f * (fPowerMax - fPowerMin))); // Update jitter buffer when quiet.
 				}
-				// qlFrames.isEmpty() will always be true if using opus.
-				// Q_ASSERT(qlFrames.isEmpty());
-				if (qlFrames.isEmpty() && update)
+
+				if (qlFrames.isEmpty() && update) {
 					jitter_buffer_update_delay(jbJitter, nullptr, nullptr);
+				}
 
-				if (qlFrames.isEmpty() && bHasTerminator)
+				if (qlFrames.isEmpty() && bHasTerminator) {
 					nextalive = false;
+				}
 			} else {
-				if (umtType == MessageHandler::UDPVoiceCELTAlpha || umtType == MessageHandler::UDPVoiceCELTBeta) {
-					if (cdDecoder)
-						cCodec->decode_float(cdDecoder, nullptr, 0, pOut);
-					else
-						memset(pOut, 0, sizeof(float) * iFrameSize);
-				} else if (umtType == MessageHandler::UDPVoiceOpus) {
-					if (oCodec) {
-						decodedSamples = oCodec->opus_decode_float(opusState, nullptr, 0, pOut, iFrameSize, 0);
-						decodedSamples *= channels;
+				switch (m_codec) {
+					case Mumble::Protocol::AudioCodec::CELT_Alpha:
+					case Mumble::Protocol::AudioCodec::CELT_Beta: {
+						if (cdDecoder) {
+							cCodec->decode_float(cdDecoder, nullptr, 0, pOut);
+						} else {
+							memset(pOut, 0, sizeof(float) * iFrameSize);
+						}
+
+						break;
 					}
+					case Mumble::Protocol::AudioCodec::Opus: {
+						if (oCodec) {
+							decodedSamples = oCodec->opus_decode_float(opusState, nullptr, 0, pOut, iFrameSize, 0);
+							decodedSamples *= channels;
+						}
 
-					if (decodedSamples < 0) {
-						decodedSamples = iFrameSize;
-						memset(pOut, 0, iFrameSize * sizeof(float));
+						if (decodedSamples < 0) {
+							decodedSamples = iFrameSize;
+							memset(pOut, 0, iFrameSize * sizeof(float));
+						}
+						break;
+					}
+					case Mumble::Protocol::AudioCodec::Speex: {
+						speex_decode(dsSpeex, nullptr, pOut);
+						for (unsigned int i = 0; i < iFrameSize; ++i)
+							pOut[i] *= (1.0f / 32767.f);
+
+						break;
 					}
-				} else {
-					speex_decode(dsSpeex, nullptr, pOut);
-					for (unsigned int i = 0; i < iFrameSize; ++i)
-						pOut[i] *= (1.0f / 32767.f);
 				}
 			}
 
@@ -485,23 +589,29 @@ bool AudioOutputSpeech::prepareSampleBuffer(unsigned int frameCount) {
 
 	if (p) {
 		Settings::TalkState ts;
-		if (!nextalive)
-			ucFlags = SpeechFlags::Invalid;
-		switch (ucFlags) {
-			case SpeechFlags::Listen:
+		if (!nextalive) {
+			m_audioContext = Mumble::Protocol::AudioContext::Invalid;
+		}
+
+		switch (m_audioContext) {
+			case Mumble::Protocol::AudioContext::Listen:
 				// Fallthrough
-			case SpeechFlags::Normal:
+			case Mumble::Protocol::AudioContext::Normal:
 				ts = Settings::Talking;
 				break;
-			case SpeechFlags::Shout:
+			case Mumble::Protocol::AudioContext::Shout:
 				ts = Settings::Shouting;
 				break;
-			case SpeechFlags::Invalid:
+			case Mumble::Protocol::AudioContext::Invalid:
 				ts = Settings::Passive;
 				break;
-			default:
+			case Mumble::Protocol::AudioContext::Whisper:
 				ts = Settings::Whispering;
 				break;
+			default:
+				// Default to normal talking, if we don't know the used context
+				ts = Settings::Talking;
+				break;
 		}
 
 		if (ts != Settings::Passive && p->bLocalMute) {
diff --git a/src/mumble/AudioOutputSpeech.h b/src/mumble/AudioOutputSpeech.h
index 9b8049579..ecddee438 100644
--- a/src/mumble/AudioOutputSpeech.h
+++ b/src/mumble/AudioOutputSpeech.h
@@ -13,8 +13,12 @@
 
 #include <QtCore/QMutex>
 
+#include "AudioOutputCache.h"
 #include "AudioOutputUser.h"
-#include "Message.h"
+#include "MumbleProtocol.h"
+
+#include <mutex>
+#include <vector>
 
 class CELTCodec;
 class OpusCodec;
@@ -26,6 +30,12 @@ private:
 	Q_OBJECT
 	Q_DISABLE_COPY(AudioOutputSpeech)
 protected:
+	static std::mutex s_audioCachesMutex;
+	static std::vector< AudioOutputCache > s_audioCaches;
+
+	static void invalidateAudioOutputCache(void *maskedIndex);
+	static std::size_t storeAudioOutputCache(const Mumble::Protocol::AudioData &audioData);
+
 	unsigned int iAudioBufferSize;
 	unsigned int iBufferOffset;
 	unsigned int iBufferFilled;
@@ -60,8 +70,8 @@ protected:
 	QList< QByteArray > qlFrames;
 
 public:
-	unsigned char ucFlags;
-	MessageHandler::UDPMessageType umtType;
+	Mumble::Protocol::audio_context_t m_audioContext;
+	Mumble::Protocol::AudioCodec m_codec;
 	int iMissedFrames;
 	ClientUser *p;
 
@@ -70,10 +80,10 @@ public:
 	/// @param frameCount Number of frames to decode. frame means a bundle of one sample from each channel.
 	virtual bool prepareSampleBuffer(unsigned int frameCount) Q_DECL_OVERRIDE;
 
-	void addFrameToBuffer(const QByteArray &, unsigned int iBaseSeq);
+	void addFrameToBuffer(const Mumble::Protocol::AudioData &audioData);
 
 	/// @param systemMaxBufferSize maximum number of samples the system audio play back may request each time
-	AudioOutputSpeech(ClientUser *, unsigned int freq, MessageHandler::UDPMessageType type,
+	AudioOutputSpeech(ClientUser *, unsigned int freq, Mumble::Protocol::AudioCodec codec,
 					  unsigned int systemMaxBufferSize);
 	~AudioOutputSpeech() Q_DECL_OVERRIDE;
 };
diff --git a/src/mumble/AudioOutputUser.h b/src/mumble/AudioOutputUser.h
index 42ddb0ccd..03297481d 100644
--- a/src/mumble/AudioOutputUser.h
+++ b/src/mumble/AudioOutputUser.h
@@ -29,8 +29,9 @@ public:
 	AudioOutputUser(const QString &name);
 	~AudioOutputUser() Q_DECL_OVERRIDE;
 	const QString qsName;
-	float *pfBuffer = nullptr;
-	float *pfVolume = nullptr;
+	float *pfBuffer                   = nullptr;
+	float *pfVolume                   = nullptr;
+	float m_suggestedVolumeAdjustment = 1.0f;
 	std::unique_ptr< unsigned int[] > piOffset;
 	std::array< float, 3 > fPos = { 0.0, 0.0, 0.0 };
 	bool bStereo;
diff --git a/src/mumble/BanEditor.cpp b/src/mumble/BanEditor.cpp
index 53934a6be..709cb3d4a 100644
--- a/src/mumble/BanEditor.cpp
+++ b/src/mumble/BanEditor.cpp
@@ -7,6 +7,7 @@
 
 #include "Ban.h"
 #include "Channel.h"
+#include "QtUtils.h"
 #include "ServerHandler.h"
 #include "Global.h"
 
diff --git a/src/mumble/CMakeLists.txt b/src/mumble/CMakeLists.txt
index 5f1c14ba9..159caeb2e 100644
--- a/src/mumble/CMakeLists.txt
+++ b/src/mumble/CMakeLists.txt
@@ -99,6 +99,8 @@ set(MUMBLE_SOURCES
 	"AudioConfigDialog.h"
 	"Audio.cpp"
 	"Audio.h"
+	"AudioOutputCache.cpp"
+	"AudioOutputCache.h"
 	"AudioInput.cpp"
 	"AudioInput.h"
 	"AudioInput.ui"
diff --git a/src/mumble/ConnectDialog.cpp b/src/mumble/ConnectDialog.cpp
index d3e6517d3..d3655d826 100644
--- a/src/mumble/ConnectDialog.cpp
+++ b/src/mumble/ConnectDialog.cpp
@@ -35,7 +35,9 @@
 #include <boost/array.hpp>
 
 #ifdef Q_OS_WIN
-#	define NOMINMAX
+#	ifndef NOMINMAX
+#		define NOMINMAX
+#	endif
 #	include <shlobj.h>
 #endif
 
@@ -71,7 +73,7 @@ void PingStats::init() {
 	uiBandwidth = 0;
 	uiSent      = 0;
 	uiRecv      = 0;
-	uiVersion   = 0;
+	uiVersion   = Version::UNKNOWN;
 }
 
 void PingStats::reset() {
@@ -1561,7 +1563,9 @@ void ConnectDialog::timeTick() {
 	if (si == hover)
 		tHover.restart();
 
-	foreach (const ServerAddress &addr, si->qlAddresses) { sendPing(addr.host.toAddress(), addr.port); }
+	for (const ServerAddress &addr : si->qlAddresses) {
+		sendPing(addr.host.toAddress(), addr.port, si->uiVersion);
+	}
 }
 
 void ConnectDialog::filterPublicServerList() const {
@@ -1723,13 +1727,14 @@ void ConnectDialog::lookedUp() {
 	}
 
 	if (bAllowPing) {
-		foreach (const ServerAddress &addr, qs) { sendPing(addr.host.toAddress(), addr.port); }
+		for (const ServerAddress &addr : qs) {
+			sendPing(addr.host.toAddress(), addr.port, Version::UNKNOWN);
+		}
 	}
 }
 
-void ConnectDialog::sendPing(const QHostAddress &host, unsigned short port) {
-	char blob[16];
-
+void ConnectDialog::sendPing(const QHostAddress &host, unsigned short port,
+							 Version::mumble_raw_version_t protocolVersion) {
 	ServerAddress addr(HostAddress(host), port);
 
 	quint64 uiRand;
@@ -1745,15 +1750,19 @@ void ConnectDialog::sendPing(const QHostAddress &host, unsigned short port) {
 		qhPingRand.insert(addr, uiRand);
 	}
 
-	memset(blob, 0, sizeof(blob));
-	*reinterpret_cast< quint64 * >(blob + 8) = tPing.elapsed() ^ uiRand;
+	Mumble::Protocol::PingData pingData;
+	// "Encrypt" the timestamp so that server's can't spoof the returned timestamp (easily) to fake a better ping
+	pingData.timestamp                    = tPing.elapsed() ^ uiRand;
+	pingData.requestAdditionalInformation = true;
 
-	if (bIPv4 && host.protocol() == QAbstractSocket::IPv4Protocol)
-		qusSocket4->writeDatagram(blob + 4, 12, host, port);
-	else if (bIPv6 && host.protocol() == QAbstractSocket::IPv6Protocol)
-		qusSocket6->writeDatagram(blob + 4, 12, host, port);
-	else
+	if (!writePing(host, port, protocolVersion, pingData)) {
 		return;
+	}
+	if (protocolVersion == Version::UNKNOWN) {
+		// Also attempt to use new ping format in case we are pinging a server that only knows the new format
+		writePing(host, port, Mumble::Protocol::PROTOBUF_INTRODUCTION_VERSION, pingData);
+	}
+
 
 	const QSet< ServerItem * > &qs = qhPings.value(addr);
 
@@ -1761,33 +1770,60 @@ void ConnectDialog::sendPing(const QHostAddress &host, unsigned short port) {
 		++si->uiSent;
 }
 
+bool ConnectDialog::writePing(const QHostAddress &host, unsigned short port,
+							  Version::mumble_raw_version_t protocolVersion,
+							  const Mumble::Protocol::PingData &pingData) {
+	m_udpPingEncoder.setProtocolVersion(protocolVersion);
+
+	gsl::span< const Mumble::Protocol::byte > encodedPacket = m_udpPingEncoder.encodePingPacket(pingData);
+
+	if (bIPv4 && host.protocol() == QAbstractSocket::IPv4Protocol) {
+		qusSocket4->writeDatagram(reinterpret_cast< const char * >(encodedPacket.data()), encodedPacket.size(), host,
+								  port);
+	} else if (bIPv6 && host.protocol() == QAbstractSocket::IPv6Protocol) {
+		qusSocket6->writeDatagram(reinterpret_cast< const char * >(encodedPacket.data()), encodedPacket.size(), host,
+								  port);
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
 void ConnectDialog::udpReply() {
 	QUdpSocket *sock = qobject_cast< QUdpSocket * >(sender());
 
 	while (sock->hasPendingDatagrams()) {
-		char blob[64];
-
 		QHostAddress host;
 		unsigned short port;
 
-		qint64 len = sock->readDatagram(blob + 4, 24, &host, &port);
-		if (len == 24) {
+		gsl::span< Mumble::Protocol::byte > buffer = m_udpDecoder.getBuffer();
+
+		std::size_t len = sock->readDatagram(reinterpret_cast< char * >(buffer.data()), buffer.size(), &host, &port);
+
+		// Pings are special in that they can be decoded in the new or the old format, if the protocol version is set to
+		// the old format (which UNKNOWN does). Thus by setting the version to UNKNOWN, we effectively enable to decode
+		// either format. We have to reset it to this value every time, since the call to decode may set the protocol
+		// version to a more recent version (if a ping in new format is detected).
+		m_udpDecoder.setProtocolVersion(Version::UNKNOWN);
+
+		if (m_udpDecoder.decodePing(buffer.subspan(0, len))
+			&& m_udpDecoder.getMessageType() == Mumble::Protocol::UDPMessageType::Ping) {
 			if (host.scopeId() == QLatin1String("0"))
 				host.setScopeId(QLatin1String(""));
 
 			ServerAddress address(HostAddress(host), port);
 
 			if (qhPings.contains(address)) {
-				quint32 *ping = reinterpret_cast< quint32 * >(blob + 4);
-				quint64 *ts   = reinterpret_cast< quint64 * >(blob + 8);
+				Mumble::Protocol::PingData pingData = m_udpDecoder.getPingData();
 
-				quint64 elapsed = tPing.elapsed() - (*ts ^ qhPingRand.value(address));
+				quint64 elapsed = tPing.elapsed() - (pingData.timestamp ^ qhPingRand.value(address));
 
-				foreach (ServerItem *si, qhPings.value(address)) {
-					si->uiVersion    = qFromBigEndian(ping[0]);
-					quint32 users    = qFromBigEndian(ping[3]);
-					quint32 maxusers = qFromBigEndian(ping[4]);
-					si->uiBandwidth  = qFromBigEndian(ping[5]);
+				for (ServerItem *si : qhPings.value(address)) {
+					si->uiVersion    = pingData.serverVersion;
+					quint32 users    = pingData.userCount;
+					quint32 maxusers = pingData.maxUserCount;
+					si->uiBandwidth  = pingData.maxBandwidthPerUser;
 
 					if (!si->uiPingSort)
 						si->uiPingSort = qmPingCache.value(UnresolvedServerAddress(si->qsHostname, si->usPort));
diff --git a/src/mumble/ConnectDialog.h b/src/mumble/ConnectDialog.h
index 9a415101f..b0017c27c 100644
--- a/src/mumble/ConnectDialog.h
+++ b/src/mumble/ConnectDialog.h
@@ -26,10 +26,12 @@
 #endif
 
 #include "HostAddress.h"
+#include "MumbleProtocol.h"
 #include "Net.h"
 #include "ServerAddress.h"
 #include "Timer.h"
 #include "UnresolvedServerAddress.h"
+#include "Version.h"
 
 struct FavoriteServer;
 class QUdpSocket;
@@ -269,6 +271,8 @@ protected:
 	bool bIPv4;
 	bool bIPv6;
 	int iPingIndex;
+	Mumble::Protocol::UDPPingEncoder< Mumble::Protocol::Role::Client > m_udpPingEncoder;
+	Mumble::Protocol::UDPDecoder< Mumble::Protocol::Role::Client > m_udpDecoder;
 
 	bool bLastFound;
 
@@ -289,7 +293,9 @@ protected:
 	bool bAllowFilters;
 
 
-	void sendPing(const QHostAddress &, unsigned short port);
+	void sendPing(const QHostAddress &, unsigned short port, Version::mumble_raw_version_t protocolVersion);
+	bool writePing(const QHostAddress &host, unsigned short port, Version::mumble_raw_version_t protocolVersion,
+				   const Mumble::Protocol::PingData &pingData);
 
 	void initList();
 	void fillList();
diff --git a/src/mumble/Database.cpp b/src/mumble/Database.cpp
index ea4419032..63677dd1a 100644
--- a/src/mumble/Database.cpp
+++ b/src/mumble/Database.cpp
@@ -5,7 +5,6 @@
 
 #include "Database.h"
 
-#include "Message.h"
 #include "MumbleApplication.h"
 #include "Net.h"
 #include "Utils.h"
diff --git a/src/mumble/LCD.cpp b/src/mumble/LCD.cpp
index c97d6eb13..2d1c4af72 100644
--- a/src/mumble/LCD.cpp
+++ b/src/mumble/LCD.cpp
@@ -7,7 +7,6 @@
 
 #include "Channel.h"
 #include "ClientUser.h"
-#include "Message.h"
 #include "ServerHandler.h"
 #include "Utils.h"
 #include "Global.h"
diff --git a/src/mumble/MainWindow.cpp b/src/mumble/MainWindow.cpp
index 95884ae08..2019fb924 100644
--- a/src/mumble/MainWindow.cpp
+++ b/src/mumble/MainWindow.cpp
@@ -3577,29 +3577,29 @@ void MainWindow::customEvent(QEvent *evt) {
 	ServerHandlerMessageEvent *shme = static_cast< ServerHandlerMessageEvent * >(evt);
 
 #ifdef QT_NO_DEBUG
-#	define MUMBLE_MH_MSG(x)                                                       \
-		case MessageHandler::x: {                                                  \
-			MumbleProto::x msg;                                                    \
+#	define PROCESS_MUMBLE_TCP_MESSAGE(name, value)                                \
+		case Mumble::Protocol::TCPMessageType::name: {                             \
+			MumbleProto::name msg;                                                 \
 			if (msg.ParseFromArray(shme->qbaMsg.constData(), shme->qbaMsg.size())) \
-				msg##x(msg);                                                       \
+				msg##name(msg);                                                    \
 			break;                                                                 \
 		}
 #else
-#	define MUMBLE_MH_MSG(x)                                                         \
-		case MessageHandler::x: {                                                    \
-			MumbleProto::x msg;                                                      \
+#	define PROCESS_MUMBLE_TCP_MESSAGE(name, value)                                  \
+		case Mumble::Protocol::TCPMessageType::name: {                               \
+			MumbleProto::name msg;                                                   \
 			if (msg.ParseFromArray(shme->qbaMsg.constData(), shme->qbaMsg.size())) { \
-				printf("%s:\n", #x);                                                 \
+				printf("%s:\n", #name);                                              \
 				msg.PrintDebugString();                                              \
-				msg##x(msg);                                                         \
+				msg##name(msg);                                                      \
 			}                                                                        \
 			break;                                                                   \
 		}
 #endif
-	switch (shme->uiType) { MUMBLE_MH_ALL }
+	switch (shme->type) { MUMBLE_ALL_TCP_MESSAGES }
 
 
-#undef MUMBLE_MH_MSG
+#undef PROCESS_MUMBLE_TCP_MESSAGE
 }
 
 
diff --git a/src/mumble/MainWindow.h b/src/mumble/MainWindow.h
index 1c210c5a7..00eb98e19 100644
--- a/src/mumble/MainWindow.h
+++ b/src/mumble/MainWindow.h
@@ -14,8 +14,8 @@
 
 #include "CustomElements.h"
 #include "MUComboBox.h"
-#include "Message.h"
 #include "Mumble.pb.h"
+#include "MumbleProtocol.h"
 #include "Usage.h"
 #include "UserLocalNicknameDialog.h"
 #include "UserLocalVolumeDialog.h"
@@ -56,7 +56,7 @@ public:
 	OpenURLEvent(QUrl url);
 };
 
-class MainWindow : public QMainWindow, public MessageHandler, public Ui::MainWindow {
+class MainWindow : public QMainWindow, public Ui::MainWindow {
 	friend class UserModel;
 
 private:
@@ -355,10 +355,10 @@ public:
 	MainWindow(QWidget *parent);
 	~MainWindow() Q_DECL_OVERRIDE;
 
-	// From msgHandler. Implementation in Messages.cpp
-#define MUMBLE_MH_MSG(x) void msg##x(const MumbleProto::x &);
-	MUMBLE_MH_ALL
-#undef MUMBLE_MH_MSG
+	// Implementation in Messages.cpp
+#define PROCESS_MUMBLE_TCP_MESSAGE(name, value) void msg##name(const MumbleProto::name &);
+	MUMBLE_ALL_TCP_MESSAGES
+#undef PROCESS_MUMBLE_TCP_MESSAGE
 	void removeContextAction(const MumbleProto::ContextActionModify &msg);
 	/// Logs a message that an action could not be saved permanently because
 	/// the user has no certificate and can't be reliably identified.
diff --git a/src/mumble/Messages.cpp b/src/mumble/Messages.cpp
index c16f3ea14..3a622bf65 100644
--- a/src/mumble/Messages.cpp
+++ b/src/mumble/Messages.cpp
@@ -1156,8 +1156,9 @@ void MainWindow::removeContextAction(const MumbleProto::ContextActionModify &msg
 ///
 /// @param msg The message object with the respective information
 void MainWindow::msgVersion(const MumbleProto::Version &msg) {
-	if (msg.has_version())
-		Global::get().sh->uiVersion = msg.version();
+	if (msg.has_version()) {
+		Global::get().sh->setProtocolVersion(msg.version());
+	}
 	if (msg.has_release())
 		Global::get().sh->qsRelease = u8(msg.release());
 	if (msg.has_os()) {
diff --git a/src/mumble/Overlay.cpp b/src/mumble/Overlay.cpp
index 938d39ce7..107867525 100644
--- a/src/mumble/Overlay.cpp
+++ b/src/mumble/Overlay.cpp
@@ -9,7 +9,6 @@
 #include "ClientUser.h"
 #include "Database.h"
 #include "MainWindow.h"
-#include "Message.h"
 #include "OverlayClient.h"
 #include "OverlayText.h"
 #include "RichTextEditor.h"
diff --git a/src/mumble/OverlayClient.cpp b/src/mumble/OverlayClient.cpp
index 936b669c9..1a8ccae01 100644
--- a/src/mumble/OverlayClient.cpp
+++ b/src/mumble/OverlayClient.cpp
@@ -7,7 +7,6 @@
 #include "Channel.h"
 #include "Database.h"
 #include "MainWindow.h"
-#include "Message.h"
 #include "NetworkConfig.h"
 #include "OverlayEditor.h"
 #include "OverlayPositionableItem.h"
diff --git a/src/mumble/OverlayConfig.cpp b/src/mumble/OverlayConfig.cpp
index 1cb0c996f..f72a5eec2 100644
--- a/src/mumble/OverlayConfig.cpp
+++ b/src/mumble/OverlayConfig.cpp
@@ -8,7 +8,6 @@
 #include "Channel.h"
 #include "Database.h"
 #include "MainWindow.h"
-#include "Message.h"
 #include "NetworkConfig.h"
 #include "Overlay.h"
 #include "OverlayPositionableItem.h"
diff --git a/src/mumble/OverlayEditor.cpp b/src/mumble/OverlayEditor.cpp
index 61b0ee145..8c52c29db 100644
--- a/src/mumble/OverlayEditor.cpp
+++ b/src/mumble/OverlayEditor.cpp
@@ -8,7 +8,6 @@
 #include "Channel.h"
 #include "Database.h"
 #include "MainWindow.h"
-#include "Message.h"
 #include "NetworkConfig.h"
 #include "OverlayClient.h"
 #include "OverlayText.h"
diff --git a/src/mumble/OverlayEditorScene.cpp b/src/mumble/OverlayEditorScene.cpp
index cdacaad63..2471a9710 100644
--- a/src/mumble/OverlayEditorScene.cpp
+++ b/src/mumble/OverlayEditorScene.cpp
@@ -8,7 +8,6 @@
 #include "Channel.h"
 #include "Database.h"
 #include "MainWindow.h"
-#include "Message.h"
 #include "NetworkConfig.h"
 #include "OverlayClient.h"
 #include "OverlayText.h"
diff --git a/src/mumble/OverlayUser.cpp b/src/mumble/OverlayUser.cpp
index ae57f4230..a4785c6eb 100644
--- a/src/mumble/OverlayUser.cpp
+++ b/src/mumble/OverlayUser.cpp
@@ -9,7 +9,6 @@
 #include "ClientUser.h"
 #include "Database.h"
 #include "MainWindow.h"
-#include "Message.h"
 #include "NetworkConfig.h"
 #include "OverlayText.h"
 #include "ServerHandler.h"
diff --git a/src/mumble/OverlayUserGroup.cpp b/src/mumble/OverlayUserGroup.cpp
index e52c0e6e9..eecf557f6 100644
--- a/src/mumble/OverlayUserGroup.cpp
+++ b/src/mumble/OverlayUserGroup.cpp
@@ -9,7 +9,6 @@
 #include "ClientUser.h"
 #include "Database.h"
 #include "MainWindow.h"
-#include "Message.h"
 #include "NetworkConfig.h"
 #include "OverlayClient.h"
 #include "OverlayEditor.h"
diff --git a/src/mumble/PluginConfig.cpp b/src/mumble/PluginConfig.cpp
index e5664779e..689fbd8b5 100644
--- a/src/mumble/PluginConfig.cpp
+++ b/src/mumble/PluginConfig.cpp
@@ -7,7 +7,6 @@
 
 #include "Log.h"
 #include "MainWindow.h"
-#include "Message.h"
 #include "MumbleApplication.h"
 #include "PluginInstaller.h"
 #include "PluginManager.h"
diff --git a/src/mumble/ServerHandler.cpp b/src/mumble/ServerHandler.cpp
index 2b7fc4d40..5d1a21c02 100644
--- a/src/mumble/ServerHandler.cpp
+++ b/src/mumble/ServerHandler.cpp
@@ -19,7 +19,6 @@
 #include "Database.h"
 #include "HostAddress.h"
 #include "MainWindow.h"
-#include "Message.h"
 #include "Net.h"
 #include "NetworkConfig.h"
 #include "OSInfo.h"
@@ -40,6 +39,8 @@
 
 #include <openssl/crypto.h>
 
+#include <cassert>
+
 #ifdef Q_OS_WIN
 // <delayimp.h> is not protected with an include guard on MinGW, resulting in
 // redefinitions if the PCH header is used.
@@ -63,11 +64,12 @@
 int ServerHandler::nextConnectionID = -1;
 QMutex ServerHandler::nextConnectionIDMutex;
 
-ServerHandlerMessageEvent::ServerHandlerMessageEvent(const QByteArray &msg, unsigned int mtype, bool flush)
+ServerHandlerMessageEvent::ServerHandlerMessageEvent(const QByteArray &msg, Mumble::Protocol::TCPMessageType type,
+													 bool flush)
 	: QEvent(static_cast< QEvent::Type >(SERVERSEND_EVENT)) {
-	qbaMsg = msg;
-	uiType = mtype;
-	bFlush = flush;
+	qbaMsg     = msg;
+	this->type = type;
+	bFlush     = flush;
 }
 
 #ifdef Q_OS_WIN
@@ -112,7 +114,7 @@ ServerHandler::ServerHandler() : database(new Database(QLatin1String("ServerHand
 	usPort                  = 0;
 	bUdp                    = true;
 	tConnectionTimeoutTimer = nullptr;
-	uiVersion               = 0;
+	uiVersion               = Version::UNKNOWN;
 	iInFlightTCPPings       = 0;
 
 	// assign connection ID
@@ -194,14 +196,20 @@ int ServerHandler::getConnectionID() const {
 	return connectionID;
 }
 
+void ServerHandler::setProtocolVersion(Version::mumble_raw_version_t version) {
+	uiVersion = version;
+
+	m_udpPingEncoder.setProtocolVersion(version);
+	m_udpDecoder.setProtocolVersion(version);
+	m_tcpTunnelDecoder.setProtocolVersion(version);
+}
+
 void ServerHandler::udpReady() {
-	const unsigned int UDP_MAX_SIZE = 2048;
 	while (qusUdp->hasPendingDatagrams()) {
-		char encrypted[UDP_MAX_SIZE];
-		char buffer[UDP_MAX_SIZE];
+		char encrypted[Mumble::Protocol::MAX_UDP_PACKET_SIZE];
 		unsigned int buflen = static_cast< unsigned int >(qusUdp->pendingDatagramSize());
 
-		if (buflen > UDP_MAX_SIZE) {
+		if (buflen > Mumble::Protocol::MAX_UDP_PACKET_SIZE) {
 			// Discard datagrams that exceed our buffer's size as we'd have to trim them down anyways and it is not very
 			// likely that the data is valid in the trimmed down form.
 			// As we're using a maxSize of 0 it is okay to pass nullptr as the data buffer. Qt's docs (5.15) ensures
@@ -227,8 +235,13 @@ void ServerHandler::udpReady() {
 		if (buflen < 5)
 			continue;
 
-		if (!connection->csCrypt->decrypt(reinterpret_cast< const unsigned char * >(encrypted),
-										  reinterpret_cast< unsigned char * >(buffer), buflen)) {
+		gsl::span< Mumble::Protocol::byte > buffer = m_udpDecoder.getBuffer();
+
+		// 4 bytes is the overhead of the encryption
+		assert(buffer.size() >= buflen - 4);
+
+		if (!connection->csCrypt->decrypt(reinterpret_cast< const unsigned char * >(encrypted), buffer.data(),
+										  buflen)) {
 			if (connection->csCrypt->tLastGood.elapsed() > 5000000ULL) {
 				if (connection->csCrypt->tLastRequest.elapsed() > 5000000ULL) {
 					connection->csCrypt->tLastRequest.restart();
@@ -239,47 +252,38 @@ void ServerHandler::udpReady() {
 			continue;
 		}
 
-		PacketDataStream pds(buffer + 1, buflen - 5);
-
-		MessageHandler::UDPMessageType msgType = static_cast< MessageHandler::UDPMessageType >((buffer[0] >> 5) & 0x7);
-		unsigned int msgFlags                  = buffer[0] & 0x1f;
-
-		switch (msgType) {
-			case MessageHandler::UDPPing: {
-				quint64 t;
-				pds >> t;
-				accUDP(static_cast< double >(tTimestamp.elapsed() - t) / 1000.0);
-			} break;
-			case MessageHandler::UDPVoiceCELTAlpha:
-			case MessageHandler::UDPVoiceCELTBeta:
-			case MessageHandler::UDPVoiceSpeex:
-			case MessageHandler::UDPVoiceOpus:
-				handleVoicePacket(msgFlags, pds, msgType);
-				break;
-			default:
-				break;
+		if (m_udpDecoder.decode(buffer.subspan(0, buflen - 4))) {
+			switch (m_udpDecoder.getMessageType()) {
+				case Mumble::Protocol::UDPMessageType::Ping: {
+					const Mumble::Protocol::PingData pingData = m_udpDecoder.getPingData();
+
+					accUDP(static_cast< double >(tTimestamp.elapsed() - pingData.timestamp) / 1000.0);
+
+					break;
+				}
+				case Mumble::Protocol::UDPMessageType::Audio: {
+					const Mumble::Protocol::AudioData audioData = m_udpDecoder.getAudioData();
+
+					handleVoicePacket(audioData);
+					break;
+				};
+			}
 		}
 	}
 }
 
-void ServerHandler::handleVoicePacket(unsigned int msgFlags, PacketDataStream &pds,
-									  MessageHandler::UDPMessageType type) {
-	unsigned int uiSession;
-	pds >> uiSession;
-	ClientUser *p     = ClientUser::get(uiSession);
+void ServerHandler::handleVoicePacket(const Mumble::Protocol::AudioData &audioData) {
+	ClientUser *sender = ClientUser::get(audioData.senderSession);
+
 	AudioOutputPtr ao = Global::get().ao;
-	if (ao && p && !(((msgFlags & 0x1f) == 2) && Global::get().s.bWhisperFriends && p->qsFriendName.isEmpty())) {
-		unsigned int iSeq;
-		pds >> iSeq;
-		QByteArray qba;
-		qba.reserve(pds.left() + 1);
-		qba.append(static_cast< char >(msgFlags));
-		qba.append(pds.dataBlock(pds.left()));
-		ao->addFrameToBuffer(p, qba, iSeq, type);
+	if (ao && sender
+		&& !((audioData.targetOrContext == Mumble::Protocol::AudioContext::Whisper) && Global::get().s.bWhisperFriends
+			 && sender->qsFriendName.isEmpty())) {
+		ao->addFrameToBuffer(sender, audioData);
 	}
 }
 
-void ServerHandler::sendMessage(const char *data, int len, bool force) {
+void ServerHandler::sendMessage(const unsigned char *data, int len, bool force) {
 	STACKVAR(unsigned char, crypto, len + 4);
 
 	QMutexLocker qml(&qmUdp);
@@ -295,12 +299,14 @@ void ServerHandler::sendMessage(const char *data, int len, bool force) {
 		QByteArray qba;
 
 		qba.resize(len + 6);
-		unsigned char *uc                      = reinterpret_cast< unsigned char * >(qba.data());
-		*reinterpret_cast< quint16 * >(&uc[0]) = qToBigEndian(static_cast< quint16 >(MessageHandler::UDPTunnel));
+		unsigned char *uc = reinterpret_cast< unsigned char * >(qba.data());
+		*reinterpret_cast< quint16 * >(&uc[0]) =
+			qToBigEndian(static_cast< quint16 >(Mumble::Protocol::TCPMessageType::UDPTunnel));
 		*reinterpret_cast< quint32 * >(&uc[2]) = qToBigEndian(static_cast< quint32 >(len));
 		memcpy(uc + 6, data, len);
 
-		QApplication::postEvent(this, new ServerHandlerMessageEvent(qba, MessageHandler::UDPTunnel, true));
+		QApplication::postEvent(this,
+								new ServerHandlerMessageEvent(qba, Mumble::Protocol::TCPMessageType::UDPTunnel, true));
 	} else {
 		if (!connection->csCrypt->encrypt(reinterpret_cast< const unsigned char * >(data), crypto, len)) {
 			return;
@@ -309,19 +315,19 @@ void ServerHandler::sendMessage(const char *data, int len, bool force) {
 	}
 }
 
-void ServerHandler::sendProtoMessage(const ::google::protobuf::Message &msg, unsigned int msgType) {
+void ServerHandler::sendProtoMessage(const ::google::protobuf::Message &msg, Mumble::Protocol::TCPMessageType type) {
 	QByteArray qba;
 
 	if (QThread::currentThread() != thread()) {
-		Connection::messageToNetwork(msg, msgType, qba);
-		ServerHandlerMessageEvent *shme = new ServerHandlerMessageEvent(qba, 0, false);
+		Connection::messageToNetwork(msg, type, qba);
+		ServerHandlerMessageEvent *shme = new ServerHandlerMessageEvent(qba, type, false);
 		QApplication::postEvent(this, shme);
 	} else {
 		ConnectionPtr connection(cConnection);
 		if (!connection)
 			return;
 
-		connection->sendMessage(msg, msgType, qba);
+		connection->sendMessage(msg, type, qba);
 	}
 }
 
@@ -410,15 +416,11 @@ void ServerHandler::run() {
 			qlErrors.clear();
 			qscCert.clear();
 
-			connect(qtsSock, SIGNAL(encrypted()), this, SLOT(serverConnectionConnected()));
-			connect(qtsSock, SIGNAL(stateChanged(QAbstractSocket::SocketState)), this,
-					SLOT(serverConnectionStateChanged(QAbstractSocket::SocketState)));
-			connect(connection.get(), SIGNAL(connectionClosed(QAbstractSocket::SocketError, const QString &)), this,
-					SLOT(serverConnectionClosed(QAbstractSocket::SocketError, const QString &)));
-			connect(connection.get(), SIGNAL(message(unsigned int, const QByteArray &)), this,
-					SLOT(message(unsigned int, const QByteArray &)));
-			connect(connection.get(), SIGNAL(handleSslErrors(const QList< QSslError > &)), this,
-					SLOT(setSslErrors(const QList< QSslError > &)));
+			connect(qtsSock, &QSslSocket::encrypted, this, &ServerHandler::serverConnectionConnected);
+			connect(qtsSock, &QSslSocket::stateChanged, this, &ServerHandler::serverConnectionStateChanged);
+			connect(connection.get(), &Connection::connectionClosed, this, &ServerHandler::serverConnectionClosed);
+			connect(connection.get(), &Connection::message, this, &ServerHandler::message);
+			connect(connection.get(), &Connection::handleSslErrors, this, &ServerHandler::setSslErrors);
 		}
 		bUdp = false;
 
@@ -446,7 +448,7 @@ void ServerHandler::run() {
 
 		accUDP = accTCP = accClean;
 
-		uiVersion   = 0;
+		uiVersion   = Version::UNKNOWN;
 		qsRelease   = QString();
 		qsOS        = QString();
 		qsOSVersion = QString();
@@ -567,11 +569,14 @@ void ServerHandler::sendPingInternal() {
 	quint64 t = tTimestamp.elapsed();
 
 	if (qusUdp) {
-		unsigned char buffer[256];
-		PacketDataStream pds(buffer + 1, 255);
-		buffer[0] = MessageHandler::UDPPing << 5;
-		pds << t;
-		sendMessage(reinterpret_cast< const char * >(buffer), pds.size() + 1, true);
+		Mumble::Protocol::PingData pingData;
+		pingData.timestamp                    = t;
+		pingData.requestAdditionalInformation = false;
+
+		m_udpPingEncoder.setProtocolVersion(uiVersion);
+		gsl::span< const Mumble::Protocol::byte > encodedPacket = m_udpPingEncoder.encodePingPacket(pingData);
+
+		sendMessage(encodedPacket.data(), encodedPacket.size(), true);
 	}
 
 	MumbleProto::Ping mpp;
@@ -600,27 +605,20 @@ void ServerHandler::sendPingInternal() {
 	iInFlightTCPPings += 1;
 }
 
-void ServerHandler::message(unsigned int msgType, const QByteArray &qbaMsg) {
+void ServerHandler::message(Mumble::Protocol::TCPMessageType type, const QByteArray &qbaMsg) {
 	const char *ptr = qbaMsg.constData();
-	if (msgType == MessageHandler::UDPTunnel) {
-		if (qbaMsg.length() < 1)
-			return;
-
-		MessageHandler::UDPMessageType umsgType = static_cast< MessageHandler::UDPMessageType >((ptr[0] >> 5) & 0x7);
-		unsigned int msgFlags                   = ptr[0] & 0x1f;
-		PacketDataStream pds(qbaMsg.constData() + 1, qbaMsg.size());
-
-		switch (umsgType) {
-			case MessageHandler::UDPVoiceCELTAlpha:
-			case MessageHandler::UDPVoiceCELTBeta:
-			case MessageHandler::UDPVoiceSpeex:
-			case MessageHandler::UDPVoiceOpus:
-				handleVoicePacket(msgFlags, pds, umsgType);
-				break;
-			default:
-				break;
+	if (type == Mumble::Protocol::TCPMessageType::UDPTunnel) {
+		// audio tunneled through tcp.
+		// since it could happen that we are receiving udp and tcp messages at the same time (e.g. the server used to
+		// send us packages via TCP but has now switched to UDP again and the first UDP packages arrive at the same time
+		// as the last TCP ones), we want to use a dedicated decoder for this (to make sure there is no concurrent
+		// access to the decoder's internal buffer).
+		if (m_tcpTunnelDecoder.decode(
+				{ reinterpret_cast< const Mumble::Protocol::byte * >(ptr), static_cast< std::size_t >(qbaMsg.size()) })
+			&& m_tcpTunnelDecoder.getMessageType() == Mumble::Protocol::UDPMessageType::Audio) {
+			handleVoicePacket(m_tcpTunnelDecoder.getAudioData());
 		}
-	} else if (msgType == MessageHandler::Ping) {
+	} else if (type == Mumble::Protocol::TCPMessageType::Ping) {
 		MumbleProto::Ping msg;
 		if (msg.ParseFromArray(qbaMsg.constData(), qbaMsg.size())) {
 			ConnectionPtr connection(cConnection);
@@ -665,15 +663,18 @@ void ServerHandler::message(unsigned int msgType, const QByteArray &qbaMsg) {
 			}
 		}
 	} else {
-		ServerHandlerMessageEvent *shme = new ServerHandlerMessageEvent(qbaMsg, msgType, false);
+		ServerHandlerMessageEvent *shme = new ServerHandlerMessageEvent(qbaMsg, type, false);
 		QApplication::postEvent(Global::get().mw, shme);
 	}
 }
 
 void ServerHandler::disconnect() {
 	// Actual TCP object is in a different thread, so signal it
+	// The actual type of this event doesn't matter as we are only abusing the event mechanism to signal the thread to
+	// exit.
 	QByteArray qbaBuffer;
-	ServerHandlerMessageEvent *shme = new ServerHandlerMessageEvent(qbaBuffer, 0, false);
+	ServerHandlerMessageEvent *shme =
+		new ServerHandlerMessageEvent(qbaBuffer, Mumble::Protocol::TCPMessageType::Ping, false);
 	QApplication::postEvent(this, shme);
 }
 
diff --git a/src/mumble/ServerHandler.h b/src/mumble/ServerHandler.h
index a4976f589..8307938aa 100644
--- a/src/mumble/ServerHandler.h
+++ b/src/mumble/ServerHandler.h
@@ -32,14 +32,13 @@
 
 #define SERVERSEND_EVENT 3501
 
-#include "Message.h"
 #include "Mumble.pb.h"
+#include "MumbleProtocol.h"
 #include "ServerAddress.h"
 #include "Timer.h"
 
 class Connection;
 class Database;
-class Message;
 class PacketDataStream;
 class QUdpSocket;
 class QSslSocket;
@@ -47,10 +46,10 @@ class VoiceRecorder;
 
 class ServerHandlerMessageEvent : public QEvent {
 public:
-	unsigned int uiType;
+	Mumble::Protocol::TCPMessageType type;
 	QByteArray qbaMsg;
 	bool bFlush;
-	ServerHandlerMessageEvent(const QByteArray &msg, unsigned int type, bool flush = false);
+	ServerHandlerMessageEvent(const QByteArray &msg, Mumble::Protocol::TCPMessageType type, bool flush = false);
 };
 
 typedef boost::shared_ptr< Connection > ConnectionPtr;
@@ -74,6 +73,9 @@ protected:
 	bool bUdp;
 	bool bStrong;
 	int connectionID;
+	Mumble::Protocol::UDPPingEncoder< Mumble::Protocol::Role::Client > m_udpPingEncoder;
+	Mumble::Protocol::UDPDecoder< Mumble::Protocol::Role::Client > m_udpDecoder;
+	Mumble::Protocol::UDPDecoder< Mumble::Protocol::Role::Client > m_tcpTunnelDecoder;
 
 	/// Flag indicating whether the server we are currently connected to has
 	/// finished synchronizing already.
@@ -89,7 +91,7 @@ protected:
 	QUdpSocket *qusUdp;
 	QMutex qmUdp;
 
-	void handleVoicePacket(unsigned int msgFlags, PacketDataStream &pds, MessageHandler::UDPMessageType type);
+	void handleVoicePacket(const Mumble::Protocol::AudioData &audioData);
 
 public:
 	Timer tTimestamp;
@@ -129,8 +131,10 @@ public:
 	void customEvent(QEvent *evt) Q_DECL_OVERRIDE;
 	int getConnectionID() const;
 
-	void sendProtoMessage(const ::google::protobuf::Message &msg, unsigned int msgType);
-	void sendMessage(const char *data, int len, bool force = false);
+	void setProtocolVersion(Version::mumble_raw_version_t version);
+
+	void sendProtoMessage(const ::google::protobuf::Message &msg, Mumble::Protocol::TCPMessageType type);
+	void sendMessage(const unsigned char *data, int len, bool force = false);
 
 	/// @returns Whether this handler is currently connected to a server.
 	bool isConnected() const;
@@ -142,10 +146,10 @@ public:
 	/// @param synchronized Whether the server has finished synchronization
 	void setServerSynchronized(bool synchronized);
 
-#define MUMBLE_MH_MSG(x) \
-	void sendMessage(const MumbleProto::x &msg) { sendProtoMessage(msg, MessageHandler::x); }
-	MUMBLE_MH_ALL
-#undef MUMBLE_MH_MSG
+#define PROCESS_MUMBLE_TCP_MESSAGE(name, value) \
+	void sendMessage(const MumbleProto::name &msg) { sendProtoMessage(msg, Mumble::Protocol::TCPMessageType::name); }
+	MUMBLE_ALL_TCP_MESSAGES
+#undef PROCESS_MUMBLE_TCP_MESSAGE
 
 	void requestUserStats(unsigned int uiSession, bool statsOnly);
 	void joinChannel(unsigned int uiSession, unsigned int channel);
@@ -189,7 +193,7 @@ signals:
 	void connected();
 	void pingRequested();
 protected slots:
-	void message(unsigned int, const QByteArray &);
+	void message(Mumble::Protocol::TCPMessageType type, const QByteArray &);
 	void serverConnectionConnected();
 	void serverConnectionTimeoutOnConnect();
 	void serverConnectionStateChanged(QAbstractSocket::SocketState);
diff --git a/src/mumble/UserEdit.h b/src/mumble/UserEdit.h
index 6d17dc03a..b1f728df1 100644
--- a/src/mumble/UserEdit.h
+++ b/src/mumble/UserEdit.h
@@ -6,7 +6,6 @@
 #ifndef MUMBLE_MUMBLE_USEREDIT_H_
 #define MUMBLE_MUMBLE_USEREDIT_H_
 
-#include "Message.h"
 #include "User.h"
 #include "ui_UserEdit.h"
 
diff --git a/src/mumble/UserInformation.h b/src/mumble/UserInformation.h
index e393c2816..dfeae8a3c 100644
--- a/src/mumble/UserInformation.h
+++ b/src/mumble/UserInformation.h
@@ -8,8 +8,6 @@
 
 #include <QtNetwork/QSslCertificate>
 
-#include "Message.h"
-
 #include "ui_UserInformation.h"
 
 namespace MumbleProto {
diff --git a/src/mumble/UserListModel.cpp b/src/mumble/UserListModel.cpp
index 6af3b3888..9964c786c 100644
--- a/src/mumble/UserListModel.cpp
+++ b/src/mumble/UserListModel.cpp
@@ -6,7 +6,7 @@
 #include "UserListModel.h"
 
 #include "Channel.h"
-#include "Message.h"
+#include "QtUtils.h"
 #include "Utils.h"
 
 #include <algorithm>
diff --git a/src/mumble/UserModel.cpp b/src/mumble/UserModel.cpp
index 8639f21cc..4442bd2df 100644
--- a/src/mumble/UserModel.cpp
+++ b/src/mumble/UserModel.cpp
@@ -11,7 +11,6 @@
 #include "LCD.h"
 #include "Log.h"
 #include "MainWindow.h"
-#include "Message.h"
 #ifdef USE_OVERLAY
 #	include "Overlay.h"
 #endif
diff --git a/src/murmur/AudioReceiverBuffer.cpp b/src/murmur/AudioReceiverBuffer.cpp
new file mode 100644
index 000000000..7ea752f19
--- /dev/null
+++ b/src/murmur/AudioReceiverBuffer.cpp
@@ -0,0 +1,143 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#include "AudioReceiverBuffer.h"
+
+#include <algorithm>
+#include <cassert>
+
+AudioReceiver::AudioReceiver(ServerUser &receiver, Mumble::Protocol::audio_context_t context,
+							 const VolumeAdjustment &volumeAdjustment)
+	: m_receiver(receiver), m_context(context), m_volumeAdjustment(volumeAdjustment) {
+}
+
+AudioReceiver::AudioReceiver(ServerUser &receiver, Mumble::Protocol::audio_context_t context,
+							 VolumeAdjustment &&volumeAdjustment)
+	: m_receiver(receiver), m_context(context), m_volumeAdjustment(std::move(volumeAdjustment)) {
+}
+
+ServerUser &AudioReceiver::getReceiver() {
+	return m_receiver;
+}
+
+const ServerUser &AudioReceiver::getReceiver() const {
+	return m_receiver;
+}
+
+Mumble::Protocol::audio_context_t AudioReceiver::getContext() const {
+	return m_context;
+}
+
+void AudioReceiver::setContext(Mumble::Protocol::audio_context_t context) {
+	m_context = context;
+}
+
+const VolumeAdjustment &AudioReceiver::getVolumeAdjustment() const {
+	return m_volumeAdjustment;
+}
+
+void AudioReceiver::setVolumeAdjustment(const VolumeAdjustment &adjustment) {
+	m_volumeAdjustment = adjustment;
+}
+
+void AudioReceiver::setVolumeAdjustment(VolumeAdjustment &&adjustment) {
+	m_volumeAdjustment = std::move(adjustment);
+}
+
+AudioReceiverBuffer::AudioReceiverBuffer() {
+	// These are just educated guesses at reasonable starting capacities for these vectors
+	m_regularReceivers.reserve(50);
+	m_positionalReceivers.reserve(10);
+}
+
+void AudioReceiverBuffer::addReceiver(const ServerUser &sender, ServerUser &receiver,
+									  Mumble::Protocol::audio_context_t context, bool positionalDataAvailable,
+									  const VolumeAdjustment &volumeAdjustment) {
+	if (sender.uiSession == receiver.uiSession || receiver.bDeaf || receiver.bSelfDeaf) {
+		return;
+	}
+
+	forceAddReceiver(receiver, context, positionalDataAvailable && sender.ssContext == receiver.ssContext,
+					 volumeAdjustment);
+}
+
+void AudioReceiverBuffer::forceAddReceiver(ServerUser &receiver, Mumble::Protocol::audio_context_t context,
+										   bool includePositionalData, const VolumeAdjustment &volumeAdjustment) {
+	std::vector< AudioReceiver > &receiverList = includePositionalData ? m_positionalReceivers : m_regularReceivers;
+	std::unordered_map< const ServerUser *, std::size_t > &userEntryIndices =
+		includePositionalData ? m_positionalReceiverIndices : m_regularReceiverIndices;
+
+	auto it = userEntryIndices.find(&receiver);
+	if (it == userEntryIndices.end()) {
+		// No entry for that user yet
+		receiverList.emplace_back(receiver, context, volumeAdjustment);
+		userEntryIndices[&receiver] = receiverList.size() - 1;
+	} else {
+		// We already have an entry for the given user -> update that instead of adding a new one
+		AudioReceiver &receiverEntry = receiverList[it->second];
+
+		assert(receiverEntry.getReceiver().uiSession == receiver.uiSession);
+
+		receiverEntry.setContext(std::min(receiverEntry.getContext(), context));
+
+		if (receiverEntry.getVolumeAdjustment().factor < volumeAdjustment.factor) {
+			receiverEntry.setVolumeAdjustment(volumeAdjustment);
+		}
+	}
+}
+
+void AudioReceiverBuffer::preprocessBuffer() {
+	preprocessBuffer(m_regularReceivers);
+	preprocessBuffer(m_positionalReceivers);
+}
+
+void AudioReceiverBuffer::clear() {
+	m_regularReceivers.clear();
+	m_regularReceiverIndices.clear();
+	m_positionalReceivers.clear();
+	m_positionalReceiverIndices.clear();
+}
+
+std::vector< AudioReceiver > &AudioReceiverBuffer::getReceivers(bool receivePositionalData) {
+	if (receivePositionalData) {
+		return m_positionalReceivers;
+	} else {
+		return m_regularReceivers;
+	}
+}
+
+void AudioReceiverBuffer::preprocessBuffer(std::vector< AudioReceiver > &receiverList) {
+#ifndef NDEBUG
+	// Sort the list such that entries with same receiver are next to each other
+	std::sort(receiverList.begin(), receiverList.end(), [](const AudioReceiver &lhs, const AudioReceiver &rhs) {
+		return lhs.getReceiver().uiSession < rhs.getReceiver().uiSession;
+	});
+
+	// Assert that our list does not contain any duplicate receivers
+	assert(std::unique(receiverList.begin(), receiverList.end(),
+					   [](const AudioReceiver &lhs, const AudioReceiver &rhs) {
+						   return lhs.getReceiver().uiSession == rhs.getReceiver().uiSession;
+					   })
+		   == receiverList.end());
+#endif
+
+	// Sort the receivers, such that we can efficiently partition them into different regions
+	// Note: The list doesn't contains any duplicate receivers
+	std::sort(receiverList.begin(), receiverList.end(), [](const AudioReceiver &lhs, const AudioReceiver &rhs) {
+		// 1. Sort into block of compatible protocol versions
+		if (!Mumble::Protocol::protocolVersionsAreCompatible(lhs.getReceiver().uiVersion,
+															 rhs.getReceiver().uiVersion)) {
+			return lhs.getReceiver().uiVersion < rhs.getReceiver().uiVersion;
+		}
+
+		// 2. Within each block, sort based on the audio context
+		if (lhs.getContext() != rhs.getContext()) {
+			return lhs.getContext() < rhs.getContext();
+		}
+
+		// 3. Within each context, sort based on volume adjustments (but in descending order!)
+		return lhs.getVolumeAdjustment().factor > rhs.getVolumeAdjustment().factor;
+	});
+}
diff --git a/src/murmur/AudioReceiverBuffer.h b/src/murmur/AudioReceiverBuffer.h
new file mode 100644
index 000000000..236ca45ee
--- /dev/null
+++ b/src/murmur/AudioReceiverBuffer.h
@@ -0,0 +1,89 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#ifndef MUMBLE_MURMUR_AUDIORECEIVERBUFFER_H_
+#define MUMBLE_MURMUR_AUDIORECEIVERBUFFER_H_
+
+#include "MumbleProtocol.h"
+#include "ServerUser.h"
+#include "VolumeAdjustment.h"
+
+#include <functional>
+#include <unordered_map>
+#include <vector>
+
+class AudioReceiver {
+public:
+	AudioReceiver(ServerUser &receiver, Mumble::Protocol::audio_context_t context,
+				  const VolumeAdjustment &volumeAdjustment);
+	AudioReceiver(ServerUser &receiver, Mumble::Protocol::audio_context_t context, VolumeAdjustment &&volumeAdjustment);
+
+	ServerUser &getReceiver();
+	const ServerUser &getReceiver() const;
+
+	Mumble::Protocol::audio_context_t getContext() const;
+	void setContext(Mumble::Protocol::audio_context_t context);
+
+	const VolumeAdjustment &getVolumeAdjustment() const;
+	void setVolumeAdjustment(const VolumeAdjustment &adjustment);
+	void setVolumeAdjustment(VolumeAdjustment &&adjustment);
+
+protected:
+	std::reference_wrapper< ServerUser > m_receiver;
+	Mumble::Protocol::audio_context_t m_context = Mumble::Protocol::AudioContext::Invalid;
+	VolumeAdjustment m_volumeAdjustment         = VolumeAdjustment::fromFactor(1.0f);
+};
+
+
+template< typename Iterator > struct ReceiverRange {
+	Iterator begin;
+	Iterator end;
+};
+
+
+class AudioReceiverBuffer {
+public:
+	AudioReceiverBuffer();
+
+	void addReceiver(const ServerUser &sender, ServerUser &receiver, Mumble::Protocol::audio_context_t context,
+					 bool includePositionalData,
+					 const VolumeAdjustment &volumeAdjustment = VolumeAdjustment::fromFactor(1.0f));
+	void forceAddReceiver(ServerUser &receiver, Mumble::Protocol::audio_context_t context, bool includePositionalData,
+						  const VolumeAdjustment &volumeAdjustment = VolumeAdjustment::fromFactor(1.0f));
+
+	void preprocessBuffer();
+
+	void clear();
+
+	std::vector< AudioReceiver > &getReceivers(bool receivePositionalData);
+
+
+	template< typename Iterator > static ReceiverRange< Iterator > getReceiverRange(Iterator begin, Iterator end) {
+		ReceiverRange< Iterator > range;
+		range.begin = begin;
+
+		// Find a range, such that all receivers in [begin, end) are compatible in the sense that they will all receive
+		// the exact same audio packet (thus: no re-encoding required between sending the packet to them).
+		range.end = std::lower_bound(begin, end, *begin, [](const AudioReceiver &lhs, const AudioReceiver &rhs) {
+			return lhs.getContext() == rhs.getContext()
+				   && Mumble::Protocol::protocolVersionsAreCompatible(lhs.getReceiver().uiVersion,
+																	  rhs.getReceiver().uiVersion)
+				   // Allow a little variance between volume adjustments
+				   && std::abs(lhs.getVolumeAdjustment().factor - rhs.getVolumeAdjustment().factor) < 0.05f;
+		});
+
+		return range;
+	}
+
+protected:
+	std::vector< AudioReceiver > m_regularReceivers;
+	std::unordered_map< const ServerUser *, std::size_t > m_regularReceiverIndices;
+	std::vector< AudioReceiver > m_positionalReceivers;
+	std::unordered_map< const ServerUser *, std::size_t > m_positionalReceiverIndices;
+
+	void preprocessBuffer(std::vector< AudioReceiver > &receiverList);
+};
+
+#endif // MUMBLE_MURMUR_AUDIORECEIVERBUFFER_H_
diff --git a/src/murmur/CMakeLists.txt b/src/murmur/CMakeLists.txt
index 4a434e6f9..c31890908 100644
--- a/src/murmur/CMakeLists.txt
+++ b/src/murmur/CMakeLists.txt
@@ -20,6 +20,8 @@ find_pkg(Qt5 COMPONENTS Sql REQUIRED)
 
 set(MURMUR_SOURCES
 	"main.cpp"
+	"AudioReceiverBuffer.cpp"
+	"AudioReceiverBuffer.h"
 	"Cert.cpp"
 	"Messages.cpp"
 	"Meta.cpp"
diff --git a/src/murmur/DBus.cpp b/src/murmur/DBus.cpp
index e0972ae56..cd370d2a7 100644
--- a/src/murmur/DBus.cpp
+++ b/src/murmur/DBus.cpp
@@ -12,7 +12,7 @@
 #include "DBus.h"
 
 #include "Connection.h"
-#include "Message.h"
+#include "QtUtils.h"
 #include "Server.h"
 #include "ServerDB.h"
 #include "ServerUser.h"
diff --git a/src/murmur/Messages.cpp b/src/murmur/Messages.cpp
index 54d2e2e5a..352fbcc0e 100644
--- a/src/murmur/Messages.cpp
+++ b/src/murmur/Messages.cpp
@@ -7,9 +7,9 @@
 #include "Channel.h"
 #include "Connection.h"
 #include "Group.h"
-#include "Message.h"
 #include "Meta.h"
 #include "MumbleConstants.h"
+#include "QtUtils.h"
 #include "Server.h"
 #include "ServerDB.h"
 #include "ServerUser.h"
@@ -645,7 +645,13 @@ void Server::msgUDPTunnel(ServerUser *uSource, MumbleProto::UDPTunnel &msg) {
 	if (len < 1)
 		return;
 	QReadLocker rl(&qrwlVoiceThread);
-	processMsg(uSource, str.data(), len);
+	if (m_tcpTunnelDecoder.decode(gsl::span< const Mumble::Protocol::byte >(
+			reinterpret_cast< const Mumble::Protocol::byte * >(str.data()), str.size()))
+		&& m_tcpTunnelDecoder.getMessageType() == Mumble::Protocol::UDPMessageType::Audio) {
+		Mumble::Protocol::AudioData audioData = m_tcpTunnelDecoder.getAudioData();
+
+		processMsg(uSource, audioData, m_tcpAudioReceivers, m_tcpAudioEncoder);
+	}
 }
 
 void Server::msgUserState(ServerUser *uSource, MumbleProto::UserState &msg) {
diff --git a/src/murmur/MurmurIce.cpp b/src/murmur/MurmurIce.cpp
index 0cabf6e2f..7f806556b 100644
--- a/src/murmur/MurmurIce.cpp
+++ b/src/murmur/MurmurIce.cpp
@@ -10,6 +10,7 @@
 #include "Group.h"
 #include "Meta.h"
 #include "MurmurI.h"
+#include "QtUtils.h"
 #include "Server.h"
 #include "ServerDB.h"
 #include "ServerUser.h"
diff --git a/src/murmur/RPC.cpp b/src/murmur/RPC.cpp
index 377c425d5..1e4b7f7f5 100644
--- a/src/murmur/RPC.cpp
+++ b/src/murmur/RPC.cpp
@@ -12,6 +12,7 @@
 #include "Channel.h"
 #include "Group.h"
 #include "Meta.h"
+#include "QtUtils.h"
 #include "Server.h"
 #include "ServerDB.h"
 #include "ServerUser.h"
diff --git a/src/murmur/Server.cpp b/src/murmur/Server.cpp
index 5cdb0651b..cc031825b 100644
--- a/src/murmur/Server.cpp
+++ b/src/murmur/Server.cpp
@@ -12,12 +12,11 @@
 #include "Group.h"
 #include "HTMLFilter.h"
 #include "HostAddress.h"
-#include "Message.h"
 #include "Meta.h"
-#include "PacketDataStream.h"
+#include "MumbleProtocol.h"
+#include "QtUtils.h"
 #include "ServerDB.h"
 #include "ServerUser.h"
-#include "SpeechFlags.h"
 #include "User.h"
 #include "Version.h"
 
@@ -39,6 +38,9 @@
 #include "TracyConstants.h"
 #include <Tracy.hpp>
 
+#include <algorithm>
+#include <vector>
+
 #ifdef Q_OS_WIN
 #	include <qos2.h>
 #	include <ws2tcpip.h>
@@ -47,12 +49,6 @@
 #	include <poll.h>
 #endif
 
-#ifndef MAX
-#	define MAX(a, b) ((a) > (b) ? (a) : (b))
-#endif
-
-#define UDP_PACKET_SIZE 1024
-
 ExecEvent::ExecEvent(boost::function< void() > f) : QEvent(static_cast< QEvent::Type >(EXEC_QEVENT)) {
 	func = f;
 }
@@ -76,6 +72,7 @@ QSslSocket *SslServer::nextPendingSSLConnection() {
 	return qlSockets.takeFirst();
 }
 
+
 Server::Server(int snum, QObject *p) : QThread(p) {
 	tracy::SetThreadName("Main");
 
@@ -235,7 +232,7 @@ Server::Server(int snum, QObject *p) : QThread(p) {
 	QString release;
 	Meta::getVersion(major, minor, patch, release);
 
-	uiVersionBlob = qToBigEndian(static_cast< quint32 >((major << 16) | (minor << 8) | patch));
+	m_versionBlob = Version::toRaw(major, minor, patch);
 
 	if (bValid) {
 #ifdef USE_ZEROCONF
@@ -668,24 +665,54 @@ void Server::removeZeroconf() {
 }
 #endif
 
+gsl::span< const Mumble::Protocol::byte >
+	Server::handlePing(const Mumble::Protocol::UDPDecoder< Mumble::Protocol::Role::Server > &decoder,
+					   Mumble::Protocol::UDPPingEncoder< Mumble::Protocol::Role::Server > &encoder,
+					   bool expectExtended) {
+	Mumble::Protocol::PingData pingData = decoder.getPingData();
+
+	if (pingData.requestAdditionalInformation) {
+		pingData.requestAdditionalInformation = false;
+
+		pingData.serverVersion                 = m_versionBlob;
+		pingData.userCount                     = qhUsers.size();
+		pingData.maxUserCount                  = iMaxUsers;
+		pingData.maxBandwidthPerUser           = iMaxBandwidth;
+		pingData.containsAdditionalInformation = true;
+	} else if (expectExtended) {
+		// Return zero-length span
+		return {};
+	}
+
+	// Encode in the same protocol version that we decoded with
+	encoder.setProtocolVersion(decoder.getProtocolVersion());
+
+	return encoder.encodePingPacket(pingData);
+}
+
+
 void Server::customEvent(QEvent *evt) {
 	if (evt->type() == EXEC_QEVENT)
 		static_cast< ExecEvent * >(evt)->execute();
 }
 
 void Server::udpActivated(int socket) {
+	// At this part we are only expecting pings of clients we don't know yet -> thus we also don't know which protocol
+	// version they are using.
+	m_udpDecoder.setProtocolVersion(Version::UNKNOWN);
+
 	qint32 len;
-	char encrypt[UDP_PACKET_SIZE];
+
 	sockaddr_storage from;
 #ifdef Q_OS_UNIX
 #	ifdef Q_OS_LINUX
 	struct msghdr msg;
 	struct iovec iov[1];
 
-	iov[0].iov_base = encrypt;
-	iov[0].iov_len  = UDP_PACKET_SIZE;
+	iov[0].iov_base = m_udpDecoder.getBuffer().data();
+	iov[0].iov_len  = m_udpDecoder.getBuffer().size();
 
-	uint8_t controldata[CMSG_SPACE(MAX(sizeof(struct in6_pktinfo), sizeof(struct in_pktinfo)))];
+	uint8_t controldata[CMSG_SPACE(std::max(sizeof(struct in6_pktinfo), sizeof(struct in_pktinfo)))];
 
 	memset(&msg, 0, sizeof(msg));
 	msg.msg_name       = reinterpret_cast< struct sockaddr * >(&from);
@@ -700,31 +727,34 @@ void Server::udpActivated(int socket) {
 #	else
 	socklen_t fromlen = sizeof(from);
 	int &sock         = socket;
-	len               = static_cast< qint32 >(
-        ::recvfrom(sock, encrypt, UDP_PACKET_SIZE, MSG_TRUNC, reinterpret_cast< struct sockaddr * >(&from), &fromlen));
+	len = static_cast< qint32 >(::recvfrom(sock, m_udpDecoder.getBuffer().data(), m_udpDecoder.getBuffer().size(),
+										   MSG_TRUNC, reinterpret_cast< struct sockaddr * >(&from), &fromlen));
 #	endif
 #else
 	int fromlen = sizeof(from);
 	SOCKET sock = static_cast< SOCKET >(socket);
-	len         = ::recvfrom(sock, encrypt, UDP_PACKET_SIZE, 0, reinterpret_cast< struct sockaddr * >(&from), &fromlen);
+	len = ::recvfrom(sock, reinterpret_cast< char * >(m_udpDecoder.getBuffer().data()), m_udpDecoder.getBuffer().size(),
+					 0, reinterpret_cast< struct sockaddr * >(&from), &fromlen);
 #endif
 
-	// Cloned from ::run(), as it's the only UDP data we care about until the thread is started.
-	quint32 *ping = reinterpret_cast< quint32 * >(encrypt);
-	if ((len == 12) && (*ping == 0) && bAllowPing) {
-		ping[0] = uiVersionBlob;
-		ping[3] = qToBigEndian(static_cast< quint32 >(qhUsers.count()));
-		ping[4] = qToBigEndian(static_cast< quint32 >(iMaxUsers));
-		ping[5] = qToBigEndian(static_cast< quint32 >(iMaxBandwidth));
+	gsl::span< Mumble::Protocol::byte > inputData(&m_udpDecoder.getBuffer()[0], len);
 
+	if (bAllowPing && m_udpDecoder.decodePing(inputData)
+		&& m_udpDecoder.getMessageType() == Mumble::Protocol::UDPMessageType::Ping) {
+		gsl::span< const Mumble::Protocol::byte > encodedPing = handlePing(m_udpDecoder, m_udpPingEncoder, true);
+
+		if (!encodedPing.empty()) {
 #ifdef Q_OS_LINUX
-		// There will be space for only one header, and the only data we have asked for is the incoming
-		// address. So we can reuse most of the same msg and control data.
-		iov[0].iov_len = 6 * sizeof(quint32);
-		::sendmsg(sock, &msg, 0);
+			// There will be space for only one header, and the only data we have asked for is the incoming
+			// address. So we can reuse most of the same msg and control data.
+			iov[0].iov_len  = encodedPing.size();
+			iov[0].iov_base = const_cast< Mumble::Protocol::byte * >(encodedPing.data());
+			::sendmsg(sock, &msg, 0);
 #else
-        ::sendto(sock, encrypt, 6 * sizeof(quint32), 0, reinterpret_cast< struct sockaddr * >(&from), fromlen);
+			::sendto(sock, reinterpret_cast< const char * >(encodedPing.data()), encodedPing.size(), 0,
+					 reinterpret_cast< struct sockaddr * >(&from), fromlen);
 #endif
+		}
 	}
 }
 
@@ -733,12 +763,12 @@ void Server::run() {
 
 	qint32 len;
 #if defined(__LP64__)
-	char encbuff[UDP_PACKET_SIZE + 8];
-	char *encrypt = encbuff + 4;
+	unsigned char encbuff[Mumble::Protocol::MAX_UDP_PACKET_SIZE + 8];
+	unsigned char *encrypt = encbuff + 4;
 #else
-	char encrypt[UDP_PACKET_SIZE];
+	unsigned char encrypt[Mumble::Protocol::MAX_UDP_PACKET_SIZE];
 #endif
-	char buffer[UDP_PACKET_SIZE];
+	unsigned char buffer[Mumble::Protocol::MAX_UDP_PACKET_SIZE];
 
 	sockaddr_storage from;
 	int nfds = qlUdpSocket.count();
@@ -817,17 +847,17 @@ void Server::run() {
 
 				fromlen = sizeof(from);
 #ifdef Q_OS_WIN
-				len = ::recvfrom(sock, encrypt, UDP_PACKET_SIZE, 0, reinterpret_cast< struct sockaddr * >(&from),
-								 &fromlen);
+				len = ::recvfrom(sock, reinterpret_cast< char * >(encrypt), Mumble::Protocol::MAX_UDP_PACKET_SIZE, 0,
+								 reinterpret_cast< struct sockaddr * >(&from), &fromlen);
 #else
 #	ifdef Q_OS_LINUX
 				struct msghdr msg;
 				struct iovec iov[1];
 
 				iov[0].iov_base = encrypt;
-				iov[0].iov_len  = UDP_PACKET_SIZE;
+				iov[0].iov_len  = Mumble::Protocol::MAX_UDP_PACKET_SIZE;
 
-				uint8_t controldata[CMSG_SPACE(MAX(sizeof(struct in6_pktinfo), sizeof(struct in_pktinfo)))];
+				uint8_t controldata[CMSG_SPACE(std::max(sizeof(struct in6_pktinfo), sizeof(struct in_pktinfo)))];
 
 				memset(&msg, 0, sizeof(msg));
 				msg.msg_name       = reinterpret_cast< struct sockaddr * >(&from);
@@ -840,7 +870,7 @@ void Server::run() {
 				len = static_cast< quint32 >(::recvmsg(sock, &msg, MSG_TRUNC));
 				Q_UNUSED(fromlen);
 #	else
-				len = static_cast< qint32 >(::recvfrom(sock, encrypt, UDP_PACKET_SIZE, MSG_TRUNC,
+				len = static_cast< qint32 >(::recvfrom(sock, encrypt, Mumble::Protocol::MAX_UDP_PACKET_SIZE, MSG_TRUNC,
 													   reinterpret_cast< struct sockaddr * >(&from), &fromlen));
 #	endif
 #endif
@@ -855,40 +885,52 @@ void Server::run() {
 				} else if (len < 5) {
 					// 4 bytes crypt header + type + session
 					continue;
-				} else if (len > UDP_PACKET_SIZE) {
+				} else if (static_cast< unsigned int >(len) > Mumble::Protocol::MAX_UDP_PACKET_SIZE) {
+					// This will also catch the len == -1 case (indicating error)
+					static_assert(static_cast< unsigned int >(-1) > Mumble::Protocol::MAX_UDP_PACKET_SIZE,
+								  "Invalid assumption");
 					continue;
 				}
 
 				QReadLocker rl(&qrwlVoiceThread);
 
-				quint32 *ping = reinterpret_cast< quint32 * >(encrypt);
+				quint16 port = (from.ss_family == AF_INET6) ? (reinterpret_cast< sockaddr_in6 * >(&from)->sin6_port)
+															: (reinterpret_cast< sockaddr_in * >(&from)->sin_port);
+				const HostAddress &ha = HostAddress(from);
+
+				const QPair< HostAddress, quint16 > &key = QPair< HostAddress, quint16 >(ha, port);
+
+				ServerUser *u = qhPeerUsers.value(key);
 
-				if ((len == 12) && (*ping == 0) && bAllowPing) {
+				if (u) {
+					m_udpDecoder.setProtocolVersion(u->uiVersion);
+				} else {
+					m_udpDecoder.setProtocolVersion(Version::UNKNOWN);
+				}
+				// This may be a general ping requesting server details, unencrypted.
+				if (bAllowPing && m_udpDecoder.decodePing(gsl::span< Mumble::Protocol::byte >(encrypt, len))
+					&& m_udpDecoder.getMessageType() == Mumble::Protocol::UDPMessageType::Ping) {
 					ZoneScopedN(TracyConstants::ping_processing_zone);
 
-					ping[0] = uiVersionBlob;
-					// 1 and 2 will be the timestamp, which we return unmodified.
-					ping[3] = qToBigEndian(static_cast< quint32 >(qhUsers.count()));
-					ping[4] = qToBigEndian(static_cast< quint32 >(iMaxUsers));
-					ping[5] = qToBigEndian(static_cast< quint32 >(iMaxBandwidth));
+					gsl::span< const Mumble::Protocol::byte > encodedPing =
+						handlePing(m_udpDecoder, m_udpPingEncoder, true);
 
+					if (!encodedPing.empty()) {
 #ifdef Q_OS_LINUX
-					iov[0].iov_len = 6 * sizeof(quint32);
-					::sendmsg(sock, &msg, 0);
+						// We are only reading from the buffer and thus the const_cast should be fine
+						iov[0].iov_base = const_cast< Mumble::Protocol::byte * >(encodedPing.data());
+						iov[0].iov_len  = encodedPing.size();
+						::sendmsg(sock, &msg, 0);
 #else
-					::sendto(sock, encrypt, 6 * sizeof(quint32), 0, reinterpret_cast< struct sockaddr * >(&from),
-							 fromlen);
+						::sendto(sock, reinterpret_cast< const char * >(encodedPing.data()), encodedPing.size(), 0,
+								 reinterpret_cast< struct sockaddr * >(&from), fromlen);
 #endif
+					}
+
 					continue;
 				}
 
-				quint16 port = (from.ss_family == AF_INET6) ? (reinterpret_cast< sockaddr_in6 * >(&from)->sin6_port)
-															: (reinterpret_cast< sockaddr_in * >(&from)->sin_port);
-				const HostAddress &ha = HostAddress(from);
 
-				const QPair< HostAddress, quint16 > &key = QPair< HostAddress, quint16 >(ha, port);
-
-				ServerUser *u = qhPeerUsers.value(key);
 				if (u) {
 					if (!checkDecrypt(u, encrypt, buffer, len)) {
 						continue;
@@ -924,27 +966,43 @@ void Server::run() {
 				}
 				len -= 4;
 
-				MessageHandler::UDPMessageType msgType =
-					static_cast< MessageHandler::UDPMessageType >((buffer[0] >> 5) & 0x7);
+				if (m_udpDecoder.decode(gsl::span< Mumble::Protocol::byte >(buffer, len))) {
+					switch (m_udpDecoder.getMessageType()) {
+						case Mumble::Protocol::UDPMessageType::Audio: {
+							Mumble::Protocol::AudioData audioData = m_udpDecoder.getAudioData();
 
-				if (msgType == MessageHandler::UDPVoiceSpeex || msgType == MessageHandler::UDPVoiceCELTAlpha
-					|| msgType == MessageHandler::UDPVoiceCELTBeta || msgType == MessageHandler::UDPVoiceOpus) {
-					// Allow all voice packets through by default.
-					bool ok = true;
-					// ...Unless we're in Opus mode. In Opus mode, only Opus packets are allowed.
-					if (bOpus && msgType != MessageHandler::UDPVoiceOpus) {
-						ok = false;
-					}
+							// Allow all voice packets through by default.
+							bool ok = true;
+							// ...Unless we're in Opus mode. In Opus mode, only Opus packets are allowed.
+							if (bOpus && audioData.usedCodec != Mumble::Protocol::AudioCodec::Opus) {
+								ok = false;
+							}
 
-					if (ok) {
-						u->aiUdpFlag = 1;
-						processMsg(u, buffer, len);
-					}
-				} else if (msgType == MessageHandler::UDPPing) {
-					ZoneScopedN(TracyConstants::udp_ping_processing_zone);
+							if (ok) {
+								u->aiUdpFlag = 1;
 
-					QByteArray qba;
-					sendMessage(u, buffer, len, qba, true);
+								// Add session id
+								audioData.senderSession = u->uiSession;
+
+								processMsg(u, audioData, m_udpAudioReceivers, m_udpAudioEncoder);
+							}
+							break;
+						}
+						case Mumble::Protocol::UDPMessageType::Ping: {
+							ZoneScopedN(TracyConstants::udp_ping_processing_zone);
+
+							Mumble::Protocol::PingData pingData = m_udpDecoder.getPingData();
+							if (!pingData.requestAdditionalInformation && !pingData.containsAdditionalInformation) {
+								// At this point here, we only want to handle connectivity pings
+								gsl::span< const Mumble::Protocol::byte > encodedPing =
+									handlePing(m_udpDecoder, m_udpPingEncoder, false);
+
+								QByteArray cache;
+								sendMessage(*u, encodedPing.data(), encodedPing.size(), cache, true);
+							}
+							break;
+						}
+					}
 				}
 #ifdef Q_OS_UNIX
 				fds[i].revents = 0;
@@ -960,15 +1018,14 @@ void Server::run() {
 #endif
 }
 
-bool Server::checkDecrypt(ServerUser *u, const char *encrypt, char *plain, unsigned int len) {
+bool Server::checkDecrypt(ServerUser *u, const unsigned char *encrypt, unsigned char *plain, unsigned int len) {
 	ZoneScoped;
 
 	QMutexLocker l(&u->qmCrypt);
 
-	if (u->csCrypt->isValid()
-		&& u->csCrypt->decrypt(reinterpret_cast< const unsigned char * >(encrypt),
-							   reinterpret_cast< unsigned char * >(plain), len))
+	if (u->csCrypt->isValid() && u->csCrypt->decrypt(encrypt, plain, len)) {
 		return true;
+	}
 
 	if (u->csCrypt->tLastGood.elapsed() > 5000000ULL) {
 		if (u->csCrypt->tLastRequest.elapsed() > 5000000ULL) {
@@ -979,14 +1036,14 @@ bool Server::checkDecrypt(ServerUser *u, const char *encrypt, char *plain, unsig
 	return false;
 }
 
-void Server::sendMessage(ServerUser *u, const char *data, int len, QByteArray &cache, bool force) {
+void Server::sendMessage(ServerUser &u, const unsigned char *data, int len, QByteArray &cache, bool force) {
 	ZoneScoped;
 
 #if QT_VERSION >= QT_VERSION_CHECK(5, 14, 0)
-	if ((u->aiUdpFlag.loadRelaxed() == 1 || force) && (u->sUdpSocket != INVALID_SOCKET)) {
+	if ((u.aiUdpFlag.loadRelaxed() == 1 || force) && (u.sUdpSocket != INVALID_SOCKET)) {
 #else
 	// Qt 5.14 introduced QAtomicInteger::loadRelaxed() which deprecates QAtomicInteger::load()
-	if ((u->aiUdpFlag.load() == 1 || force) && (u->sUdpSocket != INVALID_SOCKET)) {
+	if ((u.aiUdpFlag.load() == 1 || force) && (u.sUdpSocket != INVALID_SOCKET)) {
 #endif
 #if defined(__LP64__)
 		STACKVAR(char, ebuffer, len + 4 + 16);
@@ -995,21 +1052,21 @@ void Server::sendMessage(ServerUser *u, const char *data, int len, QByteArray &c
 		STACKVAR(char, buffer, len + 4);
 #endif
 		{
-			QMutexLocker wl(&u->qmCrypt);
+			QMutexLocker wl(&u.qmCrypt);
 
-			if (!u->csCrypt->isValid()) {
+			if (!u.csCrypt->isValid()) {
 				return;
 			}
 
-			if (!u->csCrypt->encrypt(reinterpret_cast< const unsigned char * >(data),
-									 reinterpret_cast< unsigned char * >(buffer), len)) {
+			if (!u.csCrypt->encrypt(reinterpret_cast< const unsigned char * >(data),
+									reinterpret_cast< unsigned char * >(buffer), len)) {
 				return;
 			}
 		}
 #ifdef Q_OS_WIN
 		DWORD dwFlow = 0;
 		if (Meta::hQoS)
-			QOSAddSocketToFlow(Meta::hQoS, u->sUdpSocket, reinterpret_cast< struct sockaddr * >(&u->saiUdpAddress),
+			QOSAddSocketToFlow(Meta::hQoS, u.sUdpSocket, reinterpret_cast< struct sockaddr * >(&u.saiUdpAddress),
 							   QOSTrafficTypeVoice, QOS_NON_ADAPTIVE_FLOW, reinterpret_cast< PQOS_FLOWID >(&dwFlow));
 #endif
 #ifdef Q_OS_LINUX
@@ -1019,22 +1076,22 @@ void Server::sendMessage(ServerUser *u, const char *data, int len, QByteArray &c
 		iov[0].iov_base = buffer;
 		iov[0].iov_len  = len + 4;
 
-		uint8_t controldata[CMSG_SPACE(MAX(sizeof(struct in6_pktinfo), sizeof(struct in_pktinfo)))];
+		uint8_t controldata[CMSG_SPACE(std::max(sizeof(struct in6_pktinfo), sizeof(struct in_pktinfo)))];
 		memset(controldata, 0, sizeof(controldata));
 
 		memset(&msg, 0, sizeof(msg));
-		msg.msg_name    = reinterpret_cast< struct sockaddr * >(&u->saiUdpAddress);
+		msg.msg_name    = reinterpret_cast< struct sockaddr * >(&u.saiUdpAddress);
 		msg.msg_namelen = static_cast< socklen_t >(
-			(u->saiUdpAddress.ss_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in));
+			(u.saiUdpAddress.ss_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in));
 		msg.msg_iov        = iov;
 		msg.msg_iovlen     = 1;
 		msg.msg_control    = controldata;
-		msg.msg_controllen = CMSG_SPACE((u->saiUdpAddress.ss_family == AF_INET6) ? sizeof(struct in6_pktinfo)
-																				 : sizeof(struct in_pktinfo));
+		msg.msg_controllen = CMSG_SPACE((u.saiUdpAddress.ss_family == AF_INET6) ? sizeof(struct in6_pktinfo)
+																				: sizeof(struct in_pktinfo));
 
 		struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
-		HostAddress tcpha(u->saiTcpLocalAddress);
-		if (u->saiUdpAddress.ss_family == AF_INET6) {
+		HostAddress tcpha(u.saiTcpLocalAddress);
+		if (u.saiUdpAddress.ss_family == AF_INET6) {
 			cmsg->cmsg_level            = IPPROTO_IPV6;
 			cmsg->cmsg_type             = IPV6_PKTINFO;
 			cmsg->cmsg_len              = CMSG_LEN(sizeof(struct in6_pktinfo));
@@ -1053,10 +1110,10 @@ void Server::sendMessage(ServerUser *u, const char *data, int len, QByteArray &c
 		}
 
 
-		::sendmsg(u->sUdpSocket, &msg, 0);
+		::sendmsg(u.sUdpSocket, &msg, 0);
 #else
-		::sendto(u->sUdpSocket, buffer, len + 4, 0, reinterpret_cast< struct sockaddr * >(&u->saiUdpAddress),
-				 (u->saiUdpAddress.ss_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in));
+		::sendto(u.sUdpSocket, buffer, len + 4, 0, reinterpret_cast< struct sockaddr * >(&u.saiUdpAddress),
+				 (u.saiUdpAddress.ss_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in));
 #endif
 #ifdef Q_OS_WIN
 		if (Meta::hQoS && dwFlow)
@@ -1065,23 +1122,17 @@ void Server::sendMessage(ServerUser *u, const char *data, int len, QByteArray &c
 #endif
 	} else {
 		if (cache.isEmpty())
-			cache = QByteArray(data, len);
-		emit tcpTransmit(cache, u->uiSession);
+			cache = QByteArray(reinterpret_cast< const char * >(data), len);
+		emit tcpTransmit(cache, u.uiSession);
 	}
 }
 
-#define SENDTO                                                 \
-	if ((!pDst->bDeaf) && (!pDst->bSelfDeaf) && (pDst != u)) { \
-		if ((poslen > 0) && (pDst->ssContext == u->ssContext)) \
-			sendMessage(pDst, buffer, len, qba);               \
-		else                                                   \
-			sendMessage(pDst, buffer, len - poslen, qba_npos); \
-	}
 
-void Server::processMsg(ServerUser *u, const char *data, int len) {
+void Server::processMsg(ServerUser *u, Mumble::Protocol::AudioData audioData, AudioReceiverBuffer &buffer,
+						Mumble::Protocol::UDPAudioEncoder< Mumble::Protocol::Role::Server > &encoder) {
 	ZoneScoped;
 
-	// Note that in this function we never have to aquire a read-lock on qrwlVoiceThread
+	// Note that in this function we never have to acquire a read-lock on qrwlVoiceThread
 	// as all places that call this function will hold that lock at the point of calling
 	// this function.
 	// This function is currently called from Server::msgUDPTunnel, Server::run and
@@ -1089,21 +1140,12 @@ void Server::processMsg(ServerUser *u, const char *data, int len) {
 	if (u->sState != ServerUser::Authenticated || u->bMute || u->bSuppress || u->bSelfMute)
 		return;
 
-	QByteArray qba, qba_npos;
-	unsigned int counter;
-	char buffer[UDP_PACKET_SIZE];
-	PacketDataStream pdi(data + 1, len - 1);
-	PacketDataStream pds(buffer + 1, UDP_PACKET_SIZE - 1);
-	unsigned int type   = data[0] & 0xe0;
-	unsigned int target = data[0] & 0x1f;
-	unsigned int poslen;
-
 	// Check the voice data rate limit.
 	{
 		BandwidthRecord *bw = &u->bwr;
 
 		// IP + UDP + Crypt + Data
-		const int packetsize = 20 + 8 + 4 + len;
+		const int packetsize = 20 + 8 + 4 + audioData.payload.size();
 
 		if (!bw->addFrame(packetsize, iMaxBandwidth / 8)) {
 			// Suppress packet.
@@ -1111,69 +1153,26 @@ void Server::processMsg(ServerUser *u, const char *data, int len) {
 		}
 	}
 
-	// Read the sequence number.
-	pdi >> counter;
-
-	// Skip to the end of the voice data.
-	if ((type >> 5) != MessageHandler::UDPVoiceOpus) {
-		do {
-			counter = pdi.next8();
-			pdi.skip(counter & 0x7f);
-		} while ((counter & 0x80) && pdi.isValid());
-	} else {
-		int size;
-		pdi >> size;
-		pdi.skip(size & 0x1fff);
-	}
-
-	// Save location of the positional audio data.
-	poslen = pdi.left();
+	buffer.clear();
 
-	// Append session id to the new output stream.
-	pds << u->uiSession;
-	// Copy all voice and positional audio data to the output stream.
-	pds.append(data + 1, len - 1);
-
-	len = pds.size() + 1;
-
-	if (poslen > static_cast< unsigned int >(len)) {
-		// poslen must never ever be bigger than len as this could lead to negative buffer sizes (len - poslen) being
-		// used when further processing the packet.
-		// Usually this shouldn't happen in the first place but can happen with malformed/malicious packets in certain
-		// cases.
-		poslen = 0;
-	}
-
-	/// A set of users that'll receive the audio buffer because they are listening
-	/// to a channel that received that audio.
-	QSet< ServerUser * > listeningUsers;
-
-	if (target == 0x1f) { // Server loopback
-		buffer[0] = static_cast< char >(type | SpeechFlags::Normal);
-		sendMessage(u, buffer, len, qba);
-		return;
-	} else if (target == 0) { // Normal speech
+	if (audioData.targetOrContext == Mumble::Protocol::ReservedTargetIDs::ServerLoopback) {
+		buffer.forceAddReceiver(*u, Mumble::Protocol::AudioContext::Normal, audioData.containsPositionalData);
+	} else if (audioData.targetOrContext == Mumble::Protocol::ReservedTargetIDs::RegularSpeech) {
 		Channel *c = u->cChannel;
 
-		buffer[0] = static_cast< char >(type | SpeechFlags::Normal);
-
 		// Send audio to all users that are listening to the channel
 		foreach (unsigned int currentSession, m_channelListenerManager.getListenersForChannel(c->iId)) {
 			ServerUser *pDst = static_cast< ServerUser * >(qhUsers.value(currentSession));
 			if (pDst) {
-				listeningUsers << pDst;
+				buffer.addReceiver(*u, *pDst, Mumble::Protocol::AudioContext::Listen, audioData.containsPositionalData);
 			}
 		}
 
 		// Send audio to all users in the same channel
-		foreach (User *p, c->qlUsers) {
+		for (User *p : c->qlUsers) {
 			ServerUser *pDst = static_cast< ServerUser * >(p);
 
-			// As we send the audio to this particular user here, we want to make sure to not send it again due to a
-			// listener proxy
-			listeningUsers -= pDst;
-
-			SENDTO;
+			buffer.addReceiver(*u, *pDst, Mumble::Protocol::AudioContext::Normal, audioData.containsPositionalData);
 		}
 
 		// Send audio to all linked channels the user has speak-permission
@@ -1183,46 +1182,39 @@ void Server::processMsg(ServerUser *u, const char *data, int len) {
 
 			QMutexLocker qml(&qmCache);
 
-			foreach (Channel *l, chans) {
+			for (Channel *l : chans) {
 				if (ChanACL::hasPermission(u, l, ChanACL::Speak, &acCache)) {
-					// Send the audio stream to all users that are listening to the linked channel but are not
-					// in the original channel the audio is coming from nor are they listening to the orignal
-					// channel (in these cases they have received the audio already).
-					foreach (unsigned int currentSession, m_channelListenerManager.getListenersForChannel(l->iId)) {
+					// Send the audio stream to all users that are listening to the linked channel
+					for (unsigned int currentSession : m_channelListenerManager.getListenersForChannel(l->iId)) {
 						ServerUser *pDst = static_cast< ServerUser * >(qhUsers.value(currentSession));
-						if (pDst && pDst->cChannel != c
-							&& !m_channelListenerManager.isListening(pDst->uiSession, c->iId)) {
-							listeningUsers << pDst;
+						if (pDst) {
+							buffer.addReceiver(*u, *pDst, Mumble::Protocol::AudioContext::Listen,
+											   audioData.containsPositionalData);
 						}
 					}
 
 					// Send audio to users in the linked channel
-					foreach (User *p, l->qlUsers) {
-						if (!m_channelListenerManager.isListening(p->uiSession, c->iId)) {
-							ServerUser *pDst = static_cast< ServerUser * >(p);
-
-							// As we send the audio to this particular user here, we want to make sure to not send it
-							// again due to a listener proxy
-							listeningUsers -= pDst;
+					for (User *p : l->qlUsers) {
+						ServerUser *pDst = static_cast< ServerUser * >(p);
 
-							SENDTO;
-						}
+						buffer.addReceiver(*u, *pDst, Mumble::Protocol::AudioContext::Normal,
+										   audioData.containsPositionalData);
 					}
 				}
 			}
 		}
-	} else if (u->qmTargets.contains(target)) { // Whisper/Shout
+	} else if (u->qmTargets.contains(audioData.targetOrContext)) { // Whisper/Shout
 		QSet< ServerUser * > channel;
 		QSet< ServerUser * > direct;
 		QSet< ServerUser * > listener;
 
-		if (u->qmTargetCache.contains(target)) {
-			const WhisperTargetCache &cache = u->qmTargetCache.value(target);
+		if (u->qmTargetCache.contains(audioData.targetOrContext)) {
+			const WhisperTargetCache &cache = u->qmTargetCache.value(audioData.targetOrContext);
 			channel                         = cache.channelTargets;
 			direct                          = cache.directTargets;
 			listener                        = cache.listeningTargets;
 		} else {
-			const WhisperTarget &wt = u->qmTargets.value(target);
+			const WhisperTarget &wt = u->qmTargets.value(audioData.targetOrContext);
 			if (!wt.qlChannels.isEmpty()) {
 				QMutexLocker qml(&qmCache);
 
@@ -1303,33 +1295,78 @@ void Server::processMsg(ServerUser *u, const char *data, int len) {
 			qrwlVoiceThread.lockForWrite();
 
 			if (qhUsers.contains(uiSession))
-				u->qmTargetCache.insert(target, { channel, direct, listener });
+				u->qmTargetCache.insert(audioData.targetOrContext, { channel, direct, listener });
 			qrwlVoiceThread.unlock();
 			qrwlVoiceThread.lockForRead();
 			if (!qhUsers.contains(uiSession))
 				return;
 		}
-		if (!channel.isEmpty()) {
-			// These users receive the audio because someone is shouting to their channel
-			buffer[0] = static_cast< char >(type | SpeechFlags::Shout);
-			foreach (ServerUser *pDst, channel) { SENDTO; }
-			if (!direct.isEmpty()) {
-				qba.clear();
-				qba_npos.clear();
-			}
+		// These users receive the audio because someone is shouting to their channel
+		for (ServerUser *pDst : channel) {
+			buffer.addReceiver(*u, *pDst, Mumble::Protocol::AudioContext::Shout, audioData.containsPositionalData);
 		}
-		if (!direct.isEmpty()) {
-			buffer[0] = static_cast< char >(type | SpeechFlags::Whisper);
-			foreach (ServerUser *pDst, direct) { SENDTO; }
+		// These users receive audio because someone is whispering to them
+		for (ServerUser *pDst : direct) {
+			buffer.addReceiver(*u, *pDst, Mumble::Protocol::AudioContext::Whisper, audioData.containsPositionalData);
+		}
+		// These users receive audio because someone is sending audio to one of their listeners
+		for (ServerUser *current : listener) {
+			buffer.addReceiver(*u, *current, Mumble::Protocol::AudioContext::Listen, audioData.containsPositionalData);
 		}
-
-		// Add the listening users to the set of current listeners
-		listeningUsers += listener;
 	}
 
-	// Send the audio to all listening users
-	buffer[0] = static_cast< char >(type | SpeechFlags::Listen);
-	foreach (ServerUser *pDst, listeningUsers) { SENDTO; }
+	buffer.preprocessBuffer();
+
+	bool isFirstIteration = true;
+	QByteArray tcpCache;
+	for (bool includePositionalData : { true, false }) {
+		std::vector< AudioReceiver > &receiverList = buffer.getReceivers(includePositionalData);
+
+		audioData.containsPositionalData = includePositionalData && audioData.containsPositionalData;
+
+		if (!audioData.containsPositionalData) {
+			encoder.dropPositionalData();
+		}
+
+		// Note: The receiver-ranges are determined in such a way, that they are all going to receive the exact
+		// same audio packet.
+		ReceiverRange< std::vector< AudioReceiver >::iterator > currentRange =
+			AudioReceiverBuffer::getReceiverRange(receiverList.begin(), receiverList.end());
+
+		while (currentRange.begin != currentRange.end) {
+			// Setup encoder for this range
+			if (isFirstIteration
+				|| !Mumble::Protocol::protocolVersionsAreCompatible(encoder.getProtocolVersion(),
+																	currentRange.begin->getReceiver().uiVersion)) {
+				encoder.setProtocolVersion(currentRange.begin->getReceiver().uiVersion);
+
+				// We have to re-encode the "fixed" part of the audio message
+				encoder.prepareAudioPacket(audioData);
+
+				if (audioData.containsPositionalData) {
+					encoder.addPositionalData(audioData);
+				}
+
+				isFirstIteration = false;
+			}
+
+			audioData.targetOrContext = currentRange.begin->getContext();
+
+			// Update data
+			gsl::span< const Mumble::Protocol::byte > encodedPacket = encoder.updateAudioPacket(audioData);
+
+			// Clear TCP cache
+			tcpCache.clear();
+
+			// Send encoded packet to all receivers of this range
+			for (auto it = currentRange.begin; it != currentRange.end; ++it) {
+				sendMessage(it->getReceiver(), encodedPacket.data(), encodedPacket.size(), tcpCache);
+			}
+
+			// Find next range
+			currentRange = AudioReceiverBuffer::getReceiverRange(currentRange.end, receiverList.end());
+		}
+	}
 }
 
 void Server::log(ServerUser *u, const QString &str) const {
@@ -1455,13 +1492,11 @@ void Server::newClient() {
 		u->haAddress  = ha;
 		HostAddress(sock->localAddress()).toSockaddr(&u->saiTcpLocalAddress);
 
-		connect(u, SIGNAL(connectionClosed(QAbstractSocket::SocketError, const QString &)), this,
-				SLOT(connectionClosed(QAbstractSocket::SocketError, const QString &)));
-		connect(u, SIGNAL(message(unsigned int, const QByteArray &)), this,
-				SLOT(message(unsigned int, const QByteArray &)));
-		connect(u, SIGNAL(handleSslErrors(const QList< QSslError > &)), this,
-				SLOT(sslError(const QList< QSslError > &)));
-		connect(u, SIGNAL(encrypted()), this, SLOT(encrypted()));
+		connect(u, &ServerUser::connectionClosed, this, &Server::connectionClosed);
+		connect(u, SIGNAL(message(Mumble::Protocol::TCPMessageType, const QByteArray &)), this,
+				SLOT(message(Mumble::Protocol::TCPMessageType, const QByteArray &)));
+		connect(u, &ServerUser::handleSslErrors, this, &Server::sslError);
+		connect(u, &ServerUser::encrypted, this, &Server::encrypted);
 
 		log(u, QString("New connection: %1").arg(addressToString(sock->peerAddress(), sock->peerPort())));
 
@@ -1688,7 +1723,7 @@ void Server::connectionClosed(QAbstractSocket::SocketError err, const QString &r
 		stopThread();
 }
 
-void Server::message(unsigned int uiType, const QByteArray &qbaMsg, ServerUser *u) {
+void Server::message(Mumble::Protocol::TCPMessageType type, const QByteArray &qbaMsg, ServerUser *u) {
 	ZoneScopedN(TracyConstants::tcp_packet_processing_zone);
 
 	if (!u) {
@@ -1699,9 +1734,9 @@ void Server::message(unsigned int uiType, const QByteArray &qbaMsg, ServerUser *
 		u->resetActivityTime();
 	}
 
-	if (uiType == MessageHandler::UDPTunnel) {
+	if (type == Mumble::Protocol::TCPMessageType::UDPTunnel) {
 		int len = qbaMsg.size();
-		if (len < 2 || len > UDP_PACKET_SIZE) {
+		if (len < 2 || static_cast< unsigned int >(len) > Mumble::Protocol::MAX_UDP_PACKET_SIZE) {
 			// Drop messages that are too small to be senseful or that are bigger than allowed
 			return;
 		}
@@ -1710,21 +1745,25 @@ void Server::message(unsigned int uiType, const QByteArray &qbaMsg, ServerUser *
 
 		u->aiUdpFlag = 0;
 
-		const char *buffer = qbaMsg.constData();
+		m_tcpTunnelDecoder.setProtocolVersion(u->uiVersion);
+
+		if (m_tcpTunnelDecoder.decode(gsl::span< const Mumble::Protocol::byte >(
+				reinterpret_cast< const Mumble::Protocol::byte * >(qbaMsg.constData()), qbaMsg.size()))) {
+			if (m_tcpTunnelDecoder.getMessageType() == Mumble::Protocol::UDPMessageType::Audio) {
+				Mumble::Protocol::AudioData audioData = m_tcpTunnelDecoder.getAudioData();
+				// Allow all voice packets through by default.
+				bool ok = true;
+				// ...Unless we're in Opus mode. In Opus mode, only Opus packets are allowed.
+				if (bOpus && audioData.usedCodec != Mumble::Protocol::AudioCodec::Opus) {
+					ok = false;
+				}
 
-		MessageHandler::UDPMessageType msgType = static_cast< MessageHandler::UDPMessageType >((buffer[0] >> 5) & 0x7);
+				if (ok) {
+					// Add session id
+					audioData.senderSession = u->uiSession;
 
-		if (msgType == MessageHandler::UDPVoiceSpeex || msgType == MessageHandler::UDPVoiceCELTAlpha
-			|| msgType == MessageHandler::UDPVoiceCELTBeta || msgType == MessageHandler::UDPVoiceOpus) {
-			// Allow all voice packets through by default.
-			bool ok = true;
-			// ...Unless we're in Opus mode. In Opus mode, only Opus packets are allowed.
-			if (bOpus && msgType != MessageHandler::UDPVoiceOpus) {
-				ok = false;
-			}
-
-			if (ok) {
-				processMsg(u, buffer, len);
+					processMsg(u, std::move(audioData), m_tcpAudioReceivers, m_tcpAudioEncoder);
+				}
 			}
 		}
 
@@ -1732,34 +1771,34 @@ void Server::message(unsigned int uiType, const QByteArray &qbaMsg, ServerUser *
 	}
 
 #ifdef QT_NO_DEBUG
-#	define MUMBLE_MH_MSG(x)                                             \
-		case MessageHandler::x: {                                        \
-			MumbleProto::x msg;                                          \
+#	define PROCESS_MUMBLE_TCP_MESSAGE(name, value)                      \
+		case Mumble::Protocol::TCPMessageType::name: {                   \
+			MumbleProto::name msg;                                       \
 			if (msg.ParseFromArray(qbaMsg.constData(), qbaMsg.size())) { \
 				msg.DiscardUnknownFields();                              \
-				msg##x(u, msg);                                          \
+				msg##name(u, msg);                                       \
 			}                                                            \
 			break;                                                       \
 		}
 #else
-#	define MUMBLE_MH_MSG(x)                                             \
-		case MessageHandler::x: {                                        \
-			MumbleProto::x msg;                                          \
+#	define PROCESS_MUMBLE_TCP_MESSAGE(name, value)                      \
+		case Mumble::Protocol::TCPMessageType::name: {                   \
+			MumbleProto::name msg;                                       \
 			if (msg.ParseFromArray(qbaMsg.constData(), qbaMsg.size())) { \
-				if (uiType != MessageHandler::Ping) {                    \
-					printf("== %s:\n", #x);                              \
+				if (type != Mumble::Protocol::TCPMessageType::Ping) {    \
+					printf("== %s:\n", #name);                           \
 					msg.PrintDebugString();                              \
 				}                                                        \
 				msg.DiscardUnknownFields();                              \
-				msg##x(u, msg);                                          \
+				msg##name(u, msg);                                       \
 			}                                                            \
 			break;                                                       \
 		}
 #endif
 
-	switch (uiType) { MUMBLE_MH_ALL }
+	switch (type) { MUMBLE_ALL_TCP_MESSAGES }
 
-#undef MUMBLE_MH_MSG
+#undef PROCESS_MUMBLE_TCP_MESSAGE
 }
 
 void Server::checkTimeout() {
@@ -1784,8 +1823,9 @@ void Server::tcpTransmitData(QByteArray a, unsigned int id) {
 		int len = a.size();
 
 		qba.resize(len + 6);
-		unsigned char *uc                      = reinterpret_cast< unsigned char * >(qba.data());
-		*reinterpret_cast< quint16 * >(&uc[0]) = qToBigEndian(static_cast< quint16 >(MessageHandler::UDPTunnel));
+		unsigned char *uc = reinterpret_cast< unsigned char * >(qba.data());
+		*reinterpret_cast< quint16 * >(&uc[0]) =
+			qToBigEndian(static_cast< quint16 >(Mumble::Protocol::TCPMessageType::UDPTunnel));
 		*reinterpret_cast< quint32 * >(&uc[2]) = qToBigEndian(static_cast< quint32 >(len));
 		memcpy(uc + 6, a.constData(), len);
 
@@ -1803,17 +1843,19 @@ void Server::doSync(unsigned int id) {
 	}
 }
 
-void Server::sendProtoMessage(ServerUser *u, const ::google::protobuf::Message &msg, unsigned int msgType) {
+void Server::sendProtoMessage(ServerUser *u, const ::google::protobuf::Message &msg,
+							  Mumble::Protocol::TCPMessageType msgType) {
 	QByteArray cache;
 	u->sendMessage(msg, msgType, cache);
 }
 
-void Server::sendProtoAll(const ::google::protobuf::Message &msg, unsigned int msgType, unsigned int version) {
+void Server::sendProtoAll(const ::google::protobuf::Message &msg, Mumble::Protocol::TCPMessageType msgType,
+						  unsigned int version) {
 	sendProtoExcept(nullptr, msg, msgType, version);
 }
 
-void Server::sendProtoExcept(ServerUser *u, const ::google::protobuf::Message &msg, unsigned int msgType,
-							 unsigned int version) {
+void Server::sendProtoExcept(ServerUser *u, const ::google::protobuf::Message &msg,
+							 Mumble::Protocol::TCPMessageType msgType, unsigned int version) {
 	QByteArray cache;
 	foreach (ServerUser *usr, qhUsers)
 		if ((usr != u) && (usr->sState == ServerUser::Authenticated))
@@ -2298,7 +2340,5 @@ bool Server::canNest(Channel *newParent, Channel *channel) const {
 	return (parentLevel + channelDepth) < iChannelNestingLimit;
 }
 
-#undef MAX
-#undef UDP_PACKET_SIZE
 #undef SIO_UDP_CONNRESET
 #undef SENDTO
diff --git a/src/murmur/Server.h b/src/murmur/Server.h
index e4d86e64f..eb90a35d3 100644
--- a/src/murmur/Server.h
+++ b/src/murmur/Server.h
@@ -13,13 +13,15 @@
 #endif
 
 #include "ACL.h"
+#include "AudioReceiverBuffer.h"
 #include "Ban.h"
 #include "ChannelListenerManager.h"
 #include "HostAddress.h"
-#include "Message.h"
 #include "Mumble.pb.h"
+#include "MumbleProtocol.h"
 #include "Timer.h"
 #include "User.h"
+#include "Version.h"
 
 #ifndef Q_MOC_RUN
 #	include <boost/function.hpp>
@@ -169,6 +171,17 @@ public:
 
 	ChannelListenerManager m_channelListenerManager;
 
+
+	Mumble::Protocol::UDPDecoder< Mumble::Protocol::Role::Server > m_udpDecoder;
+	Mumble::Protocol::UDPDecoder< Mumble::Protocol::Role::Server > m_tcpTunnelDecoder;
+	Mumble::Protocol::UDPPingEncoder< Mumble::Protocol::Role::Server > m_udpPingEncoder;
+	Mumble::Protocol::UDPAudioEncoder< Mumble::Protocol::Role::Server > m_udpAudioEncoder;
+	Mumble::Protocol::UDPAudioEncoder< Mumble::Protocol::Role::Server > m_tcpAudioEncoder;
+
+	gsl::span< const Mumble::Protocol::byte >
+		handlePing(const Mumble::Protocol::UDPDecoder< Mumble::Protocol::Role::Server > &decoder,
+				   Mumble::Protocol::UDPPingEncoder< Mumble::Protocol::Role::Server > &encoder, bool expectExtended);
+
 	void readParams();
 
 	int iCodecAlpha;
@@ -189,6 +202,9 @@ private:
 	int iChannelNestingLimit;
 	int iChannelCountLimit;
 
+	AudioReceiverBuffer m_udpAudioReceivers;
+	AudioReceiverBuffer m_tcpAudioReceivers;
+
 public slots:
 	void regSslError(const QList< QSslError > &);
 	void finished();
@@ -209,7 +225,7 @@ public slots:
 	void newClient();
 	void connectionClosed(QAbstractSocket::SocketError, const QString &);
 	void sslError(const QList< QSslError > &);
-	void message(unsigned int, const QByteArray &, ServerUser *cCon = nullptr);
+	void message(Mumble::Protocol::TCPMessageType, const QByteArray &, ServerUser *cCon = nullptr);
 	void checkTimeout();
 	void tcpTransmitData(QByteArray, unsigned int);
 	void doSync(unsigned int);
@@ -232,7 +248,7 @@ public:
 	HANDLE hNotify;
 	QList< SOCKET > qlUdpSocket;
 #endif
-	quint32 uiVersionBlob;
+	Version::mumble_raw_version_t m_versionBlob;
 	QList< QSocketNotifier * > qlUdpNotifier;
 
 	/// This lock provides synchronization between the
@@ -294,14 +310,15 @@ public:
 
 	QList< Ban > qlBans;
 
-	void processMsg(ServerUser *u, const char *data, int len);
-	void sendMessage(ServerUser *u, const char *data, int len, QByteArray &cache, bool force = false);
+	void processMsg(ServerUser *u, Mumble::Protocol::AudioData audioData, AudioReceiverBuffer &buffer,
+					Mumble::Protocol::UDPAudioEncoder< Mumble::Protocol::Role::Server > &encoder);
+	void sendMessage(ServerUser &u, const unsigned char *data, int len, QByteArray &cache, bool force = false);
 	void run();
 
 	bool validateChannelName(const QString &name);
 	bool validateUserName(const QString &name);
 
-	bool checkDecrypt(ServerUser *u, const char *encrypted, char *plain, unsigned int cryptlen);
+	bool checkDecrypt(ServerUser *u, const unsigned char *encrypted, unsigned char *plain, unsigned int cryptlen);
 
 	bool hasPermission(ServerUser *p, Channel *c, QFlags< ChanACL::Perm > perm);
 	QFlags< ChanACL::Perm > effectivePermissions(ServerUser *p, Channel *c);
@@ -310,22 +327,27 @@ public:
 	void clearACLCache(User *p = nullptr);
 	void clearWhisperTargetCache();
 
-	void sendProtoAll(const ::google::protobuf::Message &msg, unsigned int msgType, unsigned int minversion);
-	void sendProtoExcept(ServerUser *, const ::google::protobuf::Message &msg, unsigned int msgType,
+	void sendProtoAll(const ::google::protobuf::Message &msg, Mumble::Protocol::TCPMessageType type,
+					  unsigned int minversion);
+	void sendProtoExcept(ServerUser *, const ::google::protobuf::Message &msg, Mumble::Protocol::TCPMessageType type,
 						 unsigned int minversion);
-	void sendProtoMessage(ServerUser *, const ::google::protobuf::Message &msg, unsigned int msgType);
+	void sendProtoMessage(ServerUser *, const ::google::protobuf::Message &msg, Mumble::Protocol::TCPMessageType type);
 
 	// sendAll sends a protobuf message to all users on the server whose version is either bigger than v or
 	// lower than ~v. If v == 0 the message is sent to everyone.
-#define MUMBLE_MH_MSG(x)                                                                                     \
-	void sendAll(const MumbleProto::x &msg, unsigned int v = 0) { sendProtoAll(msg, MessageHandler::x, v); } \
-	void sendExcept(ServerUser *u, const MumbleProto::x &msg, unsigned int v = 0) {                          \
-		sendProtoExcept(u, msg, MessageHandler::x, v);                                                       \
-	}                                                                                                        \
-	void sendMessage(ServerUser *u, const MumbleProto::x &msg) { sendProtoMessage(u, msg, MessageHandler::x); }
-
-	MUMBLE_MH_ALL
-#undef MUMBLE_MH_MSG
+#define PROCESS_MUMBLE_TCP_MESSAGE(name, value)                                        \
+	void sendAll(const MumbleProto::name &msg, unsigned int v = 0) {                   \
+		sendProtoAll(msg, Mumble::Protocol::TCPMessageType::name, v);                  \
+	}                                                                                  \
+	void sendExcept(ServerUser *u, const MumbleProto::name &msg, unsigned int v = 0) { \
+		sendProtoExcept(u, msg, Mumble::Protocol::TCPMessageType::name, v);            \
+	}                                                                                  \
+	void sendMessage(ServerUser *u, const MumbleProto::name &msg) {                    \
+		sendProtoMessage(u, msg, Mumble::Protocol::TCPMessageType::name);              \
+	}
+
+	MUMBLE_ALL_TCP_MESSAGES
+#undef PROCESS_MUMBLE_TCP_MESSAGE
 
 	static void hashAssign(QString &destination, QByteArray &hash, const QString &str);
 	static void hashAssign(QByteArray &destination, QByteArray &hash, const QByteArray &source);
@@ -434,9 +456,9 @@ public:
 	void dblog(const QString &str) const;
 
 	// From msgHandler. Implementation in Messages.cpp
-#define MUMBLE_MH_MSG(x) void msg##x(ServerUser *, MumbleProto::x &);
-	MUMBLE_MH_ALL
-#undef MUMBLE_MH_MSG
+#define PROCESS_MUMBLE_TCP_MESSAGE(name, value) void msg##name(ServerUser *, MumbleProto::name &);
+	MUMBLE_ALL_TCP_MESSAGES
+#undef PROCESS_MUMBLE_TCP_MESSAGE
 };
 
 #endif
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 87392a1c3..129980708 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -24,6 +24,7 @@ endif()
 
 if(server)
 	use_test("TestCrypt")
+	use_test("TestAudioReceiverBuffer")
 endif()
 
 # Shared tests
@@ -32,6 +33,7 @@ use_test("TestCryptographicRandom")
 use_test("TestFFDHE")
 use_test("TestPacketDataStream")
 use_test("TestPasswordGenerator")
+use_test("TestMumbleProtocol")
 use_test("TestSelfSignedCertificate")
 use_test("TestServerAddress")
 use_test("TestSSLLocks")
diff --git a/src/tests/TestAudioReceiverBuffer/CMakeLists.txt b/src/tests/TestAudioReceiverBuffer/CMakeLists.txt
new file mode 100644
index 000000000..8509f9e12
--- /dev/null
+++ b/src/tests/TestAudioReceiverBuffer/CMakeLists.txt
@@ -0,0 +1,35 @@
+# Copyright 2020-2021 The Mumble Developers. All rights reserved.
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file at the root of the
+# Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+add_executable(TestAudioReceiverBuffer TestAudioReceiverBuffer.cpp)
+
+set_target_properties(TestAudioReceiverBuffer PROPERTIES AUTOMOC ON)
+
+target_link_libraries(TestAudioReceiverBuffer PRIVATE shared Qt5::Test)
+
+target_include_directories(TestAudioReceiverBuffer PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
+
+# In order to be able to mock the ServerUser class, we have to extract the server-specific source and header
+# files into an isolated environment, such that they don't include/link with the remaining server files.
+set(CUSTOM_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
+file(MAKE_DIRECTORY "${CUSTOM_INCLUDE_DIR}")
+set(HEADER_TO_COPY "${CMAKE_SOURCE_DIR}/src/murmur/AudioReceiverBuffer.h")
+set(SOURCE_TO_COPY "${CMAKE_SOURCE_DIR}/src/murmur/AudioReceiverBuffer.cpp")
+get_filename_component(HEADER_NAME "${HEADER_TO_COPY}" NAME)
+get_filename_component(SOURCE_NAME "${SOURCE_TO_COPY}" NAME)
+set(COPIED_HEADER "${CUSTOM_INCLUDE_DIR}/${HEADER_NAME}")
+set(COPIED_SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${SOURCE_NAME}")
+
+add_custom_command(OUTPUT "${COPIED_SOURCE}"
+	COMMAND ${CMAKE_COMMAND} -E copy "${HEADER_TO_COPY}" "${COPIED_HEADER}"
+	COMMAND ${CMAKE_COMMAND} -E copy "${SOURCE_TO_COPY}" "${COPIED_SOURCE}"
+	DEPENDS "${HEADER_TO_COPY}" "${SOURCE_TO_COPY}"
+)
+
+target_sources(TestAudioReceiverBuffer PRIVATE "${COPIED_SOURCE}")
+
+target_include_directories(TestAudioReceiverBuffer PRIVATE "${CUSTOM_INCLUDE_DIR}")
+
+add_test(NAME TestAudioReceiverBuffer COMMAND $<TARGET_FILE:TestAudioReceiverBuffer>)
diff --git a/src/tests/TestAudioReceiverBuffer/ServerUser.h b/src/tests/TestAudioReceiverBuffer/ServerUser.h
new file mode 100644
index 000000000..d07985227
--- /dev/null
+++ b/src/tests/TestAudioReceiverBuffer/ServerUser.h
@@ -0,0 +1,23 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+
+// NOTE: This is merely a mock of the ServerUser class
+
+#include "Version.h"
+
+#include <string>
+
+struct ServerUser {
+	ServerUser(unsigned int uiSession, Version::mumble_raw_version_t version, bool deaf = false, bool selfDeaf = false,
+			   const std::string context = "")
+		: uiSession(uiSession), uiVersion(version), bDeaf(deaf), bSelfDeaf(selfDeaf), ssContext(context) {}
+
+	unsigned int uiSession;
+	Version::mumble_raw_version_t uiVersion;
+	bool bDeaf;
+	bool bSelfDeaf;
+	std::string ssContext;
+};
diff --git a/src/tests/TestAudioReceiverBuffer/TestAudioReceiverBuffer.cpp b/src/tests/TestAudioReceiverBuffer/TestAudioReceiverBuffer.cpp
new file mode 100644
index 000000000..46cf8f5b5
--- /dev/null
+++ b/src/tests/TestAudioReceiverBuffer/TestAudioReceiverBuffer.cpp
@@ -0,0 +1,276 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#include "AudioReceiverBuffer.h"
+#include "MumbleProtocol.h"
+#include "Version.h"
+
+#include <QObject>
+#include <QtTest>
+
+#include <array>
+#include <unordered_set>
+
+#include <QDebug>
+
+QDebug &operator<<(QDebug &stream, const ServerUser &user) {
+	int major, minor, patch;
+	Version::fromRaw(user.uiVersion, &major, &minor, &patch);
+	return stream.nospace() << "ServerUser{ session: " << user.uiSession << ", version: " << major << "." << minor
+							<< "." << patch << ", deaf: " << user.bDeaf << ", selfDeaf: " << user.bSelfDeaf
+							<< ", ssContext: " << QString::fromStdString(user.ssContext) << " }";
+}
+
+QDebug &operator<<(QDebug &stream, const AudioReceiver &receiver) {
+	return stream.nospace() << "AudioReceiver{ receiver: " << receiver.getReceiver()
+							<< ", context: " << receiver.getContext()
+							<< ", volAdj: " << receiver.getVolumeAdjustment().factor << " }";
+}
+
+bool operator<(const AudioReceiver &lhs, const AudioReceiver &rhs) {
+	// session IDs are supposed to be unique, so sorting by them should yield a unique ordering
+	return lhs.getReceiver().uiSession < rhs.getReceiver().uiSession;
+}
+
+
+Version::mumble_raw_version_t vOld1 = Version::toRaw(1, 2, 5);
+Version::mumble_raw_version_t vOld2 = Version::toRaw(1, 3, 1);
+Version::mumble_raw_version_t vOld3 = Version::toRaw(1, 4, 0);
+Version::mumble_raw_version_t vNew  = Mumble::Protocol::PROTOBUF_INTRODUCTION_VERSION;
+
+std::array< ServerUser, 5 > users = { ServerUser(0, vOld1), ServerUser(1, vOld2), ServerUser(2, vOld3),
+									  ServerUser(3, vNew), ServerUser(4, vNew) };
+
+ServerUser deafUser(5, vOld1, true);
+ServerUser selfDeafUser(6, vNew, false, true);
+ServerUser contextUser1(7, vNew, false, false, "context1");
+ServerUser contextUser2(8, vNew, false, false, "context2");
+ServerUser contextUser3(9, vNew, false, false, "context1");
+
+
+struct Range {
+	Range(ServerUser *begin, ServerUser *end) : begin(begin), end(end){};
+
+	ServerUser *begin;
+	ServerUser *end;
+};
+
+class PseudoEncoder {
+public:
+	PseudoEncoder() = default;
+
+	bool checkRequiresEncoding(Version::mumble_raw_version_t protocolVersion, Mumble::Protocol::audio_context_t context,
+							   float volumeAdjustment) {
+		bool requiresEncoding = m_encodings == 0
+								|| !Mumble::Protocol::protocolVersionsAreCompatible(m_protocolVersion, protocolVersion)
+								|| m_context != context || m_volumeAdjustment != volumeAdjustment;
+
+		if (requiresEncoding) {
+			m_encodings++;
+
+			m_protocolVersion  = protocolVersion;
+			m_context          = context;
+			m_volumeAdjustment = volumeAdjustment;
+		}
+
+		return requiresEncoding;
+	}
+
+	std::size_t getAmountOfEncodings() const { return m_encodings; }
+
+	void reset() { m_encodings = 0; }
+
+protected:
+	std::size_t m_encodings                         = 0;
+	Version::mumble_raw_version_t m_protocolVersion = Version::UNKNOWN;
+	Mumble::Protocol::audio_context_t m_context     = Mumble::Protocol::AudioContext::Invalid;
+	float m_volumeAdjustment                        = 0.0f;
+};
+
+class TestAudioReceiverBuffer : public QObject {
+	Q_OBJECT;
+private slots:
+	void test_preconditions() {
+		// Preconditions for these test to make any sense
+		QVERIFY(Mumble::Protocol::protocolVersionsAreCompatible(vOld1, vOld2));
+		QVERIFY(Mumble::Protocol::protocolVersionsAreCompatible(vOld2, vOld3));
+		QVERIFY(Mumble::Protocol::protocolVersionsAreCompatible(vOld1, vOld3));
+		QVERIFY(!Mumble::Protocol::protocolVersionsAreCompatible(vOld1, vNew));
+		QVERIFY(!Mumble::Protocol::protocolVersionsAreCompatible(vOld2, vNew));
+		QVERIFY(!Mumble::Protocol::protocolVersionsAreCompatible(vOld3, vNew));
+
+		// Make sure we are not accidentally using duplicate IDs for our dummy users
+		std::unordered_set< unsigned int > usedIDs;
+		for (const ServerUser &current : users) {
+			QVERIFY(usedIDs.find(current.uiSession) == usedIDs.end());
+			usedIDs.insert(current.uiSession);
+		}
+		for (const ServerUser &current : { deafUser, selfDeafUser, contextUser1, contextUser2, contextUser3 }) {
+			QVERIFY(usedIDs.find(current.uiSession) == usedIDs.end());
+			usedIDs.insert(current.uiSession);
+		}
+
+		QVERIFY(Mumble::Protocol::AudioContext::Normal < Mumble::Protocol::AudioContext::Whisper);
+		QVERIFY(Mumble::Protocol::AudioContext::Normal < Mumble::Protocol::AudioContext::Shout);
+		QVERIFY(Mumble::Protocol::AudioContext::Normal < Mumble::Protocol::AudioContext::Listen);
+
+		QVERIFY(contextUser1.ssContext == contextUser3.ssContext);
+		QVERIFY(contextUser1.ssContext != contextUser2.ssContext);
+	}
+
+	void test_addReceiver() {
+		AudioReceiverBuffer buffer;
+
+		ServerUser &sender = users[0];
+
+		buffer.addReceiver(sender, sender, Mumble::Protocol::AudioContext::Listen, false);
+		buffer.addReceiver(sender, users[1], Mumble::Protocol::AudioContext::Whisper, false);
+		buffer.addReceiver(sender, users[2], Mumble::Protocol::AudioContext::Shout, false);
+		buffer.addReceiver(sender, contextUser1, Mumble::Protocol::AudioContext::Shout, false);
+		buffer.addReceiver(sender, selfDeafUser, Mumble::Protocol::AudioContext::Shout, false);
+		buffer.addReceiver(sender, deafUser, Mumble::Protocol::AudioContext::Shout, false);
+
+		QCOMPARE(buffer.getReceivers(false).size(), static_cast< std::size_t >(3));
+		QVERIFY(buffer.getReceivers(true).empty());
+	}
+
+	void test_addReceiverPositional() {
+		AudioReceiverBuffer buffer;
+
+		ServerUser &sender = contextUser1;
+
+		buffer.addReceiver(sender, users[0], Mumble::Protocol::AudioContext::Normal, true);
+		buffer.addReceiver(sender, users[1], Mumble::Protocol::AudioContext::Normal, true);
+		buffer.addReceiver(sender, contextUser2, Mumble::Protocol::AudioContext::Normal, true);
+		buffer.addReceiver(sender, contextUser3, Mumble::Protocol::AudioContext::Normal, true);
+
+		// There is only one receiver whose context matches that of the sender
+		QCOMPARE(buffer.getReceivers(true).size(), static_cast< std::size_t >(1));
+		// All other receivers will get the audio without positional data
+		QCOMPARE(buffer.getReceivers(false).size(), static_cast< std::size_t >(3));
+	}
+
+	void test_forceAddReceiver() {
+		AudioReceiverBuffer buffer;
+
+		ServerUser &sender = users[0];
+
+		buffer.forceAddReceiver(sender, Mumble::Protocol::AudioContext::Normal, false);
+
+		QCOMPARE(buffer.getReceivers(false).size(), static_cast< std::size_t >(1));
+		QVERIFY(buffer.getReceivers(true).empty());
+	}
+
+	void test_preprocessBuffer() {
+		AudioReceiverBuffer buffer;
+
+		ServerUser &sender = users[0];
+
+		buffer.addReceiver(sender, users[3], Mumble::Protocol::AudioContext::Listen, false,
+						   VolumeAdjustment::fromFactor(1.2f));
+		buffer.addReceiver(sender, users[1], Mumble::Protocol::AudioContext::Whisper, false);
+		buffer.addReceiver(sender, users[1], Mumble::Protocol::AudioContext::Normal, false);
+		buffer.addReceiver(sender, users[2], Mumble::Protocol::AudioContext::Shout, false);
+		buffer.addReceiver(sender, users[1], Mumble::Protocol::AudioContext::Listen, false);
+		buffer.addReceiver(sender, users[1], Mumble::Protocol::AudioContext::Shout, false);
+		buffer.addReceiver(sender, users[3], Mumble::Protocol::AudioContext::Listen, false,
+						   VolumeAdjustment::fromFactor(1.4f));
+		buffer.addReceiver(sender, contextUser1, Mumble::Protocol::AudioContext::Shout, false);
+
+		buffer.preprocessBuffer();
+
+		// The preprocessing should have removed all duplicates of users[1] and users[3]
+		QCOMPARE(buffer.getReceivers(false).size(), static_cast< std::size_t >(4));
+		QVERIFY(buffer.getReceivers(true).empty());
+
+		const AudioReceiver *duplicateReceiver = nullptr;
+		const AudioReceiver *volumeReceiver    = nullptr;
+		for (const AudioReceiver &current : buffer.getReceivers(false)) {
+			if (current.getReceiver().uiSession == users[1].uiSession) {
+				duplicateReceiver = &current;
+			} else if (current.getReceiver().uiSession == users[3].uiSession) {
+				volumeReceiver = &current;
+			}
+		}
+
+		QVERIFY(duplicateReceiver != nullptr);
+		QVERIFY(volumeReceiver != nullptr);
+
+		// Verify that the "Normal" speech receiver has survived (instead of one of the other contexts)
+		QCOMPARE(duplicateReceiver->getContext(), Mumble::Protocol::AudioContext::Normal);
+		// Verify that the highest volume adjustment has survived
+		QCOMPARE(volumeReceiver->getVolumeAdjustment().factor, 1.4f);
+	}
+
+	void test_encoding() {
+		AudioReceiverBuffer buffer;
+
+		ServerUser &sender = contextUser2;
+
+		buffer.addReceiver(sender, users[3], Mumble::Protocol::AudioContext::Shout, false);
+		buffer.addReceiver(sender, users[1], Mumble::Protocol::AudioContext::Listen, false);
+		buffer.addReceiver(sender, users[4], Mumble::Protocol::AudioContext::Shout, false);
+		buffer.addReceiver(sender, users[2], Mumble::Protocol::AudioContext::Normal, false);
+		buffer.addReceiver(sender, users[0], Mumble::Protocol::AudioContext::Normal, false);
+		buffer.addReceiver(sender, contextUser1, Mumble::Protocol::AudioContext::Shout, false,
+						   VolumeAdjustment::fromFactor(1.4f));
+		buffer.addReceiver(sender, contextUser2, Mumble::Protocol::AudioContext::Shout, false,
+						   VolumeAdjustment::fromFactor(1.2f));
+		buffer.addReceiver(sender, contextUser3, Mumble::Protocol::AudioContext::Shout, false,
+						   VolumeAdjustment::fromFactor(1.2f));
+
+		buffer.preprocessBuffer();
+
+		std::vector< AudioReceiver > receivers = buffer.getReceivers(false);
+		auto receiverRange = AudioReceiverBuffer::getReceiverRange(receivers.begin(), receivers.end());
+
+		std::size_t processedReceiver = 0;
+		PseudoEncoder encoder;
+
+		while (receiverRange.begin != receiverRange.end) {
+			qWarning("Opening a new range");
+			qWarning() << "Start:" << *receiverRange.begin;
+			qWarning() << "End:" << *(receiverRange.end - 1);
+
+			QVERIFY2(encoder.checkRequiresEncoding(receiverRange.begin->getReceiver().uiVersion,
+												   receiverRange.begin->getContext(),
+												   receiverRange.begin->getVolumeAdjustment().factor),
+					 "Starting a new range, but no re-encoding is required");
+
+			for (auto it = receiverRange.begin; it != receiverRange.end; ++it) {
+				qWarning() << "Processing" << *it;
+				QVERIFY2(!encoder.checkRequiresEncoding(it->getReceiver().uiVersion, it->getContext(),
+														it->getVolumeAdjustment().factor),
+						 "Mid-range re-encoding required");
+				processedReceiver++;
+			}
+
+			receiverRange = AudioReceiverBuffer::getReceiverRange(receiverRange.end, receivers.end());
+		}
+
+		QCOMPARE(processedReceiver, receivers.size());
+
+		std::size_t requiredReencodings = encoder.getAmountOfEncodings();
+
+		// Check all permutations of receivers in order to check that the one created by preprocessBuffer is in fact the
+		// one that minimizes the amount of re-encodings that are required in order to send the audio to all receivers.
+		do {
+			encoder.reset();
+
+			for (const AudioReceiver &current : receivers) {
+				encoder.checkRequiresEncoding(current.getReceiver().uiVersion, current.getContext(),
+											  current.getVolumeAdjustment().factor);
+			}
+
+			QVERIFY2(encoder.getAmountOfEncodings() >= requiredReencodings,
+					 "There exists a permutation of the receivers, that requires less re-encoding steps");
+		} while (std::next_permutation(receivers.begin(), receivers.end()));
+
+		qDebug() << "Sample receiver list required" << requiredReencodings << "encoding steps";
+	}
+};
+
+QTEST_MAIN(TestAudioReceiverBuffer)
+#include "TestAudioReceiverBuffer.moc"
diff --git a/src/tests/TestMumbleProtocol/CMakeLists.txt b/src/tests/TestMumbleProtocol/CMakeLists.txt
new file mode 100644
index 000000000..b6e3d2cd7
--- /dev/null
+++ b/src/tests/TestMumbleProtocol/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright 2020-2021 The Mumble Developers. All rights reserved.
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file at the root of the
+# Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+add_executable(TestMumbleProtocol TestMumbleProtocol.cpp)
+
+set_target_properties(TestMumbleProtocol PROPERTIES AUTOMOC ON)
+
+target_link_libraries(TestMumbleProtocol PRIVATE shared Qt5::Test)
+
+add_test(NAME TestMumbleProtocol COMMAND $<TARGET_FILE:TestMumbleProtocol>)
diff --git a/src/tests/TestMumbleProtocol/TestMumbleProtocol.cpp b/src/tests/TestMumbleProtocol/TestMumbleProtocol.cpp
new file mode 100644
index 000000000..85e0b502a
--- /dev/null
+++ b/src/tests/TestMumbleProtocol/TestMumbleProtocol.cpp
@@ -0,0 +1,306 @@
+// Copyright 2021 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#include "MumbleProtocol.h"
+#include "MumbleUDP.pb.h"
+#include "Version.h"
+
+#include <QObject>
+#include <QtTest>
+
+#include <cstring>
+#include <sstream>
+#include <string>
+
+namespace Mumble {
+namespace Protocol {
+	// Add toString functions for use by QCOMPARE
+
+	char *toString(const Mumble::Protocol::AudioData &data) {
+		std::stringstream stream;
+		stream << "{ payload: {" << static_cast< const void * >(data.payload.data()) << ", " << data.payload.size()
+			   << "}, frameNumber: " << data.frameNumber << ", isLastFrame: " << data.isLastFrame
+			   << ", senderSession: " << data.senderSession << ", targetOrContext: " << data.targetOrContext
+			   << ", usedCodec: " << static_cast< int >(data.usedCodec)
+			   << ", containsPositionalData: " << data.containsPositionalData;
+		if (data.containsPositionalData) {
+			stream << ", position: {";
+
+			for (unsigned int i = 0; i < data.position.size(); ++i) {
+				stream << data.position[i];
+
+				if (i + 1 < data.position.size()) {
+					stream << ", ";
+				}
+			}
+
+			stream << "}";
+		}
+		stream << ", volumeAdjustment: " << data.volumeAdjustment.factor << " }";
+
+		std::string str = stream.str();
+
+		char *charArray = new char[str.size() + 1];
+
+		std::strcpy(charArray, str.c_str());
+
+		return charArray;
+	}
+
+	char *toString(const Mumble::Protocol::PingData &data) {
+		std::stringstream stream;
+
+		stream << "{ timestamp: " << data.timestamp
+			   << ", requestAdditionalInformation: " << data.requestAdditionalInformation
+			   << ", containsAdditionalInformation: " << data.containsAdditionalInformation
+			   << ", userCount: " << data.userCount << ", maxUserCount: " << data.maxUserCount
+			   << ", maxBandwidthPerUser: " << data.maxBandwidthPerUser << " }";
+
+		std::string str = stream.str();
+
+		char *charArray = new char[str.size() + 1];
+
+		std::strcpy(charArray, str.c_str());
+
+		return charArray;
+	}
+
+
+	template< Role role > class TestAudioEncoder : public UDPAudioEncoder< role > {
+	public:
+		using UDPAudioEncoder< role >::UDPAudioEncoder;
+
+		// Expose these functions publicly for testing-purposes
+		using UDPAudioEncoder< role >::getPreEncodedContext;
+		using UDPAudioEncoder< role >::getPreEncodedVolumeAdjustment;
+	};
+
+}; // namespace Protocol
+}; // namespace Mumble
+
+template< Mumble::Protocol::Role encoderRole, Mumble::Protocol::Role decoderRole > void do_test_ping() {
+	Mumble::Protocol::UDPPingEncoder< encoderRole > encoder;
+	Mumble::Protocol::UDPDecoder< decoderRole > decoder;
+
+	for (Version::mumble_raw_version_t version :
+		 { Version::toRaw(1, 3, 0), Mumble::Protocol::PROTOBUF_INTRODUCTION_VERSION }) {
+		int major, minor, patch;
+		Version::fromRaw(version, &major, &minor, &patch);
+		qWarning("Using protocol version %d.%d.%d", major, minor, patch);
+
+		// Note: When the decoder is set to a version < PROTOBUF_INTRODUCTION_VERSION, it can decode pings in
+		// either format
+		encoder.setProtocolVersion(version);
+
+		Mumble::Protocol::PingData data;
+		data.timestamp = 42;
+
+		// Regular connectivity ping
+		auto encodedData = encoder.encodePingPacket(data);
+		QVERIFY(decoder.decode(encodedData));
+
+		QCOMPARE(decoder.getMessageType(), Mumble::Protocol::UDPMessageType::Ping);
+		QCOMPARE(decoder.getPingData(), data);
+
+		// Extended ping (request)
+		if (decoderRole == Mumble::Protocol::Role::Server) {
+			QVERIFY(encoderRole == Mumble::Protocol::Role::Client);
+
+			data.requestAdditionalInformation = true;
+
+			encodedData = encoder.encodePingPacket(data);
+			QVERIFY(decoder.decode(encodedData));
+
+			QCOMPARE(decoder.getMessageType(), Mumble::Protocol::UDPMessageType::Ping);
+			QCOMPARE(decoder.getPingData(), data);
+		} else {
+			QVERIFY(encoderRole == Mumble::Protocol::Role::Server);
+
+			data.containsAdditionalInformation = true;
+			data.userCount                     = 12;
+			data.maxUserCount                  = 42;
+			data.maxBandwidthPerUser           = 512;
+
+			encodedData = encoder.encodePingPacket(data);
+			QVERIFY(decoder.decode(encodedData));
+
+			QCOMPARE(decoder.getMessageType(), Mumble::Protocol::UDPMessageType::Ping);
+		}
+	}
+}
+
+void printData(gsl::span< const Mumble::Protocol::byte > data) {
+	QString str = "Data is: { ";
+	for (Mumble::Protocol::byte current : data) {
+		str += QString::number(static_cast< int >(current)) + " ";
+	}
+	str += " }";
+
+	qDebug() << str;
+}
+
+template< Mumble::Protocol::Role encoderRole, Mumble::Protocol::Role decoderRole > void do_test_audio() {
+	Mumble::Protocol::UDPAudioEncoder< encoderRole > encoder;
+	Mumble::Protocol::UDPDecoder< decoderRole > decoder;
+
+	std::string payloadData = "I am the payload";
+
+	for (Version::mumble_raw_version_t version :
+		 { Version::toRaw(1, 3, 0), Mumble::Protocol::PROTOBUF_INTRODUCTION_VERSION }) {
+		int major, minor, patch;
+		Version::fromRaw(version, &major, &minor, &patch);
+		qWarning("Using protocol version %d.%d.%d", major, minor, patch);
+
+		encoder.setProtocolVersion(version);
+		decoder.setProtocolVersion(version);
+
+		Mumble::Protocol::AudioData data;
+		data.payload = { reinterpret_cast< const Mumble::Protocol::byte * >(payloadData.c_str()), payloadData.size() };
+
+		data.frameNumber            = 12;
+		data.containsPositionalData = true;
+		data.position               = { 3, 2, 1 };
+		data.isLastFrame            = true;
+		data.usedCodec              = Mumble::Protocol::AudioCodec::Opus;
+		if (version >= Mumble::Protocol::PROTOBUF_INTRODUCTION_VERSION
+			&& decoderRole == Mumble::Protocol::Role::Client) {
+			// Transmitting volume adjustment is only supported in the new packet format
+			// and only in the server->client direction
+			data.volumeAdjustment = VolumeAdjustment::fromFactor(1.4f);
+		}
+
+		if (decoderRole == Mumble::Protocol::Role::Client) {
+			QVERIFY(encoder.getRole() == Mumble::Protocol::Role::Server);
+
+			data.targetOrContext = Mumble::Protocol::AudioContext::Shout;
+			data.senderSession   = 42;
+		} else {
+			QVERIFY(encoder.getRole() == Mumble::Protocol::Role::Client);
+
+			data.targetOrContext = Mumble::Protocol::ReservedTargetIDs::ServerLoopback;
+		}
+
+		auto encodedData = encoder.encodeAudioPacket(data);
+		QVERIFY(!encodedData.empty());
+
+		QVERIFY(decoder.decode(encodedData));
+
+		QCOMPARE(decoder.getMessageType(), Mumble::Protocol::UDPMessageType::Audio);
+		QCOMPARE(decoder.getAudioData(), data);
+
+		qWarning() << "Partial re-encoding";
+
+		// Re-encode fields from the "variable" part
+		data.targetOrContext = Mumble::Protocol::AudioContext::Listen;
+		if (version >= Mumble::Protocol::PROTOBUF_INTRODUCTION_VERSION
+			&& decoderRole == Mumble::Protocol::Role::Client) {
+			data.volumeAdjustment = VolumeAdjustment::fromFactor(0.9f);
+		}
+
+		encodedData = encoder.updateAudioPacket(data);
+		QVERIFY(!encodedData.empty());
+
+		QVERIFY(decoder.decode(encodedData));
+
+		QCOMPARE(decoder.getMessageType(), Mumble::Protocol::UDPMessageType::Audio);
+		QCOMPARE(decoder.getAudioData(), data);
+
+		qWarning() << "Removing positional data";
+		// Update the audio packet to no longer contain positional data.
+		data.containsPositionalData = false;
+		data.targetOrContext        = Mumble::Protocol::AudioContext::Normal;
+
+		encoder.dropPositionalData();
+
+		encodedData = encoder.updateAudioPacket(data);
+		QVERIFY(!encodedData.empty());
+
+		QVERIFY(decoder.decode(encodedData));
+
+		QCOMPARE(decoder.getMessageType(), Mumble::Protocol::UDPMessageType::Audio);
+		QCOMPARE(decoder.getAudioData(), data);
+	}
+}
+
+class TestMumbleProtocol : public QObject {
+	Q_OBJECT;
+private slots:
+	void test_ping_client_to_server() {
+		do_test_ping< Mumble::Protocol::Role::Client, Mumble::Protocol::Role::Server >();
+	}
+
+	void test_ping_server_to_client() {
+		do_test_ping< Mumble::Protocol::Role::Server, Mumble::Protocol::Role::Client >();
+	}
+
+	void test_audio_client_to_server() {
+		do_test_audio< Mumble::Protocol::Role::Client, Mumble::Protocol::Role::Server >();
+	}
+
+	void test_audio_server_to_client() {
+		do_test_audio< Mumble::Protocol::Role::Server, Mumble::Protocol::Role::Client >();
+	}
+
+	void test_preEncode_audio_context() {
+		Mumble::Protocol::TestAudioEncoder< Mumble::Protocol::Role::Server > encoder;
+
+		MumbleUDP::Audio msg;
+		std::vector< Mumble::Protocol::byte > buffer;
+
+		for (Mumble::Protocol::audio_context_t currentContext = Mumble::Protocol::AudioContext::begin;
+			 currentContext < Mumble::Protocol::AudioContext::end; currentContext++) {
+			gsl::span< const Mumble::Protocol::byte > snippet = encoder.getPreEncodedContext(currentContext);
+
+			QVERIFY2(!snippet.empty(), "Unable to find pre-encoded snippet for audio context");
+
+			msg.set_context(currentContext);
+
+#if GOOGLE_PROTOBUF_VERSION >= 3002000
+			// ByteSizeLong() was introduced in Protobuf v3.2 as a replacement for ByteSize()
+			buffer.resize(msg.ByteSizeLong());
+#else
+			buffer.resize(msg.ByteSize());
+#endif
+			msg.SerializeWithCachedSizesToArray(buffer.data());
+
+			QCOMPARE(snippet.size(), buffer.size());
+			QVERIFY2(std::equal(snippet.begin(), snippet.end(), buffer.begin()), "Pre-encoded snippet is incorrect");
+		}
+
+		// Ensure that an unknown context yields an empty span
+		QVERIFY(encoder.getPreEncodedContext(Mumble::Protocol::AudioContext::end).empty());
+	}
+
+	void test_preEncode_volume_adjustments() {
+		Mumble::Protocol::TestAudioEncoder< Mumble::Protocol::Role::Server > encoder;
+
+		MumbleUDP::Audio msg;
+
+		constexpr int min = -60;
+		constexpr int max = 30;
+
+		for (int currentAdjustment = min; currentAdjustment <= max; ++currentAdjustment) {
+			msg.Clear();
+
+			gsl::span< const Mumble::Protocol::byte > snippet =
+				encoder.getPreEncodedVolumeAdjustment(VolumeAdjustment::fromDBAdjustment(currentAdjustment));
+
+			QVERIFY2(!snippet.empty(), "Unable to find pre-encoded snippet for volume adjustment");
+
+			msg.ParseFromArray(snippet.data(), snippet.size());
+
+			// This will perform a fuzzy-compare
+			QCOMPARE(msg.volume_adjustment(), std::pow(2.0f, currentAdjustment / 6.0f));
+		}
+
+		// Ensure that an unknown/unexpected volume adjustment yields an empty span
+		QVERIFY(encoder.getPreEncodedVolumeAdjustment(VolumeAdjustment::fromDBAdjustment(min - 1)).empty());
+		// We only expect pre-encoded values for integer dB adjustments
+		QVERIFY(encoder.getPreEncodedVolumeAdjustment(VolumeAdjustment(std::pow(2.0f, (min + 0.5) / 6.0f))).empty());
+	}
+};
+
+QTEST_MAIN(TestMumbleProtocol)
+#include "TestMumbleProtocol.moc"
diff --git a/src/tests/TestPacketDataStream/TestPacketDataStream.cpp b/src/tests/TestPacketDataStream/TestPacketDataStream.cpp
index 3b2dceffb..b705e1be8 100644
--- a/src/tests/TestPacketDataStream/TestPacketDataStream.cpp
+++ b/src/tests/TestPacketDataStream/TestPacketDataStream.cpp
@@ -3,8 +3,8 @@
 // that can be found in the LICENSE file at the root of the
 // Mumble source tree or at <https://www.mumble.info/LICENSE>.
 
-#include "Message.h"
 #include "PacketDataStream.h"
+
 #include <QObject>
 #include <QtCore>
 #include <QtNetwork>
author	Robert Adam <dev@robert-adam.de>	2021-11-22 13:17:58 +0300
committer	Robert Adam <dev@robert-adam.de>	2022-03-27 10:49:58 +0300
commit	1d45d991aa4d53b6c1bd7d7cae0126a21f3991e1 (patch)
tree	bd810f21a6cc43fa718e0f2807ac373403244edf
parent	06b56530997fb623c9690ad7bdb8d3f5915d48a0 (diff)