// Copyright 2007-2021 The Mumble Developers. All rights reserved. // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file at the root of the // Mumble source tree or at . #ifndef MUMBLE_MUMBLE_AUDIOINPUT_H_ #define MUMBLE_MUMBLE_AUDIOINPUT_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include "Audio.h" #include "EchoCancelOption.h" #include "Message.h" #include "Settings.h" #include "Timer.h" class AudioInput; class CELTCodec; class OpusCodec; struct CELTEncoder; struct OpusEncoder; struct DenoiseState; typedef boost::shared_ptr< AudioInput > AudioInputPtr; /** * A chunk of audio data to process * This struct wraps pointers to two dynamically allocated arrays, containing * PCM samples of microphone and speaker readback data (for echo cancellation). * Does not handle pointer ownership, so you'll have to deallocate them yourself. */ struct AudioChunk { AudioChunk() : mic(nullptr), speaker(nullptr) {} explicit AudioChunk(short *mic) : mic(mic), speaker(nullptr) {} AudioChunk(short *mic, short *speaker) : mic(mic), speaker(speaker) {} bool empty() const { return mic == nullptr; } short *mic; ///< Pointer to microphone samples short *speaker; ///< Pointer to speaker samples, nullptr if echo cancellation is disabled }; /* * According to https://www.speex.org/docs/manual/speex-manual/node7.html * "It is important that, at any time, any echo that is present in the input * has already been sent to the echo canceller as echo_frame." * Thus, we artificially introduce a small lag in the microphone by means of * a queue, so as to be sure the speaker data always precedes the microphone. * * There are conflicting requirements for the queue: * - it has to be small enough not to cause a noticeable lag in the voice * - it has to be large enough not to force us to drop packets frequently * when the addMic() and addEcho() callbacks are called in a jittery way * - its fill level must be controlled so it does not operate towards zero * elements size, as this would not provide the lag required for the * echo canceller to work properly. * * The current implementation uses a 5 elements queue, with a control * statemachine that introduces packet drops to control the fill level * to at least 2 (plus or minus one) and less than 4 elements. * With a 10ms chunk, this queue should introduce a ~20ms lag to the voice. */ class Resynchronizer { public: /** * Add a microphone sample to the resynchronizer queue * The resynchronizer may decide to drop the sample, and in that case * the pointer will be deallocated not lo leak memory * * \param mic pointer to a dynamically allocated array with PCM data */ void addMic(short *mic); /** * Add a speaker sample to the resynchronizer * The resynchronizer may decide to drop the sample, and in that case * the pointer will be deallocated not lo leak memory * * \param mic pointer to a dynamically allocated array with PCM data * \return If microphone data is available, the resynchronizer will return a * valid audio chunk to encode, otherwise an empty chunk will be returned */ AudioChunk addSpeaker(short *speaker); /** * Reinitialize the resynchronizer, emptying the queue in the process. */ void reset(); /** * \return the nominal lag that the resynchronizer tries to enforce on the * microphone data, in order to make sure the speaker data is always passed * first to the echo canceller */ int getNominalLag() const { return 2; } ~Resynchronizer(); bool bDebugPrintQueue = false; ///< Enables printing queue fill level stats private: /** * Print queue level stats for debugging purposes * \param mic used to distinguish between addMic() and addSpeaker() */ void printQueue(char who); // TODO: there was a mutex (qmEcho), but can the callbacks be called concurrently? mutable std::mutex m; std::list< short * > micQueue; ///< Queue of microphone samples enum { S0, S1a, S1b, S2, S3, S4a, S4b, S5 } state = S0; ///< Queue fill control statemachine }; class AudioInputRegistrar { private: Q_DISABLE_COPY(AudioInputRegistrar) public: static QMap< QString, AudioInputRegistrar * > *qmNew; static QString current; static AudioInputPtr newFromChoice(QString choice = QString()); const QString name; int priority; /// A list of echo cancellation options available for this backend. std::vector< EchoCancelOptionID > echoOptions; AudioInputRegistrar(const QString &n, int priority = 0); virtual ~AudioInputRegistrar(); virtual AudioInput *create() = 0; virtual const QList< audioDevice > getDeviceChoices() = 0; virtual void setDeviceChoice(const QVariant &, Settings &) = 0; /// Check that given combination of echoOption and outputSystem combination is suitable for echo cancellation virtual bool canEcho(EchoCancelOptionID echoOptionId, const QString &outputSystem) const = 0; virtual bool canExclusive() const; /** * Check if Mumble's microphone access has been denied by the OS. * Both Windows and macOS have builtin privacy safeguards that display a message asking for users' * consent when apps are trying to use the microphone, and/or provide ways to deny the microphone * access of some apps. * This function should check if Mumble has the permission to use the microphone. * Note: It is possible that this result could only be known after trying to initialize the audio backend. * Generally, call this function after attempts to initialize the AudioInput have been made. * @return true if microphone access is denied. */ virtual bool isMicrophoneAccessDeniedByOS() = 0; }; class AudioInput : public QThread { friend class AudioNoiseWidget; friend class AudioEchoWidget; friend class AudioStats; friend class AudioInputDialog; private: Q_OBJECT Q_DISABLE_COPY(AudioInput) protected: typedef enum { CodecCELT, CodecSpeex } CodecFormat; typedef enum { SampleShort, SampleFloat } SampleFormat; typedef void (*inMixerFunc)(float *RESTRICT, const void *RESTRICT, unsigned int, unsigned int, quint64); private: bool bDebugDumpInput; ///< When true, dump pcm data to debug the echo canceller std::ofstream outMic, outSpeaker, outProcessed; ///< Files to dump raw pcm data SpeexResamplerState *srsMic, *srsEcho; unsigned int iMicFilled, iEchoFilled; inMixerFunc imfMic, imfEcho; inMixerFunc chooseMixer(const unsigned int nchan, SampleFormat sf, quint64 mask); void resetAudioProcessor(); OpusCodec *oCodec; OpusEncoder *opusState; DenoiseState *denoiseState; bool selectCodec(); void selectNoiseCancel(); typedef boost::array< unsigned char, 960 > EncodingOutputBuffer; int encodeOpusFrame(short *source, int size, EncodingOutputBuffer &buffer); int encodeCELTFrame(short *pSource, EncodingOutputBuffer &buffer); protected: MessageHandler::UDPMessageType umtType; SampleFormat eMicFormat, eEchoFormat; unsigned int iMicChannels, iEchoChannels; unsigned int iMicFreq, iEchoFreq; unsigned int iMicLength, iEchoLength; unsigned int iMicSampleSize, iEchoSampleSize; int iEchoMCLength, iEchoFrameSize; quint64 uiMicChannelMask, uiEchoChannelMask; bool bEchoMulti; Settings::NoiseCancel noiseCancel; static const unsigned int iSampleRate = SAMPLE_RATE; static const int iFrameSize = SAMPLE_RATE / 100; QMutex qmSpeex; SpeexPreprocessState *sppPreprocess; SpeexEchoState *sesEcho; CELTCodec *cCodec; CELTEncoder *ceEncoder; /// bResetEncoder is a flag that notifies /// our encoder functions that the encoder /// needs to be reset. bool bResetEncoder; /// Encoded audio rate in bit/s int iAudioQuality; bool bAllowLowDelay; /// Number of 10ms audio "frames" per packet (!= frames in packet) int iAudioFrames; float *pfMicInput; float *pfEchoInput; Resynchronizer resync; std::vector< short > opusBuffer; void encodeAudioFrame(AudioChunk chunk); void addMic(const void *data, unsigned int nsamp); void addEcho(const void *data, unsigned int nsamp); volatile bool bRunning; volatile bool bPreviousVoice; int iFrameCounter; int iSilentFrames; int iHoldFrames; int iBufferedFrames; QList< QByteArray > qlFrames; void flushCheck(const QByteArray &, bool terminator, int voiceTargetID); void initializeMixer(); static void adjustBandwidth(int bitspersec, int &bitrate, int &frames, bool &allowLowDelay); signals: void doDeaf(); void doMute(); /// A signal emitted if audio input is being encountered /// /// @param inputPCM The encountered input PCM /// @param sampleCount The amount of samples in the input /// @param channelCount The amount of channels in the input /// @param sampleRate The used sample rate in Hz /// @param isSpeech Whether Mumble considers the inpu to be speech void audioInputEncountered(short *inputPCM, unsigned int sampleCount, unsigned int channelCount, unsigned int sampleRate, bool isSpeech); public: typedef enum { ActivityStateIdle, ActivityStateReturnedFromIdle, ActivityStateActive } ActivityState; ActivityState activityState; bool bResetProcessor; Timer tIdle; int iBitrate; float dPeakSpeaker, dPeakSignal, dMaxMic, dPeakMic, dPeakCleanMic; float fSpeechProb; static int getNetworkBandwidth(int bitrate, int frames); static void setMaxBandwidth(int bitspersec); /// Construct an AudioInput. /// /// This constructor is only ever called by Audio::startInput(), and is guaranteed /// to be called on the application's main thread. AudioInput(); /// Destroy an AudioInput. /// /// This destructor is only ever called by Audio::stopInput() and Audio::stop(), /// and is guaranteed to be called on the application's main thread. ~AudioInput() Q_DECL_OVERRIDE; void run() Q_DECL_OVERRIDE = 0; virtual bool isAlive() const; bool isTransmitting() const; }; #endif