diff options
author | Andrew Comminos <andrewcomminos@gmail.com> | 2013-11-26 06:19:30 +0400 |
---|---|---|
committer | Andrew Comminos <andrewcomminos@gmail.com> | 2013-11-26 06:19:30 +0400 |
commit | fff11a9b454f302147cc2e73096558397c416b14 (patch) | |
tree | 59ca26ee9a4fe7b6f66625a84edb38063a5b5a89 | |
parent | 93679870e0d3eaa00aa97cd4491992ee495922e6 (diff) |
Implemented energy-based logarithmic VAD.
-rw-r--r-- | src/main/java/com/morlunk/jumble/JumbleService.java | 5 | ||||
-rw-r--r-- | src/main/java/com/morlunk/jumble/audio/AudioInput.java | 30 |
2 files changed, 28 insertions, 7 deletions
diff --git a/src/main/java/com/morlunk/jumble/JumbleService.java b/src/main/java/com/morlunk/jumble/JumbleService.java index e01b291..3d12457 100644 --- a/src/main/java/com/morlunk/jumble/JumbleService.java +++ b/src/main/java/com/morlunk/jumble/JumbleService.java @@ -118,6 +118,11 @@ public class JumbleService extends Service implements JumbleConnection.JumbleCon } }); } + + @Override + public void onVADStateUpdate(float prob) { + + } }; // Logging diff --git a/src/main/java/com/morlunk/jumble/audio/AudioInput.java b/src/main/java/com/morlunk/jumble/audio/AudioInput.java index 596eb72..8991f6d 100644 --- a/src/main/java/com/morlunk/jumble/audio/AudioInput.java +++ b/src/main/java/com/morlunk/jumble/audio/AudioInput.java @@ -57,6 +57,14 @@ public class AudioInput extends ProtocolHandler implements Runnable { public void onFrameEncoded(byte[] data, int length, JumbleUDPMessageType messageType); public void onTalkStateChanged(boolean talking); + + // Debug methods + + /** + * Called after a frame is passed into the speex preprocessor. + * @param prob The probability of speech, from 0 to 1. + */ + public void onVADStateUpdate(float prob); } private static final int[] SAMPLE_RATES = { 48000, 44100, 22050, 160000, 11025, 8000 }; @@ -82,6 +90,10 @@ public class AudioInput extends ProtocolHandler implements Runnable { private int mMicFrameSize = Audio.FRAME_SIZE; private int mFramesPerPacket = 6; + // Energy-based VAD + private int mMinAmplitude = -1; + private int mMaxAmplitude = -1; + // Temporary encoder state private final short[] mOpusBuffer = new short[mFrameSize*mFramesPerPacket]; private final byte[] mEncodedBuffer = new byte[OPUS_MAX_BYTES]; @@ -271,11 +283,15 @@ public class AudioInput extends ProtocolHandler implements Runnable { boolean talking = true; if(mTransmitMode == Constants.TRANSMIT_VOICE_ACTIVITY) { - // Check if audio input registered as probable speech. - IntPointer prob = new IntPointer(1); - mPreprocessState.control(Speex.SpeexPreprocessState.SPEEX_PREPROCESS_GET_PROB, prob); - float speechProbablilty = (float)prob.get() / 100.0f; - talking = speechProbablilty >= mVADThreshold; + // Use a logarithmic energy-based scale for VAD. + float sum = 1.0f; + for (int i = 0; i < mFrameSize; i++) { + sum += audioData[i] * audioData[i]; + } + float micLevel = (float) Math.sqrt(sum / (float)mFrameSize); + float peakSignal = (float) (20.0f*Math.log10(micLevel / 32768.0f))/96.0f; + talking = (peakSignal+1) >= mVADThreshold; +// Log.v(Constants.TAG, String.format("Signal: %2f, Threshold: %2f", peakSignal+1, mVADThreshold)); if(talking ^ mVADLastDetected) // Update the service with the new talking state if we detected voice. mListener.onTalkStateChanged(talking); @@ -383,8 +399,8 @@ public class AudioInput extends ProtocolHandler implements Runnable { @Override public void messageCodecVersion(Mumble.CodecVersion msg) { -// if(msg.getOpus()) -// switchCodec(JumbleUDPMessageType.UDPVoiceOpus); + if(msg.getOpus()) + switchCodec(JumbleUDPMessageType.UDPVoiceOpus); // else if(msg.getPreferAlpha()) // switchCodec(JumbleUDPMessageType.UDPVoiceCELTAlpha); // else |