Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/Morlunk/Jumble.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Comminos <andrewcomminos@gmail.com>2013-11-26 06:19:30 +0400
committerAndrew Comminos <andrewcomminos@gmail.com>2013-11-26 06:19:30 +0400
commitfff11a9b454f302147cc2e73096558397c416b14 (patch)
tree59ca26ee9a4fe7b6f66625a84edb38063a5b5a89
parent93679870e0d3eaa00aa97cd4491992ee495922e6 (diff)
Implemented energy-based logarithmic VAD.
-rw-r--r--src/main/java/com/morlunk/jumble/JumbleService.java5
-rw-r--r--src/main/java/com/morlunk/jumble/audio/AudioInput.java30
2 files changed, 28 insertions, 7 deletions
diff --git a/src/main/java/com/morlunk/jumble/JumbleService.java b/src/main/java/com/morlunk/jumble/JumbleService.java
index e01b291..3d12457 100644
--- a/src/main/java/com/morlunk/jumble/JumbleService.java
+++ b/src/main/java/com/morlunk/jumble/JumbleService.java
@@ -118,6 +118,11 @@ public class JumbleService extends Service implements JumbleConnection.JumbleCon
}
});
}
+
+ @Override
+ public void onVADStateUpdate(float prob) {
+
+ }
};
// Logging
diff --git a/src/main/java/com/morlunk/jumble/audio/AudioInput.java b/src/main/java/com/morlunk/jumble/audio/AudioInput.java
index 596eb72..8991f6d 100644
--- a/src/main/java/com/morlunk/jumble/audio/AudioInput.java
+++ b/src/main/java/com/morlunk/jumble/audio/AudioInput.java
@@ -57,6 +57,14 @@ public class AudioInput extends ProtocolHandler implements Runnable {
public void onFrameEncoded(byte[] data, int length, JumbleUDPMessageType messageType);
public void onTalkStateChanged(boolean talking);
+
+ // Debug methods
+
+ /**
+ * Called after a frame is passed into the speex preprocessor.
+ * @param prob The probability of speech, from 0 to 1.
+ */
+ public void onVADStateUpdate(float prob);
}
private static final int[] SAMPLE_RATES = { 48000, 44100, 22050, 160000, 11025, 8000 };
@@ -82,6 +90,10 @@ public class AudioInput extends ProtocolHandler implements Runnable {
private int mMicFrameSize = Audio.FRAME_SIZE;
private int mFramesPerPacket = 6;
+ // Energy-based VAD
+ private int mMinAmplitude = -1;
+ private int mMaxAmplitude = -1;
+
// Temporary encoder state
private final short[] mOpusBuffer = new short[mFrameSize*mFramesPerPacket];
private final byte[] mEncodedBuffer = new byte[OPUS_MAX_BYTES];
@@ -271,11 +283,15 @@ public class AudioInput extends ProtocolHandler implements Runnable {
boolean talking = true;
if(mTransmitMode == Constants.TRANSMIT_VOICE_ACTIVITY) {
- // Check if audio input registered as probable speech.
- IntPointer prob = new IntPointer(1);
- mPreprocessState.control(Speex.SpeexPreprocessState.SPEEX_PREPROCESS_GET_PROB, prob);
- float speechProbablilty = (float)prob.get() / 100.0f;
- talking = speechProbablilty >= mVADThreshold;
+ // Use a logarithmic energy-based scale for VAD.
+ float sum = 1.0f;
+ for (int i = 0; i < mFrameSize; i++) {
+ sum += audioData[i] * audioData[i];
+ }
+ float micLevel = (float) Math.sqrt(sum / (float)mFrameSize);
+ float peakSignal = (float) (20.0f*Math.log10(micLevel / 32768.0f))/96.0f;
+ talking = (peakSignal+1) >= mVADThreshold;
+// Log.v(Constants.TAG, String.format("Signal: %2f, Threshold: %2f", peakSignal+1, mVADThreshold));
if(talking ^ mVADLastDetected) // Update the service with the new talking state if we detected voice.
mListener.onTalkStateChanged(talking);
@@ -383,8 +399,8 @@ public class AudioInput extends ProtocolHandler implements Runnable {
@Override
public void messageCodecVersion(Mumble.CodecVersion msg) {
-// if(msg.getOpus())
-// switchCodec(JumbleUDPMessageType.UDPVoiceOpus);
+ if(msg.getOpus())
+ switchCodec(JumbleUDPMessageType.UDPVoiceOpus);
// else if(msg.getPreferAlpha())
// switchCodec(JumbleUDPMessageType.UDPVoiceCELTAlpha);
// else