Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/quite/humla.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/se/lublin/humla/audio/AudioOutputSpeech.java')
-rw-r--r--src/main/java/se/lublin/humla/audio/AudioOutputSpeech.java402
1 files changed, 402 insertions, 0 deletions
diff --git a/src/main/java/se/lublin/humla/audio/AudioOutputSpeech.java b/src/main/java/se/lublin/humla/audio/AudioOutputSpeech.java
new file mode 100644
index 0000000..0982f06
--- /dev/null
+++ b/src/main/java/se/lublin/humla/audio/AudioOutputSpeech.java
@@ -0,0 +1,402 @@
+/*
+ * Copyright (C) 2014 Andrew Comminos
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package se.lublin.humla.audio;
+
+import com.googlecode.javacpp.IntPointer;
+import se.lublin.humla.audio.javacpp.CELT11;
+import se.lublin.humla.audio.javacpp.CELT7;
+import se.lublin.humla.audio.javacpp.Opus;
+import se.lublin.humla.audio.javacpp.Speex;
+import se.lublin.humla.exception.NativeAudioException;
+import se.lublin.humla.model.TalkState;
+import se.lublin.humla.model.User;
+import se.lublin.humla.net.HumlaUDPMessageType;
+import se.lublin.humla.net.PacketBuffer;
+import se.lublin.humla.protocol.AudioHandler;
+
+import java.nio.BufferOverflowException;
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Queue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentLinkedQueue;
+
+/**
+ * Created by andrew on 16/07/13.
+ */
+public class AudioOutputSpeech implements Callable<AudioOutputSpeech.Result> {
+
+ interface TalkStateListener {
+ public void onTalkStateUpdated(int session, TalkState state);
+ }
+
+ private IDecoder mDecoder;
+ private Speex.JitterBuffer mJitterBuffer;
+ private final Object mJitterLock = new Object();
+
+ private User mUser;
+ private HumlaUDPMessageType mCodec;
+ private int mAudioBufferSize = AudioHandler.FRAME_SIZE;
+ private int mRequestedSamples; // Number of samples requested
+
+ // State-specific
+ private float[] mBuffer;
+ private float[] mOut;
+ private float[] mFadeOut;
+ private float[] mFadeIn;
+ private Queue<ByteBuffer> mFrames = new ConcurrentLinkedQueue<ByteBuffer>();
+ private int mMissCount = 0;
+ private boolean mHasTerminator = false;
+ private boolean mLastAlive = true;
+ private int mBufferFilled, mLastConsume = 0;
+ private int ucFlags;
+ private IntPointer avail = new IntPointer(1);
+
+ private TalkStateListener mTalkStateListener;
+
+ public AudioOutputSpeech(User user, HumlaUDPMessageType codec, int requestedSamples, TalkStateListener listener) throws NativeAudioException {
+ // TODO: consider implementing resampling if some Android devices not support 48kHz?
+ mUser = user;
+ mCodec = codec;
+ mRequestedSamples = requestedSamples;
+ mTalkStateListener = listener;
+ switch (codec) {
+ case UDPVoiceOpus:
+ mAudioBufferSize *= 12;
+ mDecoder = new Opus.OpusDecoder(AudioHandler.SAMPLE_RATE, 1);
+ break;
+ case UDPVoiceCELTBeta:
+ mDecoder = new CELT11.CELT11Decoder(AudioHandler.SAMPLE_RATE, 1);
+ break;
+ case UDPVoiceCELTAlpha:
+ mDecoder = new CELT7.CELT7Decoder(AudioHandler.SAMPLE_RATE, AudioHandler.FRAME_SIZE, 1);
+ break;
+ case UDPVoiceSpeex:
+ mDecoder = new Speex.SpeexDecoder();
+ break;
+ }
+
+ mBuffer = new float[mAudioBufferSize*2]; // Make initial buffer size larger so we can save performance by not resizing at runtime.
+ mOut = new float[mAudioBufferSize];
+ mFadeIn = new float[AudioHandler.FRAME_SIZE];
+ mFadeOut = new float[AudioHandler.FRAME_SIZE];
+
+ // Sine function to represent fade in/out. Period is FRAME_SIZE.
+ float mul = (float)(Math.PI / (2.0 * (float) AudioHandler.FRAME_SIZE));
+ for (int i = 0; i < AudioHandler.FRAME_SIZE; i++)
+ mFadeIn[i] = mFadeOut[AudioHandler.FRAME_SIZE-i-1] = (float) Math.sin((float) i * mul);
+
+ mJitterBuffer = new Speex.JitterBuffer(AudioHandler.FRAME_SIZE);
+ IntPointer margin = new IntPointer(1);
+ margin.put(10 * AudioHandler.FRAME_SIZE);
+ mJitterBuffer.control(Speex.JitterBuffer.JITTER_BUFFER_SET_MARGIN, margin);
+ }
+
+ public void addFrameToBuffer(PacketBuffer pb, byte flags, int seq) {
+ if(pb.capacity() < 2)
+ return;
+
+ synchronized (mJitterLock) {
+ try {
+ int samples = 0;
+ if (mCodec == HumlaUDPMessageType.UDPVoiceOpus) {
+ long header = pb.readLong();
+ int size = (int) (header & ((1 << 13) - 1));
+
+ if (size > 0) {
+ byte[] data = pb.dataBlock(size);
+ if (data.length != size) return;
+
+ int frames = Opus.opus_packet_get_nb_frames(data, size);
+ samples = frames * Opus.opus_packet_get_samples_per_frame(data, AudioHandler.SAMPLE_RATE);
+ } else {
+ return;
+ }
+ } else {
+ try {
+ int header;
+ do {
+ header = pb.next();
+ samples += AudioHandler.FRAME_SIZE;
+ pb.skip(header & 0x7f);
+ } while ((header & 0x80) > 0);
+ } catch (BufferUnderflowException e) {
+ // reached end of buffer
+ }
+ }
+ pb.rewind();
+
+ int size = pb.left();
+ byte[] data = pb.dataBlock(size);
+ Speex.JitterBufferPacket packet = new Speex.JitterBufferPacket(data, size, AudioHandler.FRAME_SIZE * seq, samples, 0, flags);
+ synchronized (mJitterLock) {
+ mJitterBuffer.put(packet);
+ }
+ } catch (BufferOverflowException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ @Override
+ public Result call() throws Exception {
+ if (mBufferFilled - mLastConsume > 0) {
+ // Shift over the remaining unconsumed data in the buffer.
+ System.arraycopy(mBuffer, mLastConsume, mBuffer, 0, mBufferFilled - mLastConsume);
+ }
+ mBufferFilled -= mLastConsume;
+
+ mLastConsume = mRequestedSamples;
+
+ if(mBufferFilled >= mRequestedSamples)
+ return new Result(this, mLastAlive, mBuffer, mBufferFilled);
+
+ boolean nextAlive = mLastAlive;
+
+ while(mBufferFilled < mRequestedSamples) {
+ int decodedSamples = AudioHandler.FRAME_SIZE;
+ resizeBuffer(mBufferFilled + mAudioBufferSize);
+
+ if(!mLastAlive)
+ Arrays.fill(mOut, 0);
+ else {
+ avail.put(0);
+
+ int ts;
+ synchronized (mJitterLock) {
+ ts = mJitterBuffer.getPointerTimestamp();
+ mJitterBuffer.control(Speex.JitterBuffer.JITTER_BUFFER_GET_AVAILABLE_COUNT, avail);
+ }
+ float availPackets = (float) avail.get();
+
+ // This bit of code here will make sure that we have enough packets in the jitter
+ // buffer before we even begin decoding, based on the average # of packets available.
+ // It's useful in preventing a metallic 'twang' when the user starts talking,
+ // caused by buffer underrun. The official Mumble project uses the same technique.
+ if(ts == 0) {
+ int want = (int) Math.ceil(mUser.getAverageAvailable());
+ if (availPackets < want) {
+ mMissCount++;
+ if(mMissCount < 20) {
+ Arrays.fill(mOut, 0);
+ System.arraycopy(mOut, 0, mBuffer, mBufferFilled, decodedSamples);
+ mBufferFilled += decodedSamples;
+ continue;
+ }
+ }
+ }
+
+ if(mFrames.isEmpty()) {
+ ByteBuffer packet = ByteBuffer.allocateDirect(4096);
+ Speex.JitterBufferPacket jbp = new Speex.JitterBufferPacket(packet, 4096, 0, 0, 0, 0);
+ int result;
+
+ synchronized (mJitterLock) {
+ result = mJitterBuffer.get(jbp, null);
+ }
+
+ if(result == Speex.JitterBuffer.JITTER_BUFFER_OK) {
+ packet.limit(jbp.getLength());
+ PacketBuffer pb = new PacketBuffer(packet);
+
+ mMissCount = 0;
+ ucFlags = jbp.getUserData();
+
+ mHasTerminator = false;
+ try {
+ if (mCodec == HumlaUDPMessageType.UDPVoiceOpus) {
+ long header = pb.readLong();
+ int size = (int) (header & ((1 << 13) - 1));
+ mHasTerminator = (header & (1 << 13)) > 0;
+
+ ByteBuffer audioData = pb.bufferBlock(size);
+ mFrames.add(audioData);
+ } else {
+ int header;
+ do {
+ header = pb.next();
+ int size = header & 0x7f;
+ if (header > 0) {
+ mFrames.add(pb.bufferBlock(size));
+ } else {
+ mHasTerminator = true;
+ }
+ } while ((header & 0x80) > 0);
+ }
+ } catch (BufferOverflowException e) {
+ e.printStackTrace();
+ } catch (BufferUnderflowException e) {
+ e.printStackTrace();
+ }
+
+ if(availPackets >= mUser.getAverageAvailable())
+ mUser.setAverageAvailable(availPackets);
+ else
+ mUser.setAverageAvailable(mUser.getAverageAvailable() * 0.99f);
+
+ } else {
+ synchronized (mJitterLock) {
+ mJitterBuffer.updateDelay(jbp, null);
+ }
+
+ mMissCount++;
+ if(mMissCount > 10)
+ nextAlive = false;
+ }
+ }
+
+ try {
+ if(!mFrames.isEmpty()) {
+ ByteBuffer data = mFrames.poll();
+
+ decodedSamples = mDecoder.decodeFloat(data, data.limit(), mOut, mAudioBufferSize);
+
+ if(mFrames.isEmpty())
+ synchronized (mJitterLock) {
+ mJitterBuffer.updateDelay(null, new IntPointer(1));
+ }
+
+ if(mFrames.isEmpty() && mHasTerminator)
+ nextAlive = false;
+ } else {
+ decodedSamples = mDecoder.decodeFloat(null, 0, mOut, AudioHandler.FRAME_SIZE);
+ }
+ } catch (NativeAudioException e) {
+ e.printStackTrace();
+ decodedSamples = AudioHandler.FRAME_SIZE;
+ }
+
+ if (!nextAlive) {
+ for (int i = 0; i < AudioHandler.FRAME_SIZE; i++) {
+ mOut[i] *= mFadeOut[i];
+ }
+ } else if (ts == 0) {
+ for (int i = 0; i < AudioHandler.FRAME_SIZE; i++) {
+ mOut[i] *= mFadeIn[i];
+ }
+ }
+
+ synchronized (mJitterLock) {
+ for(int i = decodedSamples / AudioHandler.FRAME_SIZE; i > 0; i--)
+ mJitterBuffer.tick();
+ }
+ }
+
+ System.arraycopy(mOut, 0, mBuffer, mBufferFilled, decodedSamples);
+ mBufferFilled += decodedSamples;
+ }
+
+ if(!nextAlive) ucFlags = 0xFF;
+
+ TalkState talkState;
+ switch (ucFlags) {
+ case 0:
+ talkState = TalkState.TALKING;
+ break;
+ case 1:
+ talkState = TalkState.SHOUTING;
+ break;
+ case 0xFF:
+ talkState = TalkState.PASSIVE;
+ break;
+ default:
+ talkState = TalkState.WHISPERING;
+ break;
+ }
+
+ mTalkStateListener.onTalkStateUpdated(mUser.getSession(), talkState);
+
+ boolean tmp = mLastAlive;
+ mLastAlive = nextAlive;
+
+ return new Result(this, tmp, mBuffer, mRequestedSamples);
+ }
+
+ private void resizeBuffer(int newSize) {
+ if(newSize > mBuffer.length) {
+ float[] newBuffer = Arrays.copyOf(mBuffer, newSize);
+ mBuffer = newBuffer;
+ }
+ }
+
+ /**
+ * Sets the preferred number of samples to return when the callable is executed.
+ * @param samples The number of floating point samples to retrieve.
+ */
+ public void setRequestedSamples(int samples) {
+ mRequestedSamples = samples;
+ }
+
+ public HumlaUDPMessageType getCodec() {
+ return mCodec;
+ }
+
+ public User getUser() {
+ return mUser;
+ }
+
+ public int getSession() {
+ return mUser.getSession();
+ }
+
+ /**
+ * Cleans up all JNI refs linked to this instance.
+ * This MUST be called eventually, otherwise we get memory leaks!
+ */
+ public void destroy() {
+ if(mDecoder != null) mDecoder.destroy();
+ mJitterBuffer.destroy();
+ }
+
+ /**
+ * The outcome of a decoding pass.
+ */
+ protected static class Result implements IAudioMixerSource<float[]> {
+ private AudioOutputSpeech mSpeechOutput;
+ private boolean mAlive;
+ private float[] mSamples;
+ private int mNumSamples;
+
+ private Result(AudioOutputSpeech speechOutput,
+ boolean alive,
+ float[] samples,
+ int numSamples) {
+ mSpeechOutput = speechOutput;
+ mAlive = alive;
+ mSamples = samples;
+ mNumSamples = numSamples;
+ }
+
+ public AudioOutputSpeech getSpeechOutput() {
+ return mSpeechOutput;
+ }
+
+ public boolean isAlive() {
+ return mAlive;
+ }
+
+ public float[] getSamples() {
+ return mSamples;
+ }
+
+ public int getNumSamples() {
+ return mNumSamples;
+ }
+ }
+}