4 files changed, 270 insertions, 0 deletions
diff --git a/power_sequencer/operators/audiosync/mfcc/__init__.py b/power_sequencer/operators/audiosync/mfcc/__init__.py
new file mode 100644
index 00000000..2b20af43
--- /dev/null
+++ b/power_sequencer/operators/audiosync/mfcc/__init__.py
@@ -0,0 +1,17 @@
+#
+# Copyright (C) 2016-2019 by Nathan Lovato, Daniel Oakey, Razvan Radulescu, and contributors
+#
+# This file is part of Power Sequencer.
+#
+# Power Sequencer is free software: you can redistribute it and/or modify it under the terms of the
+# GNU General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Power Sequencer is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with Power Sequencer. If
+# not, see <https://www.gnu.org/licenses/>.
+#
+from .mfcc import mfcc
diff --git a/power_sequencer/operators/audiosync/mfcc/mfcc.py b/power_sequencer/operators/audiosync/mfcc/mfcc.py
new file mode 100644
index 00000000..93709e8c
--- /dev/null
+++ b/power_sequencer/operators/audiosync/mfcc/mfcc.py
@@ -0,0 +1,92 @@
+#
+# Copyright (C) 2016-2019 by Nathan Lovato, Daniel Oakey, Razvan Radulescu, and contributors
+#
+# This file is part of Power Sequencer.
+#
+# Power Sequencer is free software: you can redistribute it and/or modify it under the terms of the
+# GNU General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Power Sequencer is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with Power Sequencer. If
+# not, see <https://www.gnu.org/licenses/>.
+#
+import numpy as np
+
+from scipy.signal import hamming, lfilter
+from scipy.fftpack import fft
+from scipy.fftpack.realtransforms import dct
+
+from .trfbank import trfbank
+from .segment_axis import segment_axis
+
+
+def mfcc(input, nwin=256, nfft=512, fs=16000, nceps=13):
+    """Compute Mel Frequency Cepstral Coefficients.
+
+    Parameters
+    ----------
+    input: ndarray
+        input from which the coefficients are computed
+
+    Returns
+    -------
+    ceps: ndarray
+        Mel-cepstrum coefficients
+    mspec: ndarray
+        Log-spectrum in the mel-domain.
+
+    Notes
+    -----
+    MFCC are computed as follows:
+        * Pre-processing in time-domain (pre-emphasizing)
+        * Compute the spectrum amplitude by windowing with a Hamming window
+        * Filter the signal in the spectral domain with a triangular
+        filter-bank, whose filters are approximatively linearly spaced on the
+        mel scale, and have equal bandwidth in the mel scale
+        * Compute the DCT of the log-spectrum
+
+    References
+    ----------
+    .. [1] S.B. Davis and P. Mermelstein, "Comparison of parametric
+           representations for monosyllabic word recognition in continuously
+           spoken sentences", IEEE Trans. Acoustics. Speech, Signal Proc.
+           ASSP-28 (4): 357-366, August 1980."""
+
+    # MFCC parameters: taken from auditory toolbox
+    over = nwin - 160
+    # Pre-emphasis factor (to take into account the -6dB/octave rolloff of the
+    # radiation at the lips level)
+    prefac = 0.97
+
+    # lowfreq = 400 / 3.
+    lowfreq = 133.33
+    # highfreq = 6855.4976
+    linsc = 200 / 3.0
+    logsc = 1.0711703
+
+    nlinfil = 13
+    nlogfil = 27
+    nfil = nlinfil + nlogfil
+
+    w = hamming(nwin, sym=0)
+
+    fbank = trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfil, nlogfil)[0]
+
+    # ------------------
+    # Compute the MFCC
+    # ------------------
+    extract = lfilter([1.0, -prefac], 1, input)
+    framed = segment_axis(extract, nwin, over) * w
+
+    # Compute the spectrum magnitude
+    spec = np.abs(fft(framed, nfft, axis=-1))
+    # Filter the spectrum through the triangle filterbank
+    mspec = np.log10(np.dot(spec, fbank.T))
+    # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
+    ceps = dct(mspec, type=2, norm="ortho", axis=-1)[:, :nceps]
+
+    return ceps, mspec, spec
diff --git a/power_sequencer/operators/audiosync/mfcc/segment_axis.py b/power_sequencer/operators/audiosync/mfcc/segment_axis.py
new file mode 100644
index 00000000..a8345833
--- /dev/null
+++ b/power_sequencer/operators/audiosync/mfcc/segment_axis.py
@@ -0,0 +1,110 @@
+#
+# Copyright (C) 2016-2019 by Nathan Lovato, Daniel Oakey, Razvan Radulescu, and contributors
+#
+# This file is part of Power Sequencer.
+#
+# Power Sequencer is free software: you can redistribute it and/or modify it under the terms of the
+# GNU General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Power Sequencer is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with Power Sequencer. If
+# not, see <https://www.gnu.org/licenses/>.
+#
+import numpy as np
+import warnings
+
+
+def segment_axis(a, length, overlap=0, axis=None, end="cut", endvalue=0):
+    """Generate a new array that chops the given array along the given axis
+    into overlapping frames.
+
+    example:
+    >>> segment_axis(arange(10), 4, 2)
+    array([[0, 1, 2, 3],
+           [2, 3, 4, 5],
+           [4, 5, 6, 7],
+           [6, 7, 8, 9]])
+
+    arguments:
+    a       The array to segment
+    length  The length of each frame
+    overlap The number of array elements by which the frames should overlap
+    axis    The axis to operate on; if None, act on the flattened array
+    end     What to do with the last frame, if the array is not evenly
+            divisible into pieces. Options are:
+
+            'cut'   Simply discard the extra values
+            'wrap'  Copy values from the beginning of the array
+            'pad'   Pad with a constant value
+
+    endvalue    The value to use for end='pad'
+
+    The array is not copied unless necessary (either because it is unevenly
+    strided and being flattened or because end is set to 'pad' or 'wrap').
+    """
+
+    if axis is None:
+        a = np.ravel(a)  # may copy
+        axis = 0
+
+    l = a.shape[axis]
+
+    if overlap >= length:
+        raise ValueError("frames cannot overlap by more than 100%")
+    if overlap < 0 or length <= 0:
+        raise ValueError("overlap must be nonnegative and length must " "be positive")
+
+    if l < length or (l - length) % (length - overlap):
+        if l > length:
+            roundup = length + (1 + (l - length) // (length - overlap)) * (length - overlap)
+            rounddown = length + ((l - length) // (length - overlap)) * (length - overlap)
+        else:
+            roundup = length
+            rounddown = 0
+        assert rounddown < l < roundup
+        assert roundup == rounddown + (length - overlap) or (roundup == length and rounddown == 0)
+        a = a.swapaxes(-1, axis)
+
+        if end == "cut":
+            a = a[..., :rounddown]
+        elif end in ["pad", "wrap"]:  # copying will be necessary
+            s = list(a.shape)
+            s[-1] = roundup
+            b = np.empty(s, dtype=a.dtype)
+            b[..., :l] = a
+            if end == "pad":
+                b[..., l:] = endvalue
+            elif end == "wrap":
+                b[..., l:] = a[..., : roundup - l]
+            a = b
+
+        a = a.swapaxes(-1, axis)
+
+    l = a.shape[axis]
+    if l == 0:
+        raise ValueError(
+            "Not enough data points to segment array in 'cut' mode; " "try 'pad' or 'wrap'"
+        )
+    assert l >= length
+    assert (l - length) % (length - overlap) == 0
+    n = 1 + (l - length) // (length - overlap)
+    s = a.strides[axis]
+    newshape = a.shape[:axis] + (n, length) + a.shape[axis + 1 :]
+    newstrides = a.strides[:axis] + ((length - overlap) * s, s) + a.strides[axis + 1 :]
+
+    try:
+        return np.ndarray.__new__(
+            np.ndarray, strides=newstrides, shape=newshape, buffer=a, dtype=a.dtype
+        )
+    except TypeError:
+        warnings.warn("Problem with ndarray creation forces copy.")
+        a = a.copy()
+        # Shape doesn't change but strides does
+        newstrides = a.strides[:axis] + ((length - overlap) * s, s) + a.strides[axis + 1 :]
+        return np.ndarray.__new__(
+            np.ndarray, strides=newstrides, shape=newshape, buffer=a, dtype=a.dtype
+        )
diff --git a/power_sequencer/operators/audiosync/mfcc/trfbank.py b/power_sequencer/operators/audiosync/mfcc/trfbank.py
new file mode 100644
index 00000000..00558944
--- /dev/null
+++ b/power_sequencer/operators/audiosync/mfcc/trfbank.py
@@ -0,0 +1,51 @@
+#
+# Copyright (C) 2016-2019 by Nathan Lovato, Daniel Oakey, Razvan Radulescu, and contributors
+#
+# This file is part of Power Sequencer.
+#
+# Power Sequencer is free software: you can redistribute it and/or modify it under the terms of the
+# GNU General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Power Sequencer is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with Power Sequencer. If
+# not, see <https://www.gnu.org/licenses/>.
+#
+import numpy as np
+
+
+def trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfilt, nlogfilt):
+    """Compute triangular filterbank for MFCC computation."""
+    # Total number of filters
+    nfilt = nlinfilt + nlogfilt
+
+    # ------------------------
+    # Compute the filter bank
+    # ------------------------
+    # Compute start/middle/end points of the triangular filters in spectral
+    # domain
+    freqs = np.zeros(nfilt + 2)
+    freqs[:nlinfilt] = lowfreq + np.arange(nlinfilt) * linsc
+    freqs[nlinfilt:] = freqs[nlinfilt - 1] * logsc ** np.arange(1, nlogfilt + 3)
+    heights = 2.0 / (freqs[2:] - freqs[0:-2])
+
+    # Compute filterbank coeff (in fft domain, in bins)
+    fbank = np.zeros((nfilt, nfft))
+    # FFT bins (in Hz)
+    nfreqs = np.arange(nfft) / (1.0 * nfft) * fs
+    for i in range(nfilt):
+        low = freqs[i]
+        cen = freqs[i + 1]
+        hi = freqs[i + 2]
+
+        lid = np.arange(np.floor(low * nfft / fs) + 1, np.floor(cen * nfft / fs) + 1, dtype=np.int)
+        lslope = heights[i] / (cen - low)
+        rid = np.arange(np.floor(cen * nfft / fs) + 1, np.floor(hi * nfft / fs) + 1, dtype=np.int)
+        rslope = heights[i] / (hi - cen)
+        fbank[i][lid] = lslope * (nfreqs[lid] - low)
+        fbank[i][rid] = rslope * (hi - nfreqs[rid])
+
+    return fbank, freqs