Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender-addons.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'power_sequencer/operators/audiosync/mfcc')
-rw-r--r--power_sequencer/operators/audiosync/mfcc/__init__.py17
-rw-r--r--power_sequencer/operators/audiosync/mfcc/mfcc.py92
-rw-r--r--power_sequencer/operators/audiosync/mfcc/segment_axis.py110
-rw-r--r--power_sequencer/operators/audiosync/mfcc/trfbank.py51
4 files changed, 270 insertions, 0 deletions
diff --git a/power_sequencer/operators/audiosync/mfcc/__init__.py b/power_sequencer/operators/audiosync/mfcc/__init__.py
new file mode 100644
index 00000000..2b20af43
--- /dev/null
+++ b/power_sequencer/operators/audiosync/mfcc/__init__.py
@@ -0,0 +1,17 @@
+#
+# Copyright (C) 2016-2019 by Nathan Lovato, Daniel Oakey, Razvan Radulescu, and contributors
+#
+# This file is part of Power Sequencer.
+#
+# Power Sequencer is free software: you can redistribute it and/or modify it under the terms of the
+# GNU General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Power Sequencer is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with Power Sequencer. If
+# not, see <https://www.gnu.org/licenses/>.
+#
+from .mfcc import mfcc
diff --git a/power_sequencer/operators/audiosync/mfcc/mfcc.py b/power_sequencer/operators/audiosync/mfcc/mfcc.py
new file mode 100644
index 00000000..93709e8c
--- /dev/null
+++ b/power_sequencer/operators/audiosync/mfcc/mfcc.py
@@ -0,0 +1,92 @@
+#
+# Copyright (C) 2016-2019 by Nathan Lovato, Daniel Oakey, Razvan Radulescu, and contributors
+#
+# This file is part of Power Sequencer.
+#
+# Power Sequencer is free software: you can redistribute it and/or modify it under the terms of the
+# GNU General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Power Sequencer is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with Power Sequencer. If
+# not, see <https://www.gnu.org/licenses/>.
+#
+import numpy as np
+
+from scipy.signal import hamming, lfilter
+from scipy.fftpack import fft
+from scipy.fftpack.realtransforms import dct
+
+from .trfbank import trfbank
+from .segment_axis import segment_axis
+
+
+def mfcc(input, nwin=256, nfft=512, fs=16000, nceps=13):
+ """Compute Mel Frequency Cepstral Coefficients.
+
+ Parameters
+ ----------
+ input: ndarray
+ input from which the coefficients are computed
+
+ Returns
+ -------
+ ceps: ndarray
+ Mel-cepstrum coefficients
+ mspec: ndarray
+ Log-spectrum in the mel-domain.
+
+ Notes
+ -----
+ MFCC are computed as follows:
+ * Pre-processing in time-domain (pre-emphasizing)
+ * Compute the spectrum amplitude by windowing with a Hamming window
+ * Filter the signal in the spectral domain with a triangular
+ filter-bank, whose filters are approximatively linearly spaced on the
+ mel scale, and have equal bandwidth in the mel scale
+ * Compute the DCT of the log-spectrum
+
+ References
+ ----------
+ .. [1] S.B. Davis and P. Mermelstein, "Comparison of parametric
+ representations for monosyllabic word recognition in continuously
+ spoken sentences", IEEE Trans. Acoustics. Speech, Signal Proc.
+ ASSP-28 (4): 357-366, August 1980."""
+
+ # MFCC parameters: taken from auditory toolbox
+ over = nwin - 160
+ # Pre-emphasis factor (to take into account the -6dB/octave rolloff of the
+ # radiation at the lips level)
+ prefac = 0.97
+
+ # lowfreq = 400 / 3.
+ lowfreq = 133.33
+ # highfreq = 6855.4976
+ linsc = 200 / 3.0
+ logsc = 1.0711703
+
+ nlinfil = 13
+ nlogfil = 27
+ nfil = nlinfil + nlogfil
+
+ w = hamming(nwin, sym=0)
+
+ fbank = trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfil, nlogfil)[0]
+
+ # ------------------
+ # Compute the MFCC
+ # ------------------
+ extract = lfilter([1.0, -prefac], 1, input)
+ framed = segment_axis(extract, nwin, over) * w
+
+ # Compute the spectrum magnitude
+ spec = np.abs(fft(framed, nfft, axis=-1))
+ # Filter the spectrum through the triangle filterbank
+ mspec = np.log10(np.dot(spec, fbank.T))
+ # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
+ ceps = dct(mspec, type=2, norm="ortho", axis=-1)[:, :nceps]
+
+ return ceps, mspec, spec
diff --git a/power_sequencer/operators/audiosync/mfcc/segment_axis.py b/power_sequencer/operators/audiosync/mfcc/segment_axis.py
new file mode 100644
index 00000000..a8345833
--- /dev/null
+++ b/power_sequencer/operators/audiosync/mfcc/segment_axis.py
@@ -0,0 +1,110 @@
+#
+# Copyright (C) 2016-2019 by Nathan Lovato, Daniel Oakey, Razvan Radulescu, and contributors
+#
+# This file is part of Power Sequencer.
+#
+# Power Sequencer is free software: you can redistribute it and/or modify it under the terms of the
+# GNU General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Power Sequencer is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with Power Sequencer. If
+# not, see <https://www.gnu.org/licenses/>.
+#
+import numpy as np
+import warnings
+
+
+def segment_axis(a, length, overlap=0, axis=None, end="cut", endvalue=0):
+ """Generate a new array that chops the given array along the given axis
+ into overlapping frames.
+
+ example:
+ >>> segment_axis(arange(10), 4, 2)
+ array([[0, 1, 2, 3],
+ [2, 3, 4, 5],
+ [4, 5, 6, 7],
+ [6, 7, 8, 9]])
+
+ arguments:
+ a The array to segment
+ length The length of each frame
+ overlap The number of array elements by which the frames should overlap
+ axis The axis to operate on; if None, act on the flattened array
+ end What to do with the last frame, if the array is not evenly
+ divisible into pieces. Options are:
+
+ 'cut' Simply discard the extra values
+ 'wrap' Copy values from the beginning of the array
+ 'pad' Pad with a constant value
+
+ endvalue The value to use for end='pad'
+
+ The array is not copied unless necessary (either because it is unevenly
+ strided and being flattened or because end is set to 'pad' or 'wrap').
+ """
+
+ if axis is None:
+ a = np.ravel(a) # may copy
+ axis = 0
+
+ l = a.shape[axis]
+
+ if overlap >= length:
+ raise ValueError("frames cannot overlap by more than 100%")
+ if overlap < 0 or length <= 0:
+ raise ValueError("overlap must be nonnegative and length must " "be positive")
+
+ if l < length or (l - length) % (length - overlap):
+ if l > length:
+ roundup = length + (1 + (l - length) // (length - overlap)) * (length - overlap)
+ rounddown = length + ((l - length) // (length - overlap)) * (length - overlap)
+ else:
+ roundup = length
+ rounddown = 0
+ assert rounddown < l < roundup
+ assert roundup == rounddown + (length - overlap) or (roundup == length and rounddown == 0)
+ a = a.swapaxes(-1, axis)
+
+ if end == "cut":
+ a = a[..., :rounddown]
+ elif end in ["pad", "wrap"]: # copying will be necessary
+ s = list(a.shape)
+ s[-1] = roundup
+ b = np.empty(s, dtype=a.dtype)
+ b[..., :l] = a
+ if end == "pad":
+ b[..., l:] = endvalue
+ elif end == "wrap":
+ b[..., l:] = a[..., : roundup - l]
+ a = b
+
+ a = a.swapaxes(-1, axis)
+
+ l = a.shape[axis]
+ if l == 0:
+ raise ValueError(
+ "Not enough data points to segment array in 'cut' mode; " "try 'pad' or 'wrap'"
+ )
+ assert l >= length
+ assert (l - length) % (length - overlap) == 0
+ n = 1 + (l - length) // (length - overlap)
+ s = a.strides[axis]
+ newshape = a.shape[:axis] + (n, length) + a.shape[axis + 1 :]
+ newstrides = a.strides[:axis] + ((length - overlap) * s, s) + a.strides[axis + 1 :]
+
+ try:
+ return np.ndarray.__new__(
+ np.ndarray, strides=newstrides, shape=newshape, buffer=a, dtype=a.dtype
+ )
+ except TypeError:
+ warnings.warn("Problem with ndarray creation forces copy.")
+ a = a.copy()
+ # Shape doesn't change but strides does
+ newstrides = a.strides[:axis] + ((length - overlap) * s, s) + a.strides[axis + 1 :]
+ return np.ndarray.__new__(
+ np.ndarray, strides=newstrides, shape=newshape, buffer=a, dtype=a.dtype
+ )
diff --git a/power_sequencer/operators/audiosync/mfcc/trfbank.py b/power_sequencer/operators/audiosync/mfcc/trfbank.py
new file mode 100644
index 00000000..00558944
--- /dev/null
+++ b/power_sequencer/operators/audiosync/mfcc/trfbank.py
@@ -0,0 +1,51 @@
+#
+# Copyright (C) 2016-2019 by Nathan Lovato, Daniel Oakey, Razvan Radulescu, and contributors
+#
+# This file is part of Power Sequencer.
+#
+# Power Sequencer is free software: you can redistribute it and/or modify it under the terms of the
+# GNU General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Power Sequencer is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with Power Sequencer. If
+# not, see <https://www.gnu.org/licenses/>.
+#
+import numpy as np
+
+
+def trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfilt, nlogfilt):
+ """Compute triangular filterbank for MFCC computation."""
+ # Total number of filters
+ nfilt = nlinfilt + nlogfilt
+
+ # ------------------------
+ # Compute the filter bank
+ # ------------------------
+ # Compute start/middle/end points of the triangular filters in spectral
+ # domain
+ freqs = np.zeros(nfilt + 2)
+ freqs[:nlinfilt] = lowfreq + np.arange(nlinfilt) * linsc
+ freqs[nlinfilt:] = freqs[nlinfilt - 1] * logsc ** np.arange(1, nlogfilt + 3)
+ heights = 2.0 / (freqs[2:] - freqs[0:-2])
+
+ # Compute filterbank coeff (in fft domain, in bins)
+ fbank = np.zeros((nfilt, nfft))
+ # FFT bins (in Hz)
+ nfreqs = np.arange(nfft) / (1.0 * nfft) * fs
+ for i in range(nfilt):
+ low = freqs[i]
+ cen = freqs[i + 1]
+ hi = freqs[i + 2]
+
+ lid = np.arange(np.floor(low * nfft / fs) + 1, np.floor(cen * nfft / fs) + 1, dtype=np.int)
+ lslope = heights[i] / (cen - low)
+ rid = np.arange(np.floor(cen * nfft / fs) + 1, np.floor(hi * nfft / fs) + 1, dtype=np.int)
+ rslope = heights[i] / (hi - cen)
+ fbank[i][lid] = lslope * (nfreqs[lid] - low)
+ fbank[i][rid] = rslope * (hi - nfreqs[rid])
+
+ return fbank, freqs