diff options
author | Philippe Normand <philn@igalia.com> | 2023-06-03 12:44:10 +0300 |
---|---|---|
committer | Philippe Normand <philn@igalia.com> | 2023-09-28 18:58:38 +0300 |
commit | 4d9263f93299a359e1d57bfaad261fcd49ac4c84 (patch) | |
tree | 202c5d4b42027c7a39f8f89296a79750abf9c4c6 /audio | |
parent | 90e06dc37bfd53cc7393546913d701a7f77ea808 (diff) |
audiornnoise: Attach audio level meta to output buffers
This is useful downstream for processing of audio voice payloads, for
instance feeding a speech recognition library such as Whisper.
Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/1231>
Diffstat (limited to 'audio')
-rw-r--r-- | audio/audiofx/Cargo.toml | 6 | ||||
-rw-r--r-- | audio/audiofx/src/audiornnoise/imp.rs | 42 |
2 files changed, 36 insertions, 12 deletions
diff --git a/audio/audiofx/Cargo.toml b/audio/audiofx/Cargo.toml index 5c07628d5..0a86ac361 100644 --- a/audio/audiofx/Cargo.toml +++ b/audio/audiofx/Cargo.toml @@ -9,9 +9,9 @@ edition = "2021" rust-version = "1.70" [dependencies] -gst = { package = "gstreamer", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs", features = ["v1_16"] } -gst-base = { package = "gstreamer-base", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs", features = ["v1_16"] } -gst-audio = { package = "gstreamer-audio", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs", features = ["v1_16"] } +gst = { package = "gstreamer", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs", features = ["v1_20"] } +gst-base = { package = "gstreamer-base", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs", features = ["v1_20"] } +gst-audio = { package = "gstreamer-audio", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs", features = ["v1_20"] } anyhow = "1" byte-slice-cast = "1.0" num-traits = "0.2" diff --git a/audio/audiofx/src/audiornnoise/imp.rs b/audio/audiofx/src/audiornnoise/imp.rs index 25e1b1aaf..e20700f67 100644 --- a/audio/audiofx/src/audiornnoise/imp.rs +++ b/audio/audiofx/src/audiornnoise/imp.rs @@ -129,10 +129,13 @@ impl AudioRNNoise { buffer.set_duration(duration); buffer.set_pts(pts); - let mut out_map = buffer.map_writable().map_err(|_| gst::FlowError::Error)?; - let out_data = out_map.as_mut_slice_of::<f32>().unwrap(); + let (level, has_voice) = { + let mut out_map = buffer.map_writable().map_err(|_| gst::FlowError::Error)?; + let out_data = out_map.as_mut_slice_of::<f32>().unwrap(); + self.process(state, &settings, in_data, out_data) + }; - self.process(state, &settings, in_data, out_data); + gst_audio::AudioLevelMeta::add(buffer, level, has_voice); } self.obj().src_pad().push(buffer) @@ -160,10 +163,13 @@ impl AudioRNNoise { buffer.set_duration(duration); buffer.set_pts(pts); - let mut out_map = buffer.map_writable().map_err(|_| gst::FlowError::Error)?; - let out_data = out_map.as_mut_slice_of::<f32>().unwrap(); + let (level, has_voice) = { + let mut out_map = buffer.map_writable().map_err(|_| gst::FlowError::Error)?; + let out_data = out_map.as_mut_slice_of::<f32>().unwrap(); + self.process(state, &settings, in_data, out_data) + }; - self.process(state, &settings, in_data, out_data); + gst_audio::AudioLevelMeta::add(buffer, level, has_voice); } Ok(GenerateOutputSuccess::Buffer(buffer)) @@ -175,9 +181,10 @@ impl AudioRNNoise { settings: &Settings, input_plane: &[f32], output_plane: &mut [f32], - ) { + ) -> (u8, bool) { let channels = state.in_info.channels() as usize; let size = FRAME_SIZE * channels; + let mut has_voice = false; for (out_frame, in_frame) in output_plane.chunks_mut(size).zip(input_plane.chunks(size)) { for (index, item) in in_frame.iter().enumerate() { @@ -207,11 +214,15 @@ impl AudioRNNoise { ); } - gst::debug!(CAT, imp: self, "Voice activity: {}", vad); - + gst::trace!(CAT, imp: self, "Voice activity: {}", vad); if vad < settings.vad_threshold { out_frame.fill(0.0); } else { + // Upon voice activity nnoiseless never really reports a 1.0 + // VAD, so we use a hardcoded value close to 1.0 here. + if vad >= 0.98 { + has_voice = true; + } for (index, item) in out_frame.iter_mut().enumerate() { let channel_index = index % channels; let channel_denoiser = &state.denoisers[channel_index]; @@ -220,6 +231,19 @@ impl AudioRNNoise { } } } + + let rms = output_plane.iter().copied().map(|x| x * x).sum::<f32>(); + let level = (20.0 * f32::log10(rms + f32::EPSILON)) as u8; + + gst::trace!( + CAT, + imp: self, + "rms: {}, level: {}, has_voice : {} ", rms, + level, + has_voice + ); + + (level, has_voice) } } |