audiornnoise: Add debug output for voice activity to help you choose a threshold

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/1010>
author: Michiel Konstapel <github@konstapel.nl> 2022-12-12 19:29:55 +0300
committer: GStreamer Marge Bot <gitlab-merge-bot@gstreamer-foundation.org> 2022-12-14 13:00:28 +0300
commit: 3a8536b45efa41e5af6eec250e998f24bcc4018c (patch)
tree: 6bc0c7309d116a57ad0b9032a9e2b230e62fb11d /audio
parent: e5360ff43159ee4b70e875f1786a4ca32968385b (diff)
1 files changed, 55 insertions, 47 deletions
diff --git a/audio/audiofx/src/audiornnoise/imp.rs b/audio/audiofx/src/audiornnoise/imp.rs
index b804bd056..2eb66a6cc 100644
--- a/audio/audiofx/src/audiornnoise/imp.rs
+++ b/audio/audiofx/src/audiornnoise/imp.rs
@@ -99,51 +99,6 @@ impl State {
     fn needs_more_data(&self) -> bool {
         self.adapter.available() < (FRAME_SIZE * self.in_info.bpf() as usize)
     }
-
-    fn process(&mut self, input_plane: &[f32], output_plane: &mut [f32], settings: &Settings) {
-        let channels = self.in_info.channels() as usize;
-        let size = FRAME_SIZE * channels;
-
-        for (out_frame, in_frame) in output_plane.chunks_mut(size).zip(input_plane.chunks(size)) {
-            for (index, item) in in_frame.iter().enumerate() {
-                let channel_index = index % channels;
-                let channel_denoiser = &mut self.denoisers[channel_index];
-                let pos = index / channels;
-                channel_denoiser.frame_chunk[pos] = *item * 32767.0;
-            }
-
-            for i in (in_frame.len() / channels)..(size / channels) {
-                for c in 0..channels {
-                    let channel_denoiser = &mut self.denoisers[c];
-                    channel_denoiser.frame_chunk[i] = 0.0;
-                }
-            }
-
-            // FIXME: The first chunks coming out of the denoisers contains some
-            // fade-in artifacts. We might want to discard those.
-            let mut vad: f32 = 0.0;
-            for channel_denoiser in &mut self.denoisers {
-                vad = f32::max(
-                    vad,
-                    channel_denoiser.denoiser.process_frame(
-                        &mut channel_denoiser.out_chunk[..],
-                        &channel_denoiser.frame_chunk[..],
-                    ),
-                );
-            }
-
-            if vad < settings.vad_threshold {
-                out_frame.fill(0.0);
-            } else {
-                for (index, item) in out_frame.iter_mut().enumerate() {
-                    let channel_index = index % channels;
-                    let channel_denoiser = &self.denoisers[channel_index];
-                    let pos = index / channels;
-                    *item = channel_denoiser.out_chunk[pos] / 32767.0;
-                }
-            }
-        }
-    }
 }
 
 impl AudioRNNoise {
@@ -177,7 +132,7 @@ impl AudioRNNoise {
             let mut out_map = buffer.map_writable().map_err(|_| gst::FlowError::Error)?;
             let out_data = out_map.as_mut_slice_of::<f32>().unwrap();
 
-            state.process(in_data, out_data, &settings);
+            self.process(state, &settings, in_data, out_data);
         }
 
         self.obj().src_pad().push(buffer)
@@ -208,11 +163,64 @@ impl AudioRNNoise {
             let mut out_map = buffer.map_writable().map_err(|_| gst::FlowError::Error)?;
             let out_data = out_map.as_mut_slice_of::<f32>().unwrap();
 
-            state.process(in_data, out_data, &settings);
+            self.process(state, &settings, in_data, out_data);
         }
 
         Ok(GenerateOutputSuccess::Buffer(buffer))
     }
+
+    fn process(
+        &self,
+        state: &mut State,
+        settings: &Settings,
+        input_plane: &[f32],
+        output_plane: &mut [f32],
+    ) {
+        let channels = state.in_info.channels() as usize;
+        let size = FRAME_SIZE * channels;
+
+        for (out_frame, in_frame) in output_plane.chunks_mut(size).zip(input_plane.chunks(size)) {
+            for (index, item) in in_frame.iter().enumerate() {
+                let channel_index = index % channels;
+                let channel_denoiser = &mut state.denoisers[channel_index];
+                let pos = index / channels;
+                channel_denoiser.frame_chunk[pos] = *item * 32767.0;
+            }
+
+            for i in (in_frame.len() / channels)..(size / channels) {
+                for c in 0..channels {
+                    let channel_denoiser = &mut state.denoisers[c];
+                    channel_denoiser.frame_chunk[i] = 0.0;
+                }
+            }
+
+            // FIXME: The first chunks coming out of the denoisers contains some
+            // fade-in artifacts. We might want to discard those.
+            let mut vad: f32 = 0.0;
+            for channel_denoiser in &mut state.denoisers {
+                vad = f32::max(
+                    vad,
+                    channel_denoiser.denoiser.process_frame(
+                        &mut channel_denoiser.out_chunk[..],
+                        &channel_denoiser.frame_chunk[..],
+                    ),
+                );
+            }
+
+            gst::debug!(CAT, imp: self, "Voice activity: {}", vad);
+
+            if vad < settings.vad_threshold {
+                out_frame.fill(0.0);
+            } else {
+                for (index, item) in out_frame.iter_mut().enumerate() {
+                    let channel_index = index % channels;
+                    let channel_denoiser = &state.denoisers[channel_index];
+                    let pos = index / channels;
+                    *item = channel_denoiser.out_chunk[pos] / 32767.0;
+                }
+            }
+        }
+    }
 }
 
 #[glib::object_subclass]
author	Michiel Konstapel <github@konstapel.nl>	2022-12-12 19:29:55 +0300
committer	GStreamer Marge Bot <gitlab-merge-bot@gstreamer-foundation.org>	2022-12-14 13:00:28 +0300
commit	3a8536b45efa41e5af6eec250e998f24bcc4018c (patch)
tree	6bc0c7309d116a57ad0b9032a9e2b230e62fb11d /audio
parent	e5360ff43159ee4b70e875f1786a4ca32968385b (diff)