Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.freedesktop.org/gstreamer/gst-plugins-rs.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMathieu Duponchelle <mathieu@centricular.com>2021-02-19 02:45:34 +0300
committerMathieu Duponchelle <mathieu@centricular.com>2021-02-19 17:58:14 +0300
commit77bf0c945eae4f9a8a4d50c120dfe469175c037b (patch)
tree6e03b10f63809d69b7acb0169084374689aa9108
parenta7df50e68fab31a52ab31361e75f2d00c11955af (diff)
text: new element for text processing: regex
The element expects an array of "commands", as GstStructures, in the form: operation, pattern=<pattern>, ... The only operation implemented for now is replace-all, eg: replace-all, pattern=foo, replacement=bar Other operations can be implemented if useful in the future, eg. "match" could post a message to the bus when the pattern is encountered. The main use case for this is automatic speech recognition, as implemented by eg awstranscribe as users may want to replace swear words with tamer language. Commands are applied in order. The interface is usable through the CLI with the usual escaping strategies, though trying to pass in actual regular expressions through it is a bit tricky, as this introduces yet another level of escaping.
-rw-r--r--Cargo.toml1
-rw-r--r--ci/utils.py2
-rw-r--r--meson.build1
-rw-r--r--text/regex/Cargo.toml47
-rw-r--r--text/regex/build.rs3
-rw-r--r--text/regex/src/gstregex/imp.rs321
-rw-r--r--text/regex/src/gstregex/mod.rs31
-rw-r--r--text/regex/src/lib.rs36
-rw-r--r--text/regex/tests/regex.rs76
9 files changed, 517 insertions, 1 deletions
diff --git a/Cargo.toml b/Cargo.toml
index a8c14922b..a20ad55ff 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,7 @@ members = [
"video/hsv",
"text/wrap",
"text/json",
+ "text/regex",
]
[profile.release]
diff --git a/ci/utils.py b/ci/utils.py
index e0c7626d0..8951e4b05 100644
--- a/ci/utils.py
+++ b/ci/utils.py
@@ -2,7 +2,7 @@ import os
DIRS = ['audio', 'generic', 'net', 'text', 'utils', 'video']
# Plugins whose name is prefixed by 'rs'
-RS_PREFIXED = ['audiofx', 'closedcaption', 'dav1d', 'file', 'json']
+RS_PREFIXED = ['audiofx', 'closedcaption', 'dav1d', 'file', 'json', 'regex']
OVERRIDE = {'wrap': 'rstextwrap', 'flavors': 'rsflv'}
diff --git a/meson.build b/meson.build
index 15ffc254a..027b63ea0 100644
--- a/meson.build
+++ b/meson.build
@@ -49,6 +49,7 @@ plugins_rep = {
'utils/togglerecord': 'libgsttogglerecord',
'video/hsv': 'libgsthsv',
'text/json': 'libgstrsjson',
+ 'text/regex': 'libgstrsregex',
}
exclude = []
diff --git a/text/regex/Cargo.toml b/text/regex/Cargo.toml
new file mode 100644
index 000000000..e7a7750b6
--- /dev/null
+++ b/text/regex/Cargo.toml
@@ -0,0 +1,47 @@
+[package]
+name = "gst-plugin-regex"
+version = "0.6.0"
+authors = ["Mathieu Duponchelle <mathieu@centricular.com>"]
+license = "LGPL-2.1-or-later"
+edition = "2018"
+description = "Rust Regular Expression Plugin"
+repository = "https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs"
+
+[dependencies]
+glib = { git = "https://github.com/gtk-rs/gtk-rs" }
+once_cell = "1.0"
+regex = "1"
+
+[dependencies.gst]
+git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs"
+features = ["v1_14"]
+package="gstreamer"
+
+[lib]
+name = "gstrsregex"
+crate-type = ["cdylib", "rlib"]
+path = "src/lib.rs"
+
+[build-dependencies]
+gst-plugin-version-helper = { path="../../version-helper" }
+
+[dev-dependencies.gst-check]
+git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs"
+package="gstreamer-check"
+
+[features]
+# GStreamer 1.14 is required for static linking
+static = ["gst/v1_14"]
+
+[package.metadata.capi]
+min_version = "0.7.0"
+
+[package.metadata.capi.header]
+enabled = false
+
+[package.metadata.capi.library]
+install_subdir = "gstreamer-1.0"
+versioning = false
+
+[package.metadata.capi.pkg_config]
+requires_private = "gstreamer-1.0, gobject-2.0, glib-2.0, gmodule-2.0"
diff --git a/text/regex/build.rs b/text/regex/build.rs
new file mode 100644
index 000000000..17be1215e
--- /dev/null
+++ b/text/regex/build.rs
@@ -0,0 +1,3 @@
+fn main() {
+ gst_plugin_version_helper::get_info()
+}
diff --git a/text/regex/src/gstregex/imp.rs b/text/regex/src/gstregex/imp.rs
new file mode 100644
index 000000000..025749b4b
--- /dev/null
+++ b/text/regex/src/gstregex/imp.rs
@@ -0,0 +1,321 @@
+// Copyright (C) 2021 Mathieu Duponchelle <mathieu@centricular.com>
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Library General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Library General Public License for more details.
+//
+// You should have received a copy of the GNU Library General Public
+// License along with this library; if not, write to the
+// Free Software Foundation, Inc., 51 Franklin Street, Suite 500,
+// Boston, MA 02110-1335, USA.
+
+use glib::prelude::*;
+use glib::subclass;
+use glib::subclass::prelude::*;
+use gst::gst_error;
+use gst::prelude::*;
+use gst::subclass::prelude::*;
+
+use regex::Regex;
+use std::default::Default;
+use std::sync::Mutex;
+
+use once_cell::sync::Lazy;
+
+static CAT: Lazy<gst::DebugCategory> = Lazy::new(|| {
+ gst::DebugCategory::new(
+ "regex",
+ gst::DebugColorFlags::empty(),
+ Some("Regular Expression element"),
+ )
+});
+
+enum Operation {
+ ReplaceAll(String),
+}
+
+struct Command {
+ pattern: String,
+ regex: Regex,
+ operation: Operation,
+}
+
+struct State {
+ commands: Vec<Command>,
+}
+
+impl Default for State {
+ fn default() -> Self {
+ Self { commands: vec![] }
+ }
+}
+
+pub struct RegEx {
+ srcpad: gst::Pad,
+ sinkpad: gst::Pad,
+ state: Mutex<State>,
+}
+
+impl RegEx {
+ fn sink_chain(
+ &self,
+ _pad: &gst::Pad,
+ element: &super::RegEx,
+ buffer: gst::Buffer,
+ ) -> Result<gst::FlowSuccess, gst::FlowError> {
+ let data = buffer.map_readable().map_err(|_| {
+ gst_error!(CAT, obj: element, "Can't map buffer readable");
+ gst::element_error!(element, gst::CoreError::Failed, ["Failed to map buffer"]);
+ gst::FlowError::Error
+ })?;
+
+ let mut data = std::str::from_utf8(&data)
+ .map_err(|err| {
+ gst_error!(CAT, obj: element, "Can't decode utf8: {}", err);
+ gst::element_error!(
+ element,
+ gst::StreamError::Decode,
+ ["Failed to decode utf8: {}", err]
+ );
+
+ gst::FlowError::Error
+ })?
+ .to_string();
+
+ let state = self.state.lock().unwrap();
+
+ for command in &state.commands {
+ match &command.operation {
+ Operation::ReplaceAll(replacement) => {
+ data = command
+ .regex
+ .replace_all(&data, replacement.as_str())
+ .to_string();
+ }
+ }
+ }
+
+ let mut outbuf = gst::Buffer::from_mut_slice(data.into_bytes());
+
+ {
+ let outbuf_mut = outbuf.get_mut().unwrap();
+ let _ = buffer.copy_into(
+ outbuf_mut,
+ gst::BufferCopyFlags::FLAGS
+ | gst::BufferCopyFlags::TIMESTAMPS
+ | gst::BufferCopyFlags::META,
+ 0,
+ None,
+ );
+ }
+
+ self.srcpad.push(outbuf)
+ }
+}
+
+impl ObjectSubclass for RegEx {
+ const NAME: &'static str = "RsRegEx";
+ type Type = super::RegEx;
+ type ParentType = gst::Element;
+ type Interfaces = ();
+ type Instance = gst::subclass::ElementInstanceStruct<Self>;
+ type Class = subclass::simple::ClassStruct<Self>;
+
+ glib::object_subclass!();
+
+ fn with_class(klass: &Self::Class) -> Self {
+ let templ = klass.get_pad_template("sink").unwrap();
+ let sinkpad = gst::Pad::builder_with_template(&templ, Some("sink"))
+ .chain_function(|pad, parent, buffer| {
+ RegEx::catch_panic_pad_function(
+ parent,
+ || Err(gst::FlowError::Error),
+ |regex, element| regex.sink_chain(pad, element, buffer),
+ )
+ })
+ .flags(gst::PadFlags::PROXY_CAPS | gst::PadFlags::FIXED_CAPS)
+ .build();
+
+ let templ = klass.get_pad_template("src").unwrap();
+ let srcpad = gst::Pad::builder_with_template(&templ, Some("src"))
+ .flags(gst::PadFlags::PROXY_CAPS | gst::PadFlags::FIXED_CAPS)
+ .build();
+
+ let state = Mutex::new(State::default());
+
+ Self {
+ srcpad,
+ sinkpad,
+ state,
+ }
+ }
+}
+
+impl ObjectImpl for RegEx {
+ fn properties() -> &'static [glib::ParamSpec] {
+ static PROPERTIES: Lazy<Vec<glib::ParamSpec>> = Lazy::new(|| {
+ vec![glib::ParamSpec::array(
+ "commands",
+ "Commands",
+ "A set of commands to apply on input text",
+ &glib::ParamSpec::boxed(
+ "command",
+ "Command",
+ "A command to apply on input text",
+ gst::Structure::static_type(),
+ glib::ParamFlags::READWRITE,
+ ),
+ glib::ParamFlags::READWRITE | gst::PARAM_FLAG_MUTABLE_PLAYING,
+ )]
+ });
+
+ PROPERTIES.as_ref()
+ }
+
+ fn constructed(&self, obj: &Self::Type) {
+ self.parent_constructed(obj);
+
+ obj.add_pad(&self.sinkpad).unwrap();
+ obj.add_pad(&self.srcpad).unwrap();
+ }
+
+ fn set_property(
+ &self,
+ _obj: &Self::Type,
+ _id: usize,
+ value: &glib::Value,
+ pspec: &glib::ParamSpec,
+ ) {
+ match pspec.get_name() {
+ "commands" => {
+ let mut state = self.state.lock().unwrap();
+ state.commands = vec![];
+ let commands: gst::Array = value.get_some().expect("type checked upstream");
+ for command in commands.as_slice() {
+ let s = match command
+ .get::<gst::Structure>()
+ .expect("type checked upstream")
+ {
+ Some(s) => s,
+ None => {
+ continue;
+ }
+ };
+ let operation = s.get_name();
+
+ let pattern = match s.get::<String>("pattern") {
+ Ok(Some(pattern)) => pattern,
+ Ok(None) | Err(_) => {
+ gst_error!(CAT, "All commands require a pattern field as a string");
+ continue;
+ }
+ };
+
+ let regex = match Regex::new(&pattern) {
+ Ok(regex) => regex,
+ Err(err) => {
+ gst_error!(CAT, "Failed to compile regex: {:?}", err);
+ continue;
+ }
+ };
+
+ match operation {
+ "replace-all" | "replace_all" => {
+ let replacement = match s.get::<String>("replacement") {
+ Ok(Some(pattern)) => pattern,
+ Ok(None) | Err(_) => {
+ gst_error!(
+ CAT,
+ "Replace operations require a replacement field as a string"
+ );
+ continue;
+ }
+ };
+ state.commands.push(Command {
+ pattern,
+ regex,
+ operation: Operation::ReplaceAll(replacement),
+ });
+ }
+ val => {
+ gst_error!(CAT, "Unknown operation {}", val);
+ }
+ }
+ }
+ }
+ _ => unimplemented!(),
+ }
+ }
+
+ fn get_property(&self, _obj: &Self::Type, _id: usize, pspec: &glib::ParamSpec) -> glib::Value {
+ match pspec.get_name() {
+ "commands" => {
+ let state = self.state.lock().unwrap();
+ let mut commands = vec![];
+ for command in &state.commands {
+ match command.operation {
+ Operation::ReplaceAll(ref replacement) => {
+ commands.push(
+ gst::Structure::new(
+ &"replace-all",
+ &[("pattern", &command.pattern), ("replacement", &replacement)],
+ )
+ .to_send_value(),
+ );
+ }
+ }
+ }
+ gst::Array::from_owned(commands).to_value()
+ }
+ _ => unimplemented!(),
+ }
+ }
+}
+
+impl ElementImpl for RegEx {
+ fn metadata() -> Option<&'static gst::subclass::ElementMetadata> {
+ static ELEMENT_METADATA: Lazy<gst::subclass::ElementMetadata> = Lazy::new(|| {
+ gst::subclass::ElementMetadata::new(
+ "Regular Expression processor",
+ "Text/Filter",
+ "Applies operations according to regular expressions",
+ "Mathieu Duponchelle <mathieu@centricular.com>",
+ )
+ });
+
+ Some(&*ELEMENT_METADATA)
+ }
+
+ fn pad_templates() -> &'static [gst::PadTemplate] {
+ static PAD_TEMPLATES: Lazy<Vec<gst::PadTemplate>> = Lazy::new(|| {
+ let caps = gst::Caps::builder("text/x-raw")
+ .field("format", &"utf8")
+ .build();
+ let src_pad_template = gst::PadTemplate::new(
+ "src",
+ gst::PadDirection::Src,
+ gst::PadPresence::Always,
+ &caps,
+ )
+ .unwrap();
+
+ let sink_pad_template = gst::PadTemplate::new(
+ "sink",
+ gst::PadDirection::Sink,
+ gst::PadPresence::Always,
+ &caps,
+ )
+ .unwrap();
+
+ vec![src_pad_template, sink_pad_template]
+ });
+
+ PAD_TEMPLATES.as_ref()
+ }
+}
diff --git a/text/regex/src/gstregex/mod.rs b/text/regex/src/gstregex/mod.rs
new file mode 100644
index 000000000..45355aa1a
--- /dev/null
+++ b/text/regex/src/gstregex/mod.rs
@@ -0,0 +1,31 @@
+// Copyright (C) 2021 Mathieu Duponchelle <mathieu@centricular.com>
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Library General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Library General Public License for more details.
+//
+// You should have received a copy of the GNU Library General Public
+// License along with this library; if not, write to the
+// Free Software Foundation, Inc., 51 Franklin Street, Suite 500,
+// Boston, MA 02110-1335, USA.
+
+use glib::prelude::*;
+
+mod imp;
+
+glib::wrapper! {
+ pub struct RegEx(ObjectSubclass<imp::RegEx>) @extends gst::Element, gst::Object;
+}
+
+unsafe impl Send for RegEx {}
+unsafe impl Sync for RegEx {}
+
+pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
+ gst::Element::register(Some(plugin), "regex", gst::Rank::None, RegEx::static_type())
+}
diff --git a/text/regex/src/lib.rs b/text/regex/src/lib.rs
new file mode 100644
index 000000000..7389d8a3c
--- /dev/null
+++ b/text/regex/src/lib.rs
@@ -0,0 +1,36 @@
+// Copyright (C) 2021 Mathieu Duponchelle <mathieu@centricular.com>
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Library General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Library General Public License for more details.
+//
+// You should have received a copy of the GNU Library General Public
+// License along with this library; if not, write to the
+// Free Software Foundation, Inc., 51 Franklin Street, Suite 500,
+// Boston, MA 02110-1335, USA.
+
+#![recursion_limit = "128"]
+
+mod gstregex;
+
+fn plugin_init(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
+ gstregex::register(plugin)
+}
+
+gst::plugin_define!(
+ rsregex,
+ env!("CARGO_PKG_DESCRIPTION"),
+ plugin_init,
+ concat!(env!("CARGO_PKG_VERSION"), "-", env!("COMMIT_ID")),
+ "LGPL",
+ env!("CARGO_PKG_NAME"),
+ env!("CARGO_PKG_NAME"),
+ env!("CARGO_PKG_REPOSITORY"),
+ env!("BUILD_REL_DATE")
+);
diff --git a/text/regex/tests/regex.rs b/text/regex/tests/regex.rs
new file mode 100644
index 000000000..292672834
--- /dev/null
+++ b/text/regex/tests/regex.rs
@@ -0,0 +1,76 @@
+// Copyright (C) 2020 Mathieu Duponchelle <mathieu@centricular.com>
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Library General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Library General Public License for more details.
+//
+// You should have received a copy of the GNU Library General Public
+// License along with this library; if not, write to the
+// Free Software Foundation, Inc., 51 Franklin Street, Suite 500,
+// Boston, MA 02110-1335, USA.
+
+use glib::prelude::*;
+
+fn init() {
+ use std::sync::Once;
+ static INIT: Once = Once::new();
+
+ INIT.call_once(|| {
+ gst::init().unwrap();
+ gstrsregex::plugin_register_static().expect("regex test");
+ });
+}
+
+#[test]
+fn test_replace_all() {
+ init();
+
+ let input = b"crap that mothertrapper";
+
+ let expected_output = "trap that mothertrapper";
+
+ let mut h = gst_check::Harness::new("regex");
+
+ {
+ let regex = h.get_element().expect("Could not create regex");
+
+ let command = gst::Structure::new(
+ "replace-all",
+ &[("pattern", &"crap"), ("replacement", &"trap")],
+ );
+
+ let commands = gst::Array::from_owned(vec![command.to_send_value()]);
+
+ regex.set_property("commands", &commands).unwrap();
+ }
+
+ h.set_src_caps_str("text/x-raw, format=utf8");
+
+ let buf = {
+ let mut buf = gst::Buffer::from_mut_slice(Vec::from(&input[..]));
+ let buf_ref = buf.get_mut().unwrap();
+ buf_ref.set_pts(gst::ClockTime::from_seconds(0));
+ buf_ref.set_duration(gst::ClockTime::from_seconds(2));
+ buf
+ };
+
+ assert_eq!(h.push(buf), Ok(gst::FlowSuccess::Ok));
+
+ let buf = h.pull().expect("Couldn't pull buffer");
+
+ assert_eq!(buf.get_pts(), 0.into());
+ assert_eq!(buf.get_duration(), 2 * gst::SECOND);
+
+ let map = buf.map_readable().expect("Couldn't map buffer readable");
+
+ assert_eq!(
+ std::str::from_utf8(map.as_ref()),
+ std::str::from_utf8(expected_output.as_ref())
+ );
+}