Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2017-03-24 20:37:35 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2017-03-24 20:37:35 +0300
commit5dbeadb47dbb70b85061aa08c8c55193d9fbe74a (patch)
tree2e0e7d1698b0c756896c6af59cde4eb4f5eabd02 /examples/training/scripts/normalise-romanian.py
parente7157515d3f30b44649f2f48b4a70b3999530a02 (diff)
added training example
Diffstat (limited to 'examples/training/scripts/normalise-romanian.py')
-rwxr-xr-xexamples/training/scripts/normalise-romanian.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/examples/training/scripts/normalise-romanian.py b/examples/training/scripts/normalise-romanian.py
new file mode 100755
index 00000000..7d5e86ca
--- /dev/null
+++ b/examples/training/scripts/normalise-romanian.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Author: Barry Haddow
+# Distributed under MIT license
+
+#
+# Normalise Romanian s-comma and t-comma
+
+import io
+import sys
+istream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+ostream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+
+for line in istream:
+ line = line.replace("\u015e", "\u0218").replace("\u015f", "\u0219")
+ line = line.replace("\u0162", "\u021a").replace("\u0163", "\u021b")
+ ostream.write(line)