diff options
author | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2017-03-24 20:37:35 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2017-03-24 20:37:35 +0300 |
commit | 5dbeadb47dbb70b85061aa08c8c55193d9fbe74a (patch) | |
tree | 2e0e7d1698b0c756896c6af59cde4eb4f5eabd02 /examples/training/scripts/normalise-romanian.py | |
parent | e7157515d3f30b44649f2f48b4a70b3999530a02 (diff) |
added training example
Diffstat (limited to 'examples/training/scripts/normalise-romanian.py')
-rwxr-xr-x | examples/training/scripts/normalise-romanian.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/examples/training/scripts/normalise-romanian.py b/examples/training/scripts/normalise-romanian.py new file mode 100755 index 00000000..7d5e86ca --- /dev/null +++ b/examples/training/scripts/normalise-romanian.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Author: Barry Haddow +# Distributed under MIT license + +# +# Normalise Romanian s-comma and t-comma + +import io +import sys +istream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') +ostream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + +for line in istream: + line = line.replace("\u015e", "\u0218").replace("\u015f", "\u0219") + line = line.replace("\u0162", "\u021a").replace("\u0163", "\u021b") + ostream.write(line) |