Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>2019-02-05 02:32:51 +0300
committerMarcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>2019-02-05 02:32:51 +0300
commit0e51199e0a7a85c4a8f2ab5d6787a01584576ada (patch)
treeddaa845eb539119b56ae33ba170941893aa2121e
parentd9f528d869b37f7a0190ecb7537eeae73f7d7229 (diff)
split off parentheses
-rwxr-xr-xtools/diff-nums.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/tools/diff-nums.py b/tools/diff-nums.py
index 18b4415..0e527ad 100755
--- a/tools/diff-nums.py
+++ b/tools/diff-nums.py
@@ -16,7 +16,7 @@ NORMALIZE_NUMPY = [
("...) ", "... "),
("..., ", "... "),
("]", " ]"),
- ("[", "[ ")
+ ("[", "[ "),
]
@@ -99,6 +99,7 @@ def read_line(iofile, separator=""):
def process_line(line):
line = REGEX_STRIP_EP.sub("[valid] ", line) # normalize "[valid] Ep. 1 : Up. 30" -> "[valid] 30"
+ line = line.replace("(", "( ").replace(")", " )") # insert space before and after parentheses
line_toks = line.rstrip().replace("[[-", "[[ -").split() # tokenize
nums = [float(s.replace(',', '')) # handle comma as thousands separator
for s in line_toks if is_numeric(s)] # find all numbers