Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-11-25 13:28:28 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-11-25 13:28:28 +0300
commit7d94b25724ab44f753f15e2280c500c641cb7d58 (patch)
tree50cf0a2dbe4b83de0aa007dd77372908dca0169b
parentb9f05040a82089df19e146d62905b5501f8cc35f (diff)
Handle comma as thousands separator
-rw-r--r--tests/training/basics/test_mini_batch_fit.sh2
-rwxr-xr-xtools/diff-nums.py9
2 files changed, 7 insertions, 4 deletions
diff --git a/tests/training/basics/test_mini_batch_fit.sh b/tests/training/basics/test_mini_batch_fit.sh
index 2f1ba77..bef96b9 100644
--- a/tests/training/basics/test_mini_batch_fit.sh
+++ b/tests/training/basics/test_mini_batch_fit.sh
@@ -23,7 +23,7 @@ test -e batch_fit/model.npz.amun.yml
test -e batch_fit.log
-cat batch_fit.log | grep 'Ep\. 1 :' | sed -r 's/.*Up\. ([0-9]+) .*Sen. ([0-9]+).*/\2\/\1/' | bc > batch_fit.out
+cat batch_fit.log | grep 'Ep\. 1 :' | sed -r 's/.*Up\. ([0-9]+) .*Sen. ([,0-9]+).*/\2\/\1/' | sed 's/,//g' | bc > batch_fit.out
$MRT_TOOLS/diff.sh batch_fit.out batch_fit.expected > batch_fit.diff
# Exit with success code
diff --git a/tools/diff-nums.py b/tools/diff-nums.py
index 5b895bb..18b4415 100755
--- a/tools/diff-nums.py
+++ b/tools/diff-nums.py
@@ -6,7 +6,7 @@ import sys
import argparse
import re
-REGEX_NUMERIC = re.compile(r"^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$")
+REGEX_NUMERIC = re.compile(r"^[+-]?\d+(?:(?:,\d\d\d)+|(?:\d+))*(?:\.\d+)?(?:[eE][+-]?\d+)?$")
REGEX_STRIP_EP = re.compile(r"^\[valid\] Ep\. \d+ : Up\. ")
NORMALIZE_NUMPY = [
@@ -100,7 +100,8 @@ def read_line(iofile, separator=""):
def process_line(line):
line = REGEX_STRIP_EP.sub("[valid] ", line) # normalize "[valid] Ep. 1 : Up. 30" -> "[valid] 30"
line_toks = line.rstrip().replace("[[-", "[[ -").split() # tokenize
- nums = [float(s) for s in line_toks if is_numeric(s)] # find all numbers
+ nums = [float(s.replace(',', '')) # handle comma as thousands separator
+ for s in line_toks if is_numeric(s)] # find all numbers
text = ' '.join(["<NUM>" if is_numeric(s) else s # text format with numbers normalized
for s in line_toks])
return line_toks, nums, text
@@ -115,7 +116,9 @@ def message(text, args):
text += "\n"
args.output.write(text)
args.message_count += 1
- if args.output is not sys.stdout and args.output is not sys.stderr and not args.quiet:
+ if not args.quiet \
+ and args.output is not sys.stdout \
+ and args.output is not sys.stderr:
sys.stderr.write(text)