Merge branch 'master' into small-updates

author: Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> 2018-11-15 13:23:40 +0300
committer: Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> 2018-11-15 13:23:40 +0300
commit: 9b65326839049d2d63fe15f71b8bd60a5e7f236f (patch)
tree: 9528e52c60e15c5336271e84fce2b1365b469e08
parent: 14760f37ecb99f84a3b85cfc22c55ce27bf8a813 (diff)
parent: 32fe3c1a12bd17c9b7d0c1d11afd903dee954535 (diff)
130 files changed, 415 insertions, 321 deletions
diff --git a/.gitignore b/.gitignore
index 3746da8..d593105 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,3 +32,4 @@ models/transformer/*.bpe
 data/*/corpus.*
 data/*/*.bpe
 data/*/truecase*
+data/*/*.gz
diff --git a/README.md b/README.md
index 314cfb7..9f08abe 100644
--- a/README.md
+++ b/README.md
@@ -49,13 +49,33 @@ More invocation examples:
     ./run_mrt.sh tests/training/basics/test_valid_script.sh
     ./run_mrt.sh previous.log
 
-where _previous.log_ contains a list of test files in separate lines.
+where `previous.log` contains a list of test files in separate lines.  This file
+is automatically generated each time `./run_mrt.sh` finishes running.
 
-Clean test artifacts:
+Cleaning test artifacts:
 
     make clean
 
 
+## Debugging failed tests
+
+Failed tests are displayed at the end of testing or in `previous.log`, e.g.:
+
+    Failed:
+    - tests/training/restoring/multi-gpu/test_async.sh
+    - tests/training/embeddings/test_custom_embeddings.sh
+    ---------------------
+    Ran 145 tests in 00:48:48.210s, 143 passed, 0 skipped, 2 failed
+
+Logging messages are in files ending with _.sh.log_ suffix:
+
+    less tests/training/restoring/multi-gpu/test_async.sh.log
+
+The last command in most tests is an execution of a custom `diff` tool, which
+prints the exact invocation commands with absolute paths. It can be used to
+display the differences that cause the test fails.
+
+
 ## Adding new tests
 
 Use templates provided in `tests/_template`.
@@ -63,7 +83,7 @@ Use templates provided in `tests/_template`.
 Please follow these recommendations:
 
 * For comparing outputs with numbers, please use float-friendly
-  `tools/diff-floats.py` instead of GNU `diff`
+  `tools/diff-nums.py` instead of GNU `diff`
 * Make your tests deterministic using `--no-shuffle --seed 1111` or similar
 * Make training execution as short as possible, for instance, by reducing the
   size of the network and the number of iterations
diff --git a/run_mrt.sh b/run_mrt.sh
index 403ce11..3b731f7 100755
--- a/run_mrt.sh
+++ b/run_mrt.sh
@@ -9,7 +9,7 @@
 
 # Environment variables:
 #  - MARIAN - path to Marian root directory
-#  - CUDA_VISIBLE_DEVICES - CUDA's variable specifying GPU devices
+#  - CUDA_VISIBLE_DEVICES - CUDA's variable specifying GPU device IDs
 #  - NUM_DEVICES - maximum number of GPU devices to be used
 
 SHELL=/bin/bash
@@ -146,9 +146,8 @@ do
         fi
 
         # Run test
-        test_stdout=$test_name.stdout
-        test_stderr=$test_name.stderr
-        $SHELL -x $test_file > $test_stdout 2> $test_stderr
+        # Note: all output gets written to stderr (very very few cases write to stdout)
+        $SHELL -x $test_file 2> $test_file.log 1>&2
         exit_code=$?
 
         # Check exit code
@@ -211,7 +210,7 @@ for test_name in "${tests_failed[@]}"; do
 done
 [[ -z "$tests_failed" ]] || echo "Logs:"
 for test_name in "${tests_failed[@]}"; do
-    echo "  - $(realpath $test_name | sed 's/.sh/.stderr/')"
+    echo "  - $(realpath $test_name | sed 's/\.sh/.sh.log/')"
 done
 
 # Print summary
diff --git a/tests/_self-adaptive/test_context_empty.sh b/tests/_self-adaptive/test_context_empty.sh
index 463a313..01a2301 100644
--- a/tests/_self-adaptive/test_context_empty.sh
+++ b/tests/_self-adaptive/test_context_empty.sh
@@ -14,7 +14,7 @@ $MRT_MARIAN/build/marian-adaptive \
   -t ubuntu.nocontext.src ubuntu.nocontext.ref --log nocontext.log < ubuntu.src > nocontext.out
 
 # Check outputs
-diff nocontext.out nocontext.expected > nocontext.diff
+$MRT_TOOLS/diff.sh nocontext.out nocontext.expected > nocontext.diff
 
 # Check if the log file does not contain training logs
 grep -q "Ep\." nocontext.log && exit 1
diff --git a/tests/_self-adaptive/test_context_partial.sh b/tests/_self-adaptive/test_context_partial.sh
index 2bd51f2..d956526 100644
--- a/tests/_self-adaptive/test_context_partial.sh
+++ b/tests/_self-adaptive/test_context_partial.sh
@@ -14,11 +14,11 @@ $MRT_MARIAN/build/marian-adaptive \
   -t ubuntu.contextpart.src ubuntu.contextpart.ref --log contextpart.log < ubuntu.src > contextpart.out
 
 # Check outputs
-diff contextpart.out contextpart.expected > contextpart.diff
+$MRT_TOOLS/diff.sh contextpart.out contextpart.expected > contextpart.diff
 
 # Check costs
 cat contextpart.log | $MRT_TOOLS/extract-costs.sh > contextpart.costs.out
-$MRT_TOOLS/diff-floats.py -p 0.01 contextpart.costs.out contextpart.costs.expected > contextpart.costs.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 contextpart.costs.out contextpart.costs.expected -o contextpart.costs.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/_self-adaptive/test_oracle_1_sent_2_epochs.sh b/tests/_self-adaptive/test_oracle_1_sent_2_epochs.sh
index f274886..568104e 100644
--- a/tests/_self-adaptive/test_oracle_1_sent_2_epochs.sh
+++ b/tests/_self-adaptive/test_oracle_1_sent_2_epochs.sh
@@ -20,15 +20,15 @@ $MRT_MARIAN/build/marian-adaptive \
   -t ubuntu.oracle_1s2e.src ubuntu.oracle_1s2e.ref --log oracle_1s2e.log < ubuntu.src > oracle_1s2e.out
 
 # Check outputs
-diff oracle_1s2e.out oracle.expected > oracle_1s2e.diff
+$MRT_TOOLS/diff.sh oracle_1s2e.out oracle.expected > oracle_1s2e.diff
 
 # Check BLEU
 $MRT_TOOLS/moses-scripts/scripts/generic/multi-bleu.perl -lc ubuntu.ref < oracle_1s2e.out > oracle_1s2e.bleu
-diff oracle_1s2e.bleu oracle.bleu.expected > oracle_1s2e.bleu.diff
+$MRT_TOOLS/diff.sh oracle_1s2e.bleu oracle.bleu.expected > oracle_1s2e.bleu.diff
 
 # Check costs
 cat oracle_1s2e.log | grep 'Ep\. ' | $MRT_TOOLS/extract-costs.sh > costs_1s2e.out
-$MRT_TOOLS/diff-floats.py -p 0.01 costs_1s2e.out costs.expected > costs_1s2e.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 costs_1s2e.out costs.expected -o costs_1s2e.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/_self-adaptive/test_oracle_2_sent_1_epoch.sh b/tests/_self-adaptive/test_oracle_2_sent_1_epoch.sh
index 9b61f5f..6547a61 100644
--- a/tests/_self-adaptive/test_oracle_2_sent_1_epoch.sh
+++ b/tests/_self-adaptive/test_oracle_2_sent_1_epoch.sh
@@ -20,15 +20,15 @@ $MRT_MARIAN/build/marian-adaptive \
   -t ubuntu.oracle_2s1e.src ubuntu.oracle_2s1e.ref --log oracle_2s1e.log < ubuntu.src > oracle_2s1e.out
 
 # Check outputs
-diff oracle_2s1e.out oracle.expected > oracle_2s1e.diff
+$MRT_TOOLS/diff.sh oracle_2s1e.out oracle.expected > oracle_2s1e.diff
 
 # Check BLEU
 $MRT_TOOLS/moses-scripts/scripts/generic/multi-bleu.perl -lc ubuntu.ref < oracle_2s1e.out > oracle_2s1e.bleu
-diff oracle_2s1e.bleu oracle.bleu.expected > oracle_2s1e.bleu.diff
+$MRT_TOOLS/diff.sh oracle_2s1e.bleu oracle.bleu.expected > oracle_2s1e.bleu.diff
 
 # Check costs
 cat oracle_2s1e.log | grep 'Ep\. ' | $MRT_TOOLS/extract-costs.sh > costs_2s1e.out
-$MRT_TOOLS/diff-floats.py -p 0.01 costs_2s1e.out costs.expected > costs_2s1e.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 costs_2s1e.out costs.expected -o costs_2s1e.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/_template/test_decoder.sh b/tests/_template/test_decoder.sh
index 1645cc9..bc0e200 100644
--- a/tests/_template/test_decoder.sh
+++ b/tests/_template/test_decoder.sh
@@ -15,7 +15,7 @@ rm -f decoder.{out,diff}
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml < text.in > decoder.out
 
 # Compare the output with the expected output
-diff decoder.out text.expected > decoder.diff
+$MRT_TOOLS/diff.sh decoder.out text.expected > decoder.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/_template/test_server.sh b/tests/_template/test_server.sh
index 6d02815..1714d85 100644
--- a/tests/_template/test_server.sh
+++ b/tests/_template/test_server.sh
@@ -30,7 +30,7 @@ python3 $MRT_MARIAN/scripts/server/client_example.py -p 8765 < text.in > server.
 kill $SERVER_PID
 
 # Compare the current output with the expected output
-diff server.out text.expected > server.diff
+$MRT_TOOLS/diff.sh server.out text.expected > server.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/_template/test_training.sh b/tests/_template/test_training.sh
index ab9af38..551db67 100644
--- a/tests/_template/test_training.sh
+++ b/tests/_template/test_training.sh
@@ -25,7 +25,7 @@ test -e train.log
 
 # Compare the current output with the expected output
 cat train.log | $MRT_TOOLS/extract-costs.sh > train.out
-$MRT_TOOLS/diff-floats.py train.out train.expected > train.diff
+$MRT_TOOLS/diff-nums.py train.out train.expected -o train.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align-ensemble/test_align_ensemble.sh b/tests/decoder/align-ensemble/test_align_ensemble.sh
index 4967a7f..066702c 100644
--- a/tests/decoder/align-ensemble/test_align_ensemble.sh
+++ b/tests/decoder/align-ensemble/test_align_ensemble.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.ensemble.yml --mini-batch 32 -b 5 --alignment < text.in > align.out
-$MRT_TOOLS/diff-floats.py -p 0.0001 align.out align.expected > align.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align.out align.expected -o align.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align-ensemble/test_align_ensemble_beam_1.sh b/tests/decoder/align-ensemble/test_align_ensemble_beam_1.sh
index 65c01c1..b4809d6 100644
--- a/tests/decoder/align-ensemble/test_align_ensemble_beam_1.sh
+++ b/tests/decoder/align-ensemble/test_align_ensemble_beam_1.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.ensemble.yml --mini-batch 1 -b 1 --alignment < text.in > align.b1.out
-$MRT_TOOLS/diff-floats.py -p 0.0001 align.b1.out align.b1.expected > align.b1.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align.b1.out align.b1.expected -o align.b1.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_align.sh b/tests/decoder/align/test_align.sh
index df46e30..7c0028d 100644
--- a/tests/decoder/align/test_align.sh
+++ b/tests/decoder/align/test_align.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 16 -b 5 --alignment < text.in > align.out
-$MRT_TOOLS/diff-floats.py -p 0.0001 align.out align.expected > align.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align.out align.expected -o align.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_align_beam_1.sh b/tests/decoder/align/test_align_beam_1.sh
index ddb38a6..f4869ac 100644
--- a/tests/decoder/align/test_align_beam_1.sh
+++ b/tests/decoder/align/test_align_beam_1.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 1 -b 1 --alignment < text.in > align.b1.out
-$MRT_TOOLS/diff-floats.py -p 0.0001 align.b1.out align.b1.expected > align.b1.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align.b1.out align.b1.expected -o align.b1.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_align_beam_1_batched.sh b/tests/decoder/align/test_align_beam_1_batched.sh
index e5aba89..68ca362 100644
--- a/tests/decoder/align/test_align_beam_1_batched.sh
+++ b/tests/decoder/align/test_align_beam_1_batched.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 32 -b 1 --alignment < text.in > align.batched.out
-$MRT_TOOLS/diff-floats.py -p 0.0001 align.batched.out align.batched.expected > align.batched.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align.batched.out align.batched.expected -o align.batched.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_align_nbest.sh b/tests/decoder/align/test_align_nbest.sh
index bcb0705..8add32f 100644
--- a/tests/decoder/align/test_align_nbest.sh
+++ b/tests/decoder/align/test_align_nbest.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 16 -b 3 --n-best --alignment < text.in > align_nbest.out
-$MRT_TOOLS/diff-floats.py -p 0.0001 align_nbest.out align_nbest.expected > align_nbest.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align_nbest.out align_nbest.expected -o align_nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_align_threshold.sh b/tests/decoder/align/test_align_threshold.sh
index 4fc11a9..6c672b1 100644
--- a/tests/decoder/align/test_align_threshold.sh
+++ b/tests/decoder/align/test_align_threshold.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 16 -b 5 --alignment 0.35 < text.in > align_threshold.out
-$MRT_TOOLS/diff-floats.py -p 0.0001 align_threshold.out align_threshold.expected > align_threshold.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align_threshold.out align_threshold.expected -o align_threshold.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_soft_align.sh b/tests/decoder/align/test_soft_align.sh
index f41f8a1..c08a2e2 100644
--- a/tests/decoder/align/test_soft_align.sh
+++ b/tests/decoder/align/test_soft_align.sh
@@ -6,7 +6,7 @@ set -e
 # Test code goes here
 rm -f soft.out soft.raw.out
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 5 -b 5 --alignment soft < text.in > soft.out
-$MRT_TOOLS/diff-floats.py -s , -p 0.0001 soft.out soft.expected > soft.diff
+$MRT_TOOLS/diff-nums.py -s , -p 0.0001 soft.out soft.expected -o soft.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_soft_align_nbest.sh b/tests/decoder/align/test_soft_align_nbest.sh
index 700df81..57418c3 100644
--- a/tests/decoder/align/test_soft_align_nbest.sh
+++ b/tests/decoder/align/test_soft_align_nbest.sh
@@ -6,7 +6,7 @@ set -e
 # Test code goes here
 rm -f soft.nbest.out soft.nbest.raw.out
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 5 -b 3 --n-best --alignment soft < text.in > soft.nbest.out
-$MRT_TOOLS/diff-floats.py -s , -p 0.0001 soft.nbest.out soft.nbest.expected > soft.nbest.diff
+$MRT_TOOLS/diff-nums.py -s , -p 0.0001 soft.nbest.out soft.nbest.expected -o soft.nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/wmt16/test_ende.sh b/tests/decoder/wmt16/test_ende.sh
index 22707bc..04b5f5b 100644
--- a/tests/decoder/wmt16/test_ende.sh
+++ b/tests/decoder/wmt16/test_ende.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml < text.in > text.out
-diff text.out text.expected > text.diff
+$MRT_TOOLS/diff.sh text.out text.expected > text.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/wmt16/test_ende_cpu.sh b/tests/decoder/wmt16/test_ende_cpu.sh
index c04ceee..f183315 100644
--- a/tests/decoder/wmt16/test_ende_cpu.sh
+++ b/tests/decoder/wmt16/test_ende_cpu.sh
@@ -11,7 +11,7 @@ fi
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --cpu-threads 4 < text.in > text_cpu.out
-diff text_cpu.out text.expected > text_cpu.diff
+$MRT_TOOLS/diff.sh text_cpu.out text.expected > text_cpu.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/wmt16/test_ende_logs.sh b/tests/decoder/wmt16/test_ende_logs.sh
index 13e5689..55b6542 100644
--- a/tests/decoder/wmt16/test_ende_logs.sh
+++ b/tests/decoder/wmt16/test_ende_logs.sh
@@ -6,7 +6,7 @@ set -e
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml < text.in 2> logs.raw
 cat logs.raw | grep "] Best translation" | sed -r "s/.*Best translation [0-9]+ : (.*)/\1/" > logs.out
-diff logs.out text.expected > logs.diff
+$MRT_TOOLS/diff.sh logs.out text.expected > logs.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/wmt16/test_nbest.sh b/tests/decoder/wmt16/test_nbest.sh
index 7d75ac3..a002bdb 100644
--- a/tests/decoder/wmt16/test_nbest.sh
+++ b/tests/decoder/wmt16/test_nbest.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -b 5 --n-best < text.in > nbest.out
-$MRT_TOOLS/diff-floats.py nbest.out nbest.expected > nbest.diff
+$MRT_TOOLS/diff-nums.py nbest.out nbest.expected -o nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/wmt17/test_ende.sh b/tests/decoder/wmt17/test_ende.sh
index d9d61c2..902b16e 100644
--- a/tests/decoder/wmt17/test_ende.sh
+++ b/tests/decoder/wmt17/test_ende.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt17_systems/marian.en-de.yml < text.in | tail -n 99 > text.out
-diff text.out text.expected > text.diff
+$MRT_TOOLS/diff.sh text.out text.expected > text.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/wmt17/test_nbest.sh b/tests/decoder/wmt17/test_nbest.sh
index 43f111a..05a4053 100644
--- a/tests/decoder/wmt17/test_nbest.sh
+++ b/tests/decoder/wmt17/test_nbest.sh
@@ -8,11 +8,11 @@ $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt17_systems/marian.en-de.yml \
   -b 5 --n-best --normalize < text.in | tail -n +6 > nbest.out
 
 # Compare n-best lists
-$MRT_TOOLS/diff-floats.py -p 0.0002 nbest.out nbest.expected > nbest.diff
+$MRT_TOOLS/diff-nums.py -p 0.0002 nbest.out nbest.expected -o nbest.diff
 
 # Compare with nematus scores
 cat nbest.out | sed -r 's/ \|\|\| /\t/g' | cut -f4 | cut -c2- > nbest.scores.out
-$MRT_TOOLS/diff-floats.py -p 0.0002 nbest.scores.out nbest.scores.nematus > nbest.scores.diff
+$MRT_TOOLS/diff-nums.py -p 0.0002 nbest.scores.out nbest.scores.nematus -o nbest.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/examples/iris/test_iris.sh b/tests/examples/iris/test_iris.sh
index 41b16a1..ec6bde3 100644
--- a/tests/examples/iris/test_iris.sh
+++ b/tests/examples/iris/test_iris.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/iris_example > iris.out
-$MRT_TOOLS/diff-floats.py iris.out iris.expected > iris.diff
+$MRT_TOOLS/diff-nums.py iris.out iris.expected -o iris.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/examples/mnist/test_mnist_ffnn.sh b/tests/examples/mnist/test_mnist_ffnn.sh
index 509a411..eeb3f30 100644
--- a/tests/examples/mnist/test_mnist_ffnn.sh
+++ b/tests/examples/mnist/test_mnist_ffnn.sh
@@ -18,7 +18,7 @@ $MRT_MARIAN/build/mnist_example \
     --log train.log
 
 cat train.log | grep '\[valid\]' | sed -re 's/.*\[valid\] //' -e 's/ : (new|stalled).*//' > ffnn.out
-$MRT_TOOLS/diff-floats.py ffnn.out ffnn.expected -p 0.005 > ffnn.diff
+$MRT_TOOLS/diff-nums.py ffnn.out ffnn.expected -p 0.005 -o ffnn.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/interface/config/test_load_config.sh b/tests/interface/config/test_load_config.sh
index ff97b07..8947347 100644
--- a/tests/interface/config/test_load_config.sh
+++ b/tests/interface/config/test_load_config.sh
@@ -33,7 +33,7 @@ grep -q "dim-emb: 16" load_config.log
 cat no_config.log   | grep -vP "\[(memory|marian)\]" | $MRT_TOOLS/strip-timestamps.sh > no_config.out
 cat load_config.log | grep -vP "\[(memory|marian)\]" | $MRT_TOOLS/strip-timestamps.sh > load_config.out
 
-diff load_config.out no_config.out > load_config.diff
+$MRT_TOOLS/diff.sh load_config.out no_config.out > load_config.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/interface/envvars/test_interpolate_envvars.sh b/tests/interface/envvars/test_interpolate_envvars.sh
index 472c26d..3dfd5dd 100644
--- a/tests/interface/envvars/test_interpolate_envvars.sh
+++ b/tests/interface/envvars/test_interpolate_envvars.sh
@@ -8,7 +8,7 @@ rm -f envvars.out
 
 export MRTMODELDIR=wmt16_systems
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/'${MRTMODELDIR}'/marian.en-de.yml --interpolate-env-vars -i text.in > envvars.out
-diff envvars.out text.expected > envvars.diff
+$MRT_TOOLS/diff.sh envvars.out text.expected > envvars.diff
 
 # Without --interpolate-env-vars this should fail
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/'${MRTMODELDIR}'/marian.en-de.yml -i text.in > envvars.log 2>&1 || true
diff --git a/tests/models/_char-s2s/test_char_s2s.sh b/tests/models/_char-s2s/test_char_s2s.sh
index 11dbf76..4d9cd62 100644
--- a/tests/models/_char-s2s/test_char_s2s.sh
+++ b/tests/models/_char-s2s/test_char_s2s.sh
@@ -10,7 +10,7 @@ fi
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/char-s2s/translate.yml < text.in > text.out
-diff text.out text.expected > text.diff
+$MRT_TOOLS/diff.sh text.out text.expected > text.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/_char-s2s/test_compare_scores.sh b/tests/models/_char-s2s/test_compare_scores.sh
index db84c25..51df88c 100755
--- a/tests/models/_char-s2s/test_compare_scores.sh
+++ b/tests/models/_char-s2s/test_compare_scores.sh
@@ -18,8 +18,8 @@ $MRT_MARIAN/build/marian-decoder  \
 cat nbest.out sed 's/ ||| /\t/g' | cut -f2 > text.out
 
 # Prepare source and target files for rescoring
-cat text.in | perl -ne 'for$i(1..12){print}' > compare.src
-cat nbest.out | sed 's/ ||| /\t/g' | cut -f2  > compare.trg
+cat text.in | perl -ne 'for$i(1..12){print}' > compare.char.src
+cat nbest.out | sed 's/ ||| /\t/g' | cut -f2  > compare.char.trg
 
 # Run rescorer
 $MRT_MARIAN/build/marian-scorer  -c $MRT_MODELS/char-s2s/translate.yml \
@@ -27,13 +27,12 @@ $MRT_MARIAN/build/marian-scorer  -c $MRT_MODELS/char-s2s/translate.yml \
   --max-length 7000 \
   --workspace 256 \
   --mini-batch 32 \
-  -t $(pwd)/compare.src $(pwd)/compare.trg > compare.scorer.out
+  -t $(pwd)/compare.char.src $(pwd)/compare.char.trg > compare.char.scorer.out
 
 
 # Compare scores
-cat nbest.out | sed 's/ ||| /\t/g' | cut -f3 | cut -d ' ' -f 2 > compare.decoder.out
-$MRT_TOOLS/diff-floats.py compare.scorer.out compare.decoder.out -p 0.0003
+cat nbest.out | sed 's/ ||| /\t/g' | cut -f3 | cut -d ' ' -f 2 > compare.char.decoder.out
+$MRT_TOOLS/diff-nums.py compare.char.scorer.out compare.char.decoder.out -p 0.0003 -d compare.char.diff
 
 # Exit with success code
 exit 0
-
diff --git a/tests/models/transformer/test_hard_aligns.sh b/tests/models/transformer/test_hard_aligns.sh
index 6f5d82b..471d1e7 100644
--- a/tests/models/transformer/test_hard_aligns.sh
+++ b/tests/models/transformer/test_hard_aligns.sh
@@ -8,7 +8,7 @@ rm -f hardalign.out
 # Run Marian
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/transformer/decode.yml -b 6 --mini-batch 32 --alignment < text.in > hardalign.out
 
-diff hardalign.out text.b6.hardalign.expected > hardalign.diff
+$MRT_TOOLS/diff.sh hardalign.out text.b6.hardalign.expected > hardalign.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/transformer/test_nbest.sh b/tests/models/transformer/test_nbest.sh
index 5ebb3ad..eb56134 100644
--- a/tests/models/transformer/test_nbest.sh
+++ b/tests/models/transformer/test_nbest.sh
@@ -8,7 +8,7 @@ rm -f nbest.out
 # Run Marian
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/transformer/decode.yml -b 6 --mini-batch 32 --n-best < text.in > nbest.out
 
-$MRT_TOOLS/diff-floats.py -p 0.0001 nbest.out text.b6.nbest.expected > nbest.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 nbest.out text.b6.nbest.expected -o nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/transformer/test_soft_aligns.sh b/tests/models/transformer/test_soft_aligns.sh
index 9c4abf0..651392c 100644
--- a/tests/models/transformer/test_soft_aligns.sh
+++ b/tests/models/transformer/test_soft_aligns.sh
@@ -8,7 +8,7 @@ rm -f softalign.out
 # Run Marian
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/transformer/decode.yml -b 6 --mini-batch 32 --alignment soft < text.in > softalign.out
 
-$MRT_TOOLS/diff-floats.py -s , -p 0.0001 softalign.out text.b6.softalign.expected > softalign.diff
+$MRT_TOOLS/diff-nums.py -s , -p 0.0001 softalign.out text.b6.softalign.expected -o softalign.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/transformer/test_translation.sh b/tests/models/transformer/test_translation.sh
index 227361e..2ad73e3 100644
--- a/tests/models/transformer/test_translation.sh
+++ b/tests/models/transformer/test_translation.sh
@@ -8,7 +8,7 @@ rm -f transformer.out
 # Run Marian
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/transformer/decode.yml -b 6 < text.in > transformer.out
 
-diff transformer.out text.b6.expected > transformer.diff
+$MRT_TOOLS/diff.sh transformer.out text.b6.expected > transformer.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/transformer/test_translation_batched.sh b/tests/models/transformer/test_translation_batched.sh
index 0a428f8..1da9fda 100644
--- a/tests/models/transformer/test_translation_batched.sh
+++ b/tests/models/transformer/test_translation_batched.sh
@@ -8,7 +8,7 @@ rm -f batched.out
 # Run Marian
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/transformer/decode.yml -b 6 --mini-batch 32 < text.in > batched.out
 
-diff batched.out text.b6.expected > batched.diff
+$MRT_TOOLS/diff.sh batched.out text.b6.expected > batched.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/wmt16-ende/test_translation_b6n.sh b/tests/models/wmt16-ende/test_translation_b6n.sh
index c013c34..e712a29 100644
--- a/tests/models/wmt16-ende/test_translation_b6n.sh
+++ b/tests/models/wmt16-ende/test_translation_b6n.sh
@@ -7,8 +7,8 @@ set -e
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -b 6 -n 1.0 < text.b6n.in > marian.b6n.out
 
 # Compare with Marian and Nematus
-diff marian.b6n.out marian.b6n.expected > marian.b6n.diff
-diff marian.b6n.out nematus.b6n.out > nematus.b6n.diff
+$MRT_TOOLS/diff.sh marian.b6n.out marian.b6n.expected > marian.b6n.diff
+$MRT_TOOLS/diff.sh marian.b6n.out nematus.b6n.out > nematus.b6n.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/wmt16-ende/test_translation_b6n_batch32.sh b/tests/models/wmt16-ende/test_translation_b6n_batch32.sh
index 98ad724..831b845 100644
--- a/tests/models/wmt16-ende/test_translation_b6n_batch32.sh
+++ b/tests/models/wmt16-ende/test_translation_b6n_batch32.sh
@@ -9,8 +9,8 @@ $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -
     < text.b6n.in > marian.batch32.out
 
 # Compare with Marian and Nematus
-diff marian.batch32.out marian.b6n.expected > marian.batch32.diff
-diff marian.batch32.out nematus.b6n.out > nematus.batch32.diff
+$MRT_TOOLS/diff.sh marian.batch32.out marian.b6n.expected > marian.batch32.diff
+$MRT_TOOLS/diff.sh marian.batch32.out nematus.b6n.out > nematus.batch32.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/wmt16-ende/test_translation_b6n_batch64.sh b/tests/models/wmt16-ende/test_translation_b6n_batch64.sh
index 9f33abf..76ebea0 100644
--- a/tests/models/wmt16-ende/test_translation_b6n_batch64.sh
+++ b/tests/models/wmt16-ende/test_translation_b6n_batch64.sh
@@ -9,8 +9,8 @@ $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -
     < text.b6n.in > marian.batch64.out
 
 # Compare with Marian and Nematus
-diff marian.batch64.out marian.b6n.expected > marian.batch64.diff
-diff marian.batch64.out nematus.b6n.out > nematus.batch64.diff
+$MRT_TOOLS/diff.sh marian.batch64.out marian.b6n.expected > marian.batch64.diff
+$MRT_TOOLS/diff.sh marian.batch64.out nematus.b6n.out > nematus.batch64.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/wmt17-ende/test_translation_b6n.sh b/tests/models/wmt17-ende/test_translation_b6n.sh
index 91620ca..07ec8e6 100644
--- a/tests/models/wmt17-ende/test_translation_b6n.sh
+++ b/tests/models/wmt17-ende/test_translation_b6n.sh
@@ -9,8 +9,8 @@ rm -f marian.b6n.out
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt17_systems/marian.en-de.yml -b 6 -n 1.0 < text.b6n.in > marian.b6n.out
 
 # Compare with Marian and Nematus
-diff marian.b6n.out marian.b6n.expected > marian.b6n.diff
-diff marian.b6n.out nematus.b6n.out > nematus.b6n.diff
+$MRT_TOOLS/diff.sh marian.b6n.out marian.b6n.expected > marian.b6n.diff
+$MRT_TOOLS/diff.sh marian.b6n.out nematus.b6n.out > nematus.b6n.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/wmt17-ende/test_translation_b6n_batch32.sh b/tests/models/wmt17-ende/test_translation_b6n_batch32.sh
index 934e42c..49ae6af 100644
--- a/tests/models/wmt17-ende/test_translation_b6n_batch32.sh
+++ b/tests/models/wmt17-ende/test_translation_b6n_batch32.sh
@@ -12,8 +12,8 @@ $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt17_systems/marian.en-de.yml -
     < text.b6n.in > marian.batch32.out
 
 # Compare with Marian and Nematus
-diff marian.batch32.out marian.b6n.expected > marian.batch32.diff
-diff marian.batch32.out nematus.b6n.out > nematus.batch32.diff
+$MRT_TOOLS/diff.sh marian.batch32.out marian.b6n.expected > marian.batch32.diff
+$MRT_TOOLS/diff.sh marian.batch32.out nematus.b6n.out > nematus.batch32.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/wnmt18/test_student_small.sh b/tests/models/wnmt18/test_student_small.sh
index a09986a..74724f9 100644
--- a/tests/models/wnmt18/test_student_small.sh
+++ b/tests/models/wnmt18/test_student_small.sh
@@ -18,7 +18,7 @@ cat newstest2014.in | $MRT_MARIAN/build/marian-decoder \
     --shortlist $MRT_MODELS/wnmt18/lex.s2t 100 75 --cpu-threads=1 --skip-cost --max-length-factor 1.2 \
     > student_small.out
 
-diff student_small.out student_small.expected > student_small.diff
+$MRT_TOOLS/diff.sh student_small.out student_small.expected > student_small.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/wnmt18/test_student_small_aan.sh b/tests/models/wnmt18/test_student_small_aan.sh
index cfa53f1..73f3fd7 100644
--- a/tests/models/wnmt18/test_student_small_aan.sh
+++ b/tests/models/wnmt18/test_student_small_aan.sh
@@ -18,7 +18,7 @@ cat newstest2014.in | $MRT_MARIAN/build/marian-decoder \
     --shortlist $MRT_MODELS/wnmt18/lex.s2t 100 75 --cpu-threads=1 --skip-cost --max-length-factor 1.2 \
     > student_small_aan.out
 
-diff student_small_aan.out student_small_aan.expected > student_small_aan.diff
+$MRT_TOOLS/diff.sh student_small_aan.out student_small_aan.expected > student_small_aan.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/wnmt18/test_student_small_aan_optimize.sh b/tests/models/wnmt18/test_student_small_aan_optimize.sh
index 79a44d2..fcfbe6d 100644
--- a/tests/models/wnmt18/test_student_small_aan_optimize.sh
+++ b/tests/models/wnmt18/test_student_small_aan_optimize.sh
@@ -23,7 +23,7 @@ cat optimize_aan.out | perl -pe 's/@@ //g' \
     | $MRT_TOOLS/moses-scripts/scripts/generic/multi-bleu.perl newstest2014.ref \
     | $MRT_TOOLS/extract-bleu.sh > optimize_aan.bleu
 
-$MRT_TOOLS/diff-floats.py optimize_aan.bleu optimize_aan.bleu.expected -p 0.4 > optimize_aan.bleu.diff
+$MRT_TOOLS/diff-nums.py optimize_aan.bleu optimize_aan.bleu.expected -p 0.4 -o optimize_aan.bleu.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/align/test_scorer_align.sh b/tests/scorer/align/test_scorer_align.sh
index 6a3a788..68fe68e 100644
--- a/tests/scorer/align/test_scorer_align.sh
+++ b/tests/scorer/align/test_scorer_align.sh
@@ -9,7 +9,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
   | sed 's/^.* ||| //' > align.out
 
 # Compare scores
-diff align.out align.expected > align.diff
+$MRT_TOOLS/diff.sh align.out align.expected > align.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/align/test_scorer_align_batch_1.sh b/tests/scorer/align/test_scorer_align_batch_1.sh
index 4d0069f..d81374b 100644
--- a/tests/scorer/align/test_scorer_align_batch_1.sh
+++ b/tests/scorer/align/test_scorer_align_batch_1.sh
@@ -9,7 +9,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
   | sed 's/^.* ||| //' > align.b1.out
 
 # Compare scores
-diff align.b1.out align.expected > align.b1.diff
+$MRT_TOOLS/diff.sh align.b1.out align.expected > align.b1.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/align/test_scorer_align_nbest.sh b/tests/scorer/align/test_scorer_align_nbest.sh
index 91a293d..3f58630 100644
--- a/tests/scorer/align/test_scorer_align_nbest.sh
+++ b/tests/scorer/align/test_scorer_align_nbest.sh
@@ -8,7 +8,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
   -t $(pwd)/text.src.in $(pwd)/nbest.trg.in --alignment --mini-batch 16 --n-best > nbest.out
 
 # Compare n-best lists
-$MRT_TOOLS/diff-floats.py -p 0.0001 nbest.out nbest.expected > nbest.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 nbest.out nbest.expected -o nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/align/test_scorer_soft_align.sh b/tests/scorer/align/test_scorer_soft_align.sh
index f0671ec..54601c9 100644
--- a/tests/scorer/align/test_scorer_soft_align.sh
+++ b/tests/scorer/align/test_scorer_soft_align.sh
@@ -9,7 +9,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
   | sed 's/^.* ||| //' > soft.out
 
 # Compare scores
-$MRT_TOOLS/diff-floats.py -s , soft.out soft.expected > soft.diff
+$MRT_TOOLS/diff-nums.py -s , soft.out soft.expected -o soft.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/nbest/test_compare_parallel_and_nbest.sh b/tests/scorer/nbest/test_compare_parallel_and_nbest.sh
index 63deee1..8304c04 100644
--- a/tests/scorer/nbest/test_compare_parallel_and_nbest.sh
+++ b/tests/scorer/nbest/test_compare_parallel_and_nbest.sh
@@ -17,7 +17,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
 
 cat parallel.nbest.out | sed 's/ ||| /\t/g' | cut -f3 | tr ' ' '\t' | cut -f4 > parallel.nbest.scores.out
 
-$MRT_TOOLS/diff-floats.py parallel.scores.out parallel.nbest.scores.out -p 0.0003 > parallel.scores.diff
+$MRT_TOOLS/diff-nums.py parallel.scores.out parallel.nbest.scores.out -p 0.0003 -o parallel.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/nbest/test_custom_feature_name.sh b/tests/scorer/nbest/test_custom_feature_name.sh
index 51e890e..957d701 100644
--- a/tests/scorer/nbest/test_custom_feature_name.sh
+++ b/tests/scorer/nbest/test_custom_feature_name.sh
@@ -13,7 +13,7 @@ grep -c 'FeatureName= ' custom.out
 cat custom.out | sed 's/ ||| /\t/g' | cut -f3 | tr ' ' '\t' | cut -f4 > custom.scores.out
 cat nbest.expected | sed 's/ ||| /\t/g' | cut -f3 | tr ' ' '\t' | cut -f4 > nbest.scores.out
 
-$MRT_TOOLS/diff-floats.py custom.scores.out nbest.scores.out -p 0.0003 > custom.scores.diff
+$MRT_TOOLS/diff-nums.py custom.scores.out nbest.scores.out -p 0.0003 -o custom.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/nbest/test_score_nbest_list.sh b/tests/scorer/nbest/test_score_nbest_list.sh
index 3bf7a41..4cf3cb3 100644
--- a/tests/scorer/nbest/test_score_nbest_list.sh
+++ b/tests/scorer/nbest/test_score_nbest_list.sh
@@ -8,7 +8,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
     --n-best -t text.src.in text.nbest.in \
     > nbest.out
 
-$MRT_TOOLS/diff-floats.py nbest.out nbest.expected -p 0.0003 > nbest.diff
+$MRT_TOOLS/diff-nums.py nbest.out nbest.expected -p 0.0003 -o nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/scores/test_compare_with_decoder_scores.sh b/tests/scorer/scores/test_compare_with_decoder_scores.sh
index 799ae81..e4a7440 100644
--- a/tests/scorer/scores/test_compare_with_decoder_scores.sh
+++ b/tests/scorer/scores/test_compare_with_decoder_scores.sh
@@ -9,7 +9,7 @@ $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml \
 
 # Compare translations
 cat nbest.out | sed 's/ ||| /\t/g' | cut -f2 > text.out
-diff text.out text.expected > text.diff
+$MRT_TOOLS/diff.sh text.out text.expected > text.diff
 
 # Prepare source and target files for rescoring
 cat text.in | perl -ne 'for$i(1..12){print}' > compare.src
@@ -21,7 +21,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
 
 # Compare scores
 cat nbest.out | sed 's/ ||| /\t/g' | cut -f4 > compare.decoder.out
-$MRT_TOOLS/diff-floats.py compare.scorer.out compare.decoder.out -p 0.0003
+$MRT_TOOLS/diff-nums.py compare.scorer.out compare.decoder.out -p 0.0003 -o compare.scorer.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/scores/test_scores.sh b/tests/scorer/scores/test_scores.sh
index b1a87bc..540e102 100644
--- a/tests/scorer/scores/test_scores.sh
+++ b/tests/scorer/scores/test_scores.sh
@@ -8,7 +8,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
   -t $(pwd)/scores.src.in $(pwd)/scores.trg.in > scores.out
 
 # Compare scores
-$MRT_TOOLS/diff-floats.py scores.out scores.expected -p 0.0003
+$MRT_TOOLS/diff-nums.py scores.out scores.expected -p 0.0003 -o scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/scores/test_summary.sh b/tests/scorer/scores/test_summary.sh
index e158fdd..d2fc27f 100644
--- a/tests/scorer/scores/test_summary.sh
+++ b/tests/scorer/scores/test_summary.sh
@@ -8,7 +8,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
   -t $(pwd)/scores.src.in $(pwd)/scores.trg.in --summary > summary.out
 
 # Compare scores
-$MRT_TOOLS/diff-floats.py summary.out summary.expected -p 0.0003
+$MRT_TOOLS/diff-nums.py summary.out summary.expected -p 0.0003 -o summary.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/scores/test_summary_perplexity.sh b/tests/scorer/scores/test_summary_perplexity.sh
index 2493379..73e93d8 100644
--- a/tests/scorer/scores/test_summary_perplexity.sh
+++ b/tests/scorer/scores/test_summary_perplexity.sh
@@ -8,7 +8,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
   -t $(pwd)/scores.src.in $(pwd)/scores.trg.in --summary perplexity > summary_perplexity.out
 
 # Compare scores
-$MRT_TOOLS/diff-floats.py summary_perplexity.out summary_perplexity.expected -p 0.0003
+$MRT_TOOLS/diff-nums.py summary_perplexity.out summary_perplexity.expected -p 0.0003 -o summary_perplexity.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/server/test_ende.sh b/tests/server/test_ende.sh
index df703cf..a309d31 100644
--- a/tests/server/test_ende.sh
+++ b/tests/server/test_ende.sh
@@ -17,7 +17,7 @@ sleep 20
 python3 $MRT_MARIAN/scripts/server/client_example.py -p 8765 < text.in > text.out
 kill $SERVER_PID
 
-diff text.out text.expected > text.diff
+$MRT_TOOLS/diff.sh text.out text.expected > text.diff
 test -e server.log
 grep -q "listening on port 8765" server.log
 
diff --git a/tests/server/test_ende_align.sh b/tests/server/test_ende_align.sh
index ac696cc..5242d7b 100644
--- a/tests/server/test_ende_align.sh
+++ b/tests/server/test_ende_align.sh
@@ -18,7 +18,7 @@ sleep 20
 python3 $MRT_MARIAN/scripts/server/client_example.py -p 8765 < text.in > text.align.out
 kill $SERVER_PID
 
-diff text.align.out text.align.expected > text.align.diff
+$MRT_TOOLS/diff.sh text.align.out text.align.expected > text.align.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/server/test_ende_batch32.sh b/tests/server/test_ende_batch32.sh
index a36a55e..c9955e6 100644
--- a/tests/server/test_ende_batch32.sh
+++ b/tests/server/test_ende_batch32.sh
@@ -17,7 +17,7 @@ sleep 20
 
 python3 $MRT_MARIAN/scripts/server/client_example.py -p 8766 -b 32 < text.in > text.b32.out
 kill $SERVER_PID
-diff text.b32.out text.expected > text.diff
+$MRT_TOOLS/diff.sh text.b32.out text.expected > text.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/server/test_ende_cpu.sh b/tests/server/test_ende_cpu.sh
index c5305a4..8925561 100644
--- a/tests/server/test_ende_cpu.sh
+++ b/tests/server/test_ende_cpu.sh
@@ -25,7 +25,7 @@ sleep 20
 python3 $MRT_MARIAN/scripts/server/client_example.py -p 8768 < text4.in > text4.cpu.out
 kill $SERVER_PID
 
-diff text4.cpu.out text4.expected > text4.cpu.diff
+$MRT_TOOLS/diff.sh text4.cpu.out text4.expected > text4.cpu.diff
 test -e server_cpu.log
 grep -q "listening on port 8768" server_cpu.log
 
diff --git a/tests/training/basics/.gitignore b/tests/training/basics/.gitignore
index 61de908..af307e4 100644
--- a/tests/training/basics/.gitignore
+++ b/tests/training/basics/.gitignore
@@ -6,3 +6,4 @@ sqlite_seed
 batch_fit
 *.temp
 vocab.*.yml
+gzip
diff --git a/tests/training/basics/gzip.expected b/tests/training/basics/gzip.expected
new file mode 100644
index 0000000..cc069b6
--- /dev/null
+++ b/tests/training/basics/gzip.expected
@@ -0,0 +1,5 @@
+447.89
+374.88
+324.98
+284.49
+248.72
diff --git a/tests/training/basics/setup.sh b/tests/training/basics/setup.sh
index 6088de5..1bcb2f9 100644
--- a/tests/training/basics/setup.sh
+++ b/tests/training/basics/setup.sh
@@ -2,3 +2,8 @@ test -f $MRT_DATA/europarl.de-en/corpus.bpe.en || exit 1
 test -f $MRT_DATA/europarl.de-en/corpus.bpe.de || exit 1
 test -f $MRT_DATA/europarl.de-en/toy.bpe.en || exit 1
 test -f $MRT_DATA/europarl.de-en/toy.bpe.de || exit 1
+
+test -s vocab.de.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.de > vocab.de.yml
+test -s vocab.en.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.en > vocab.en.yml
+test -s vocab.de.yml
+test -s vocab.en.yml
diff --git a/tests/training/basics/test_gzipped_train_sets.sh b/tests/training/basics/test_gzipped_train_sets.sh
new file mode 100644
index 0000000..b28c41c
--- /dev/null
+++ b/tests/training/basics/test_gzipped_train_sets.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# Exit on error
+set -e
+
+# Test code goes here
+rm -rf gzip gzip.log
+mkdir -p gzip
+
+test -e $MRT_DATA/europarl.de-en/corpus.bpe.de.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.de | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.de.gz
+test -e $MRT_DATA/europarl.de-en/corpus.bpe.en.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.en | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.en.gz
+
+$MRT_MARIAN/build/marian \
+    --no-shuffle --seed 1111 --dim-emb 64 --dim-rnn 64 \
+    -m gzip/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de}.gz -v vocab.en.yml vocab.de.yml \
+    --log gzip.log --disp-freq 10 --after-batches 50
+
+test -e gzip/model.npz
+test -e gzip.log
+
+cat gzip.log | $MRT_TOOLS/extract-costs.sh > gzip.out
+$MRT_TOOLS/diff-nums.py gzip.out gzip.expected -p 0.1 -o gzip.diff
+
+# Exit with success code
+exit 0
diff --git a/tests/training/basics/test_mini_batch_fit.sh b/tests/training/basics/test_mini_batch_fit.sh
index 7d54d94..2f1ba77 100644
--- a/tests/training/basics/test_mini_batch_fit.sh
+++ b/tests/training/basics/test_mini_batch_fit.sh
@@ -24,7 +24,7 @@ test -e batch_fit/model.npz.amun.yml
 test -e batch_fit.log
 
 cat batch_fit.log | grep 'Ep\. 1 :' | sed -r 's/.*Up\. ([0-9]+) .*Sen. ([0-9]+).*/\2\/\1/' | bc > batch_fit.out
-diff batch_fit.out batch_fit.expected > batch_fit.diff
+$MRT_TOOLS/diff.sh batch_fit.out batch_fit.expected > batch_fit.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_sqlite.sh b/tests/training/basics/test_sqlite.sh
index c2e0302..0dec245 100644
--- a/tests/training/basics/test_sqlite.sh
+++ b/tests/training/basics/test_sqlite.sh
@@ -10,8 +10,7 @@ mkdir -p sqlite
 $MRT_MARIAN/build/marian \
     --seed 1111 --no-shuffle --dim-emb 64 --dim-rnn 128 --optimizer sgd \
     -m sqlite/model.nosqlite.npz \
-    -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} \
-    -v sqlite/vocab.en.yml sqlite/vocab.de.yml \
+    -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
     --disp-freq 10 --after-batches 100 \
     --log nosqlite.log
 
@@ -33,7 +32,7 @@ test -e sqlite.log
 
 $MRT_TOOLS/extract-costs.sh < sqlite.log > sqlite.out
 
-$MRT_TOOLS/diff-floats.py nosqlite.out sqlite.out -p 0.2 > sqlite.diff
+$MRT_TOOLS/diff-nums.py nosqlite.out sqlite.out -p 0.2 -o sqlite.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_sqlite_random_seed.sh b/tests/training/basics/test_sqlite_random_seed.sh
index 6c80ec9..a443f59 100644
--- a/tests/training/basics/test_sqlite_random_seed.sh
+++ b/tests/training/basics/test_sqlite_random_seed.sh
@@ -10,8 +10,7 @@ mkdir -p sqlite_seed
 $MRT_MARIAN/build/marian \
     --seed 3333 --dim-emb 64 --dim-rnn 128 --optimizer sgd \
     -m sqlite_seed/model1.npz \
-    -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} --sqlite \
-    -v sqlite_seed/vocab.en.yml sqlite_seed/vocab.de.yml \
+    -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} --sqlite -v vocab.en.yml vocab.de.yml \
     --disp-freq 2 --after-batches 50 \
     --log sqlite_seed_1.log
 
@@ -32,7 +31,7 @@ test -e sqlite_seed_2.log
 $MRT_TOOLS/extract-costs.sh < sqlite_seed_1.log > sqlite_seed_1.out
 $MRT_TOOLS/extract-costs.sh < sqlite_seed_2.log > sqlite_seed_2.out
 
-$MRT_TOOLS/diff-floats.py sqlite_seed_1.out sqlite_seed_2.out -p 0.1 > sqlite_seed.diff
+$MRT_TOOLS/diff-nums.py sqlite_seed_1.out sqlite_seed_2.out -p 0.1 -o sqlite_seed.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_toy_vocab.sh b/tests/training/basics/test_toy_vocab.sh
index c8401a6..2c7c253 100644
--- a/tests/training/basics/test_toy_vocab.sh
+++ b/tests/training/basics/test_toy_vocab.sh
@@ -9,7 +9,7 @@ rm -f toy/* toy.log
 
 $MRT_MARIAN/build/marian \
     --seed 1111 --dim-emb 256 --dim-rnn 512 \
-    -m toy/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v toy/vocab.en.yml toy/vocab.de.yml \
+    -m toy/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v toy/vocab.de.yml toy/vocab.en.yml \
     --log toy.log --disp-freq 5 -e 5
 
 test -e toy/vocab.en.yml
@@ -19,7 +19,7 @@ test -e toy/model.npz.yml
 test -e toy/model.npz.amun.yml
 
 cat toy.log | $MRT_TOOLS/extract-costs.sh > toy.out
-$MRT_TOOLS/diff-floats.py toy.out toy.expected -p 0.99 -n 5 > toy.diff
+$MRT_TOOLS/diff-nums.py toy.out toy.expected -p 0.9 -o toy.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_translation_script.sh b/tests/training/basics/test_translation_script.sh
index d415f53..83b102c 100644
--- a/tests/training/basics/test_translation_script.sh
+++ b/tests/training/basics/test_translation_script.sh
@@ -10,8 +10,7 @@ mkdir -p trans
 $MRT_MARIAN/build/marian \
     --seed 2222 --no-shuffle --dim-emb 128 --dim-rnn 256 --maxi-batch 1 --mini-batch 16 \
     -m trans/model.npz \
-    -t $MRT_DATA/europarl.de-en/corpus.bpe.en $MRT_DATA/europarl.de-en/corpus.bpe.de \
-    -v vocab.en.yml vocab.de.yml \
+    -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
     --dim-vocabs 50000 50000 \
     --disp-freq 30 --valid-freq 60 --after-batches 150 \
     --valid-metrics cross-entropy translation --valid-script-path ./trans_script.sh \
@@ -29,7 +28,7 @@ test -e trans.log
 grep -q "/tmp/marian.*" trans_script.temp
 
 $MRT_TOOLS/strip-timestamps.sh < trans.log | grep -v "Total translation time" | head -n 4 > trans.out
-$MRT_TOOLS/diff-floats.py trans.out trans.expected -p 0.2 > trans.diff
+$MRT_TOOLS/diff-nums.py trans.out trans.expected -p 0.2 -o trans.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_valid_script.sh b/tests/training/basics/test_valid_script.sh
index 7bf3cc8..e638854 100644
--- a/tests/training/basics/test_valid_script.sh
+++ b/tests/training/basics/test_valid_script.sh
@@ -28,7 +28,7 @@ test -e valid/model.npz.dev.npz.amun.yml
 test -e valid.log
 
 $MRT_TOOLS/strip-timestamps.sh < valid.log > valid.out
-$MRT_TOOLS/diff-floats.py valid.out valid.expected -p 0.2 > valid.diff
+$MRT_TOOLS/diff-nums.py valid.out valid.expected -p 0.2 -o valid.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/toy.expected b/tests/training/basics/toy.expected
index 74b9335..1c2ef85 100644
--- a/tests/training/basics/toy.expected
+++ b/tests/training/basics/toy.expected
@@ -1,44 +1,44 @@
 208.03
 179.47
-212.83
-237.49
-186.96
-234.88
-209.33
-215.29
-180.76
-198.65
-146.06
-263.98
-213.56
+212.82
+237.45
+186.91
+234.73
+209.09
+214.79
+180.34
+198.19
+145.59
+263.72
+213.26
+127.88
+118.83
+131.60
+174.33
+141.15
+178.26
+99.79
+124.64
+139.91
+211.91
+243.59
+168.14
+101.86
+115.36
+175.50
 128.12
-119.03
-131.72
-174.51
-141.27
-178.35
-99.92
-124.80
-139.98
-211.95
-243.47
-168.08
-101.91
-115.40
-175.46
-128.13
-138.20
+138.18
 123.40
 213.24
 148.44
-198.29
-135.94
+198.26
+135.91
 173.55
 181.04
-187.22
+187.23
 129.47
-126.52
+126.50
 162.25
-95.56
-133.94
-206.48
+95.58
+133.95
+206.49
diff --git a/tests/training/cost-functions/test_ce-mean-words.sh b/tests/training/cost-functions/test_ce-mean-words.sh
index 760a2bf..faaf113 100644
--- a/tests/training/cost-functions/test_ce-mean-words.sh
+++ b/tests/training/cost-functions/test_ce-mean-words.sh
@@ -18,7 +18,7 @@ test -e ce-mean-words/model.npz
 test -e ce-mean-words.log
 
 cat ce-mean-words.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > ce-mean-words.out
-$MRT_TOOLS/diff-floats.py ce-mean-words.out ce-mean-words.expected -p 0.02 > ce-mean-words.diff
+$MRT_TOOLS/diff-nums.py ce-mean-words.out ce-mean-words.expected -p 0.02 -o ce-mean-words.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/cost-functions/test_ce-mean.sh b/tests/training/cost-functions/test_ce-mean.sh
index d2fe96c..c2ef4d1 100644
--- a/tests/training/cost-functions/test_ce-mean.sh
+++ b/tests/training/cost-functions/test_ce-mean.sh
@@ -17,7 +17,7 @@ test -e ce-mean/model.npz
 test -e ce-mean.log
 
 cat ce-mean.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > ce-mean.out
-$MRT_TOOLS/diff-floats.py ce-mean.out ce-mean.expected -p 0.02 > ce-mean.diff
+$MRT_TOOLS/diff-nums.py ce-mean.out ce-mean.expected -p 0.02 -o ce-mean.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/cost-functions/test_ce-sum.sh b/tests/training/cost-functions/test_ce-sum.sh
index f6cc992..540e82d 100644
--- a/tests/training/cost-functions/test_ce-sum.sh
+++ b/tests/training/cost-functions/test_ce-sum.sh
@@ -18,7 +18,7 @@ test -e ce-sum/model.npz
 test -e ce-sum.log
 
 cat ce-sum.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > ce-sum.out
-$MRT_TOOLS/diff-floats.py ce-sum.out ce-sum.expected -p 0.2 > ce-sum.diff
+$MRT_TOOLS/diff-nums.py ce-sum.out ce-sum.expected -p 0.2 -o ce-sum.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/cost-functions/test_perplexity.sh b/tests/training/cost-functions/test_perplexity.sh
index 21d69b4..ae828a4 100644
--- a/tests/training/cost-functions/test_perplexity.sh
+++ b/tests/training/cost-functions/test_perplexity.sh
@@ -18,7 +18,7 @@ test -e perplexity/model.npz
 test -e perplexity.log
 
 cat perplexity.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > perplexity.out
-$MRT_TOOLS/diff-floats.py perplexity.out perplexity.expected -p 0.5 > perplexity.diff
+$MRT_TOOLS/diff-nums.py perplexity.out perplexity.expected -p 0.5 -o perplexity.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh b/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh
index 0dacf39..6529857 100644
--- a/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh
+++ b/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh
@@ -33,7 +33,7 @@ test -e compare/model.words.npz
 test -e compare.words.log
 
 cat compare.words.log | $MRT_TOOLS/extract-disp.sh > compare.words.out
-$MRT_TOOLS/diff-floats.py compare.words.out compare.sents.out -p 0.1 > compare.words.diff
+$MRT_TOOLS/diff-nums.py compare.words.out compare.sents.out -p 0.1 -o compare.words.diff
 
 
 # Exit with success code
diff --git a/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh b/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh
index 0c39b5e..071fd94 100644
--- a/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh
+++ b/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh
@@ -20,7 +20,7 @@ test -e maxibatch/model.npz
 test -e maxibatch.log
 
 $MRT_TOOLS/extract-costs.sh < maxibatch.log > maxibatch.out
-$MRT_TOOLS/diff-floats.py maxibatch.out maxibatch.expected -p 0.1 > maxibatch.diff
+$MRT_TOOLS/diff-nums.py maxibatch.out maxibatch.expected -p 0.1 -o maxibatch.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh b/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh
index 2228796..7b87e3d 100644
--- a/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh
+++ b/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh
@@ -20,7 +20,7 @@ test -e word_maxibatch/model.npz
 test -e word_maxibatch.log
 
 $MRT_TOOLS/extract-costs.sh < word_maxibatch.log > word_maxibatch.out
-$MRT_TOOLS/diff-floats.py word_maxibatch.out word_maxibatch.expected -p 0.1 > word_maxibatch.diff
+$MRT_TOOLS/diff-nums.py word_maxibatch.out word_maxibatch.expected -p 0.1 -o word_maxibatch.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_sentence_weighting_sqlite.sh b/tests/training/data-weighting/test_sentence_weighting_sqlite.sh
index 7323c2d..3f393fc 100644
--- a/tests/training/data-weighting/test_sentence_weighting_sqlite.sh
+++ b/tests/training/data-weighting/test_sentence_weighting_sqlite.sh
@@ -19,7 +19,7 @@ test -e sqlite.log
 
 cat sqlite.log | $MRT_TOOLS/extract-costs.sh > sqlite.out
 
-$MRT_TOOLS/diff-floats.py sqlite.out sqlite.expected -p 0.1 > sqlite.diff
+$MRT_TOOLS/diff-nums.py sqlite.out sqlite.expected -p 0.1 -o sqlite.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_sentence_weighting_with_ones.sh b/tests/training/data-weighting/test_sentence_weighting_with_ones.sh
index caeba16..6ff43ef 100644
--- a/tests/training/data-weighting/test_sentence_weighting_with_ones.sh
+++ b/tests/training/data-weighting/test_sentence_weighting_with_ones.sh
@@ -31,7 +31,7 @@ test -e ones/model.npz
 test -e ones.log
 
 cat ones.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > ones.out
-$MRT_TOOLS/diff-floats.py noweights.out ones.out -p 0.1 > ones.diff
+$MRT_TOOLS/diff-nums.py noweights.out ones.out -p 0.1 -o ones.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_sentence_weights_x3.sh b/tests/training/data-weighting/test_sentence_weights_x3.sh
index 11cec28..45b92d3 100644
--- a/tests/training/data-weighting/test_sentence_weights_x3.sh
+++ b/tests/training/data-weighting/test_sentence_weights_x3.sh
@@ -30,7 +30,7 @@ test -e x3weights.log
 
 cat x3weights.log | grep 'Cost ' | sed -r 's/.*Cost (.*) : Time.*/\1/' > x3weights.out
 
-$MRT_TOOLS/diff-floats.py x3copied.out x3weights.out -p 0.1 > x3weights.diff
+$MRT_TOOLS/diff-nums.py x3copied.out x3weights.out -p 0.1 -o x3weights.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_validation.sh b/tests/training/data-weighting/test_validation.sh
index 0c9dcf8..3bb3c63 100644
--- a/tests/training/data-weighting/test_validation.sh
+++ b/tests/training/data-weighting/test_validation.sh
@@ -23,8 +23,8 @@ test -e valid/train.log
 $MRT_TOOLS/strip-timestamps.sh < valid/valid.log > valid.out
 $MRT_TOOLS/extract-costs.sh < valid/train.log > train.out
 
-$MRT_TOOLS/diff-floats.py valid.out valid.expected -p 1.99 > valid.diff
-$MRT_TOOLS/diff-floats.py train.out train.expected -p 1.99 > train.diff
+$MRT_TOOLS/diff-nums.py valid.out valid.expected -p 1.99 -o valid.diff
+$MRT_TOOLS/diff-nums.py train.out train.expected -p 1.99 -o train.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_word_weighting_sqlite.sh b/tests/training/data-weighting/test_word_weighting_sqlite.sh
index 33c276e..bb2452b 100644
--- a/tests/training/data-weighting/test_word_weighting_sqlite.sh
+++ b/tests/training/data-weighting/test_word_weighting_sqlite.sh
@@ -20,7 +20,7 @@ test -e sqlite_word/corpus.sqlite3
 test -e sqlite_word.log
 
 cat sqlite_word.log | $MRT_TOOLS/extract-costs.sh > sqlite_word.out
-$MRT_TOOLS/diff-floats.py sqlite_word.out sqlite_word.expected -p 0.1 > sqlite_word.diff
+$MRT_TOOLS/diff-nums.py sqlite_word.out sqlite_word.expected -p 0.1 -o sqlite_word.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_word_weighting_with_eos.sh b/tests/training/data-weighting/test_word_weighting_with_eos.sh
index 2ebcfd8..4044b77 100644
--- a/tests/training/data-weighting/test_word_weighting_with_eos.sh
+++ b/tests/training/data-weighting/test_word_weighting_with_eos.sh
@@ -21,7 +21,7 @@ test -e word_eos/model.npz
 test -e word_eos.log
 
 cat word_eos.log | $MRT_TOOLS/extract-disp.sh > word_eos.out
-$MRT_TOOLS/diff-floats.py word_eos.out word_eos.expected -p 0.1 > word_eos.diff
+$MRT_TOOLS/diff-nums.py word_eos.out word_eos.expected -p 0.1 -o word_eos.diff
 
 
 # Exit with success code
diff --git a/tests/training/data-weighting/test_word_weighting_with_ones.sh b/tests/training/data-weighting/test_word_weighting_with_ones.sh
index 8ba31a8..9e47cdc 100644
--- a/tests/training/data-weighting/test_word_weighting_with_ones.sh
+++ b/tests/training/data-weighting/test_word_weighting_with_ones.sh
@@ -28,7 +28,7 @@ test -e word_ones/model.npz
 test -e word_ones.log
 
 cat word_ones.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > word_ones.out
-$MRT_TOOLS/diff-floats.py word_noweights.out word_ones.out -p 0.1 > word_ones.diff
+$MRT_TOOLS/diff-nums.py word_noweights.out word_ones.out -p 0.1 -o word_ones.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_word_weighting_with_twos.sh b/tests/training/data-weighting/test_word_weighting_with_twos.sh
index 4706cae..a39ec07 100644
--- a/tests/training/data-weighting/test_word_weighting_with_twos.sh
+++ b/tests/training/data-weighting/test_word_weighting_with_twos.sh
@@ -19,7 +19,7 @@ test -e word_twos/model.npz
 test -e word_twos.log
 
 cat word_twos.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > word_twos.out
-$MRT_TOOLS/diff-floats.py word_twos.out word_twos.expected -p 0.1 > word_twos.diff
+$MRT_TOOLS/diff-nums.py word_twos.out word_twos.expected -p 0.1 -o word_twos.diff
 
 rm -rf word_twos_cfg word_twos_cfg.{log,out,diff}
 mkdir -p word_twos_cfg
@@ -34,7 +34,7 @@ $MRT_MARIAN/build/marian \
     -c word_twos.config.yml
 
 cat word_twos_cfg.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > word_twos_cfg.out
-$MRT_TOOLS/diff-floats.py word_twos_cfg.out word_twos.expected -p 0.1 > word_twos_cfg.diff
+$MRT_TOOLS/diff-nums.py word_twos_cfg.out word_twos.expected -p 0.1 -o word_twos_cfg.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/embeddings/test_custom_embeddings.sh b/tests/training/embeddings/test_custom_embeddings.sh
index 4fcc1f2..3edd00a 100644
--- a/tests/training/embeddings/test_custom_embeddings.sh
+++ b/tests/training/embeddings/test_custom_embeddings.sh
@@ -26,8 +26,8 @@ $MRT_MARIAN/scripts/embeddings/export_embeddings.py -m custom_emb/model.npz -o c
 cat custom_emb.all.src | head -n 101 > custom_emb.src
 cat custom_emb.all.trg | head -n 101 > custom_emb.trg
 
-$MRT_TOOLS/diff-floats.py -n 1 -p 0.0005 word2vec.en custom_emb.src > custom_emb.src.diff
-$MRT_TOOLS/diff-floats.py -n 1 -p 0.0005 word2vec.de custom_emb.trg > custom_emb.trg.diff
+$MRT_TOOLS/diff-nums.py -n 1 -p 0.0005 word2vec.en custom_emb.src -o custom_emb.src.diff
+$MRT_TOOLS/diff-nums.py -n 1 -p 0.0005 word2vec.de custom_emb.trg -o custom_emb.trg.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/exp-smoothing/test_expsmooth.sh b/tests/training/exp-smoothing/test_expsmooth.sh
index d36d05d..2428dfc 100644
--- a/tests/training/exp-smoothing/test_expsmooth.sh
+++ b/tests/training/exp-smoothing/test_expsmooth.sh
@@ -36,11 +36,11 @@ cat expsmooth.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep -v 'val
 cat expsmooth.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep 'valid' | sed 's/ : Time.*//' > expsmooth.valid.out
 
 
-$MRT_TOOLS/diff-floats.py -p 0.01 expsmooth.out expsmooth.expected > expsmooth.diff
-$MRT_TOOLS/diff-floats.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected > expsmooth.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.out expsmooth.expected -o expsmooth.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected -o expsmooth.valid.diff
 
 # There should be no difference in costs between training w/ and w/o exponential smoothing
-$MRT_TOOLS/diff-floats.py -p 0.001 expsmooth.out noexpsmooth.out > noexpsmooth.diff
+$MRT_TOOLS/diff-nums.py -p 0.001 expsmooth.out noexpsmooth.out -o noexpsmooth.diff
 
 
 # Exit with success code
diff --git a/tests/training/exp-smoothing/test_expsmooth_sync.sh b/tests/training/exp-smoothing/test_expsmooth_sync.sh
index 0ed3bfb..14d5442 100644
--- a/tests/training/exp-smoothing/test_expsmooth_sync.sh
+++ b/tests/training/exp-smoothing/test_expsmooth_sync.sh
@@ -41,11 +41,11 @@ cat expsmooth_sync.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep -v
 cat expsmooth_sync.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep 'valid' | sed 's/ : Time.*//' > expsmooth_sync.valid.out
 
 
-$MRT_TOOLS/diff-floats.py -p 0.1 expsmooth_sync.out expsmooth_sync.expected > expsmooth_sync.diff
-$MRT_TOOLS/diff-floats.py -p 0.1 expsmooth_sync.valid.out expsmooth_sync.valid.expected > expsmooth_sync.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.out expsmooth_sync.expected -o expsmooth_sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.valid.out expsmooth_sync.valid.expected -o expsmooth_sync.valid.diff
 
 # There should be no difference in costs between training w/ and w/o exponential smoothing
-$MRT_TOOLS/diff-floats.py -p 0.1 expsmooth_sync.out noexpsmooth_sync.out > noexpsmooth_sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.out noexpsmooth_sync.out -o noexpsmooth_sync.diff
 
 
 # Exit with success code
diff --git a/tests/training/lm/test_lm-transformer.sh b/tests/training/lm/test_lm-transformer.sh
index 14767e6..47737e5 100644
--- a/tests/training/lm/test_lm-transformer.sh
+++ b/tests/training/lm/test_lm-transformer.sh
@@ -19,13 +19,13 @@ test -e lm-transformer/model.npz.yml
 test -e lm-transformer.log
 
 cat lm-transformer.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > lm-transformer.out
-$MRT_TOOLS/diff-floats.py lm-transformer.out lm-transformer.expected -p 0.02 > lm-transformer.diff
+$MRT_TOOLS/diff-nums.py lm-transformer.out lm-transformer.expected -p 0.02 -o lm-transformer.diff
 
 # Scoring with LM
 test -s temp.bpe.en || tail $MRT_DATA/europarl.de-en/corpus.bpe.en > test.bpe.en
 
 $MRT_MARIAN/build/marian-scorer -m lm-transformer/model.npz -t test.bpe.en -v vocab.en.yml > lm-transformer.scores.out
-$MRT_TOOLS/diff-floats.py lm-transformer.scores.out lm-transformer.scores.expected -p 0.002 > lm-transformer.scores.diff
+$MRT_TOOLS/diff-nums.py lm-transformer.scores.out lm-transformer.scores.expected -p 0.002 -o lm-transformer.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/lm/test_lm.sh b/tests/training/lm/test_lm.sh
index 628fdc4..e3c32a0 100644
--- a/tests/training/lm/test_lm.sh
+++ b/tests/training/lm/test_lm.sh
@@ -19,13 +19,13 @@ test -e lm/model.npz.yml
 test -e lm.log
 
 cat lm.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > lm.out
-$MRT_TOOLS/diff-floats.py lm.out lm.expected -p 0.02 > lm.diff
+$MRT_TOOLS/diff-nums.py lm.out lm.expected -p 0.02 -o lm.diff
 
 # Scoring with LM
 test -s temp.bpe.en || tail $MRT_DATA/europarl.de-en/corpus.bpe.en > test.bpe.en
 
 $MRT_MARIAN/build/marian-scorer -m lm/model.npz -t test.bpe.en -v vocab.en.yml > lm.scores.out
-$MRT_TOOLS/diff-floats.py lm.scores.out lm.scores.expected -p 0.002 > lm.scores.diff
+$MRT_TOOLS/diff-nums.py lm.scores.out lm.scores.expected -p 0.002 -o lm.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/mixed-models/test_ensemble_of_different_s2s.sh b/tests/training/mixed-models/test_ensemble_of_different_s2s.sh
index d26319d..4fb5a4d 100644
--- a/tests/training/mixed-models/test_ensemble_of_different_s2s.sh
+++ b/tests/training/mixed-models/test_ensemble_of_different_s2s.sh
@@ -31,7 +31,7 @@ test -e two_s2s_B.log
 $MRT_MARIAN/build/marian-decoder -m two_s2s/modelA.npz two_s2s/modelB.npz -v vocab.en.yml vocab.de.yml \
     -i text.in -o two_s2s.out --log two_s2s.log
 
-diff two_s2s.out two_s2s.expected > two_s2s.diff
+$MRT_TOOLS/diff.sh two_s2s.out two_s2s.expected > two_s2s.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/mixed-models/test_ensemble_of_s2s_and_transformer.sh b/tests/training/mixed-models/test_ensemble_of_s2s_and_transformer.sh
index 2580c26..2b0142e 100644
--- a/tests/training/mixed-models/test_ensemble_of_s2s_and_transformer.sh
+++ b/tests/training/mixed-models/test_ensemble_of_s2s_and_transformer.sh
@@ -31,7 +31,7 @@ test -e s2s_transf_B.log
 $MRT_MARIAN/build/marian-decoder -m s2s_transf/modelA.npz s2s_transf/modelB.npz -v vocab.en.yml vocab.de.yml \
     -i text.in -o s2s_transf.out --log s2s_transf.log
 
-diff s2s_transf.out s2s_transf.expected > s2s_transf.diff
+$MRT_TOOLS/diff.sh s2s_transf.out s2s_transf.expected > s2s_transf.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/model-types/test_transformer.sh b/tests/training/model-types/test_transformer.sh
index ce1962e..35f8f09 100644
--- a/tests/training/model-types/test_transformer.sh
+++ b/tests/training/model-types/test_transformer.sh
@@ -18,7 +18,7 @@ test -e transformer/model.npz
 test -e transformer.log
 
 cat transformer.log | $MRT_TOOLS/extract-costs.sh > transformer.out
-$MRT_TOOLS/diff-floats.py transformer.out transformer.expected -p 0.01 > transformer.diff
+$MRT_TOOLS/diff-nums.py transformer.out transformer.expected -p 0.01 -o transformer.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-gpu/test_async_sgd_runs.sh b/tests/training/multi-gpu/test_async_sgd_runs.sh
index 5a0da50..15b0348 100644
--- a/tests/training/multi-gpu/test_async_sgd_runs.sh
+++ b/tests/training/multi-gpu/test_async_sgd_runs.sh
@@ -24,7 +24,7 @@ test -e vocab.de.yml
 test -e async_sgd.log
 
 cat async_sgd.log | $MRT_TOOLS/strip-timestamps.sh | grep -oP "Ep\. 1 .* Cost [0-9.]*" > async_sgd.out
-$MRT_TOOLS/diff-floats.py async_sgd.out async_sgd.expected -p 5.00 --max-diff-nums 2 > async_sgd.diff
+$MRT_TOOLS/diff-nums.py async_sgd.out async_sgd.expected -p 5.00 --allow-n-diffs 2 -o async_sgd.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-gpu/test_sync_sgd.sh b/tests/training/multi-gpu/test_sync_sgd.sh
index f869887..0d511b3 100644
--- a/tests/training/multi-gpu/test_sync_sgd.sh
+++ b/tests/training/multi-gpu/test_sync_sgd.sh
@@ -24,7 +24,7 @@ test -e sync_sgd/model.full.npz
 test -e sync_sgd.log
 
 cat sync_sgd.log | $MRT_TOOLS/extract-costs.sh > sync_sgd.out
-$MRT_TOOLS/diff-floats.py sync_sgd.out sync_sgd.expected -p 0.1 > sync_sgd.diff
+$MRT_TOOLS/diff-nums.py sync_sgd.out sync_sgd.expected -p 0.1 -o sync_sgd.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-source/test_multi-s2s.sh b/tests/training/multi-source/test_multi-s2s.sh
index 4cbbbc4..e55a835 100644
--- a/tests/training/multi-source/test_multi-s2s.sh
+++ b/tests/training/multi-source/test_multi-s2s.sh
@@ -19,7 +19,7 @@ test -e multi-s2s/model.npz.yml
 test -e multi-s2s.log
 
 cat multi-s2s.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > multi-s2s.out
-$MRT_TOOLS/diff-floats.py multi-s2s.out multi-s2s.expected -p 0.2 > multi-s2s.diff
+$MRT_TOOLS/diff-nums.py multi-s2s.out multi-s2s.expected -p 0.2 -o multi-s2s.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-source/test_multi-transformer.sh b/tests/training/multi-source/test_multi-transformer.sh
index 833a324..cafd2ad 100644
--- a/tests/training/multi-source/test_multi-transformer.sh
+++ b/tests/training/multi-source/test_multi-transformer.sh
@@ -19,7 +19,7 @@ test -e multi-transformer/model.npz.yml
 test -e multi-transformer.log
 
 cat multi-transformer.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > multi-transformer.out
-$MRT_TOOLS/diff-floats.py multi-transformer.out multi-transformer.expected -p 0.2 > multi-transformer.diff
+$MRT_TOOLS/diff-nums.py multi-transformer.out multi-transformer.expected -p 0.2 -o multi-transformer.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/nematus/test_encdec_depth.sh b/tests/training/nematus/test_encdec_depth.sh
index da26042..99c4c7c 100644
--- a/tests/training/nematus/test_encdec_depth.sh
+++ b/tests/training/nematus/test_encdec_depth.sh
@@ -19,7 +19,7 @@ test -e encdec_depth/model.npz
 test -e encdec_depth/model.npz.yml
 
 cat encdec_depth.log | $MRT_TOOLS/extract-costs.sh > encdec_depth.out
-$MRT_TOOLS/diff-floats.py encdec_depth.out encdec_depth.expected -p 0.3 > encdec_depth.diff
+$MRT_TOOLS/diff-nums.py encdec_depth.out encdec_depth.expected -p 0.3 -o encdec_depth.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/nematus/test_wmt17_model.sh b/tests/training/nematus/test_wmt17_model.sh
index 9b56aea..43cdebe 100644
--- a/tests/training/nematus/test_wmt17_model.sh
+++ b/tests/training/nematus/test_wmt17_model.sh
@@ -19,7 +19,7 @@ test -e wmt17/model.npz
 test -e wmt17/model.npz.yml
 
 cat wmt17.log | $MRT_TOOLS/extract-costs.sh > wmt17.out
-$MRT_TOOLS/diff-floats.py wmt17.out wmt17.expected -p 0.3 > wmt17.diff
+$MRT_TOOLS/diff-nums.py wmt17.out wmt17.expected -p 0.3 -o wmt17.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/pretraining/test_weights_from_pretrained_model.sh b/tests/training/pretraining/test_weights_from_pretrained_model.sh
index 88a0e46..d7087c2 100644
--- a/tests/training/pretraining/test_weights_from_pretrained_model.sh
+++ b/tests/training/pretraining/test_weights_from_pretrained_model.sh
@@ -43,14 +43,14 @@ test -e model/model.npz
 for key in encoder_Wemb encoder_bi_U encoder_bi_r_Wx; do
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/orig.npz -k $key > key-orig-$key.txt
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/model.npz -k $key > key-model-$key.txt
-    $MRT_TOOLS/diff-floats.py --numpy -p 0.000001 key-orig-$key.txt key-model-$key.txt > key-diff-$key.txt
+    $MRT_TOOLS/diff-nums.py --numpy -p 0.000001 key-orig-$key.txt key-model-$key.txt -o key-diff-$key.txt
 done
 
 # Test if selected weights are identical with LM
 for key in decoder_Wemb decoder_cell1_U decoder_cell2_bx decoder_ff_logit_l1_W0; do
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/lm.npz -k $key > key-lm-$key.txt
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/model.npz -k $key > key-model-$key.txt
-    $MRT_TOOLS/diff-floats.py --numpy -p 0.000001 key-lm-$key.txt key-model-$key.txt > key-diff-$key.txt
+    $MRT_TOOLS/diff-nums.py --numpy -p 0.000001 key-lm-$key.txt key-model-$key.txt -o key-diff-$key.txt
 done
 
 # Exit with success code
diff --git a/tests/training/restarting/test_sgd_for_two_epochs.sh b/tests/training/restarting/test_sgd_for_two_epochs.sh
index 4720864..3cb09d2 100644
--- a/tests/training/restarting/test_sgd_for_two_epochs.sh
+++ b/tests/training/restarting/test_sgd_for_two_epochs.sh
@@ -44,7 +44,7 @@ test -e sgd_2nd_epoch.log
 cat sgd_2nd_epoch.log | $MRT_TOOLS/extract-disp.sh > sgd_2nd_epoch.out
 cat sgd_1st_epoch.out sgd_2nd_epoch.out > sgd_2e.out
 
-$MRT_TOOLS/diff-floats.py sgd_2e.out sgd_2e.expected -p 0.3 > sgd_2e.diff
+$MRT_TOOLS/diff-nums.py sgd_2e.out sgd_2e.expected -p 0.3 -o sgd_2e.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration.sh b/tests/training/restoring/corpus/test_corpus_restoration.sh
index a948246..8b3f625 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration.sh
@@ -44,7 +44,7 @@ test -e corpus_2.log
 cat corpus_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_2.out
 cat corpus_1.out corpus_2.out > corpus.out
 
-$MRT_TOOLS/diff-floats.py corpus.out corpus.expected -p 0.1 > corpus.diff
+$MRT_TOOLS/diff-nums.py corpus.out corpus.expected -p 0.1 -o corpus.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh b/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh
index 25543b4..fe5ff6e 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh
@@ -46,7 +46,7 @@ test -e corpus_maxi_2.log
 cat corpus_maxi_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_maxi_2.out
 cat corpus_maxi_1.out corpus_maxi_2.out > corpus_maxi.out
 
-$MRT_TOOLS/diff-floats.py corpus_maxi.out corpus_maxi.expected -p 0.1 > corpus_maxi.diff
+$MRT_TOOLS/diff-nums.py corpus_maxi.out corpus_maxi.expected -p 0.1 -o corpus_maxi.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh b/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh
index 222d384..2b2ade7 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh
@@ -45,7 +45,7 @@ test -e corpus_fit_2.log
 cat corpus_fit_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_fit_2.out
 cat corpus_fit_1.out corpus_fit_2.out > corpus_fit.out
 
-$MRT_TOOLS/diff-floats.py corpus_fit.out corpus_fit.expected -p 0.1 > corpus_fit.diff
+$MRT_TOOLS/diff-nums.py corpus_fit.out corpus_fit.expected -p 0.1 -o corpus_fit.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh b/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh
index c031177..3a25ca1 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh
@@ -45,7 +45,7 @@ test -e corpus_noshuf_2.log
 cat corpus_noshuf_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_noshuf_2.out
 cat corpus_noshuf_1.out corpus_noshuf_2.out > corpus_noshuf.out
 
-$MRT_TOOLS/diff-floats.py corpus_noshuf.out corpus_noshuf.expected -p 0.1 > corpus_noshuf.diff
+$MRT_TOOLS/diff-nums.py corpus_noshuf.out corpus_noshuf.expected -p 0.1 -o corpus_noshuf.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh b/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh
index 345c7af..affc5d7 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh
@@ -44,7 +44,7 @@ test -e corpus_one_2.log
 cat corpus_one_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_one_2.out
 cat corpus_one_1.out corpus_one_2.out > corpus_one.out
 
-$MRT_TOOLS/diff-floats.py corpus_one.out corpus_one.expected -p 0.1 > corpus_one.diff
+$MRT_TOOLS/diff-nums.py corpus_one.out corpus_one.expected -p 0.1 -o corpus_one.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh b/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh
index 20fc510..606acbb 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh
@@ -44,7 +44,7 @@ test -e corpus_s2s_2.log
 cat corpus_s2s_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_s2s_2.out
 cat corpus_s2s_1.out corpus_s2s_2.out > corpus_s2s.out
 
-$MRT_TOOLS/diff-floats.py corpus_s2s.out corpus_s2s.expected -p 0.1 > corpus_s2s.diff
+$MRT_TOOLS/diff-nums.py corpus_s2s.out corpus_s2s.expected -p 0.1 -o corpus_s2s.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_sqlite_restoration.sh b/tests/training/restoring/corpus/test_sqlite_restoration.sh
index 1a22193..0dcbdf8 100644
--- a/tests/training/restoring/corpus/test_sqlite_restoration.sh
+++ b/tests/training/restoring/corpus/test_sqlite_restoration.sh
@@ -44,7 +44,7 @@ test -e sqlite_2.log
 cat sqlite_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > sqlite_2.out
 cat sqlite_1.out sqlite_2.out > sqlite.out
 
-$MRT_TOOLS/diff-floats.py sqlite.out sqlite.expected -p 0.1 > sqlite.diff
+$MRT_TOOLS/diff-nums.py sqlite.out sqlite.expected -p 0.1 -o sqlite.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh b/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh
index def06d7..09770d1 100644
--- a/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh
+++ b/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh
@@ -44,7 +44,7 @@ test -e sqlite_maxi_2.log
 cat sqlite_maxi_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > sqlite_maxi_2.out
 cat sqlite_maxi_1.out sqlite_maxi_2.out > sqlite_maxi.out
 
-$MRT_TOOLS/diff-floats.py sqlite_maxi.out sqlite_maxi.expected -p 0.1 > sqlite_maxi.diff
+$MRT_TOOLS/diff-nums.py sqlite_maxi.out sqlite_maxi.expected -p 0.1 -o sqlite_maxi.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth.sh b/tests/training/restoring/exp-smoothing/test_expsmooth.sh
index a54a685..624329f 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth.sh
@@ -77,8 +77,8 @@ cat expsmooth_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep 'vali
 
 
 # Results
-$MRT_TOOLS/diff-floats.py -p 0.01 expsmooth.out expsmooth.expected > expsmooth.diff
-$MRT_TOOLS/diff-floats.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected > expsmooth.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.out expsmooth.expected -o expsmooth.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected -o expsmooth.valid.diff
 
 
 # Exit with success code
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh b/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
index e48c86a..3a74557 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
@@ -77,8 +77,8 @@ cat expsmooth_s2s_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep '
 
 
 # Results
-$MRT_TOOLS/diff-floats.py -p 0.01 expsmooth_s2s.out expsmooth_s2s.expected > expsmooth_s2s.diff
-$MRT_TOOLS/diff-floats.py -p 0.01 expsmooth_s2s.valid.out expsmooth_s2s.valid.expected > expsmooth_s2s.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_s2s.out expsmooth_s2s.expected -o expsmooth_s2s.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_s2s.valid.out expsmooth_s2s.valid.expected -o expsmooth_s2s.valid.diff
 
 
 # Exit with success code
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
index 82e270a..20aadcf 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
@@ -77,8 +77,8 @@ cat expsmooth_sync_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep
 
 
 # Results
-$MRT_TOOLS/diff-floats.py -p 0.01 expsmooth_sync.out expsmooth_sync.expected > expsmooth_sync.diff
-$MRT_TOOLS/diff-floats.py -p 0.01 expsmooth_sync.valid.out expsmooth_sync.valid.expected > expsmooth_sync.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_sync.out expsmooth_sync.expected -o expsmooth_sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_sync.valid.out expsmooth_sync.valid.expected -o expsmooth_sync.valid.diff
 
 
 # Exit with success code
diff --git a/tests/training/restoring/multi-gpu/test_async.sh b/tests/training/restoring/multi-gpu/test_async.sh
index 8d6fcef..0c3fcca 100644
--- a/tests/training/restoring/multi-gpu/test_async.sh
+++ b/tests/training/restoring/multi-gpu/test_async.sh
@@ -54,7 +54,7 @@ cat async.unsorted.expected | head -n -4 | sort -n > async.expected
 cat async.unsorted.out | head -n -4 | sort -n > async.out
 
 # async is undeterministic, so the conditions are weak
-$MRT_TOOLS/diff-floats.py -p 1.0 -n 2 async.out async.expected > async.diff
+$MRT_TOOLS/diff-nums.py -p 1.0 -n 2 async.out async.expected -o async.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/multi-gpu/test_sync.sh b/tests/training/restoring/multi-gpu/test_sync.sh
index ac62f07..78190f6 100644
--- a/tests/training/restoring/multi-gpu/test_sync.sh
+++ b/tests/training/restoring/multi-gpu/test_sync.sh
@@ -47,7 +47,7 @@ test -e sync_2.log
 
 cat sync_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' >> sync.out
 
-$MRT_TOOLS/diff-floats.py -p 0.08 sync.out sync.expected > sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.08 sync.out sync.expected -o sync.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adagrad_params.sh b/tests/training/restoring/optimizer/test_adagrad_params.sh
index 96327e4..dabe9bd 100644
--- a/tests/training/restoring/optimizer/test_adagrad_params.sh
+++ b/tests/training/restoring/optimizer/test_adagrad_params.sh
@@ -18,13 +18,13 @@ test -e adagrad/model.npz.optimizer.npz
 test -e adagrad.log
 
 $MRT_TOOLS/extract-costs.sh < adagrad.log > adagrad.costs.out
-$MRT_TOOLS/diff-floats.py adagrad.costs.out adagrad.costs.expected -p 0.2 > adagrad.costs.diff
+$MRT_TOOLS/diff-nums.py adagrad.costs.out adagrad.costs.expected -p 0.2 -o adagrad.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adagrad/model.npz.optimizer.npz > adagrad.keys.out
-diff adagrad.keys.out adagrad.keys.expected > adagrad.keys.diff
+$MRT_TOOLS/diff.sh adagrad.keys.out adagrad.keys.expected > adagrad.keys.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adagrad/model.npz.optimizer.npz -k "adagrad_gt" > adagrad.gt.out
-$MRT_TOOLS/diff-floats.py --numpy -p 0.0001 adagrad.gt.out adagrad.gt.expected > adagrad.gt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.0001 adagrad.gt.out adagrad.gt.expected -o adagrad.gt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adam_params.sh b/tests/training/restoring/optimizer/test_adam_params.sh
index 7ef39a2..7c08df1 100644
--- a/tests/training/restoring/optimizer/test_adam_params.sh
+++ b/tests/training/restoring/optimizer/test_adam_params.sh
@@ -18,15 +18,15 @@ test -e adam/model.npz.optimizer.npz
 test -e adam.log
 
 $MRT_TOOLS/extract-costs.sh < adam.log > adam.costs.out
-$MRT_TOOLS/diff-floats.py adam.costs.out adam.costs.expected -p 0.2 > adam.costs.diff
+$MRT_TOOLS/diff-nums.py adam.costs.out adam.costs.expected -p 0.2 -o adam.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam/model.npz.optimizer.npz > adam.keys.out
-diff adam.keys.out adam.keys.expected > adam.keys.diff
+$MRT_TOOLS/diff.sh adam.keys.out adam.keys.expected > adam.keys.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam/model.npz.optimizer.npz -k "adam_mt" > adam.mt.out
-$MRT_TOOLS/diff-floats.py --numpy -p 0.0001  adam.mt.out adam.mt.expected > adam.mt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.0001  adam.mt.out adam.mt.expected -o adam.mt.diff
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam/model.npz.optimizer.npz -k "adam_vt" > adam.vt.out
-$MRT_TOOLS/diff-floats.py --numpy -p 0.000005 adam.vt.out adam.vt.expected > adam.vt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.000005 adam.vt.out adam.vt.expected -o adam.vt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adam_params_async.sh b/tests/training/restoring/optimizer/test_adam_params_async.sh
index 0863d32..211c135 100644
--- a/tests/training/restoring/optimizer/test_adam_params_async.sh
+++ b/tests/training/restoring/optimizer/test_adam_params_async.sh
@@ -23,16 +23,16 @@ test -e adam_async/model.npz.optimizer.npz
 test -e adam_async.log
 
 $MRT_TOOLS/extract-costs.sh < adam_async.log > adam_async.costs.out
-$MRT_TOOLS/diff-floats.py adam_async.costs.out adam_async.costs.expected -p 10.00 -n 2 > adam_async.costs.diff
+$MRT_TOOLS/diff-nums.py adam_async.costs.out adam_async.costs.expected -p 10.00 -n 2 -o adam_async.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz > adam_async.keys.out
-diff adam_async.keys.out adam.keys.expected > adam_async.keys.diff
+$MRT_TOOLS/diff.sh adam_async.keys.out adam.keys.expected > adam_async.keys.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz -k "adam_mt" > adam_async.mt.out
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz -k "adam_vt" > adam_async.vt.out
 
-$MRT_TOOLS/diff-floats.py --numpy -a -p 0.02  adam_async.mt.out adam_async.mt.expected > adam_async.mt.diff
-$MRT_TOOLS/diff-floats.py --numpy    -p 0.001 adam_async.vt.out adam_async.vt.expected > adam_async.vt.diff
+$MRT_TOOLS/diff-nums.py --numpy -a -p 0.02  adam_async.mt.out adam_async.mt.expected -o adam_async.mt.diff
+$MRT_TOOLS/diff-nums.py --numpy    -p 0.001 adam_async.vt.out adam_async.vt.expected -o adam_async.vt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adam_params_sync.sh b/tests/training/restoring/optimizer/test_adam_params_sync.sh
index 176c377..e287bff 100644
--- a/tests/training/restoring/optimizer/test_adam_params_sync.sh
+++ b/tests/training/restoring/optimizer/test_adam_params_sync.sh
@@ -23,16 +23,16 @@ test -e adam_sync/model.npz.optimizer.npz
 test -e adam_sync.log
 
 $MRT_TOOLS/extract-costs.sh < adam_sync.log > adam_sync.costs.out
-$MRT_TOOLS/diff-floats.py adam_sync.costs.out adam_sync.costs.expected -p 3.00 -n 2 > adam_sync.costs.diff
+$MRT_TOOLS/diff-nums.py adam_sync.costs.out adam_sync.costs.expected -p 3.00 -n 2 -o adam_sync.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_sync/model.npz.optimizer.npz > adam_sync.keys.out
-diff adam_sync.keys.out adam.keys.expected > adam_sync.keys.diff
+$MRT_TOOLS/diff.sh adam_sync.keys.out adam.keys.expected > adam_sync.keys.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_sync/model.npz.optimizer.npz -k "adam_mt" > adam_sync.mt.out
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_sync/model.npz.optimizer.npz -k "adam_vt" > adam_sync.vt.out
 
-$MRT_TOOLS/diff-floats.py --numpy -p 0.002  adam_sync.mt.out adam_sync.mt.expected > adam_sync.mt.diff
-$MRT_TOOLS/diff-floats.py --numpy -p 0.0002 adam_sync.vt.out adam_sync.vt.expected > adam_sync.vt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.002  adam_sync.mt.out adam_sync.mt.expected -o adam_sync.mt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.0002 adam_sync.vt.out adam_sync.vt.expected -o adam_sync.vt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_loading_adam_params.sh b/tests/training/restoring/optimizer/test_loading_adam_params.sh
index 0a4b2b5..88ddf64 100644
--- a/tests/training/restoring/optimizer/test_loading_adam_params.sh
+++ b/tests/training/restoring/optimizer/test_loading_adam_params.sh
@@ -33,7 +33,7 @@ cat adam_load_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ :
 
 # The allowed tolerance needs to be radiculously high as restarting the
 # training is very instable on different GPU devices
-$MRT_TOOLS/diff-floats.py -p 15.0 -n 1 adam_load.out adam_load.expected > adam_load.diff
+$MRT_TOOLS/diff-nums.py -p 15.0 -n 1 adam_load.out adam_load.expected -o adam_load.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/validation/test_adding_validator_after_restart.sh b/tests/training/restoring/validation/test_adding_validator_after_restart.sh
index fd8a000..9a93a6b 100644
--- a/tests/training/restoring/validation/test_adding_validator_after_restart.sh
+++ b/tests/training/restoring/validation/test_adding_validator_after_restart.sh
@@ -50,7 +50,7 @@ test -e valid_add/model.npz
 test -e valid_add_2.log
 
 cat valid_add_2.log | $MRT_TOOLS/strip-timestamps.sh >> valid_add.out
-$MRT_TOOLS/diff-floats.py -p 0.003 valid_add.out valid_add.expected > valid_add.diff
+$MRT_TOOLS/diff-nums.py -p 0.003 valid_add.out valid_add.expected -o valid_add.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/validation/test_restoring_newbest_validators.sh b/tests/training/restoring/validation/test_restoring_newbest_validators.sh
index 296cea8..8674944 100644
--- a/tests/training/restoring/validation/test_restoring_newbest_validators.sh
+++ b/tests/training/restoring/validation/test_restoring_newbest_validators.sh
@@ -53,7 +53,7 @@ test -e valid_newbest/model.npz
 test -e valid_newbest_2.log
 
 cat valid_newbest_2.log | $MRT_TOOLS/strip-timestamps.sh | grep -P "\[valid\]" >> valid_newbest.out
-diff valid_newbest.out valid_newbest.expected > valid_newbest.diff
+$MRT_TOOLS/diff.sh valid_newbest.out valid_newbest.expected > valid_newbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/validation/test_restoring_stalled_validators.sh b/tests/training/restoring/validation/test_restoring_stalled_validators.sh
index 8283e26..a53e323 100644
--- a/tests/training/restoring/validation/test_restoring_stalled_validators.sh
+++ b/tests/training/restoring/validation/test_restoring_stalled_validators.sh
@@ -68,7 +68,7 @@ test -e valid_stalled_2.log
 
 cat valid_stalled_2.log | $MRT_TOOLS/strip-timestamps.sh \
     | grep -P "\[valid\]|Saving model" | grep -v "cross-entropy" >> valid_stalled.out
-diff valid_stalled.out valid_stalled.expected > valid_stalled.diff
+$MRT_TOOLS/diff.sh valid_stalled.out valid_stalled.expected > valid_stalled.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/validation/test_restoring_validation.sh b/tests/training/restoring/validation/test_restoring_validation.sh
index 1a1785a..3a1786b 100644
--- a/tests/training/restoring/validation/test_restoring_validation.sh
+++ b/tests/training/restoring/validation/test_restoring_validation.sh
@@ -36,7 +36,7 @@ test -e valid/model.npz
 test -e valid_2.log
 
 cat valid_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "valid-script" >> valid.out
-diff valid.out valid.expected > valid.diff
+$MRT_TOOLS/diff.sh valid.out valid.expected > valid.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
index 4f92b20..4a6c4a9 100644
--- a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
+++ b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
@@ -36,7 +36,7 @@ test -e valid_lowisbet/model.npz
 test -e valid_lowisbet_2.log
 
 cat valid_lowisbet_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "cross-entropy" >> valid_lowisbet.out
-$MRT_TOOLS/diff-floats.py -p 0.1 valid_lowisbet.out valid_lowisbet.expected > valid_lowisbet.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 valid_lowisbet.out valid_lowisbet.expected -o valid_lowisbet.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/validation/test_final_validation_after_batches.sh b/tests/training/validation/test_final_validation_after_batches.sh
index 26860a9..d6de7f1 100644
--- a/tests/training/validation/test_final_validation_after_batches.sh
+++ b/tests/training/validation/test_final_validation_after_batches.sh
@@ -19,7 +19,7 @@ test -e final_batch/model.npz
 test -e final_batch.log
 
 $MRT_TOOLS/strip-timestamps.sh < final_batch.log > final_batch.out
-$MRT_TOOLS/diff-floats.py final_batch.out final_batch.expected -p 0.9 > final_batch.diff
+$MRT_TOOLS/diff-nums.py final_batch.out final_batch.expected -p 0.9 -o final_batch.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/validation/test_final_validation_after_batches_match.sh b/tests/training/validation/test_final_validation_after_batches_match.sh
index 955fc95..673c087 100644
--- a/tests/training/validation/test_final_validation_after_batches_match.sh
+++ b/tests/training/validation/test_final_validation_after_batches_match.sh
@@ -19,7 +19,7 @@ test -e final_match/model.npz
 test -e final_match.log
 
 $MRT_TOOLS/strip-timestamps.sh < final_match.log > final_match.out
-$MRT_TOOLS/diff-floats.py final_match.out final_match.expected -p 0.9 > final_match.diff
+$MRT_TOOLS/diff-nums.py final_match.out final_match.expected -p 0.9 -o final_match.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/validation/test_final_validation_after_epochs.sh b/tests/training/validation/test_final_validation_after_epochs.sh
index de2fc4c..08d391f 100644
--- a/tests/training/validation/test_final_validation_after_epochs.sh
+++ b/tests/training/validation/test_final_validation_after_epochs.sh
@@ -22,7 +22,7 @@ test -e final_epoch/model.npz
 test -e final_epoch.log
 
 $MRT_TOOLS/strip-timestamps.sh < final_epoch.log > final_epoch.out
-$MRT_TOOLS/diff-floats.py final_epoch.out final_epoch.expected -p 0.9 > final_epoch.diff
+$MRT_TOOLS/diff-nums.py final_epoch.out final_epoch.expected -p 0.9 -o final_epoch.diff
 
 # Exit with success code
 exit 0
diff --git a/tools/diff-floats.py b/tools/diff-floats.py
deleted file mode 100755
index 63eaa25..0000000
--- a/tools/diff-floats.py
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import os
-import sys
-import argparse
-import re
-
-REGEX_NUMERIC = re.compile(r"^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$")
-REPLACE_NUMPY = [
-    ("[[", "[[ "),
-    ("]]", " ]]"),
-    ("0. ", "0.0 "),
-    ("...) ", "... "),
-    ("..., ", "... "),
-    ("]", " ]"),
-    ("[", "[ ")
-]
-
-
-def is_numeric(s):
-    return REGEX_NUMERIC.match(s)
-
-
-def main():
-    args = parse_user_args()
-    exit_code = 0
-    max_diff_nums = args.max_diff_nums
-
-    i = 0
-    while True:
-        if args.numpy:
-            line1 = ' '.join(args.file1.readlines()).replace('\n', '')
-            line2 = ' '.join(args.file2.readlines()).replace('\n', '')
-
-            for k, v in REPLACE_NUMPY:
-                line1 = line1.replace(k, v)
-                line2 = line2.replace(k, v)
-        else:
-            line1 = next(args.file1, None)
-            if line1 is None:
-                break
-            line2 = next(args.file2, None)
-
-            if args.separate_nums:
-                line1 = line1.replace(args.separate_nums,
-                                      ' ' + args.separate_nums + ' ')
-                line2 = line2.replace(args.separate_nums,
-                                      ' ' + args.separate_nums + ' ')
-
-
-        line1_toks = line1.rstrip().split()
-        line2_toks = line2.rstrip().split()
-
-
-        nums1 = [float(s) for s in line1_toks if is_numeric(s)]
-        nums2 = [float(s) for s in line2_toks if is_numeric(s)]
-
-        text1 = ' '.join(["<NUM>" if is_numeric(s) else s for s in line1_toks])
-        text2 = ' '.join(["<NUM>" if is_numeric(s) else s for s in line2_toks])
-
-        if text1 != text2:
-            print "Line {}: different texts:\n< {}\n> {}".format( i, text1, text2)
-            exit_code = 1
-            continue
-
-        if len(nums1) != len(nums2):
-            print "Line {}: different number of numerics: {} / {}" \
-                .format(i, nums1, nums2)
-            exit_code = 1
-            continue
-
-        for j, (n1, n2) in enumerate(zip(nums1, nums2)):
-            if args.abs:
-                n1 = abs(n1)
-                n2 = abs(n2)
-            if abs(n1 - n2) > args.precision:
-                if max_diff_nums < 1:
-                    print "Line {}: {} != {}".format(i, n1, n2)
-                    exit_code = 1
-                else:
-                    print "Line {}: {} != {}, allowed diff. numbers: {}" \
-                        .format(i, n1, n2, max_diff_nums)
-                    max_diff_nums -= 1
-
-        if args.numpy:
-            break
-        i += 1
-
-    for _ in args.file2:
-        print "Extra line in the second file!"
-        exit_code = 1
-
-    return exit_code
-
-
-def parse_user_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("file1", type=argparse.FileType('r'))
-    parser.add_argument("file2", type=argparse.FileType('r'))
-    parser.add_argument("-p", "--precision", type=float, default=0.001)
-    parser.add_argument("-n", "--max-diff-nums", type=int, default=0)
-    parser.add_argument("-a", "--abs", action="store_true")
-    parser.add_argument("-s", "--separate-nums", type=str)
-    parser.add_argument("--numpy", action="store_true")
-
-    return parser.parse_args()
-
-
-if __name__ == '__main__':
-    code = main()
-    exit(code)
diff --git a/tools/diff-nums.py b/tools/diff-nums.py
new file mode 100755
index 0000000..5b895bb
--- /dev/null
+++ b/tools/diff-nums.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import argparse
+import re
+
+REGEX_NUMERIC  = re.compile(r"^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$")
+REGEX_STRIP_EP = re.compile(r"^\[valid\] Ep\. \d+ : Up\. ")
+
+NORMALIZE_NUMPY = [
+    ("[[", "[[ "),
+    ("]]", " ]]"),
+    ("0. ", "0.0 "),
+    ("...) ", "... "),
+    ("..., ", "... "),
+    ("]", " ]"),
+    ("[", "[ ")
+]
+
+
+def main():
+    args = parse_user_args()
+    display_command(args)
+
+    exit_code = 0
+    allowed_diffs = args.allow_n_diffs
+    args.message_count = 0
+
+    i = 0
+    while True:
+        if args.numpy:
+            line1 = read_numpy(args.file1)
+            line2 = read_numpy(args.file2)
+        else:
+            line1 = read_line(args.file1, args.separate)
+            if line1 is None:
+                break
+            line2 = read_line(args.file2, args.separate)
+            if line2 is None:
+                break
+
+        line1_toks, nums1, text1 = process_line(line1)
+        line2_toks, nums2, text2 = process_line(line2)
+
+        if text1 != text2:
+            message("Line {}: different texts:\n< {}\n> {}".format(i, text1, text2), args)
+            exit_code = 1
+            continue
+
+        if len(nums1) != len(nums2):
+            message("Line {}: different number of numerics: {} / {}".format(i, nums1, nums2), args)
+            exit_code = 1
+            continue
+
+        for j, (n1, n2) in enumerate(zip(nums1, nums2)):
+            if args.abs:
+                n1 = abs(n1)
+                n2 = abs(n2)
+            if abs(n1 - n2) > args.precision:
+                if allowed_diffs < 1:
+                    message("Line {}: {} != {}".format(i, n1, n2), args)
+                    exit_code = 1
+                else:
+                    message("Line {}: {} != {}, allowed number of differences: {}" \
+                                .format(i, n1, n2, allowed_diffs),
+                            args)
+                    allowed_diffs -= 1
+
+        if args.numpy:
+            break
+        i += 1
+
+    for _ in args.file1:
+        message("Extra line in the first file", args)
+        exit_code = 1
+
+    for _ in args.file2:
+        message("Extra line in the second file", args)
+        exit_code = 1
+
+    return exit_code
+
+
+def read_numpy(iofile):
+    line = ' '.join(iofile.readlines()).replace('\n', '')   # merge all lines
+    for k, v in NORMALIZE_NUMPY:                            # normalize numpy format across Python/Numpy versions
+        line = line.replace(k, v)
+    return line
+
+
+def read_line(iofile, separator=""):
+    line = next(iofile, None)
+    if separator and line:
+        line = line.replace(separator, ' ' + separator + ' ')   # add spaces around the separator character
+    return line
+
+
+def process_line(line):
+    line = REGEX_STRIP_EP.sub("[valid] ", line)                 # normalize "[valid] Ep. 1 : Up. 30" -> "[valid] 30"
+    line_toks = line.rstrip().replace("[[-", "[[ -").split()    # tokenize
+    nums = [float(s) for s in line_toks if is_numeric(s)]       # find all numbers
+    text = ' '.join(["<NUM>" if is_numeric(s) else s            # text format with numbers normalized
+                      for s in line_toks])
+    return line_toks, nums, text
+
+
+def is_numeric(s):
+    return REGEX_NUMERIC.match(s)
+
+
+def message(text, args):
+    if not text.endswith("\n"):
+        text += "\n"
+    args.output.write(text)
+    args.message_count += 1
+    if args.output is not sys.stdout and args.output is not sys.stderr and not args.quiet:
+        sys.stderr.write(text)
+
+
+def display_command(args):
+    if args.quiet:
+        return
+    opts = [sys.argv[0]]
+    for opt in sys.argv[1:]:
+        # expand relative paths
+        if opt == args.file1.name or opt == args.file2.name or opt == args.output.name:
+            opts.append(os.path.abspath(opt))
+        else:
+            opts.append(opt)
+    sys.stderr.write("Command: {}\n".format(" ".join(opts)))
+
+
+def parse_user_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("file1", type=argparse.FileType('r'))
+    parser.add_argument("file2", type=argparse.FileType('r'))
+    parser.add_argument("-o", "--output", type=argparse.FileType('w'), metavar="FILE", default=sys.stdout)
+    parser.add_argument("-p", "--precision", type=float, metavar="FLOAT", default=0.001)
+    parser.add_argument("-n", "--allow-n-diffs", type=int, metavar="INT", default=0)
+    parser.add_argument("-s", "--separate", type=str, metavar="STRING")
+    parser.add_argument("-a", "--abs", action="store_true")
+    parser.add_argument("--numpy", action="store_true")
+    parser.add_argument("-q", "--quiet", action="store_true")
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    code = main()
+    exit(code)
diff --git a/tools/diff.sh b/tools/diff.sh
new file mode 100755
index 0000000..ef03374
--- /dev/null
+++ b/tools/diff.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+[[ "$#" -eq 2 ]] && >&2 echo "Command: $(realpath $0) $(realpath -m $1) $(realpath -m $2)"
+diff $1 $2
author	Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk>	2018-11-15 13:23:40 +0300
committer	Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk>	2018-11-15 13:23:40 +0300
commit	9b65326839049d2d63fe15f71b8bd60a5e7f236f (patch)
tree	9528e52c60e15c5336271e84fce2b1365b469e08
parent	14760f37ecb99f84a3b85cfc22c55ce27bf8a813 (diff)
parent	32fe3c1a12bd17c9b7d0c1d11afd903dee954535 (diff)