Use -o options

author: Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> 2018-11-12 20:11:00 +0300
committer: Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> 2018-11-12 20:11:00 +0300
commit: 374a7001cc73cff37fdfec6b59203b0fc1ee175d (patch)
tree: 86d071972668ed91ea1b1c7733b11d30ade0844a
parent: cf2d46ea1ff3481c7a5007a88eda9576719b8c70 (diff)
83 files changed, 104 insertions, 104 deletions
diff --git a/tests/_self-adaptive/test_context_partial.sh b/tests/_self-adaptive/test_context_partial.sh
index e3e4dee..2d2e49a 100644
--- a/tests/_self-adaptive/test_context_partial.sh
+++ b/tests/_self-adaptive/test_context_partial.sh
@@ -18,7 +18,7 @@ diff contextpart.out contextpart.expected > contextpart.diff
 
 # Check costs
 cat contextpart.log | $MRT_TOOLS/extract-costs.sh > contextpart.costs.out
-$MRT_TOOLS/diff-nums.py -p 0.01 contextpart.costs.out contextpart.costs.expected > contextpart.costs.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 contextpart.costs.out contextpart.costs.expected -o contextpart.costs.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/_self-adaptive/test_oracle_1_sent_2_epochs.sh b/tests/_self-adaptive/test_oracle_1_sent_2_epochs.sh
index d26e543..f307ba7 100644
--- a/tests/_self-adaptive/test_oracle_1_sent_2_epochs.sh
+++ b/tests/_self-adaptive/test_oracle_1_sent_2_epochs.sh
@@ -28,7 +28,7 @@ diff oracle_1s2e.bleu oracle.bleu.expected > oracle_1s2e.bleu.diff
 
 # Check costs
 cat oracle_1s2e.log | grep 'Ep\. ' | $MRT_TOOLS/extract-costs.sh > costs_1s2e.out
-$MRT_TOOLS/diff-nums.py -p 0.01 costs_1s2e.out costs.expected > costs_1s2e.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 costs_1s2e.out costs.expected -o costs_1s2e.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/_self-adaptive/test_oracle_2_sent_1_epoch.sh b/tests/_self-adaptive/test_oracle_2_sent_1_epoch.sh
index c73e3f3..27d816b 100644
--- a/tests/_self-adaptive/test_oracle_2_sent_1_epoch.sh
+++ b/tests/_self-adaptive/test_oracle_2_sent_1_epoch.sh
@@ -28,7 +28,7 @@ diff oracle_2s1e.bleu oracle.bleu.expected > oracle_2s1e.bleu.diff
 
 # Check costs
 cat oracle_2s1e.log | grep 'Ep\. ' | $MRT_TOOLS/extract-costs.sh > costs_2s1e.out
-$MRT_TOOLS/diff-nums.py -p 0.01 costs_2s1e.out costs.expected > costs_2s1e.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 costs_2s1e.out costs.expected -o costs_2s1e.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/_template/test_training.sh b/tests/_template/test_training.sh
index eacd788..551db67 100644
--- a/tests/_template/test_training.sh
+++ b/tests/_template/test_training.sh
@@ -25,7 +25,7 @@ test -e train.log
 
 # Compare the current output with the expected output
 cat train.log | $MRT_TOOLS/extract-costs.sh > train.out
-$MRT_TOOLS/diff-nums.py train.out train.expected > train.diff
+$MRT_TOOLS/diff-nums.py train.out train.expected -o train.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align-ensemble/test_align_ensemble.sh b/tests/decoder/align-ensemble/test_align_ensemble.sh
index ddefd2f..066702c 100644
--- a/tests/decoder/align-ensemble/test_align_ensemble.sh
+++ b/tests/decoder/align-ensemble/test_align_ensemble.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.ensemble.yml --mini-batch 32 -b 5 --alignment < text.in > align.out
-$MRT_TOOLS/diff-nums.py -p 0.0001 align.out align.expected > align.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align.out align.expected -o align.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align-ensemble/test_align_ensemble_beam_1.sh b/tests/decoder/align-ensemble/test_align_ensemble_beam_1.sh
index 8f72a03..b4809d6 100644
--- a/tests/decoder/align-ensemble/test_align_ensemble_beam_1.sh
+++ b/tests/decoder/align-ensemble/test_align_ensemble_beam_1.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.ensemble.yml --mini-batch 1 -b 1 --alignment < text.in > align.b1.out
-$MRT_TOOLS/diff-nums.py -p 0.0001 align.b1.out align.b1.expected > align.b1.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align.b1.out align.b1.expected -o align.b1.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_align.sh b/tests/decoder/align/test_align.sh
index eea0a85..7c0028d 100644
--- a/tests/decoder/align/test_align.sh
+++ b/tests/decoder/align/test_align.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 16 -b 5 --alignment < text.in > align.out
-$MRT_TOOLS/diff-nums.py -p 0.0001 align.out align.expected > align.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align.out align.expected -o align.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_align_beam_1.sh b/tests/decoder/align/test_align_beam_1.sh
index 6fad30c..f4869ac 100644
--- a/tests/decoder/align/test_align_beam_1.sh
+++ b/tests/decoder/align/test_align_beam_1.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 1 -b 1 --alignment < text.in > align.b1.out
-$MRT_TOOLS/diff-nums.py -p 0.0001 align.b1.out align.b1.expected > align.b1.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align.b1.out align.b1.expected -o align.b1.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_align_beam_1_batched.sh b/tests/decoder/align/test_align_beam_1_batched.sh
index 3a5a9b1..68ca362 100644
--- a/tests/decoder/align/test_align_beam_1_batched.sh
+++ b/tests/decoder/align/test_align_beam_1_batched.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 32 -b 1 --alignment < text.in > align.batched.out
-$MRT_TOOLS/diff-nums.py -p 0.0001 align.batched.out align.batched.expected > align.batched.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align.batched.out align.batched.expected -o align.batched.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_align_nbest.sh b/tests/decoder/align/test_align_nbest.sh
index bcd5124..8add32f 100644
--- a/tests/decoder/align/test_align_nbest.sh
+++ b/tests/decoder/align/test_align_nbest.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 16 -b 3 --n-best --alignment < text.in > align_nbest.out
-$MRT_TOOLS/diff-nums.py -p 0.0001 align_nbest.out align_nbest.expected > align_nbest.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align_nbest.out align_nbest.expected -o align_nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_align_threshold.sh b/tests/decoder/align/test_align_threshold.sh
index 7a4d2c6..6c672b1 100644
--- a/tests/decoder/align/test_align_threshold.sh
+++ b/tests/decoder/align/test_align_threshold.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 16 -b 5 --alignment 0.35 < text.in > align_threshold.out
-$MRT_TOOLS/diff-nums.py -p 0.0001 align_threshold.out align_threshold.expected > align_threshold.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 align_threshold.out align_threshold.expected -o align_threshold.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_soft_align.sh b/tests/decoder/align/test_soft_align.sh
index e1aa3a4..c08a2e2 100644
--- a/tests/decoder/align/test_soft_align.sh
+++ b/tests/decoder/align/test_soft_align.sh
@@ -6,7 +6,7 @@ set -e
 # Test code goes here
 rm -f soft.out soft.raw.out
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 5 -b 5 --alignment soft < text.in > soft.out
-$MRT_TOOLS/diff-nums.py -s , -p 0.0001 soft.out soft.expected > soft.diff
+$MRT_TOOLS/diff-nums.py -s , -p 0.0001 soft.out soft.expected -o soft.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/align/test_soft_align_nbest.sh b/tests/decoder/align/test_soft_align_nbest.sh
index 2f811e7..57418c3 100644
--- a/tests/decoder/align/test_soft_align_nbest.sh
+++ b/tests/decoder/align/test_soft_align_nbest.sh
@@ -6,7 +6,7 @@ set -e
 # Test code goes here
 rm -f soft.nbest.out soft.nbest.raw.out
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml --mini-batch 5 -b 3 --n-best --alignment soft < text.in > soft.nbest.out
-$MRT_TOOLS/diff-nums.py -s , -p 0.0001 soft.nbest.out soft.nbest.expected > soft.nbest.diff
+$MRT_TOOLS/diff-nums.py -s , -p 0.0001 soft.nbest.out soft.nbest.expected -o soft.nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/wmt16/test_nbest.sh b/tests/decoder/wmt16/test_nbest.sh
index 7787624..a002bdb 100644
--- a/tests/decoder/wmt16/test_nbest.sh
+++ b/tests/decoder/wmt16/test_nbest.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -b 5 --n-best < text.in > nbest.out
-$MRT_TOOLS/diff-nums.py nbest.out nbest.expected > nbest.diff
+$MRT_TOOLS/diff-nums.py nbest.out nbest.expected -o nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/decoder/wmt17/test_nbest.sh b/tests/decoder/wmt17/test_nbest.sh
index c62b842..05a4053 100644
--- a/tests/decoder/wmt17/test_nbest.sh
+++ b/tests/decoder/wmt17/test_nbest.sh
@@ -8,11 +8,11 @@ $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/wmt17_systems/marian.en-de.yml \
   -b 5 --n-best --normalize < text.in | tail -n +6 > nbest.out
 
 # Compare n-best lists
-$MRT_TOOLS/diff-nums.py -p 0.0002 nbest.out nbest.expected > nbest.diff
+$MRT_TOOLS/diff-nums.py -p 0.0002 nbest.out nbest.expected -o nbest.diff
 
 # Compare with nematus scores
 cat nbest.out | sed -r 's/ \|\|\| /\t/g' | cut -f4 | cut -c2- > nbest.scores.out
-$MRT_TOOLS/diff-nums.py -p 0.0002 nbest.scores.out nbest.scores.nematus > nbest.scores.diff
+$MRT_TOOLS/diff-nums.py -p 0.0002 nbest.scores.out nbest.scores.nematus -o nbest.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/examples/iris/test_iris.sh b/tests/examples/iris/test_iris.sh
index a6f2c92..ec6bde3 100644
--- a/tests/examples/iris/test_iris.sh
+++ b/tests/examples/iris/test_iris.sh
@@ -5,7 +5,7 @@ set -e
 
 # Test code goes here
 $MRT_MARIAN/build/iris_example > iris.out
-$MRT_TOOLS/diff-nums.py iris.out iris.expected > iris.diff
+$MRT_TOOLS/diff-nums.py iris.out iris.expected -o iris.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/examples/mnist/test_mnist_ffnn.sh b/tests/examples/mnist/test_mnist_ffnn.sh
index 799e8a9..eeb3f30 100644
--- a/tests/examples/mnist/test_mnist_ffnn.sh
+++ b/tests/examples/mnist/test_mnist_ffnn.sh
@@ -18,7 +18,7 @@ $MRT_MARIAN/build/mnist_example \
     --log train.log
 
 cat train.log | grep '\[valid\]' | sed -re 's/.*\[valid\] //' -e 's/ : (new|stalled).*//' > ffnn.out
-$MRT_TOOLS/diff-nums.py ffnn.out ffnn.expected -p 0.005 > ffnn.diff
+$MRT_TOOLS/diff-nums.py ffnn.out ffnn.expected -p 0.005 -o ffnn.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/transformer/test_nbest.sh b/tests/models/transformer/test_nbest.sh
index e685878..eb56134 100644
--- a/tests/models/transformer/test_nbest.sh
+++ b/tests/models/transformer/test_nbest.sh
@@ -8,7 +8,7 @@ rm -f nbest.out
 # Run Marian
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/transformer/decode.yml -b 6 --mini-batch 32 --n-best < text.in > nbest.out
 
-$MRT_TOOLS/diff-nums.py -p 0.0001 nbest.out text.b6.nbest.expected > nbest.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 nbest.out text.b6.nbest.expected -o nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/transformer/test_soft_aligns.sh b/tests/models/transformer/test_soft_aligns.sh
index 320299f..651392c 100644
--- a/tests/models/transformer/test_soft_aligns.sh
+++ b/tests/models/transformer/test_soft_aligns.sh
@@ -8,7 +8,7 @@ rm -f softalign.out
 # Run Marian
 $MRT_MARIAN/build/marian-decoder -c $MRT_MODELS/transformer/decode.yml -b 6 --mini-batch 32 --alignment soft < text.in > softalign.out
 
-$MRT_TOOLS/diff-nums.py -s , -p 0.0001 softalign.out text.b6.softalign.expected > softalign.diff
+$MRT_TOOLS/diff-nums.py -s , -p 0.0001 softalign.out text.b6.softalign.expected -o softalign.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/models/wnmt18/test_student_small_aan_optimize.sh b/tests/models/wnmt18/test_student_small_aan_optimize.sh
index 6d039c9..fcfbe6d 100644
--- a/tests/models/wnmt18/test_student_small_aan_optimize.sh
+++ b/tests/models/wnmt18/test_student_small_aan_optimize.sh
@@ -23,7 +23,7 @@ cat optimize_aan.out | perl -pe 's/@@ //g' \
     | $MRT_TOOLS/moses-scripts/scripts/generic/multi-bleu.perl newstest2014.ref \
     | $MRT_TOOLS/extract-bleu.sh > optimize_aan.bleu
 
-$MRT_TOOLS/diff-nums.py optimize_aan.bleu optimize_aan.bleu.expected -p 0.4 > optimize_aan.bleu.diff
+$MRT_TOOLS/diff-nums.py optimize_aan.bleu optimize_aan.bleu.expected -p 0.4 -o optimize_aan.bleu.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/align/test_scorer_align_nbest.sh b/tests/scorer/align/test_scorer_align_nbest.sh
index 905da09..3f58630 100644
--- a/tests/scorer/align/test_scorer_align_nbest.sh
+++ b/tests/scorer/align/test_scorer_align_nbest.sh
@@ -8,7 +8,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
   -t $(pwd)/text.src.in $(pwd)/nbest.trg.in --alignment --mini-batch 16 --n-best > nbest.out
 
 # Compare n-best lists
-$MRT_TOOLS/diff-nums.py -p 0.0001 nbest.out nbest.expected > nbest.diff
+$MRT_TOOLS/diff-nums.py -p 0.0001 nbest.out nbest.expected -o nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/align/test_scorer_soft_align.sh b/tests/scorer/align/test_scorer_soft_align.sh
index 22057cb..54601c9 100644
--- a/tests/scorer/align/test_scorer_soft_align.sh
+++ b/tests/scorer/align/test_scorer_soft_align.sh
@@ -9,7 +9,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
   | sed 's/^.* ||| //' > soft.out
 
 # Compare scores
-$MRT_TOOLS/diff-nums.py -s , soft.out soft.expected > soft.diff
+$MRT_TOOLS/diff-nums.py -s , soft.out soft.expected -o soft.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/nbest/test_compare_parallel_and_nbest.sh b/tests/scorer/nbest/test_compare_parallel_and_nbest.sh
index 93614ff..8304c04 100644
--- a/tests/scorer/nbest/test_compare_parallel_and_nbest.sh
+++ b/tests/scorer/nbest/test_compare_parallel_and_nbest.sh
@@ -17,7 +17,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
 
 cat parallel.nbest.out | sed 's/ ||| /\t/g' | cut -f3 | tr ' ' '\t' | cut -f4 > parallel.nbest.scores.out
 
-$MRT_TOOLS/diff-nums.py parallel.scores.out parallel.nbest.scores.out -p 0.0003 > parallel.scores.diff
+$MRT_TOOLS/diff-nums.py parallel.scores.out parallel.nbest.scores.out -p 0.0003 -o parallel.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/nbest/test_custom_feature_name.sh b/tests/scorer/nbest/test_custom_feature_name.sh
index 5f1809b..957d701 100644
--- a/tests/scorer/nbest/test_custom_feature_name.sh
+++ b/tests/scorer/nbest/test_custom_feature_name.sh
@@ -13,7 +13,7 @@ grep -c 'FeatureName= ' custom.out
 cat custom.out | sed 's/ ||| /\t/g' | cut -f3 | tr ' ' '\t' | cut -f4 > custom.scores.out
 cat nbest.expected | sed 's/ ||| /\t/g' | cut -f3 | tr ' ' '\t' | cut -f4 > nbest.scores.out
 
-$MRT_TOOLS/diff-nums.py custom.scores.out nbest.scores.out -p 0.0003 > custom.scores.diff
+$MRT_TOOLS/diff-nums.py custom.scores.out nbest.scores.out -p 0.0003 -o custom.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/scorer/nbest/test_score_nbest_list.sh b/tests/scorer/nbest/test_score_nbest_list.sh
index 7d6fa64..4cf3cb3 100644
--- a/tests/scorer/nbest/test_score_nbest_list.sh
+++ b/tests/scorer/nbest/test_score_nbest_list.sh
@@ -8,7 +8,7 @@ $MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer
     --n-best -t text.src.in text.nbest.in \
     > nbest.out
 
-$MRT_TOOLS/diff-nums.py nbest.out nbest.expected -p 0.0003 > nbest.diff
+$MRT_TOOLS/diff-nums.py nbest.out nbest.expected -p 0.0003 -o nbest.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_gzipped_train_sets.sh b/tests/training/basics/test_gzipped_train_sets.sh
index a3bef63..b28c41c 100644
--- a/tests/training/basics/test_gzipped_train_sets.sh
+++ b/tests/training/basics/test_gzipped_train_sets.sh
@@ -19,7 +19,7 @@ test -e gzip/model.npz
 test -e gzip.log
 
 cat gzip.log | $MRT_TOOLS/extract-costs.sh > gzip.out
-$MRT_TOOLS/diff-nums.py gzip.out gzip.expected -p 0.1 > gzip.diff
+$MRT_TOOLS/diff-nums.py gzip.out gzip.expected -p 0.1 -o gzip.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_sqlite.sh b/tests/training/basics/test_sqlite.sh
index 1dceb08..0dec245 100644
--- a/tests/training/basics/test_sqlite.sh
+++ b/tests/training/basics/test_sqlite.sh
@@ -32,7 +32,7 @@ test -e sqlite.log
 
 $MRT_TOOLS/extract-costs.sh < sqlite.log > sqlite.out
 
-$MRT_TOOLS/diff-nums.py nosqlite.out sqlite.out -p 0.2 > sqlite.diff
+$MRT_TOOLS/diff-nums.py nosqlite.out sqlite.out -p 0.2 -o sqlite.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_sqlite_random_seed.sh b/tests/training/basics/test_sqlite_random_seed.sh
index fe12772..a443f59 100644
--- a/tests/training/basics/test_sqlite_random_seed.sh
+++ b/tests/training/basics/test_sqlite_random_seed.sh
@@ -31,7 +31,7 @@ test -e sqlite_seed_2.log
 $MRT_TOOLS/extract-costs.sh < sqlite_seed_1.log > sqlite_seed_1.out
 $MRT_TOOLS/extract-costs.sh < sqlite_seed_2.log > sqlite_seed_2.out
 
-$MRT_TOOLS/diff-nums.py sqlite_seed_1.out sqlite_seed_2.out -p 0.1 > sqlite_seed.diff
+$MRT_TOOLS/diff-nums.py sqlite_seed_1.out sqlite_seed_2.out -p 0.1 -o sqlite_seed.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_toy_vocab.sh b/tests/training/basics/test_toy_vocab.sh
index 3fef63b..66c0fb3 100644
--- a/tests/training/basics/test_toy_vocab.sh
+++ b/tests/training/basics/test_toy_vocab.sh
@@ -19,7 +19,7 @@ test -e toy/model.npz.yml
 test -e toy/model.npz.amun.yml
 
 cat toy.log | $MRT_TOOLS/extract-costs.sh > toy.out
-$MRT_TOOLS/diff-nums.py toy.out toy.expected -p 0.99 -n 5 > toy.diff
+$MRT_TOOLS/diff-nums.py toy.out toy.expected -p 0.99 -n 5 -o toy.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_translation_script.sh b/tests/training/basics/test_translation_script.sh
index 0fa915c..83b102c 100644
--- a/tests/training/basics/test_translation_script.sh
+++ b/tests/training/basics/test_translation_script.sh
@@ -28,7 +28,7 @@ test -e trans.log
 grep -q "/tmp/marian.*" trans_script.temp
 
 $MRT_TOOLS/strip-timestamps.sh < trans.log | grep -v "Total translation time" | head -n 4 > trans.out
-$MRT_TOOLS/diff-nums.py trans.out trans.expected -p 0.2 > trans.diff
+$MRT_TOOLS/diff-nums.py trans.out trans.expected -p 0.2 -o trans.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_valid_script.sh b/tests/training/basics/test_valid_script.sh
index 5c79755..e638854 100644
--- a/tests/training/basics/test_valid_script.sh
+++ b/tests/training/basics/test_valid_script.sh
@@ -28,7 +28,7 @@ test -e valid/model.npz.dev.npz.amun.yml
 test -e valid.log
 
 $MRT_TOOLS/strip-timestamps.sh < valid.log > valid.out
-$MRT_TOOLS/diff-nums.py valid.out valid.expected -p 0.2 > valid.diff
+$MRT_TOOLS/diff-nums.py valid.out valid.expected -p 0.2 -o valid.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/cost-functions/test_ce-mean-words.sh b/tests/training/cost-functions/test_ce-mean-words.sh
index a68dcf7..faaf113 100644
--- a/tests/training/cost-functions/test_ce-mean-words.sh
+++ b/tests/training/cost-functions/test_ce-mean-words.sh
@@ -18,7 +18,7 @@ test -e ce-mean-words/model.npz
 test -e ce-mean-words.log
 
 cat ce-mean-words.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > ce-mean-words.out
-$MRT_TOOLS/diff-nums.py ce-mean-words.out ce-mean-words.expected -p 0.02 > ce-mean-words.diff
+$MRT_TOOLS/diff-nums.py ce-mean-words.out ce-mean-words.expected -p 0.02 -o ce-mean-words.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/cost-functions/test_ce-mean.sh b/tests/training/cost-functions/test_ce-mean.sh
index 7142a15..c2ef4d1 100644
--- a/tests/training/cost-functions/test_ce-mean.sh
+++ b/tests/training/cost-functions/test_ce-mean.sh
@@ -17,7 +17,7 @@ test -e ce-mean/model.npz
 test -e ce-mean.log
 
 cat ce-mean.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > ce-mean.out
-$MRT_TOOLS/diff-nums.py ce-mean.out ce-mean.expected -p 0.02 > ce-mean.diff
+$MRT_TOOLS/diff-nums.py ce-mean.out ce-mean.expected -p 0.02 -o ce-mean.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/cost-functions/test_ce-sum.sh b/tests/training/cost-functions/test_ce-sum.sh
index 44b0662..540e82d 100644
--- a/tests/training/cost-functions/test_ce-sum.sh
+++ b/tests/training/cost-functions/test_ce-sum.sh
@@ -18,7 +18,7 @@ test -e ce-sum/model.npz
 test -e ce-sum.log
 
 cat ce-sum.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > ce-sum.out
-$MRT_TOOLS/diff-nums.py ce-sum.out ce-sum.expected -p 0.2 > ce-sum.diff
+$MRT_TOOLS/diff-nums.py ce-sum.out ce-sum.expected -p 0.2 -o ce-sum.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/cost-functions/test_perplexity.sh b/tests/training/cost-functions/test_perplexity.sh
index 16436a4..ae828a4 100644
--- a/tests/training/cost-functions/test_perplexity.sh
+++ b/tests/training/cost-functions/test_perplexity.sh
@@ -18,7 +18,7 @@ test -e perplexity/model.npz
 test -e perplexity.log
 
 cat perplexity.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > perplexity.out
-$MRT_TOOLS/diff-nums.py perplexity.out perplexity.expected -p 0.5 > perplexity.diff
+$MRT_TOOLS/diff-nums.py perplexity.out perplexity.expected -p 0.5 -o perplexity.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh b/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh
index 7e3fc79..6529857 100644
--- a/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh
+++ b/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh
@@ -33,7 +33,7 @@ test -e compare/model.words.npz
 test -e compare.words.log
 
 cat compare.words.log | $MRT_TOOLS/extract-disp.sh > compare.words.out
-$MRT_TOOLS/diff-nums.py compare.words.out compare.sents.out -p 0.1 > compare.words.diff
+$MRT_TOOLS/diff-nums.py compare.words.out compare.sents.out -p 0.1 -o compare.words.diff
 
 
 # Exit with success code
diff --git a/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh b/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh
index 2bc5e1e..071fd94 100644
--- a/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh
+++ b/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh
@@ -20,7 +20,7 @@ test -e maxibatch/model.npz
 test -e maxibatch.log
 
 $MRT_TOOLS/extract-costs.sh < maxibatch.log > maxibatch.out
-$MRT_TOOLS/diff-nums.py maxibatch.out maxibatch.expected -p 0.1 > maxibatch.diff
+$MRT_TOOLS/diff-nums.py maxibatch.out maxibatch.expected -p 0.1 -o maxibatch.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh b/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh
index 82e1b16..7b87e3d 100644
--- a/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh
+++ b/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh
@@ -20,7 +20,7 @@ test -e word_maxibatch/model.npz
 test -e word_maxibatch.log
 
 $MRT_TOOLS/extract-costs.sh < word_maxibatch.log > word_maxibatch.out
-$MRT_TOOLS/diff-nums.py word_maxibatch.out word_maxibatch.expected -p 0.1 > word_maxibatch.diff
+$MRT_TOOLS/diff-nums.py word_maxibatch.out word_maxibatch.expected -p 0.1 -o word_maxibatch.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_sentence_weighting_sqlite.sh b/tests/training/data-weighting/test_sentence_weighting_sqlite.sh
index 7eec0ac..3f393fc 100644
--- a/tests/training/data-weighting/test_sentence_weighting_sqlite.sh
+++ b/tests/training/data-weighting/test_sentence_weighting_sqlite.sh
@@ -19,7 +19,7 @@ test -e sqlite.log
 
 cat sqlite.log | $MRT_TOOLS/extract-costs.sh > sqlite.out
 
-$MRT_TOOLS/diff-nums.py sqlite.out sqlite.expected -p 0.1 > sqlite.diff
+$MRT_TOOLS/diff-nums.py sqlite.out sqlite.expected -p 0.1 -o sqlite.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_sentence_weighting_with_ones.sh b/tests/training/data-weighting/test_sentence_weighting_with_ones.sh
index 4f165bf..6ff43ef 100644
--- a/tests/training/data-weighting/test_sentence_weighting_with_ones.sh
+++ b/tests/training/data-weighting/test_sentence_weighting_with_ones.sh
@@ -31,7 +31,7 @@ test -e ones/model.npz
 test -e ones.log
 
 cat ones.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > ones.out
-$MRT_TOOLS/diff-nums.py noweights.out ones.out -p 0.1 > ones.diff
+$MRT_TOOLS/diff-nums.py noweights.out ones.out -p 0.1 -o ones.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_sentence_weights_x3.sh b/tests/training/data-weighting/test_sentence_weights_x3.sh
index 0775904..45b92d3 100644
--- a/tests/training/data-weighting/test_sentence_weights_x3.sh
+++ b/tests/training/data-weighting/test_sentence_weights_x3.sh
@@ -30,7 +30,7 @@ test -e x3weights.log
 
 cat x3weights.log | grep 'Cost ' | sed -r 's/.*Cost (.*) : Time.*/\1/' > x3weights.out
 
-$MRT_TOOLS/diff-nums.py x3copied.out x3weights.out -p 0.1 > x3weights.diff
+$MRT_TOOLS/diff-nums.py x3copied.out x3weights.out -p 0.1 -o x3weights.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_validation.sh b/tests/training/data-weighting/test_validation.sh
index 79c7420..3bb3c63 100644
--- a/tests/training/data-weighting/test_validation.sh
+++ b/tests/training/data-weighting/test_validation.sh
@@ -23,8 +23,8 @@ test -e valid/train.log
 $MRT_TOOLS/strip-timestamps.sh < valid/valid.log > valid.out
 $MRT_TOOLS/extract-costs.sh < valid/train.log > train.out
 
-$MRT_TOOLS/diff-nums.py valid.out valid.expected -p 1.99 > valid.diff
-$MRT_TOOLS/diff-nums.py train.out train.expected -p 1.99 > train.diff
+$MRT_TOOLS/diff-nums.py valid.out valid.expected -p 1.99 -o valid.diff
+$MRT_TOOLS/diff-nums.py train.out train.expected -p 1.99 -o train.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_word_weighting_sqlite.sh b/tests/training/data-weighting/test_word_weighting_sqlite.sh
index d95e7b3..bb2452b 100644
--- a/tests/training/data-weighting/test_word_weighting_sqlite.sh
+++ b/tests/training/data-weighting/test_word_weighting_sqlite.sh
@@ -20,7 +20,7 @@ test -e sqlite_word/corpus.sqlite3
 test -e sqlite_word.log
 
 cat sqlite_word.log | $MRT_TOOLS/extract-costs.sh > sqlite_word.out
-$MRT_TOOLS/diff-nums.py sqlite_word.out sqlite_word.expected -p 0.1 > sqlite_word.diff
+$MRT_TOOLS/diff-nums.py sqlite_word.out sqlite_word.expected -p 0.1 -o sqlite_word.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_word_weighting_with_eos.sh b/tests/training/data-weighting/test_word_weighting_with_eos.sh
index 5401902..4044b77 100644
--- a/tests/training/data-weighting/test_word_weighting_with_eos.sh
+++ b/tests/training/data-weighting/test_word_weighting_with_eos.sh
@@ -21,7 +21,7 @@ test -e word_eos/model.npz
 test -e word_eos.log
 
 cat word_eos.log | $MRT_TOOLS/extract-disp.sh > word_eos.out
-$MRT_TOOLS/diff-nums.py word_eos.out word_eos.expected -p 0.1 > word_eos.diff
+$MRT_TOOLS/diff-nums.py word_eos.out word_eos.expected -p 0.1 -o word_eos.diff
 
 
 # Exit with success code
diff --git a/tests/training/data-weighting/test_word_weighting_with_ones.sh b/tests/training/data-weighting/test_word_weighting_with_ones.sh
index 92eeed5..9e47cdc 100644
--- a/tests/training/data-weighting/test_word_weighting_with_ones.sh
+++ b/tests/training/data-weighting/test_word_weighting_with_ones.sh
@@ -28,7 +28,7 @@ test -e word_ones/model.npz
 test -e word_ones.log
 
 cat word_ones.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > word_ones.out
-$MRT_TOOLS/diff-nums.py word_noweights.out word_ones.out -p 0.1 > word_ones.diff
+$MRT_TOOLS/diff-nums.py word_noweights.out word_ones.out -p 0.1 -o word_ones.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_word_weighting_with_twos.sh b/tests/training/data-weighting/test_word_weighting_with_twos.sh
index e9f2d94..a39ec07 100644
--- a/tests/training/data-weighting/test_word_weighting_with_twos.sh
+++ b/tests/training/data-weighting/test_word_weighting_with_twos.sh
@@ -19,7 +19,7 @@ test -e word_twos/model.npz
 test -e word_twos.log
 
 cat word_twos.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > word_twos.out
-$MRT_TOOLS/diff-nums.py word_twos.out word_twos.expected -p 0.1 > word_twos.diff
+$MRT_TOOLS/diff-nums.py word_twos.out word_twos.expected -p 0.1 -o word_twos.diff
 
 rm -rf word_twos_cfg word_twos_cfg.{log,out,diff}
 mkdir -p word_twos_cfg
@@ -34,7 +34,7 @@ $MRT_MARIAN/build/marian \
     -c word_twos.config.yml
 
 cat word_twos_cfg.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > word_twos_cfg.out
-$MRT_TOOLS/diff-nums.py word_twos_cfg.out word_twos.expected -p 0.1 > word_twos_cfg.diff
+$MRT_TOOLS/diff-nums.py word_twos_cfg.out word_twos.expected -p 0.1 -o word_twos_cfg.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/embeddings/test_custom_embeddings.sh b/tests/training/embeddings/test_custom_embeddings.sh
index 6523777..3edd00a 100644
--- a/tests/training/embeddings/test_custom_embeddings.sh
+++ b/tests/training/embeddings/test_custom_embeddings.sh
@@ -26,8 +26,8 @@ $MRT_MARIAN/scripts/embeddings/export_embeddings.py -m custom_emb/model.npz -o c
 cat custom_emb.all.src | head -n 101 > custom_emb.src
 cat custom_emb.all.trg | head -n 101 > custom_emb.trg
 
-$MRT_TOOLS/diff-nums.py -n 1 -p 0.0005 word2vec.en custom_emb.src > custom_emb.src.diff
-$MRT_TOOLS/diff-nums.py -n 1 -p 0.0005 word2vec.de custom_emb.trg > custom_emb.trg.diff
+$MRT_TOOLS/diff-nums.py -n 1 -p 0.0005 word2vec.en custom_emb.src -o custom_emb.src.diff
+$MRT_TOOLS/diff-nums.py -n 1 -p 0.0005 word2vec.de custom_emb.trg -o custom_emb.trg.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/exp-smoothing/test_expsmooth.sh b/tests/training/exp-smoothing/test_expsmooth.sh
index caf5762..2428dfc 100644
--- a/tests/training/exp-smoothing/test_expsmooth.sh
+++ b/tests/training/exp-smoothing/test_expsmooth.sh
@@ -36,11 +36,11 @@ cat expsmooth.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep -v 'val
 cat expsmooth.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep 'valid' | sed 's/ : Time.*//' > expsmooth.valid.out
 
 
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.out expsmooth.expected > expsmooth.diff
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected > expsmooth.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.out expsmooth.expected -o expsmooth.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected -o expsmooth.valid.diff
 
 # There should be no difference in costs between training w/ and w/o exponential smoothing
-$MRT_TOOLS/diff-nums.py -p 0.001 expsmooth.out noexpsmooth.out > noexpsmooth.diff
+$MRT_TOOLS/diff-nums.py -p 0.001 expsmooth.out noexpsmooth.out -o noexpsmooth.diff
 
 
 # Exit with success code
diff --git a/tests/training/exp-smoothing/test_expsmooth_sync.sh b/tests/training/exp-smoothing/test_expsmooth_sync.sh
index 2bf3451..14d5442 100644
--- a/tests/training/exp-smoothing/test_expsmooth_sync.sh
+++ b/tests/training/exp-smoothing/test_expsmooth_sync.sh
@@ -41,11 +41,11 @@ cat expsmooth_sync.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep -v
 cat expsmooth_sync.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep 'valid' | sed 's/ : Time.*//' > expsmooth_sync.valid.out
 
 
-$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.out expsmooth_sync.expected > expsmooth_sync.diff
-$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.valid.out expsmooth_sync.valid.expected > expsmooth_sync.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.out expsmooth_sync.expected -o expsmooth_sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.valid.out expsmooth_sync.valid.expected -o expsmooth_sync.valid.diff
 
 # There should be no difference in costs between training w/ and w/o exponential smoothing
-$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.out noexpsmooth_sync.out > noexpsmooth_sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.out noexpsmooth_sync.out -o noexpsmooth_sync.diff
 
 
 # Exit with success code
diff --git a/tests/training/lm/test_lm-transformer.sh b/tests/training/lm/test_lm-transformer.sh
index 70fac20..47737e5 100644
--- a/tests/training/lm/test_lm-transformer.sh
+++ b/tests/training/lm/test_lm-transformer.sh
@@ -19,13 +19,13 @@ test -e lm-transformer/model.npz.yml
 test -e lm-transformer.log
 
 cat lm-transformer.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > lm-transformer.out
-$MRT_TOOLS/diff-nums.py lm-transformer.out lm-transformer.expected -p 0.02 > lm-transformer.diff
+$MRT_TOOLS/diff-nums.py lm-transformer.out lm-transformer.expected -p 0.02 -o lm-transformer.diff
 
 # Scoring with LM
 test -s temp.bpe.en || tail $MRT_DATA/europarl.de-en/corpus.bpe.en > test.bpe.en
 
 $MRT_MARIAN/build/marian-scorer -m lm-transformer/model.npz -t test.bpe.en -v vocab.en.yml > lm-transformer.scores.out
-$MRT_TOOLS/diff-nums.py lm-transformer.scores.out lm-transformer.scores.expected -p 0.002 > lm-transformer.scores.diff
+$MRT_TOOLS/diff-nums.py lm-transformer.scores.out lm-transformer.scores.expected -p 0.002 -o lm-transformer.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/lm/test_lm.sh b/tests/training/lm/test_lm.sh
index d602134..e3c32a0 100644
--- a/tests/training/lm/test_lm.sh
+++ b/tests/training/lm/test_lm.sh
@@ -19,13 +19,13 @@ test -e lm/model.npz.yml
 test -e lm.log
 
 cat lm.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > lm.out
-$MRT_TOOLS/diff-nums.py lm.out lm.expected -p 0.02 > lm.diff
+$MRT_TOOLS/diff-nums.py lm.out lm.expected -p 0.02 -o lm.diff
 
 # Scoring with LM
 test -s temp.bpe.en || tail $MRT_DATA/europarl.de-en/corpus.bpe.en > test.bpe.en
 
 $MRT_MARIAN/build/marian-scorer -m lm/model.npz -t test.bpe.en -v vocab.en.yml > lm.scores.out
-$MRT_TOOLS/diff-nums.py lm.scores.out lm.scores.expected -p 0.002 > lm.scores.diff
+$MRT_TOOLS/diff-nums.py lm.scores.out lm.scores.expected -p 0.002 -o lm.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/model-types/test_transformer.sh b/tests/training/model-types/test_transformer.sh
index 28bad90..35f8f09 100644
--- a/tests/training/model-types/test_transformer.sh
+++ b/tests/training/model-types/test_transformer.sh
@@ -18,7 +18,7 @@ test -e transformer/model.npz
 test -e transformer.log
 
 cat transformer.log | $MRT_TOOLS/extract-costs.sh > transformer.out
-$MRT_TOOLS/diff-nums.py transformer.out transformer.expected -p 0.01 > transformer.diff
+$MRT_TOOLS/diff-nums.py transformer.out transformer.expected -p 0.01 -o transformer.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-gpu/test_async_sgd_runs.sh b/tests/training/multi-gpu/test_async_sgd_runs.sh
index 6fb4573..15b0348 100644
--- a/tests/training/multi-gpu/test_async_sgd_runs.sh
+++ b/tests/training/multi-gpu/test_async_sgd_runs.sh
@@ -24,7 +24,7 @@ test -e vocab.de.yml
 test -e async_sgd.log
 
 cat async_sgd.log | $MRT_TOOLS/strip-timestamps.sh | grep -oP "Ep\. 1 .* Cost [0-9.]*" > async_sgd.out
-$MRT_TOOLS/diff-nums.py async_sgd.out async_sgd.expected -p 5.00 --allow-n-diffs 2 > async_sgd.diff
+$MRT_TOOLS/diff-nums.py async_sgd.out async_sgd.expected -p 5.00 --allow-n-diffs 2 -o async_sgd.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-gpu/test_sync_sgd.sh b/tests/training/multi-gpu/test_sync_sgd.sh
index 5af1d6d..0d511b3 100644
--- a/tests/training/multi-gpu/test_sync_sgd.sh
+++ b/tests/training/multi-gpu/test_sync_sgd.sh
@@ -24,7 +24,7 @@ test -e sync_sgd/model.full.npz
 test -e sync_sgd.log
 
 cat sync_sgd.log | $MRT_TOOLS/extract-costs.sh > sync_sgd.out
-$MRT_TOOLS/diff-nums.py sync_sgd.out sync_sgd.expected -p 0.1 > sync_sgd.diff
+$MRT_TOOLS/diff-nums.py sync_sgd.out sync_sgd.expected -p 0.1 -o sync_sgd.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-source/test_multi-s2s.sh b/tests/training/multi-source/test_multi-s2s.sh
index 4d37e17..e55a835 100644
--- a/tests/training/multi-source/test_multi-s2s.sh
+++ b/tests/training/multi-source/test_multi-s2s.sh
@@ -19,7 +19,7 @@ test -e multi-s2s/model.npz.yml
 test -e multi-s2s.log
 
 cat multi-s2s.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > multi-s2s.out
-$MRT_TOOLS/diff-nums.py multi-s2s.out multi-s2s.expected -p 0.2 > multi-s2s.diff
+$MRT_TOOLS/diff-nums.py multi-s2s.out multi-s2s.expected -p 0.2 -o multi-s2s.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-source/test_multi-transformer.sh b/tests/training/multi-source/test_multi-transformer.sh
index 321342a..cafd2ad 100644
--- a/tests/training/multi-source/test_multi-transformer.sh
+++ b/tests/training/multi-source/test_multi-transformer.sh
@@ -19,7 +19,7 @@ test -e multi-transformer/model.npz.yml
 test -e multi-transformer.log
 
 cat multi-transformer.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > multi-transformer.out
-$MRT_TOOLS/diff-nums.py multi-transformer.out multi-transformer.expected -p 0.2 > multi-transformer.diff
+$MRT_TOOLS/diff-nums.py multi-transformer.out multi-transformer.expected -p 0.2 -o multi-transformer.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/nematus/test_encdec_depth.sh b/tests/training/nematus/test_encdec_depth.sh
index 64fe466..99c4c7c 100644
--- a/tests/training/nematus/test_encdec_depth.sh
+++ b/tests/training/nematus/test_encdec_depth.sh
@@ -19,7 +19,7 @@ test -e encdec_depth/model.npz
 test -e encdec_depth/model.npz.yml
 
 cat encdec_depth.log | $MRT_TOOLS/extract-costs.sh > encdec_depth.out
-$MRT_TOOLS/diff-nums.py encdec_depth.out encdec_depth.expected -p 0.3 > encdec_depth.diff
+$MRT_TOOLS/diff-nums.py encdec_depth.out encdec_depth.expected -p 0.3 -o encdec_depth.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/nematus/test_wmt17_model.sh b/tests/training/nematus/test_wmt17_model.sh
index c45c287..43cdebe 100644
--- a/tests/training/nematus/test_wmt17_model.sh
+++ b/tests/training/nematus/test_wmt17_model.sh
@@ -19,7 +19,7 @@ test -e wmt17/model.npz
 test -e wmt17/model.npz.yml
 
 cat wmt17.log | $MRT_TOOLS/extract-costs.sh > wmt17.out
-$MRT_TOOLS/diff-nums.py wmt17.out wmt17.expected -p 0.3 > wmt17.diff
+$MRT_TOOLS/diff-nums.py wmt17.out wmt17.expected -p 0.3 -o wmt17.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/pretraining/test_weights_from_pretrained_model.sh b/tests/training/pretraining/test_weights_from_pretrained_model.sh
index 359cc8c..d7087c2 100644
--- a/tests/training/pretraining/test_weights_from_pretrained_model.sh
+++ b/tests/training/pretraining/test_weights_from_pretrained_model.sh
@@ -43,14 +43,14 @@ test -e model/model.npz
 for key in encoder_Wemb encoder_bi_U encoder_bi_r_Wx; do
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/orig.npz -k $key > key-orig-$key.txt
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/model.npz -k $key > key-model-$key.txt
-    $MRT_TOOLS/diff-nums.py --numpy -p 0.000001 key-orig-$key.txt key-model-$key.txt > key-diff-$key.txt
+    $MRT_TOOLS/diff-nums.py --numpy -p 0.000001 key-orig-$key.txt key-model-$key.txt -o key-diff-$key.txt
 done
 
 # Test if selected weights are identical with LM
 for key in decoder_Wemb decoder_cell1_U decoder_cell2_bx decoder_ff_logit_l1_W0; do
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/lm.npz -k $key > key-lm-$key.txt
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/model.npz -k $key > key-model-$key.txt
-    $MRT_TOOLS/diff-nums.py --numpy -p 0.000001 key-lm-$key.txt key-model-$key.txt > key-diff-$key.txt
+    $MRT_TOOLS/diff-nums.py --numpy -p 0.000001 key-lm-$key.txt key-model-$key.txt -o key-diff-$key.txt
 done
 
 # Exit with success code
diff --git a/tests/training/restarting/test_sgd_for_two_epochs.sh b/tests/training/restarting/test_sgd_for_two_epochs.sh
index 3df8206..3cb09d2 100644
--- a/tests/training/restarting/test_sgd_for_two_epochs.sh
+++ b/tests/training/restarting/test_sgd_for_two_epochs.sh
@@ -44,7 +44,7 @@ test -e sgd_2nd_epoch.log
 cat sgd_2nd_epoch.log | $MRT_TOOLS/extract-disp.sh > sgd_2nd_epoch.out
 cat sgd_1st_epoch.out sgd_2nd_epoch.out > sgd_2e.out
 
-$MRT_TOOLS/diff-nums.py sgd_2e.out sgd_2e.expected -p 0.3 > sgd_2e.diff
+$MRT_TOOLS/diff-nums.py sgd_2e.out sgd_2e.expected -p 0.3 -o sgd_2e.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration.sh b/tests/training/restoring/corpus/test_corpus_restoration.sh
index 9b39e32..8b3f625 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration.sh
@@ -44,7 +44,7 @@ test -e corpus_2.log
 cat corpus_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_2.out
 cat corpus_1.out corpus_2.out > corpus.out
 
-$MRT_TOOLS/diff-nums.py corpus.out corpus.expected -p 0.1 > corpus.diff
+$MRT_TOOLS/diff-nums.py corpus.out corpus.expected -p 0.1 -o corpus.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh b/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh
index b26f437..fe5ff6e 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh
@@ -46,7 +46,7 @@ test -e corpus_maxi_2.log
 cat corpus_maxi_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_maxi_2.out
 cat corpus_maxi_1.out corpus_maxi_2.out > corpus_maxi.out
 
-$MRT_TOOLS/diff-nums.py corpus_maxi.out corpus_maxi.expected -p 0.1 > corpus_maxi.diff
+$MRT_TOOLS/diff-nums.py corpus_maxi.out corpus_maxi.expected -p 0.1 -o corpus_maxi.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh b/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh
index 85f0746..2b2ade7 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh
@@ -45,7 +45,7 @@ test -e corpus_fit_2.log
 cat corpus_fit_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_fit_2.out
 cat corpus_fit_1.out corpus_fit_2.out > corpus_fit.out
 
-$MRT_TOOLS/diff-nums.py corpus_fit.out corpus_fit.expected -p 0.1 > corpus_fit.diff
+$MRT_TOOLS/diff-nums.py corpus_fit.out corpus_fit.expected -p 0.1 -o corpus_fit.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh b/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh
index 04f0e66..3a25ca1 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh
@@ -45,7 +45,7 @@ test -e corpus_noshuf_2.log
 cat corpus_noshuf_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_noshuf_2.out
 cat corpus_noshuf_1.out corpus_noshuf_2.out > corpus_noshuf.out
 
-$MRT_TOOLS/diff-nums.py corpus_noshuf.out corpus_noshuf.expected -p 0.1 > corpus_noshuf.diff
+$MRT_TOOLS/diff-nums.py corpus_noshuf.out corpus_noshuf.expected -p 0.1 -o corpus_noshuf.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh b/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh
index d17fa96..affc5d7 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh
@@ -44,7 +44,7 @@ test -e corpus_one_2.log
 cat corpus_one_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_one_2.out
 cat corpus_one_1.out corpus_one_2.out > corpus_one.out
 
-$MRT_TOOLS/diff-nums.py corpus_one.out corpus_one.expected -p 0.1 > corpus_one.diff
+$MRT_TOOLS/diff-nums.py corpus_one.out corpus_one.expected -p 0.1 -o corpus_one.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh b/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh
index 2b811b8..606acbb 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh
@@ -44,7 +44,7 @@ test -e corpus_s2s_2.log
 cat corpus_s2s_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_s2s_2.out
 cat corpus_s2s_1.out corpus_s2s_2.out > corpus_s2s.out
 
-$MRT_TOOLS/diff-nums.py corpus_s2s.out corpus_s2s.expected -p 0.1 > corpus_s2s.diff
+$MRT_TOOLS/diff-nums.py corpus_s2s.out corpus_s2s.expected -p 0.1 -o corpus_s2s.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_sqlite_restoration.sh b/tests/training/restoring/corpus/test_sqlite_restoration.sh
index 735f780..0dcbdf8 100644
--- a/tests/training/restoring/corpus/test_sqlite_restoration.sh
+++ b/tests/training/restoring/corpus/test_sqlite_restoration.sh
@@ -44,7 +44,7 @@ test -e sqlite_2.log
 cat sqlite_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > sqlite_2.out
 cat sqlite_1.out sqlite_2.out > sqlite.out
 
-$MRT_TOOLS/diff-nums.py sqlite.out sqlite.expected -p 0.1 > sqlite.diff
+$MRT_TOOLS/diff-nums.py sqlite.out sqlite.expected -p 0.1 -o sqlite.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh b/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh
index f31b64d..09770d1 100644
--- a/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh
+++ b/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh
@@ -44,7 +44,7 @@ test -e sqlite_maxi_2.log
 cat sqlite_maxi_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > sqlite_maxi_2.out
 cat sqlite_maxi_1.out sqlite_maxi_2.out > sqlite_maxi.out
 
-$MRT_TOOLS/diff-nums.py sqlite_maxi.out sqlite_maxi.expected -p 0.1 > sqlite_maxi.diff
+$MRT_TOOLS/diff-nums.py sqlite_maxi.out sqlite_maxi.expected -p 0.1 -o sqlite_maxi.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth.sh b/tests/training/restoring/exp-smoothing/test_expsmooth.sh
index 31e0f7b..624329f 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth.sh
@@ -77,8 +77,8 @@ cat expsmooth_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep 'vali
 
 
 # Results
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.out expsmooth.expected > expsmooth.diff
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected > expsmooth.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.out expsmooth.expected -o expsmooth.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected -o expsmooth.valid.diff
 
 
 # Exit with success code
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh b/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
index e56bb9a..3a74557 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
@@ -77,8 +77,8 @@ cat expsmooth_s2s_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep '
 
 
 # Results
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_s2s.out expsmooth_s2s.expected > expsmooth_s2s.diff
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_s2s.valid.out expsmooth_s2s.valid.expected > expsmooth_s2s.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_s2s.out expsmooth_s2s.expected -o expsmooth_s2s.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_s2s.valid.out expsmooth_s2s.valid.expected -o expsmooth_s2s.valid.diff
 
 
 # Exit with success code
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
index da4425e..20aadcf 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
@@ -77,8 +77,8 @@ cat expsmooth_sync_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep
 
 
 # Results
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_sync.out expsmooth_sync.expected > expsmooth_sync.diff
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_sync.valid.out expsmooth_sync.valid.expected > expsmooth_sync.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_sync.out expsmooth_sync.expected -o expsmooth_sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_sync.valid.out expsmooth_sync.valid.expected -o expsmooth_sync.valid.diff
 
 
 # Exit with success code
diff --git a/tests/training/restoring/multi-gpu/test_async.sh b/tests/training/restoring/multi-gpu/test_async.sh
index 157b9e7..0c3fcca 100644
--- a/tests/training/restoring/multi-gpu/test_async.sh
+++ b/tests/training/restoring/multi-gpu/test_async.sh
@@ -54,7 +54,7 @@ cat async.unsorted.expected | head -n -4 | sort -n > async.expected
 cat async.unsorted.out | head -n -4 | sort -n > async.out
 
 # async is undeterministic, so the conditions are weak
-$MRT_TOOLS/diff-nums.py -p 1.0 -n 2 async.out async.expected > async.diff
+$MRT_TOOLS/diff-nums.py -p 1.0 -n 2 async.out async.expected -o async.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/multi-gpu/test_sync.sh b/tests/training/restoring/multi-gpu/test_sync.sh
index 8687990..78190f6 100644
--- a/tests/training/restoring/multi-gpu/test_sync.sh
+++ b/tests/training/restoring/multi-gpu/test_sync.sh
@@ -47,7 +47,7 @@ test -e sync_2.log
 
 cat sync_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' >> sync.out
 
-$MRT_TOOLS/diff-nums.py -p 0.08 sync.out sync.expected > sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.08 sync.out sync.expected -o sync.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adagrad_params.sh b/tests/training/restoring/optimizer/test_adagrad_params.sh
index e0e1b50..49fa78a 100644
--- a/tests/training/restoring/optimizer/test_adagrad_params.sh
+++ b/tests/training/restoring/optimizer/test_adagrad_params.sh
@@ -18,13 +18,13 @@ test -e adagrad/model.npz.optimizer.npz
 test -e adagrad.log
 
 $MRT_TOOLS/extract-costs.sh < adagrad.log > adagrad.costs.out
-$MRT_TOOLS/diff-nums.py adagrad.costs.out adagrad.costs.expected -p 0.2 > adagrad.costs.diff
+$MRT_TOOLS/diff-nums.py adagrad.costs.out adagrad.costs.expected -p 0.2 -o adagrad.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adagrad/model.npz.optimizer.npz > adagrad.keys.out
 diff adagrad.keys.out adagrad.keys.expected > adagrad.keys.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adagrad/model.npz.optimizer.npz -k "adagrad_gt" > adagrad.gt.out
-$MRT_TOOLS/diff-nums.py --numpy -p 0.0001 adagrad.gt.out adagrad.gt.expected > adagrad.gt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.0001 adagrad.gt.out adagrad.gt.expected -o adagrad.gt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adam_params.sh b/tests/training/restoring/optimizer/test_adam_params.sh
index ad77374..c1522d3 100644
--- a/tests/training/restoring/optimizer/test_adam_params.sh
+++ b/tests/training/restoring/optimizer/test_adam_params.sh
@@ -18,15 +18,15 @@ test -e adam/model.npz.optimizer.npz
 test -e adam.log
 
 $MRT_TOOLS/extract-costs.sh < adam.log > adam.costs.out
-$MRT_TOOLS/diff-nums.py adam.costs.out adam.costs.expected -p 0.2 > adam.costs.diff
+$MRT_TOOLS/diff-nums.py adam.costs.out adam.costs.expected -p 0.2 -o adam.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam/model.npz.optimizer.npz > adam.keys.out
 diff adam.keys.out adam.keys.expected > adam.keys.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam/model.npz.optimizer.npz -k "adam_mt" > adam.mt.out
-$MRT_TOOLS/diff-nums.py --numpy -p 0.0001  adam.mt.out adam.mt.expected > adam.mt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.0001  adam.mt.out adam.mt.expected -o adam.mt.diff
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam/model.npz.optimizer.npz -k "adam_vt" > adam.vt.out
-$MRT_TOOLS/diff-nums.py --numpy -p 0.000005 adam.vt.out adam.vt.expected > adam.vt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.000005 adam.vt.out adam.vt.expected -o adam.vt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adam_params_async.sh b/tests/training/restoring/optimizer/test_adam_params_async.sh
index a842050..c9b1271 100644
--- a/tests/training/restoring/optimizer/test_adam_params_async.sh
+++ b/tests/training/restoring/optimizer/test_adam_params_async.sh
@@ -23,7 +23,7 @@ test -e adam_async/model.npz.optimizer.npz
 test -e adam_async.log
 
 $MRT_TOOLS/extract-costs.sh < adam_async.log > adam_async.costs.out
-$MRT_TOOLS/diff-nums.py adam_async.costs.out adam_async.costs.expected -p 10.00 -n 2 > adam_async.costs.diff
+$MRT_TOOLS/diff-nums.py adam_async.costs.out adam_async.costs.expected -p 10.00 -n 2 -o adam_async.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz > adam_async.keys.out
 diff adam_async.keys.out adam.keys.expected > adam_async.keys.diff
@@ -31,8 +31,8 @@ diff adam_async.keys.out adam.keys.expected > adam_async.keys.diff
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz -k "adam_mt" > adam_async.mt.out
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz -k "adam_vt" > adam_async.vt.out
 
-$MRT_TOOLS/diff-nums.py --numpy -a -p 0.02  adam_async.mt.out adam_async.mt.expected > adam_async.mt.diff
-$MRT_TOOLS/diff-nums.py --numpy    -p 0.001 adam_async.vt.out adam_async.vt.expected > adam_async.vt.diff
+$MRT_TOOLS/diff-nums.py --numpy -a -p 0.02  adam_async.mt.out adam_async.mt.expected -o adam_async.mt.diff
+$MRT_TOOLS/diff-nums.py --numpy    -p 0.001 adam_async.vt.out adam_async.vt.expected -o adam_async.vt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adam_params_sync.sh b/tests/training/restoring/optimizer/test_adam_params_sync.sh
index 5fe2581..56e231c 100644
--- a/tests/training/restoring/optimizer/test_adam_params_sync.sh
+++ b/tests/training/restoring/optimizer/test_adam_params_sync.sh
@@ -23,7 +23,7 @@ test -e adam_sync/model.npz.optimizer.npz
 test -e adam_sync.log
 
 $MRT_TOOLS/extract-costs.sh < adam_sync.log > adam_sync.costs.out
-$MRT_TOOLS/diff-nums.py adam_sync.costs.out adam_sync.costs.expected -p 3.00 -n 2 > adam_sync.costs.diff
+$MRT_TOOLS/diff-nums.py adam_sync.costs.out adam_sync.costs.expected -p 3.00 -n 2 -o adam_sync.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_sync/model.npz.optimizer.npz > adam_sync.keys.out
 diff adam_sync.keys.out adam.keys.expected > adam_sync.keys.diff
@@ -31,8 +31,8 @@ diff adam_sync.keys.out adam.keys.expected > adam_sync.keys.diff
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_sync/model.npz.optimizer.npz -k "adam_mt" > adam_sync.mt.out
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_sync/model.npz.optimizer.npz -k "adam_vt" > adam_sync.vt.out
 
-$MRT_TOOLS/diff-nums.py --numpy -p 0.002  adam_sync.mt.out adam_sync.mt.expected > adam_sync.mt.diff
-$MRT_TOOLS/diff-nums.py --numpy -p 0.0002 adam_sync.vt.out adam_sync.vt.expected > adam_sync.vt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.002  adam_sync.mt.out adam_sync.mt.expected -o adam_sync.mt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.0002 adam_sync.vt.out adam_sync.vt.expected -o adam_sync.vt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_loading_adam_params.sh b/tests/training/restoring/optimizer/test_loading_adam_params.sh
index 4d11c77..88ddf64 100644
--- a/tests/training/restoring/optimizer/test_loading_adam_params.sh
+++ b/tests/training/restoring/optimizer/test_loading_adam_params.sh
@@ -33,7 +33,7 @@ cat adam_load_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ :
 
 # The allowed tolerance needs to be radiculously high as restarting the
 # training is very instable on different GPU devices
-$MRT_TOOLS/diff-nums.py -p 15.0 -n 1 adam_load.out adam_load.expected > adam_load.diff
+$MRT_TOOLS/diff-nums.py -p 15.0 -n 1 adam_load.out adam_load.expected -o adam_load.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/validation/test_adding_validator_after_restart.sh b/tests/training/restoring/validation/test_adding_validator_after_restart.sh
index 1a70eba..9a93a6b 100644
--- a/tests/training/restoring/validation/test_adding_validator_after_restart.sh
+++ b/tests/training/restoring/validation/test_adding_validator_after_restart.sh
@@ -50,7 +50,7 @@ test -e valid_add/model.npz
 test -e valid_add_2.log
 
 cat valid_add_2.log | $MRT_TOOLS/strip-timestamps.sh >> valid_add.out
-$MRT_TOOLS/diff-nums.py -p 0.003 valid_add.out valid_add.expected > valid_add.diff
+$MRT_TOOLS/diff-nums.py -p 0.003 valid_add.out valid_add.expected -o valid_add.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
index ede179d..4a6c4a9 100644
--- a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
+++ b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
@@ -36,7 +36,7 @@ test -e valid_lowisbet/model.npz
 test -e valid_lowisbet_2.log
 
 cat valid_lowisbet_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "cross-entropy" >> valid_lowisbet.out
-$MRT_TOOLS/diff-nums.py -p 0.1 valid_lowisbet.out valid_lowisbet.expected > valid_lowisbet.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 valid_lowisbet.out valid_lowisbet.expected -o valid_lowisbet.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/validation/test_final_validation_after_batches.sh b/tests/training/validation/test_final_validation_after_batches.sh
index 74793b6..d6de7f1 100644
--- a/tests/training/validation/test_final_validation_after_batches.sh
+++ b/tests/training/validation/test_final_validation_after_batches.sh
@@ -19,7 +19,7 @@ test -e final_batch/model.npz
 test -e final_batch.log
 
 $MRT_TOOLS/strip-timestamps.sh < final_batch.log > final_batch.out
-$MRT_TOOLS/diff-nums.py final_batch.out final_batch.expected -p 0.9 > final_batch.diff
+$MRT_TOOLS/diff-nums.py final_batch.out final_batch.expected -p 0.9 -o final_batch.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/validation/test_final_validation_after_batches_match.sh b/tests/training/validation/test_final_validation_after_batches_match.sh
index 23fdb13..673c087 100644
--- a/tests/training/validation/test_final_validation_after_batches_match.sh
+++ b/tests/training/validation/test_final_validation_after_batches_match.sh
@@ -19,7 +19,7 @@ test -e final_match/model.npz
 test -e final_match.log
 
 $MRT_TOOLS/strip-timestamps.sh < final_match.log > final_match.out
-$MRT_TOOLS/diff-nums.py final_match.out final_match.expected -p 0.9 > final_match.diff
+$MRT_TOOLS/diff-nums.py final_match.out final_match.expected -p 0.9 -o final_match.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/validation/test_final_validation_after_epochs.sh b/tests/training/validation/test_final_validation_after_epochs.sh
index c6562d3..08d391f 100644
--- a/tests/training/validation/test_final_validation_after_epochs.sh
+++ b/tests/training/validation/test_final_validation_after_epochs.sh
@@ -22,7 +22,7 @@ test -e final_epoch/model.npz
 test -e final_epoch.log
 
 $MRT_TOOLS/strip-timestamps.sh < final_epoch.log > final_epoch.out
-$MRT_TOOLS/diff-nums.py final_epoch.out final_epoch.expected -p 0.9 > final_epoch.diff
+$MRT_TOOLS/diff-nums.py final_epoch.out final_epoch.expected -p 0.9 -o final_epoch.diff
 
 # Exit with success code
 exit 0
author	Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk>	2018-11-12 20:11:00 +0300
committer	Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk>	2018-11-12 20:11:00 +0300
commit	374a7001cc73cff37fdfec6b59203b0fc1ee175d (patch)
tree	86d071972668ed91ea1b1c7733b11d30ade0844a
parent	cf2d46ea1ff3481c7a5007a88eda9576719b8c70 (diff)