58 files changed, 78 insertions, 78 deletions
diff --git a/tests/training/basics/test_gzipped_train_sets.sh b/tests/training/basics/test_gzipped_train_sets.sh
index a3bef63..b28c41c 100644
--- a/tests/training/basics/test_gzipped_train_sets.sh
+++ b/tests/training/basics/test_gzipped_train_sets.sh
@@ -19,7 +19,7 @@ test -e gzip/model.npz
 test -e gzip.log
 
 cat gzip.log | $MRT_TOOLS/extract-costs.sh > gzip.out
-$MRT_TOOLS/diff-nums.py gzip.out gzip.expected -p 0.1 > gzip.diff
+$MRT_TOOLS/diff-nums.py gzip.out gzip.expected -p 0.1 -o gzip.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_sqlite.sh b/tests/training/basics/test_sqlite.sh
index 1dceb08..0dec245 100644
--- a/tests/training/basics/test_sqlite.sh
+++ b/tests/training/basics/test_sqlite.sh
@@ -32,7 +32,7 @@ test -e sqlite.log
 
 $MRT_TOOLS/extract-costs.sh < sqlite.log > sqlite.out
 
-$MRT_TOOLS/diff-nums.py nosqlite.out sqlite.out -p 0.2 > sqlite.diff
+$MRT_TOOLS/diff-nums.py nosqlite.out sqlite.out -p 0.2 -o sqlite.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_sqlite_random_seed.sh b/tests/training/basics/test_sqlite_random_seed.sh
index fe12772..a443f59 100644
--- a/tests/training/basics/test_sqlite_random_seed.sh
+++ b/tests/training/basics/test_sqlite_random_seed.sh
@@ -31,7 +31,7 @@ test -e sqlite_seed_2.log
 $MRT_TOOLS/extract-costs.sh < sqlite_seed_1.log > sqlite_seed_1.out
 $MRT_TOOLS/extract-costs.sh < sqlite_seed_2.log > sqlite_seed_2.out
 
-$MRT_TOOLS/diff-nums.py sqlite_seed_1.out sqlite_seed_2.out -p 0.1 > sqlite_seed.diff
+$MRT_TOOLS/diff-nums.py sqlite_seed_1.out sqlite_seed_2.out -p 0.1 -o sqlite_seed.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_toy_vocab.sh b/tests/training/basics/test_toy_vocab.sh
index 3fef63b..66c0fb3 100644
--- a/tests/training/basics/test_toy_vocab.sh
+++ b/tests/training/basics/test_toy_vocab.sh
@@ -19,7 +19,7 @@ test -e toy/model.npz.yml
 test -e toy/model.npz.amun.yml
 
 cat toy.log | $MRT_TOOLS/extract-costs.sh > toy.out
-$MRT_TOOLS/diff-nums.py toy.out toy.expected -p 0.99 -n 5 > toy.diff
+$MRT_TOOLS/diff-nums.py toy.out toy.expected -p 0.99 -n 5 -o toy.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_translation_script.sh b/tests/training/basics/test_translation_script.sh
index 0fa915c..83b102c 100644
--- a/tests/training/basics/test_translation_script.sh
+++ b/tests/training/basics/test_translation_script.sh
@@ -28,7 +28,7 @@ test -e trans.log
 grep -q "/tmp/marian.*" trans_script.temp
 
 $MRT_TOOLS/strip-timestamps.sh < trans.log | grep -v "Total translation time" | head -n 4 > trans.out
-$MRT_TOOLS/diff-nums.py trans.out trans.expected -p 0.2 > trans.diff
+$MRT_TOOLS/diff-nums.py trans.out trans.expected -p 0.2 -o trans.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/basics/test_valid_script.sh b/tests/training/basics/test_valid_script.sh
index 5c79755..e638854 100644
--- a/tests/training/basics/test_valid_script.sh
+++ b/tests/training/basics/test_valid_script.sh
@@ -28,7 +28,7 @@ test -e valid/model.npz.dev.npz.amun.yml
 test -e valid.log
 
 $MRT_TOOLS/strip-timestamps.sh < valid.log > valid.out
-$MRT_TOOLS/diff-nums.py valid.out valid.expected -p 0.2 > valid.diff
+$MRT_TOOLS/diff-nums.py valid.out valid.expected -p 0.2 -o valid.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/cost-functions/test_ce-mean-words.sh b/tests/training/cost-functions/test_ce-mean-words.sh
index a68dcf7..faaf113 100644
--- a/tests/training/cost-functions/test_ce-mean-words.sh
+++ b/tests/training/cost-functions/test_ce-mean-words.sh
@@ -18,7 +18,7 @@ test -e ce-mean-words/model.npz
 test -e ce-mean-words.log
 
 cat ce-mean-words.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > ce-mean-words.out
-$MRT_TOOLS/diff-nums.py ce-mean-words.out ce-mean-words.expected -p 0.02 > ce-mean-words.diff
+$MRT_TOOLS/diff-nums.py ce-mean-words.out ce-mean-words.expected -p 0.02 -o ce-mean-words.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/cost-functions/test_ce-mean.sh b/tests/training/cost-functions/test_ce-mean.sh
index 7142a15..c2ef4d1 100644
--- a/tests/training/cost-functions/test_ce-mean.sh
+++ b/tests/training/cost-functions/test_ce-mean.sh
@@ -17,7 +17,7 @@ test -e ce-mean/model.npz
 test -e ce-mean.log
 
 cat ce-mean.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > ce-mean.out
-$MRT_TOOLS/diff-nums.py ce-mean.out ce-mean.expected -p 0.02 > ce-mean.diff
+$MRT_TOOLS/diff-nums.py ce-mean.out ce-mean.expected -p 0.02 -o ce-mean.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/cost-functions/test_ce-sum.sh b/tests/training/cost-functions/test_ce-sum.sh
index 44b0662..540e82d 100644
--- a/tests/training/cost-functions/test_ce-sum.sh
+++ b/tests/training/cost-functions/test_ce-sum.sh
@@ -18,7 +18,7 @@ test -e ce-sum/model.npz
 test -e ce-sum.log
 
 cat ce-sum.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > ce-sum.out
-$MRT_TOOLS/diff-nums.py ce-sum.out ce-sum.expected -p 0.2 > ce-sum.diff
+$MRT_TOOLS/diff-nums.py ce-sum.out ce-sum.expected -p 0.2 -o ce-sum.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/cost-functions/test_perplexity.sh b/tests/training/cost-functions/test_perplexity.sh
index 16436a4..ae828a4 100644
--- a/tests/training/cost-functions/test_perplexity.sh
+++ b/tests/training/cost-functions/test_perplexity.sh
@@ -18,7 +18,7 @@ test -e perplexity/model.npz
 test -e perplexity.log
 
 cat perplexity.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > perplexity.out
-$MRT_TOOLS/diff-nums.py perplexity.out perplexity.expected -p 0.5 > perplexity.diff
+$MRT_TOOLS/diff-nums.py perplexity.out perplexity.expected -p 0.5 -o perplexity.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh b/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh
index 7e3fc79..6529857 100644
--- a/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh
+++ b/tests/training/data-weighting/test_compare_word_and_sentence_weighting.sh
@@ -33,7 +33,7 @@ test -e compare/model.words.npz
 test -e compare.words.log
 
 cat compare.words.log | $MRT_TOOLS/extract-disp.sh > compare.words.out
-$MRT_TOOLS/diff-nums.py compare.words.out compare.sents.out -p 0.1 > compare.words.diff
+$MRT_TOOLS/diff-nums.py compare.words.out compare.sents.out -p 0.1 -o compare.words.diff
 
 
 # Exit with success code
diff --git a/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh b/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh
index 2bc5e1e..071fd94 100644
--- a/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh
+++ b/tests/training/data-weighting/test_maxi_batches_with_sentence_weights.sh
@@ -20,7 +20,7 @@ test -e maxibatch/model.npz
 test -e maxibatch.log
 
 $MRT_TOOLS/extract-costs.sh < maxibatch.log > maxibatch.out
-$MRT_TOOLS/diff-nums.py maxibatch.out maxibatch.expected -p 0.1 > maxibatch.diff
+$MRT_TOOLS/diff-nums.py maxibatch.out maxibatch.expected -p 0.1 -o maxibatch.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh b/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh
index 82e1b16..7b87e3d 100644
--- a/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh
+++ b/tests/training/data-weighting/test_maxi_batches_with_word_weights.sh
@@ -20,7 +20,7 @@ test -e word_maxibatch/model.npz
 test -e word_maxibatch.log
 
 $MRT_TOOLS/extract-costs.sh < word_maxibatch.log > word_maxibatch.out
-$MRT_TOOLS/diff-nums.py word_maxibatch.out word_maxibatch.expected -p 0.1 > word_maxibatch.diff
+$MRT_TOOLS/diff-nums.py word_maxibatch.out word_maxibatch.expected -p 0.1 -o word_maxibatch.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_sentence_weighting_sqlite.sh b/tests/training/data-weighting/test_sentence_weighting_sqlite.sh
index 7eec0ac..3f393fc 100644
--- a/tests/training/data-weighting/test_sentence_weighting_sqlite.sh
+++ b/tests/training/data-weighting/test_sentence_weighting_sqlite.sh
@@ -19,7 +19,7 @@ test -e sqlite.log
 
 cat sqlite.log | $MRT_TOOLS/extract-costs.sh > sqlite.out
 
-$MRT_TOOLS/diff-nums.py sqlite.out sqlite.expected -p 0.1 > sqlite.diff
+$MRT_TOOLS/diff-nums.py sqlite.out sqlite.expected -p 0.1 -o sqlite.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_sentence_weighting_with_ones.sh b/tests/training/data-weighting/test_sentence_weighting_with_ones.sh
index 4f165bf..6ff43ef 100644
--- a/tests/training/data-weighting/test_sentence_weighting_with_ones.sh
+++ b/tests/training/data-weighting/test_sentence_weighting_with_ones.sh
@@ -31,7 +31,7 @@ test -e ones/model.npz
 test -e ones.log
 
 cat ones.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > ones.out
-$MRT_TOOLS/diff-nums.py noweights.out ones.out -p 0.1 > ones.diff
+$MRT_TOOLS/diff-nums.py noweights.out ones.out -p 0.1 -o ones.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_sentence_weights_x3.sh b/tests/training/data-weighting/test_sentence_weights_x3.sh
index 0775904..45b92d3 100644
--- a/tests/training/data-weighting/test_sentence_weights_x3.sh
+++ b/tests/training/data-weighting/test_sentence_weights_x3.sh
@@ -30,7 +30,7 @@ test -e x3weights.log
 
 cat x3weights.log | grep 'Cost ' | sed -r 's/.*Cost (.*) : Time.*/\1/' > x3weights.out
 
-$MRT_TOOLS/diff-nums.py x3copied.out x3weights.out -p 0.1 > x3weights.diff
+$MRT_TOOLS/diff-nums.py x3copied.out x3weights.out -p 0.1 -o x3weights.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_validation.sh b/tests/training/data-weighting/test_validation.sh
index 79c7420..3bb3c63 100644
--- a/tests/training/data-weighting/test_validation.sh
+++ b/tests/training/data-weighting/test_validation.sh
@@ -23,8 +23,8 @@ test -e valid/train.log
 $MRT_TOOLS/strip-timestamps.sh < valid/valid.log > valid.out
 $MRT_TOOLS/extract-costs.sh < valid/train.log > train.out
 
-$MRT_TOOLS/diff-nums.py valid.out valid.expected -p 1.99 > valid.diff
-$MRT_TOOLS/diff-nums.py train.out train.expected -p 1.99 > train.diff
+$MRT_TOOLS/diff-nums.py valid.out valid.expected -p 1.99 -o valid.diff
+$MRT_TOOLS/diff-nums.py train.out train.expected -p 1.99 -o train.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_word_weighting_sqlite.sh b/tests/training/data-weighting/test_word_weighting_sqlite.sh
index d95e7b3..bb2452b 100644
--- a/tests/training/data-weighting/test_word_weighting_sqlite.sh
+++ b/tests/training/data-weighting/test_word_weighting_sqlite.sh
@@ -20,7 +20,7 @@ test -e sqlite_word/corpus.sqlite3
 test -e sqlite_word.log
 
 cat sqlite_word.log | $MRT_TOOLS/extract-costs.sh > sqlite_word.out
-$MRT_TOOLS/diff-nums.py sqlite_word.out sqlite_word.expected -p 0.1 > sqlite_word.diff
+$MRT_TOOLS/diff-nums.py sqlite_word.out sqlite_word.expected -p 0.1 -o sqlite_word.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_word_weighting_with_eos.sh b/tests/training/data-weighting/test_word_weighting_with_eos.sh
index 5401902..4044b77 100644
--- a/tests/training/data-weighting/test_word_weighting_with_eos.sh
+++ b/tests/training/data-weighting/test_word_weighting_with_eos.sh
@@ -21,7 +21,7 @@ test -e word_eos/model.npz
 test -e word_eos.log
 
 cat word_eos.log | $MRT_TOOLS/extract-disp.sh > word_eos.out
-$MRT_TOOLS/diff-nums.py word_eos.out word_eos.expected -p 0.1 > word_eos.diff
+$MRT_TOOLS/diff-nums.py word_eos.out word_eos.expected -p 0.1 -o word_eos.diff
 
 
 # Exit with success code
diff --git a/tests/training/data-weighting/test_word_weighting_with_ones.sh b/tests/training/data-weighting/test_word_weighting_with_ones.sh
index 92eeed5..9e47cdc 100644
--- a/tests/training/data-weighting/test_word_weighting_with_ones.sh
+++ b/tests/training/data-weighting/test_word_weighting_with_ones.sh
@@ -28,7 +28,7 @@ test -e word_ones/model.npz
 test -e word_ones.log
 
 cat word_ones.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > word_ones.out
-$MRT_TOOLS/diff-nums.py word_noweights.out word_ones.out -p 0.1 > word_ones.diff
+$MRT_TOOLS/diff-nums.py word_noweights.out word_ones.out -p 0.1 -o word_ones.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/data-weighting/test_word_weighting_with_twos.sh b/tests/training/data-weighting/test_word_weighting_with_twos.sh
index e9f2d94..a39ec07 100644
--- a/tests/training/data-weighting/test_word_weighting_with_twos.sh
+++ b/tests/training/data-weighting/test_word_weighting_with_twos.sh
@@ -19,7 +19,7 @@ test -e word_twos/model.npz
 test -e word_twos.log
 
 cat word_twos.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > word_twos.out
-$MRT_TOOLS/diff-nums.py word_twos.out word_twos.expected -p 0.1 > word_twos.diff
+$MRT_TOOLS/diff-nums.py word_twos.out word_twos.expected -p 0.1 -o word_twos.diff
 
 rm -rf word_twos_cfg word_twos_cfg.{log,out,diff}
 mkdir -p word_twos_cfg
@@ -34,7 +34,7 @@ $MRT_MARIAN/build/marian \
     -c word_twos.config.yml
 
 cat word_twos_cfg.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > word_twos_cfg.out
-$MRT_TOOLS/diff-nums.py word_twos_cfg.out word_twos.expected -p 0.1 > word_twos_cfg.diff
+$MRT_TOOLS/diff-nums.py word_twos_cfg.out word_twos.expected -p 0.1 -o word_twos_cfg.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/embeddings/test_custom_embeddings.sh b/tests/training/embeddings/test_custom_embeddings.sh
index 6523777..3edd00a 100644
--- a/tests/training/embeddings/test_custom_embeddings.sh
+++ b/tests/training/embeddings/test_custom_embeddings.sh
@@ -26,8 +26,8 @@ $MRT_MARIAN/scripts/embeddings/export_embeddings.py -m custom_emb/model.npz -o c
 cat custom_emb.all.src | head -n 101 > custom_emb.src
 cat custom_emb.all.trg | head -n 101 > custom_emb.trg
 
-$MRT_TOOLS/diff-nums.py -n 1 -p 0.0005 word2vec.en custom_emb.src > custom_emb.src.diff
-$MRT_TOOLS/diff-nums.py -n 1 -p 0.0005 word2vec.de custom_emb.trg > custom_emb.trg.diff
+$MRT_TOOLS/diff-nums.py -n 1 -p 0.0005 word2vec.en custom_emb.src -o custom_emb.src.diff
+$MRT_TOOLS/diff-nums.py -n 1 -p 0.0005 word2vec.de custom_emb.trg -o custom_emb.trg.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/exp-smoothing/test_expsmooth.sh b/tests/training/exp-smoothing/test_expsmooth.sh
index caf5762..2428dfc 100644
--- a/tests/training/exp-smoothing/test_expsmooth.sh
+++ b/tests/training/exp-smoothing/test_expsmooth.sh
@@ -36,11 +36,11 @@ cat expsmooth.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep -v 'val
 cat expsmooth.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep 'valid' | sed 's/ : Time.*//' > expsmooth.valid.out
 
 
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.out expsmooth.expected > expsmooth.diff
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected > expsmooth.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.out expsmooth.expected -o expsmooth.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected -o expsmooth.valid.diff
 
 # There should be no difference in costs between training w/ and w/o exponential smoothing
-$MRT_TOOLS/diff-nums.py -p 0.001 expsmooth.out noexpsmooth.out > noexpsmooth.diff
+$MRT_TOOLS/diff-nums.py -p 0.001 expsmooth.out noexpsmooth.out -o noexpsmooth.diff
 
 
 # Exit with success code
diff --git a/tests/training/exp-smoothing/test_expsmooth_sync.sh b/tests/training/exp-smoothing/test_expsmooth_sync.sh
index 2bf3451..14d5442 100644
--- a/tests/training/exp-smoothing/test_expsmooth_sync.sh
+++ b/tests/training/exp-smoothing/test_expsmooth_sync.sh
@@ -41,11 +41,11 @@ cat expsmooth_sync.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep -v
 cat expsmooth_sync.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep 'valid' | sed 's/ : Time.*//' > expsmooth_sync.valid.out
 
 
-$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.out expsmooth_sync.expected > expsmooth_sync.diff
-$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.valid.out expsmooth_sync.valid.expected > expsmooth_sync.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.out expsmooth_sync.expected -o expsmooth_sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.valid.out expsmooth_sync.valid.expected -o expsmooth_sync.valid.diff
 
 # There should be no difference in costs between training w/ and w/o exponential smoothing
-$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.out noexpsmooth_sync.out > noexpsmooth_sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 expsmooth_sync.out noexpsmooth_sync.out -o noexpsmooth_sync.diff
 
 
 # Exit with success code
diff --git a/tests/training/lm/test_lm-transformer.sh b/tests/training/lm/test_lm-transformer.sh
index 70fac20..47737e5 100644
--- a/tests/training/lm/test_lm-transformer.sh
+++ b/tests/training/lm/test_lm-transformer.sh
@@ -19,13 +19,13 @@ test -e lm-transformer/model.npz.yml
 test -e lm-transformer.log
 
 cat lm-transformer.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > lm-transformer.out
-$MRT_TOOLS/diff-nums.py lm-transformer.out lm-transformer.expected -p 0.02 > lm-transformer.diff
+$MRT_TOOLS/diff-nums.py lm-transformer.out lm-transformer.expected -p 0.02 -o lm-transformer.diff
 
 # Scoring with LM
 test -s temp.bpe.en || tail $MRT_DATA/europarl.de-en/corpus.bpe.en > test.bpe.en
 
 $MRT_MARIAN/build/marian-scorer -m lm-transformer/model.npz -t test.bpe.en -v vocab.en.yml > lm-transformer.scores.out
-$MRT_TOOLS/diff-nums.py lm-transformer.scores.out lm-transformer.scores.expected -p 0.002 > lm-transformer.scores.diff
+$MRT_TOOLS/diff-nums.py lm-transformer.scores.out lm-transformer.scores.expected -p 0.002 -o lm-transformer.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/lm/test_lm.sh b/tests/training/lm/test_lm.sh
index d602134..e3c32a0 100644
--- a/tests/training/lm/test_lm.sh
+++ b/tests/training/lm/test_lm.sh
@@ -19,13 +19,13 @@ test -e lm/model.npz.yml
 test -e lm.log
 
 cat lm.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > lm.out
-$MRT_TOOLS/diff-nums.py lm.out lm.expected -p 0.02 > lm.diff
+$MRT_TOOLS/diff-nums.py lm.out lm.expected -p 0.02 -o lm.diff
 
 # Scoring with LM
 test -s temp.bpe.en || tail $MRT_DATA/europarl.de-en/corpus.bpe.en > test.bpe.en
 
 $MRT_MARIAN/build/marian-scorer -m lm/model.npz -t test.bpe.en -v vocab.en.yml > lm.scores.out
-$MRT_TOOLS/diff-nums.py lm.scores.out lm.scores.expected -p 0.002 > lm.scores.diff
+$MRT_TOOLS/diff-nums.py lm.scores.out lm.scores.expected -p 0.002 -o lm.scores.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/model-types/test_transformer.sh b/tests/training/model-types/test_transformer.sh
index 28bad90..35f8f09 100644
--- a/tests/training/model-types/test_transformer.sh
+++ b/tests/training/model-types/test_transformer.sh
@@ -18,7 +18,7 @@ test -e transformer/model.npz
 test -e transformer.log
 
 cat transformer.log | $MRT_TOOLS/extract-costs.sh > transformer.out
-$MRT_TOOLS/diff-nums.py transformer.out transformer.expected -p 0.01 > transformer.diff
+$MRT_TOOLS/diff-nums.py transformer.out transformer.expected -p 0.01 -o transformer.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-gpu/test_async_sgd_runs.sh b/tests/training/multi-gpu/test_async_sgd_runs.sh
index 6fb4573..15b0348 100644
--- a/tests/training/multi-gpu/test_async_sgd_runs.sh
+++ b/tests/training/multi-gpu/test_async_sgd_runs.sh
@@ -24,7 +24,7 @@ test -e vocab.de.yml
 test -e async_sgd.log
 
 cat async_sgd.log | $MRT_TOOLS/strip-timestamps.sh | grep -oP "Ep\. 1 .* Cost [0-9.]*" > async_sgd.out
-$MRT_TOOLS/diff-nums.py async_sgd.out async_sgd.expected -p 5.00 --allow-n-diffs 2 > async_sgd.diff
+$MRT_TOOLS/diff-nums.py async_sgd.out async_sgd.expected -p 5.00 --allow-n-diffs 2 -o async_sgd.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-gpu/test_sync_sgd.sh b/tests/training/multi-gpu/test_sync_sgd.sh
index 5af1d6d..0d511b3 100644
--- a/tests/training/multi-gpu/test_sync_sgd.sh
+++ b/tests/training/multi-gpu/test_sync_sgd.sh
@@ -24,7 +24,7 @@ test -e sync_sgd/model.full.npz
 test -e sync_sgd.log
 
 cat sync_sgd.log | $MRT_TOOLS/extract-costs.sh > sync_sgd.out
-$MRT_TOOLS/diff-nums.py sync_sgd.out sync_sgd.expected -p 0.1 > sync_sgd.diff
+$MRT_TOOLS/diff-nums.py sync_sgd.out sync_sgd.expected -p 0.1 -o sync_sgd.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-source/test_multi-s2s.sh b/tests/training/multi-source/test_multi-s2s.sh
index 4d37e17..e55a835 100644
--- a/tests/training/multi-source/test_multi-s2s.sh
+++ b/tests/training/multi-source/test_multi-s2s.sh
@@ -19,7 +19,7 @@ test -e multi-s2s/model.npz.yml
 test -e multi-s2s.log
 
 cat multi-s2s.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > multi-s2s.out
-$MRT_TOOLS/diff-nums.py multi-s2s.out multi-s2s.expected -p 0.2 > multi-s2s.diff
+$MRT_TOOLS/diff-nums.py multi-s2s.out multi-s2s.expected -p 0.2 -o multi-s2s.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/multi-source/test_multi-transformer.sh b/tests/training/multi-source/test_multi-transformer.sh
index 321342a..cafd2ad 100644
--- a/tests/training/multi-source/test_multi-transformer.sh
+++ b/tests/training/multi-source/test_multi-transformer.sh
@@ -19,7 +19,7 @@ test -e multi-transformer/model.npz.yml
 test -e multi-transformer.log
 
 cat multi-transformer.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > multi-transformer.out
-$MRT_TOOLS/diff-nums.py multi-transformer.out multi-transformer.expected -p 0.2 > multi-transformer.diff
+$MRT_TOOLS/diff-nums.py multi-transformer.out multi-transformer.expected -p 0.2 -o multi-transformer.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/nematus/test_encdec_depth.sh b/tests/training/nematus/test_encdec_depth.sh
index 64fe466..99c4c7c 100644
--- a/tests/training/nematus/test_encdec_depth.sh
+++ b/tests/training/nematus/test_encdec_depth.sh
@@ -19,7 +19,7 @@ test -e encdec_depth/model.npz
 test -e encdec_depth/model.npz.yml
 
 cat encdec_depth.log | $MRT_TOOLS/extract-costs.sh > encdec_depth.out
-$MRT_TOOLS/diff-nums.py encdec_depth.out encdec_depth.expected -p 0.3 > encdec_depth.diff
+$MRT_TOOLS/diff-nums.py encdec_depth.out encdec_depth.expected -p 0.3 -o encdec_depth.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/nematus/test_wmt17_model.sh b/tests/training/nematus/test_wmt17_model.sh
index c45c287..43cdebe 100644
--- a/tests/training/nematus/test_wmt17_model.sh
+++ b/tests/training/nematus/test_wmt17_model.sh
@@ -19,7 +19,7 @@ test -e wmt17/model.npz
 test -e wmt17/model.npz.yml
 
 cat wmt17.log | $MRT_TOOLS/extract-costs.sh > wmt17.out
-$MRT_TOOLS/diff-nums.py wmt17.out wmt17.expected -p 0.3 > wmt17.diff
+$MRT_TOOLS/diff-nums.py wmt17.out wmt17.expected -p 0.3 -o wmt17.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/pretraining/test_weights_from_pretrained_model.sh b/tests/training/pretraining/test_weights_from_pretrained_model.sh
index 359cc8c..d7087c2 100644
--- a/tests/training/pretraining/test_weights_from_pretrained_model.sh
+++ b/tests/training/pretraining/test_weights_from_pretrained_model.sh
@@ -43,14 +43,14 @@ test -e model/model.npz
 for key in encoder_Wemb encoder_bi_U encoder_bi_r_Wx; do
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/orig.npz -k $key > key-orig-$key.txt
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/model.npz -k $key > key-model-$key.txt
-    $MRT_TOOLS/diff-nums.py --numpy -p 0.000001 key-orig-$key.txt key-model-$key.txt > key-diff-$key.txt
+    $MRT_TOOLS/diff-nums.py --numpy -p 0.000001 key-orig-$key.txt key-model-$key.txt -o key-diff-$key.txt
 done
 
 # Test if selected weights are identical with LM
 for key in decoder_Wemb decoder_cell1_U decoder_cell2_bx decoder_ff_logit_l1_W0; do
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/lm.npz -k $key > key-lm-$key.txt
     python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/model.npz -k $key > key-model-$key.txt
-    $MRT_TOOLS/diff-nums.py --numpy -p 0.000001 key-lm-$key.txt key-model-$key.txt > key-diff-$key.txt
+    $MRT_TOOLS/diff-nums.py --numpy -p 0.000001 key-lm-$key.txt key-model-$key.txt -o key-diff-$key.txt
 done
 
 # Exit with success code
diff --git a/tests/training/restarting/test_sgd_for_two_epochs.sh b/tests/training/restarting/test_sgd_for_two_epochs.sh
index 3df8206..3cb09d2 100644
--- a/tests/training/restarting/test_sgd_for_two_epochs.sh
+++ b/tests/training/restarting/test_sgd_for_two_epochs.sh
@@ -44,7 +44,7 @@ test -e sgd_2nd_epoch.log
 cat sgd_2nd_epoch.log | $MRT_TOOLS/extract-disp.sh > sgd_2nd_epoch.out
 cat sgd_1st_epoch.out sgd_2nd_epoch.out > sgd_2e.out
 
-$MRT_TOOLS/diff-nums.py sgd_2e.out sgd_2e.expected -p 0.3 > sgd_2e.diff
+$MRT_TOOLS/diff-nums.py sgd_2e.out sgd_2e.expected -p 0.3 -o sgd_2e.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration.sh b/tests/training/restoring/corpus/test_corpus_restoration.sh
index 9b39e32..8b3f625 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration.sh
@@ -44,7 +44,7 @@ test -e corpus_2.log
 cat corpus_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_2.out
 cat corpus_1.out corpus_2.out > corpus.out
 
-$MRT_TOOLS/diff-nums.py corpus.out corpus.expected -p 0.1 > corpus.diff
+$MRT_TOOLS/diff-nums.py corpus.out corpus.expected -p 0.1 -o corpus.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh b/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh
index b26f437..fe5ff6e 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_maxi_batch.sh
@@ -46,7 +46,7 @@ test -e corpus_maxi_2.log
 cat corpus_maxi_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_maxi_2.out
 cat corpus_maxi_1.out corpus_maxi_2.out > corpus_maxi.out
 
-$MRT_TOOLS/diff-nums.py corpus_maxi.out corpus_maxi.expected -p 0.1 > corpus_maxi.diff
+$MRT_TOOLS/diff-nums.py corpus_maxi.out corpus_maxi.expected -p 0.1 -o corpus_maxi.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh b/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh
index 85f0746..2b2ade7 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_mini_batch_fit.sh
@@ -45,7 +45,7 @@ test -e corpus_fit_2.log
 cat corpus_fit_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_fit_2.out
 cat corpus_fit_1.out corpus_fit_2.out > corpus_fit.out
 
-$MRT_TOOLS/diff-nums.py corpus_fit.out corpus_fit.expected -p 0.1 > corpus_fit.diff
+$MRT_TOOLS/diff-nums.py corpus_fit.out corpus_fit.expected -p 0.1 -o corpus_fit.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh b/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh
index 04f0e66..3a25ca1 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_no_shuffle.sh
@@ -45,7 +45,7 @@ test -e corpus_noshuf_2.log
 cat corpus_noshuf_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_noshuf_2.out
 cat corpus_noshuf_1.out corpus_noshuf_2.out > corpus_noshuf.out
 
-$MRT_TOOLS/diff-nums.py corpus_noshuf.out corpus_noshuf.expected -p 0.1 > corpus_noshuf.diff
+$MRT_TOOLS/diff-nums.py corpus_noshuf.out corpus_noshuf.expected -p 0.1 -o corpus_noshuf.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh b/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh
index d17fa96..affc5d7 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_one_epoch.sh
@@ -44,7 +44,7 @@ test -e corpus_one_2.log
 cat corpus_one_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_one_2.out
 cat corpus_one_1.out corpus_one_2.out > corpus_one.out
 
-$MRT_TOOLS/diff-nums.py corpus_one.out corpus_one.expected -p 0.1 > corpus_one.diff
+$MRT_TOOLS/diff-nums.py corpus_one.out corpus_one.expected -p 0.1 -o corpus_one.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh b/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh
index 2b811b8..606acbb 100644
--- a/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh
+++ b/tests/training/restoring/corpus/test_corpus_restoration_s2s.sh
@@ -44,7 +44,7 @@ test -e corpus_s2s_2.log
 cat corpus_s2s_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > corpus_s2s_2.out
 cat corpus_s2s_1.out corpus_s2s_2.out > corpus_s2s.out
 
-$MRT_TOOLS/diff-nums.py corpus_s2s.out corpus_s2s.expected -p 0.1 > corpus_s2s.diff
+$MRT_TOOLS/diff-nums.py corpus_s2s.out corpus_s2s.expected -p 0.1 -o corpus_s2s.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_sqlite_restoration.sh b/tests/training/restoring/corpus/test_sqlite_restoration.sh
index 735f780..0dcbdf8 100644
--- a/tests/training/restoring/corpus/test_sqlite_restoration.sh
+++ b/tests/training/restoring/corpus/test_sqlite_restoration.sh
@@ -44,7 +44,7 @@ test -e sqlite_2.log
 cat sqlite_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > sqlite_2.out
 cat sqlite_1.out sqlite_2.out > sqlite.out
 
-$MRT_TOOLS/diff-nums.py sqlite.out sqlite.expected -p 0.1 > sqlite.diff
+$MRT_TOOLS/diff-nums.py sqlite.out sqlite.expected -p 0.1 -o sqlite.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh b/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh
index f31b64d..09770d1 100644
--- a/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh
+++ b/tests/training/restoring/corpus/test_sqlite_restoration_maxi_batch.sh
@@ -44,7 +44,7 @@ test -e sqlite_maxi_2.log
 cat sqlite_maxi_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' > sqlite_maxi_2.out
 cat sqlite_maxi_1.out sqlite_maxi_2.out > sqlite_maxi.out
 
-$MRT_TOOLS/diff-nums.py sqlite_maxi.out sqlite_maxi.expected -p 0.1 > sqlite_maxi.diff
+$MRT_TOOLS/diff-nums.py sqlite_maxi.out sqlite_maxi.expected -p 0.1 -o sqlite_maxi.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth.sh b/tests/training/restoring/exp-smoothing/test_expsmooth.sh
index 31e0f7b..624329f 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth.sh
@@ -77,8 +77,8 @@ cat expsmooth_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep 'vali
 
 
 # Results
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.out expsmooth.expected > expsmooth.diff
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected > expsmooth.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.out expsmooth.expected -o expsmooth.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth.valid.out expsmooth.valid.expected -o expsmooth.valid.diff
 
 
 # Exit with success code
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh b/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
index e56bb9a..3a74557 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
@@ -77,8 +77,8 @@ cat expsmooth_s2s_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep '
 
 
 # Results
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_s2s.out expsmooth_s2s.expected > expsmooth_s2s.diff
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_s2s.valid.out expsmooth_s2s.valid.expected > expsmooth_s2s.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_s2s.out expsmooth_s2s.expected -o expsmooth_s2s.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_s2s.valid.out expsmooth_s2s.valid.expected -o expsmooth_s2s.valid.diff
 
 
 # Exit with success code
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
index da4425e..20aadcf 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
@@ -77,8 +77,8 @@ cat expsmooth_sync_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep
 
 
 # Results
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_sync.out expsmooth_sync.expected > expsmooth_sync.diff
-$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_sync.valid.out expsmooth_sync.valid.expected > expsmooth_sync.valid.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_sync.out expsmooth_sync.expected -o expsmooth_sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.01 expsmooth_sync.valid.out expsmooth_sync.valid.expected -o expsmooth_sync.valid.diff
 
 
 # Exit with success code
diff --git a/tests/training/restoring/multi-gpu/test_async.sh b/tests/training/restoring/multi-gpu/test_async.sh
index 157b9e7..0c3fcca 100644
--- a/tests/training/restoring/multi-gpu/test_async.sh
+++ b/tests/training/restoring/multi-gpu/test_async.sh
@@ -54,7 +54,7 @@ cat async.unsorted.expected | head -n -4 | sort -n > async.expected
 cat async.unsorted.out | head -n -4 | sort -n > async.out
 
 # async is undeterministic, so the conditions are weak
-$MRT_TOOLS/diff-nums.py -p 1.0 -n 2 async.out async.expected > async.diff
+$MRT_TOOLS/diff-nums.py -p 1.0 -n 2 async.out async.expected -o async.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/multi-gpu/test_sync.sh b/tests/training/restoring/multi-gpu/test_sync.sh
index 8687990..78190f6 100644
--- a/tests/training/restoring/multi-gpu/test_sync.sh
+++ b/tests/training/restoring/multi-gpu/test_sync.sh
@@ -47,7 +47,7 @@ test -e sync_2.log
 
 cat sync_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' >> sync.out
 
-$MRT_TOOLS/diff-nums.py -p 0.08 sync.out sync.expected > sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.08 sync.out sync.expected -o sync.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adagrad_params.sh b/tests/training/restoring/optimizer/test_adagrad_params.sh
index e0e1b50..49fa78a 100644
--- a/tests/training/restoring/optimizer/test_adagrad_params.sh
+++ b/tests/training/restoring/optimizer/test_adagrad_params.sh
@@ -18,13 +18,13 @@ test -e adagrad/model.npz.optimizer.npz
 test -e adagrad.log
 
 $MRT_TOOLS/extract-costs.sh < adagrad.log > adagrad.costs.out
-$MRT_TOOLS/diff-nums.py adagrad.costs.out adagrad.costs.expected -p 0.2 > adagrad.costs.diff
+$MRT_TOOLS/diff-nums.py adagrad.costs.out adagrad.costs.expected -p 0.2 -o adagrad.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adagrad/model.npz.optimizer.npz > adagrad.keys.out
 diff adagrad.keys.out adagrad.keys.expected > adagrad.keys.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adagrad/model.npz.optimizer.npz -k "adagrad_gt" > adagrad.gt.out
-$MRT_TOOLS/diff-nums.py --numpy -p 0.0001 adagrad.gt.out adagrad.gt.expected > adagrad.gt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.0001 adagrad.gt.out adagrad.gt.expected -o adagrad.gt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adam_params.sh b/tests/training/restoring/optimizer/test_adam_params.sh
index ad77374..c1522d3 100644
--- a/tests/training/restoring/optimizer/test_adam_params.sh
+++ b/tests/training/restoring/optimizer/test_adam_params.sh
@@ -18,15 +18,15 @@ test -e adam/model.npz.optimizer.npz
 test -e adam.log
 
 $MRT_TOOLS/extract-costs.sh < adam.log > adam.costs.out
-$MRT_TOOLS/diff-nums.py adam.costs.out adam.costs.expected -p 0.2 > adam.costs.diff
+$MRT_TOOLS/diff-nums.py adam.costs.out adam.costs.expected -p 0.2 -o adam.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam/model.npz.optimizer.npz > adam.keys.out
 diff adam.keys.out adam.keys.expected > adam.keys.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam/model.npz.optimizer.npz -k "adam_mt" > adam.mt.out
-$MRT_TOOLS/diff-nums.py --numpy -p 0.0001  adam.mt.out adam.mt.expected > adam.mt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.0001  adam.mt.out adam.mt.expected -o adam.mt.diff
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam/model.npz.optimizer.npz -k "adam_vt" > adam.vt.out
-$MRT_TOOLS/diff-nums.py --numpy -p 0.000005 adam.vt.out adam.vt.expected > adam.vt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.000005 adam.vt.out adam.vt.expected -o adam.vt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adam_params_async.sh b/tests/training/restoring/optimizer/test_adam_params_async.sh
index a842050..c9b1271 100644
--- a/tests/training/restoring/optimizer/test_adam_params_async.sh
+++ b/tests/training/restoring/optimizer/test_adam_params_async.sh
@@ -23,7 +23,7 @@ test -e adam_async/model.npz.optimizer.npz
 test -e adam_async.log
 
 $MRT_TOOLS/extract-costs.sh < adam_async.log > adam_async.costs.out
-$MRT_TOOLS/diff-nums.py adam_async.costs.out adam_async.costs.expected -p 10.00 -n 2 > adam_async.costs.diff
+$MRT_TOOLS/diff-nums.py adam_async.costs.out adam_async.costs.expected -p 10.00 -n 2 -o adam_async.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz > adam_async.keys.out
 diff adam_async.keys.out adam.keys.expected > adam_async.keys.diff
@@ -31,8 +31,8 @@ diff adam_async.keys.out adam.keys.expected > adam_async.keys.diff
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz -k "adam_mt" > adam_async.mt.out
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz -k "adam_vt" > adam_async.vt.out
 
-$MRT_TOOLS/diff-nums.py --numpy -a -p 0.02  adam_async.mt.out adam_async.mt.expected > adam_async.mt.diff
-$MRT_TOOLS/diff-nums.py --numpy    -p 0.001 adam_async.vt.out adam_async.vt.expected > adam_async.vt.diff
+$MRT_TOOLS/diff-nums.py --numpy -a -p 0.02  adam_async.mt.out adam_async.mt.expected -o adam_async.mt.diff
+$MRT_TOOLS/diff-nums.py --numpy    -p 0.001 adam_async.vt.out adam_async.vt.expected -o adam_async.vt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adam_params_sync.sh b/tests/training/restoring/optimizer/test_adam_params_sync.sh
index 5fe2581..56e231c 100644
--- a/tests/training/restoring/optimizer/test_adam_params_sync.sh
+++ b/tests/training/restoring/optimizer/test_adam_params_sync.sh
@@ -23,7 +23,7 @@ test -e adam_sync/model.npz.optimizer.npz
 test -e adam_sync.log
 
 $MRT_TOOLS/extract-costs.sh < adam_sync.log > adam_sync.costs.out
-$MRT_TOOLS/diff-nums.py adam_sync.costs.out adam_sync.costs.expected -p 3.00 -n 2 > adam_sync.costs.diff
+$MRT_TOOLS/diff-nums.py adam_sync.costs.out adam_sync.costs.expected -p 3.00 -n 2 -o adam_sync.costs.diff
 
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_sync/model.npz.optimizer.npz > adam_sync.keys.out
 diff adam_sync.keys.out adam.keys.expected > adam_sync.keys.diff
@@ -31,8 +31,8 @@ diff adam_sync.keys.out adam.keys.expected > adam_sync.keys.diff
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_sync/model.npz.optimizer.npz -k "adam_mt" > adam_sync.mt.out
 python $MRT_MARIAN/scripts/contrib/model_info.py -m adam_sync/model.npz.optimizer.npz -k "adam_vt" > adam_sync.vt.out
 
-$MRT_TOOLS/diff-nums.py --numpy -p 0.002  adam_sync.mt.out adam_sync.mt.expected > adam_sync.mt.diff
-$MRT_TOOLS/diff-nums.py --numpy -p 0.0002 adam_sync.vt.out adam_sync.vt.expected > adam_sync.vt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.002  adam_sync.mt.out adam_sync.mt.expected -o adam_sync.mt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.0002 adam_sync.vt.out adam_sync.vt.expected -o adam_sync.vt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_loading_adam_params.sh b/tests/training/restoring/optimizer/test_loading_adam_params.sh
index 4d11c77..88ddf64 100644
--- a/tests/training/restoring/optimizer/test_loading_adam_params.sh
+++ b/tests/training/restoring/optimizer/test_loading_adam_params.sh
@@ -33,7 +33,7 @@ cat adam_load_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ :
 
 # The allowed tolerance needs to be radiculously high as restarting the
 # training is very instable on different GPU devices
-$MRT_TOOLS/diff-nums.py -p 15.0 -n 1 adam_load.out adam_load.expected > adam_load.diff
+$MRT_TOOLS/diff-nums.py -p 15.0 -n 1 adam_load.out adam_load.expected -o adam_load.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/validation/test_adding_validator_after_restart.sh b/tests/training/restoring/validation/test_adding_validator_after_restart.sh
index 1a70eba..9a93a6b 100644
--- a/tests/training/restoring/validation/test_adding_validator_after_restart.sh
+++ b/tests/training/restoring/validation/test_adding_validator_after_restart.sh
@@ -50,7 +50,7 @@ test -e valid_add/model.npz
 test -e valid_add_2.log
 
 cat valid_add_2.log | $MRT_TOOLS/strip-timestamps.sh >> valid_add.out
-$MRT_TOOLS/diff-nums.py -p 0.003 valid_add.out valid_add.expected > valid_add.diff
+$MRT_TOOLS/diff-nums.py -p 0.003 valid_add.out valid_add.expected -o valid_add.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
index ede179d..4a6c4a9 100644
--- a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
+++ b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
@@ -36,7 +36,7 @@ test -e valid_lowisbet/model.npz
 test -e valid_lowisbet_2.log
 
 cat valid_lowisbet_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "cross-entropy" >> valid_lowisbet.out
-$MRT_TOOLS/diff-nums.py -p 0.1 valid_lowisbet.out valid_lowisbet.expected > valid_lowisbet.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 valid_lowisbet.out valid_lowisbet.expected -o valid_lowisbet.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/validation/test_final_validation_after_batches.sh b/tests/training/validation/test_final_validation_after_batches.sh
index 74793b6..d6de7f1 100644
--- a/tests/training/validation/test_final_validation_after_batches.sh
+++ b/tests/training/validation/test_final_validation_after_batches.sh
@@ -19,7 +19,7 @@ test -e final_batch/model.npz
 test -e final_batch.log
 
 $MRT_TOOLS/strip-timestamps.sh < final_batch.log > final_batch.out
-$MRT_TOOLS/diff-nums.py final_batch.out final_batch.expected -p 0.9 > final_batch.diff
+$MRT_TOOLS/diff-nums.py final_batch.out final_batch.expected -p 0.9 -o final_batch.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/validation/test_final_validation_after_batches_match.sh b/tests/training/validation/test_final_validation_after_batches_match.sh
index 23fdb13..673c087 100644
--- a/tests/training/validation/test_final_validation_after_batches_match.sh
+++ b/tests/training/validation/test_final_validation_after_batches_match.sh
@@ -19,7 +19,7 @@ test -e final_match/model.npz
 test -e final_match.log
 
 $MRT_TOOLS/strip-timestamps.sh < final_match.log > final_match.out
-$MRT_TOOLS/diff-nums.py final_match.out final_match.expected -p 0.9 > final_match.diff
+$MRT_TOOLS/diff-nums.py final_match.out final_match.expected -p 0.9 -o final_match.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/validation/test_final_validation_after_epochs.sh b/tests/training/validation/test_final_validation_after_epochs.sh
index c6562d3..08d391f 100644
--- a/tests/training/validation/test_final_validation_after_epochs.sh
+++ b/tests/training/validation/test_final_validation_after_epochs.sh
@@ -22,7 +22,7 @@ test -e final_epoch/model.npz
 test -e final_epoch.log
 
 $MRT_TOOLS/strip-timestamps.sh < final_epoch.log > final_epoch.out
-$MRT_TOOLS/diff-nums.py final_epoch.out final_epoch.expected -p 0.9 > final_epoch.diff
+$MRT_TOOLS/diff-nums.py final_epoch.out final_epoch.expected -p 0.9 -o final_epoch.diff
 
 # Exit with success code
 exit 0