#!/bin/bash -x # Exit on error set -e # Test code goes here rm -rf custom_emb custom_emb.log mkdir -p custom_emb # Train with custom embeddings only for one update with the smallest possible mini-batch, so that they should barely change $MRT_MARIAN/build/marian \ -m custom_emb/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --embedding-vectors word2vec.en word2vec.de --dim-emb 64 --dim-rnn 64 \ --no-shuffle --mini-batch 1 --after-batches 1 --log custom_emb.log test -e custom_emb/model.npz test -e custom_emb.log # Check if loading of custom embeddings has been reported grep -q "Loading embedding vectors from" custom_emb.log # Check if embeddings in the saved model are very similar to the original vectors $MRT_MARIAN/scripts/embeddings/export_embeddings.py -m custom_emb/model.npz -o custom_emb.all # The custom embeddings have been trained only for the first 100 words from each vocabulary cat custom_emb.all.src | head -n 101 > custom_emb.src cat custom_emb.all.trg | head -n 101 > custom_emb.trg $MRT_TOOLS/diff-floats.py -n 1 -p 0.0005 word2vec.en custom_emb.src > custom_emb.src.diff $MRT_TOOLS/diff-floats.py -n 1 -p 0.0005 word2vec.de custom_emb.trg > custom_emb.trg.diff # Exit with success code exit 0