Welcome to mirror list, hosted at ThFree Co, Russian Federation.

setup.sh « input-tsv « interface « tests - github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 9888ee61ef6a5bf8b6421d023cc78861e4cbfa89 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Skip if compiled without SentencePiece
if [ ! $MRT_MARIAN_USE_SENTENCEPIECE ]; then
    exit 100
fi

test -f $MRT_DATA/europarl.de-en/corpus.small.en.gz || exit 1
test -f $MRT_DATA/europarl.de-en/corpus.small.de.gz || exit 1

test -f $MRT_MODELS/ape/model.npz || exit 1
test -f $MRT_MODELS/rnn-spm/model.npz || exit 1
test -f $MRT_MODELS/lmgec/lm.npz || exit 1

# Create training data
test -s train.de  || cat $MRT_DATA/train.max50.de | sed 's/@@ //g' > train.de
test -s train.en  || cat $MRT_DATA/train.max50.en | sed 's/@@ //g' > train.en
test -s train.tsv || paste train.{de,en} > train.tsv

test -s train.bpe.de  || cat $MRT_DATA/train.max50.de > train.bpe.de
test -s train.bpe.en  || cat $MRT_DATA/train.max50.en > train.bpe.en
test -s train.bpe.tsv || paste train.bpe.{de,en} > train.bpe.tsv