Welcome to mirror list, hosted at ThFree Co, Russian Federation.

suffix-array-create.sh « adam-suffix-array « wrappers « training « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 5db5e9aa98aee14fc168fd4a9787a842d0f1e69a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
#
# This file is part of moses.  Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.

# execute: ~/workspace/bin/moses-smt/scripts/training/wrappers/suffix-array-create.sh $SA_EXEC_DIR $SOURCE_CORPUS $TARGET_CORPUS $ALIGNMENT $SA_OUTPUT

# eg.
#SA_EXEC_DIR=/Users/hieuhoang/workspace/github/cdec/sa-extract
#SOURCE_CORPUS=/Users/hieuhoang/workspace/data/europarl/exp/fr-en/training/corpus.2.fr
#TARGET_CORPUS=/Users/hieuhoang/workspace/data/europarl/exp/fr-en/training/corpus.2.en
#ALIGNMENT=/Users/hieuhoang/workspace/data/europarl/exp/fr-en/model/aligned.3.grow-diag-final-and
#SA_OUTPUT=/Users/hieuhoang/workspace/data/europarl/exp/fr-en/model/suffix-array.3


SA_EXEC_DIR=$1
SOURCE_CORPUS=$2
TARGET_CORPUS=$3
ALIGNMENT=$4
SA_OUTPUT=$5
GLUE_GRAMMAR=$6

mkdir $SA_OUTPUT

rm -rf $SA_OUTPUT/bitext

pushd .
cd $SA_EXEC_DIR

python $SA_EXEC_DIR/cdec/sa/compile.py -o $SA_OUTPUT -f $SOURCE_CORPUS -e $TARGET_CORPUS -a $ALIGNMENT -c $SA_OUTPUT/extract.ini

popd

echo "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" > $GLUE_GRAMMAR
echo "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" >> $GLUE_GRAMMAR
echo "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" >> $GLUE_GRAMMAR