diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2015-05-12 10:44:05 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2015-05-12 10:44:05 +0300 |
commit | a922245864b5de774c22e3d6bcc3ab9a6c75a743 (patch) | |
tree | 57da8d9c9ef754a1e0de147eb67818c3b59abd3f /scripts | |
parent | 99a4813353617cb89a68e647fe5a2567c43f862a (diff) |
default to using lmplz for convenience and because SRILM uses tonnes of memory
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/OSM/OSM-Train.perl | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/scripts/OSM/OSM-Train.perl b/scripts/OSM/OSM-Train.perl index 15d5a1ed1..0196f1557 100755 --- a/scripts/OSM/OSM-Train.perl +++ b/scripts/OSM/OSM-Train.perl @@ -91,11 +91,11 @@ print "Converting Bilingual Sentence Pair into Operation Corpus\n"; `$MOSES_SRC_DIR/bin/generateSequences $OUT_DIR/$factor_val/e $OUT_DIR/$factor_val/f $OUT_DIR/align $OUT_DIR/$factor_val/Singletons > $OUT_DIR/$factor_val/opCorpus`; print "Learning Operation Sequence Translation Model\n"; -if (defined($LMPLZ)) { - `$LMPLZ --order $ORDER --text $OUT_DIR/$factor_val/opCorpus --arpa $OUT_DIR/$factor_val/operationLM --prune 0 0 1`; +if (defined($SRILM_DIR)) { + `$SRILM_DIR/ngram-count -kndiscount -order $ORDER -unk -text $OUT_DIR/$factor_val/opCorpus -lm $OUT_DIR/$factor_val/operationLM`; } else { - `$SRILM_DIR/ngram-count -kndiscount -order $ORDER -unk -text $OUT_DIR/$factor_val/opCorpus -lm $OUT_DIR/$factor_val/operationLM`; + `$LMPLZ --order $ORDER --text $OUT_DIR/$factor_val/opCorpus --arpa $OUT_DIR/$factor_val/operationLM --prune 0 0 1`; } print "Binarizing\n"; |