Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/ems/experiment.meta')
-rw-r--r--scripts/ems/experiment.meta44
1 files changed, 32 insertions, 12 deletions
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 9ce378a1a..57ef4f9d6 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -7,8 +7,15 @@ get-corpus
default-name: corpus/txt
rerun-on-change: input-extension output-extension
template: IN OUT $input-extension $output-extension
+pre-tok-clean
+ in: raw-stem
+ out: pre-tok-cleaned
+ default-name: corpus/pre-tok-cleaned
+ pass-unless: pre-tok-clean
+ template: $pre-tok-clean IN $input-extension $output-extension OUT OUT.lines-retained
+ parallelizable: yes
tokenize
- in: raw-stem
+ in: pre-tok-cleaned
out: tokenized-stem
default-name: corpus/tok
pass-unless: input-tokenizer output-tokenizer
@@ -158,11 +165,18 @@ get-corpus
pass-unless: get-corpus-script
default-name: lm/txt
template: $get-corpus-script > OUT
+use-parallel-corpus
+ in: parallel-corpus-stem
+ out: tokenized-corpus
+ default-name: lm/tok
+ ignore-unless: parallel-corpus-stem
+ template: ln -s IN.$output-extension OUT
tokenize
in: raw-corpus
out: tokenized-corpus
default-name: lm/tok
pass-unless: output-tokenizer
+ ignore-if: parallel-corpus-stem
template: $output-tokenizer < IN > OUT
parallelizable: yes
mock-parse
@@ -185,7 +199,7 @@ lowercase
default-name: lm/lowercased
pass-unless: output-lowercaser
ignore-if: output-truecaser
- only-factor-0: yes
+ #only-factor-0: yes
template: $output-lowercaser < IN > OUT
parallelizable: yes
truecase
@@ -204,8 +218,14 @@ split
default-name: lm/split
pass-unless: output-splitter
template: $output-splitter -model IN1.$output-extension < IN > OUT
+strip
+ in: split-corpus
+ out: stripped-corpus
+ default-name: lm/stripped
+ pass-unless: mock-output-parser-lm
+ template: $moses-script-dir/training/strip-xml.perl < IN > OUT
train
- in: split-corpus
+ in: stripped-corpus
out: lm
default-name: lm/lm
ignore-if: rlm-training
@@ -220,7 +240,7 @@ randomize
pass-unless: lm-randomizer
ignore-if: rlm-training
train-randomized
- in: split-corpus
+ in: stripped-corpus
out: rlm
default-name: lm/rlm
ignore-unless: rlm-training
@@ -953,21 +973,21 @@ split-reference-devtest
ignore-unless: use-mira
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
template: $output-splitter -model IN1.$output-extension < IN > OUT
-reduce-reference
+strip-reference
in: split-ref
out: reference
- default-name: tuning/reference.reduced
+ default-name: tuning/reference.stripped
pass-unless: mock-output-parser-references
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
- template: $moses-script-dir/training/reduce-factors.perl --factor 0 --xml 1 --corpus IN --reduced-corpus OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
-reduce-reference-devtest
+ template: $moses-script-dir/training/strip-xml.perl < IN > OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
+strip-reference-devtest
in: split-ref-devtest
out: reference
- default-name: tuning/reference.devtest.reduced
+ default-name: tuning/reference.devtest.stripped
pass-unless: mock-output-parser-references
ignore-unless: use-mira
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
- template: $moses-script-dir/training/reduce-factors.perl --factor 0 --xml 1 --corpus IN --reduced-corpus OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
+ template: $moses-script-dir/training/strip-xml.perl < IN > OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
filter
in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table
out: filtered-dir
@@ -1224,13 +1244,13 @@ lowercase-reference
pass-if: recaser
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
template: $output-lowercaser < IN > OUT
-reduce-reference
+strip-reference
in: lowercased-reference
out: reference
default-name: evaluation/reference
pass-unless: mock-output-parser-references
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
- template: $moses-script-dir/training/reduce-factors.perl --factor 0 --xml 1 --corpus IN --reduced-corpus OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
+ template: $moses-script-dir/training/strip-xml.perl < IN > OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
wade
in: filtered-dir truecased-input tokenized-reference alignment system-output
out: wade-analysis