Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/ems/example/config.basic')
-rw-r--r--scripts/ems/example/config.basic35
1 files changed, 35 insertions, 0 deletions
diff --git a/scripts/ems/example/config.basic b/scripts/ems/example/config.basic
index 1fce185df..4af8664aa 100644
--- a/scripts/ems/example/config.basic
+++ b/scripts/ems/example/config.basic
@@ -252,6 +252,35 @@ type = 8
#lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
#################################################################
+# MODIFIED MOORE LEWIS FILTERING
+
+[MML] IGNORE
+
+### specifications for language models to be trained
+#
+#lm-training = $srilm-dir/ngram-count
+#lm-settings = "-interpolate -kndiscount -unk"
+#lm-binarizer = $moses-src-dir/bin/build_binary
+#lm-query = $moses-src-dir/bin/query
+#order = 5
+
+### in-/out-of-domain source/target corpora to train the 4 language model
+#
+# in-domain: point either to a parallel corpus
+#outdomain-stem = [CORPUS:toy:clean-split-stem]
+
+# ... or to two separate monolingual corpora
+#indomain-target = [LM:toy:lowercased-corpus]
+#raw-indomain-source = $toy-data/nc-5k.$input-extension
+
+# point to out-of-domain parallel corpus
+#outdomain-stem = [CORPUS:giga:clean-split-stem]
+
+# settings: number of lines sampled from the corpora to train each language model on
+# (typically a million or so)
+#settings = "--line-count 1000000"
+
+#################################################################
# TRANSLATION MODEL TRAINING
[TRAINING]
@@ -316,6 +345,12 @@ alignment-symmetrization-method = grow-diag-final-and
#
#word-alignment = $working-dir/model/aligned.1
+### filtering some corpora with modified Moore-Lewis
+# specify corpora to be filtered and ratio to be kept, either before or after word alignment
+#mml-filter-corpora = toy
+#mml-before-wa = "-proportion 0.9"
+#mml-after-wa = "-proportion 0.9"
+
### create a bilingual concordancer for the model
#
#biconcor = $moses-script-dir/ems/biconcor/biconcor