Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhil Williams <philip.williams@mac.com>2012-05-25 20:29:47 +0400
committerPhil Williams <philip.williams@mac.com>2012-05-25 20:29:47 +0400
commit90c0bc9f5ceec4e7d33386ec597fd753e7d23d4a (patch)
tree2e4aa63e87c6150a5317e3e8bae3cc00d9187db3 /scripts/ems
parent2fab137aaeeda8077734e4c6e5627bfb44d27691 (diff)
Add an optional PCFG scoring feature for target syntax models (similar to
the p_cfg feature used in Marcu, Wang, Echihabi, and Knight (2006)).
Diffstat (limited to 'scripts/ems')
-rw-r--r--scripts/ems/experiment.meta15
-rwxr-xr-xscripts/ems/experiment.perl2
2 files changed, 16 insertions, 1 deletions
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 51ac0f67a..b33c589d2 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -344,8 +344,21 @@ parse-relax
pass-unless: input-parse-relaxer output-parse-relaxer
template-if: input-parse-relaxer IN.$input-extension OUT.$input-extension
template-if: output-parse-relaxer IN.$output-extension OUT.$output-extension
+pcfg-extract
+ in: parse-relaxed-corpus
+ out: pcfg
+ default-name: model/pcfg
+ ignore-unless: use-pcfg-feature
+ rerun-on-change: use-pcfg-feature
+ template: $moses-script-dir/training/phrase-extract/pcfg-extract/pcfg-extract < IN.$output-extension > OUT.$output-extension
+pcfg-score
+ in: parse-relaxed-corpus pcfg
+ out: scored-corpus
+ default-name: model/scored-corpus
+ pass-unless: use-pcfg-feature
+ template: ln -s IN.$input-extension OUT.$input-extension ; $moses-script-dir/training/phrase-extract/pcfg-score/pcfg-score IN1.$output-extension < IN.$output-extension > OUT.$output-extension
extract-phrases
- in: word-alignment parse-relaxed-corpus
+ in: word-alignment scored-corpus
out: extracted-phrases
rerun-on-change: max-phrase-length translation-factors reordering-factors hierarchical-rule-set extract-settings training-options script use-ghkm
default-name: model/extract
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index 59bd2788f..0c61a2a05 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -2007,6 +2007,7 @@ sub get_training_setting {
my $target_syntax = &get("GENERAL:output-parser");
my $score_settings = &get("TRAINING:score-settings");
my $parallel = &get("TRAINING:parallel");
+ my $pcfg = &get("TRAINING:use-pcfg-feature");
my $xml = $source_syntax || $target_syntax;
@@ -2029,6 +2030,7 @@ sub get_training_setting {
$cmd .= "-glue-grammar " if $hierarchical;
$cmd .= "-score-options '".$score_settings."' " if $score_settings;
$cmd .= "-parallel " if $parallel;
+ $cmd .= "-pcfg " if $pcfg;
# factored training
if (&backoff_and_get("TRAINING:input-factors")) {