diff options
author | Nadir Durrani <nadir@hel.inf.ed.ac.uk> | 2013-07-04 22:58:19 +0400 |
---|---|---|
committer | Nadir Durrani <nadir@hel.inf.ed.ac.uk> | 2013-07-04 22:58:19 +0400 |
commit | d2bc6a2584f18c7037bea65df8566e21ee53c190 (patch) | |
tree | 23b8adf588c559d07706bf1f58bf11374ecf6794 | |
parent | 389b7762e807b22f0bf5b2a53c499fbb534324fb (diff) |
In EMS
-rwxr-xr-x | scripts/OSM/OSM-Train | bin | 26391 -> 0 bytes | |||
-rwxr-xr-x | scripts/OSM/OSM-Train.sh | 21 | ||||
-rw-r--r-- | scripts/ems/experiment.meta | 6 | ||||
-rwxr-xr-x | scripts/ems/experiment.perl | 4 | ||||
-rwxr-xr-x | scripts/training/train-model.perl | 12 |
5 files changed, 27 insertions, 16 deletions
diff --git a/scripts/OSM/OSM-Train b/scripts/OSM/OSM-Train Binary files differdeleted file mode 100755 index 5e9b607aa..000000000 --- a/scripts/OSM/OSM-Train +++ /dev/null diff --git a/scripts/OSM/OSM-Train.sh b/scripts/OSM/OSM-Train.sh index cb20f5f1e..019976ab5 100755 --- a/scripts/OSM/OSM-Train.sh +++ b/scripts/OSM/OSM-Train.sh @@ -1,34 +1,33 @@ #!/bin/sh -PATH=$PATH:/fs/hel1/nadir/SRILM/bin/i686-m64/ - echo 'Training OSM - Start' date -\rm $5/e -\rm $5/f -\rm $5/align - +mkdir $5 ln -s $1 $5/e ln -s $2 $5/f -./flipAlignment $3 > $5/align +$6/scripts/OSM/flipAlignment $3 > $5/align echo 'Extracting Singletons' -./extract-singletons.perl $5/e $5/f $5/align > $5/Singletons +$6/scripts/OSM/extract-singletons.perl $5/e $5/f $5/align > $5/Singletons echo 'Converting Bilingual Sentence Pair into Operation Corpus' -./generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus +$6/scripts/OSM/generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus echo 'Learning Operation Sequence Translation Model' -ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM$4 +ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM echo 'Binarizing' -../../bin/build_binary $5/operationLM$4 $5/operationLM$4.bin +$6/bin/build_binary $5/operationLM$4 $5/operationLM.bin + +\rm $5/e +\rm $5/f +\rm $5/align echo 'Training OSM - End' date diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta index cf79d580b..0ead260bb 100644 --- a/scripts/ems/experiment.meta +++ b/scripts/ems/experiment.meta @@ -516,8 +516,8 @@ build-osm out: osm-model ignore-unless: operation-sequence-model rerun-on-change: operation-sequence-model training-options script giza-settings - template: $moses-script-dir/OSM/OSM-Train IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir - default-name: model/OSM/ + template: $moses-script-dir/OSM/OSM-Train.sh IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir + default-name: model/OSM extract-phrases in: corpus-mml-postfilter=OR=word-alignment scored-corpus out: extracted-phrases @@ -586,7 +586,7 @@ build-sparse default-name: model/sparse-features template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features" create-config - in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains INTERPOLATED-LM:binlm LM:binlm osm-model + in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm out: config ignore-if: use-hiero rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl index e22638d79..d3a4f9788 100755 --- a/scripts/ems/experiment.perl +++ b/scripts/ems/experiment.perl @@ -2164,11 +2164,13 @@ sub get_config_tables { sub define_training_create_config { my ($step_id) = @_; - my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM) + my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,$osm, @LM) = &get_output_and_input($step_id); my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains); + $cmd .= "-osm-model $osm/operationLM.bin " if $osm; + # sparse lexical features provide additional content for config file $cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features; diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl index 8f5580541..332eb78bb 100755 --- a/scripts/training/train-model.perl +++ b/scripts/training/train-model.perl @@ -31,7 +31,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_ $_DECODING_GRAPH_BACKOFF, $_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE, @_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS, - $_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, + $_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS, $_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2, $_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES, @@ -119,6 +119,7 @@ $_HELP = 1 'xml' => \$_XML, 'no-word-alignment' => \$_OMIT_WORD_ALIGNMENT, 'config=s' => \$_CONFIG, + 'osm-model=s' => \$_OSM, 'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING, 'do-steps=s' => \$_DO_STEPS, 'memscore:s' => \$_MEMSCORE, @@ -1992,6 +1993,15 @@ sub create_ini { } } + # operation sequence model + + if($_OSM) + { + + $feature_spec .= "OpSequenceModel num-features=5 path=". $_OSM . " \n"; + $weight_spec .= "OpSequenceModel0= 0.08 -0.02 0.02 -0.001 0.03\n"; + } + # distance-based reordering if (!$_HIERARCHICAL) { $feature_spec .= "Distortion\n"; |