Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadir Durrani <nadir@hel.inf.ed.ac.uk>2013-07-04 22:58:19 +0400
committerNadir Durrani <nadir@hel.inf.ed.ac.uk>2013-07-04 22:58:19 +0400
commitd2bc6a2584f18c7037bea65df8566e21ee53c190 (patch)
tree23b8adf588c559d07706bf1f58bf11374ecf6794
parent389b7762e807b22f0bf5b2a53c499fbb534324fb (diff)
In EMS
-rwxr-xr-xscripts/OSM/OSM-Trainbin26391 -> 0 bytes
-rwxr-xr-xscripts/OSM/OSM-Train.sh21
-rw-r--r--scripts/ems/experiment.meta6
-rwxr-xr-xscripts/ems/experiment.perl4
-rwxr-xr-xscripts/training/train-model.perl12
5 files changed, 27 insertions, 16 deletions
diff --git a/scripts/OSM/OSM-Train b/scripts/OSM/OSM-Train
deleted file mode 100755
index 5e9b607aa..000000000
--- a/scripts/OSM/OSM-Train
+++ /dev/null
Binary files differ
diff --git a/scripts/OSM/OSM-Train.sh b/scripts/OSM/OSM-Train.sh
index cb20f5f1e..019976ab5 100755
--- a/scripts/OSM/OSM-Train.sh
+++ b/scripts/OSM/OSM-Train.sh
@@ -1,34 +1,33 @@
#!/bin/sh
-PATH=$PATH:/fs/hel1/nadir/SRILM/bin/i686-m64/
-
echo 'Training OSM - Start'
date
-\rm $5/e
-\rm $5/f
-\rm $5/align
-
+mkdir $5
ln -s $1 $5/e
ln -s $2 $5/f
-./flipAlignment $3 > $5/align
+$6/scripts/OSM/flipAlignment $3 > $5/align
echo 'Extracting Singletons'
-./extract-singletons.perl $5/e $5/f $5/align > $5/Singletons
+$6/scripts/OSM/extract-singletons.perl $5/e $5/f $5/align > $5/Singletons
echo 'Converting Bilingual Sentence Pair into Operation Corpus'
-./generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus
+$6/scripts/OSM/generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus
echo 'Learning Operation Sequence Translation Model'
-ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM$4
+ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM
echo 'Binarizing'
-../../bin/build_binary $5/operationLM$4 $5/operationLM$4.bin
+$6/bin/build_binary $5/operationLM$4 $5/operationLM.bin
+
+\rm $5/e
+\rm $5/f
+\rm $5/align
echo 'Training OSM - End'
date
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index cf79d580b..0ead260bb 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -516,8 +516,8 @@ build-osm
out: osm-model
ignore-unless: operation-sequence-model
rerun-on-change: operation-sequence-model training-options script giza-settings
- template: $moses-script-dir/OSM/OSM-Train IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir
- default-name: model/OSM/
+ template: $moses-script-dir/OSM/OSM-Train.sh IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir
+ default-name: model/OSM
extract-phrases
in: corpus-mml-postfilter=OR=word-alignment scored-corpus
out: extracted-phrases
@@ -586,7 +586,7 @@ build-sparse
default-name: model/sparse-features
template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features"
create-config
- in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains INTERPOLATED-LM:binlm LM:binlm osm-model
+ in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
out: config
ignore-if: use-hiero
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index e22638d79..d3a4f9788 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -2164,11 +2164,13 @@ sub get_config_tables {
sub define_training_create_config {
my ($step_id) = @_;
- my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM)
+ my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,$osm, @LM)
= &get_output_and_input($step_id);
my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains);
+ $cmd .= "-osm-model $osm/operationLM.bin " if $osm;
+
# sparse lexical features provide additional content for config file
$cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features;
diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl
index 8f5580541..332eb78bb 100755
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@@ -31,7 +31,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
$_DECODING_GRAPH_BACKOFF,
$_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
@_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
- $_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG,
+ $_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM,
$_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
$_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2,
$_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
@@ -119,6 +119,7 @@ $_HELP = 1
'xml' => \$_XML,
'no-word-alignment' => \$_OMIT_WORD_ALIGNMENT,
'config=s' => \$_CONFIG,
+ 'osm-model=s' => \$_OSM,
'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING,
'do-steps=s' => \$_DO_STEPS,
'memscore:s' => \$_MEMSCORE,
@@ -1992,6 +1993,15 @@ sub create_ini {
}
}
+ # operation sequence model
+
+ if($_OSM)
+ {
+
+ $feature_spec .= "OpSequenceModel num-features=5 path=". $_OSM . " \n";
+ $weight_spec .= "OpSequenceModel0= 0.08 -0.02 0.02 -0.001 0.03\n";
+ }
+
# distance-based reordering
if (!$_HIERARCHICAL) {
$feature_spec .= "Distortion\n";