Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2013-07-09 15:41:08 +0400
committerHieu Hoang <hieuhoang@gmail.com>2013-07-09 15:41:08 +0400
commit49ede97304574996765d4cf0b8435de96ac26262 (patch)
tree6d4c277e8c8cbaeb84c6d9aae258cf8328d47b51
parentfad88a14515f15a41c53609a065f8985739d1ab1 (diff)
parent418abf42fa319ad13a48b6433e279da89ae0e078 (diff)
merge
-rw-r--r--misc/queryPhraseTableMin.cpp2
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.cpp53
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.h3
-rw-r--r--moses/LVoc.h2
-rw-r--r--moses/StaticData.cpp2
-rw-r--r--moses/StaticData.h4
-rw-r--r--moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp2
-rw-r--r--moses/TranslationModel/CompactPT/PhraseDecoder.cpp6
-rw-r--r--moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp2
-rw-r--r--moses/TranslationModel/CompactPT/PhraseTableCreator.cpp8
-rw-r--r--moses/TranslationModel/CompactPT/PhraseTableCreator.h2
-rw-r--r--moses/TypeDef.h54
-rw-r--r--phrase-extract/relax-parse-main.cpp4
-rwxr-xr-xscripts/OSM/OSM-Train.sh34
-rwxr-xr-xscripts/OSM/extract-singletons.perl46
-rwxr-xr-xscripts/OSM/flipAlignmentbin0 -> 31388 bytes
-rwxr-xr-xscripts/OSM/generateSequencesbin0 -> 176800 bytes
-rw-r--r--scripts/ems/experiment.meta9
-rwxr-xr-xscripts/ems/experiment.perl4
-rwxr-xr-xscripts/training/train-model.perl12
-rw-r--r--symal/cmd.h10
21 files changed, 154 insertions, 105 deletions
diff --git a/misc/queryPhraseTableMin.cpp b/misc/queryPhraseTableMin.cpp
index 2f0caf910..0b4324020 100644
--- a/misc/queryPhraseTableMin.cpp
+++ b/misc/queryPhraseTableMin.cpp
@@ -65,7 +65,7 @@ int main(int argc, char **argv)
sourcePhrase.CreateFromString(Input, input, line, "||dummy_string||", NULL);
TargetPhraseVectorPtr decodedPhraseColl
- = pdc.GetTargetPhraseCollectionRaw(sourcePhrase);
+ = pdc.GetTargetPhraseCollectionRaw(sourcePhrase);
if(decodedPhraseColl != NULL) {
if(reportCounts)
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index fa8007156..8d9227aa5 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -20,24 +20,6 @@ void OpSequenceModel :: readLanguageModel(const char *lmFile)
{
string unkOp = "_TRANS_SLF_";
-
-
- /*
-
- // Code for SRILM
-
- vector <int> numbers;
- int nonWordFlag = 0;
-
- ptrOp = new Api;
- ptrOp -> read_lm(lmFile,lmOrder);
- numbers.push_back(ptrOp->getLMID(const_cast <char *> (unkOp.c_str())));
- unkOpProb = ptrOp->contextProbN(numbers,nonWordFlag);
-
- */
-
- // Code to load KenLM
-
OSM = new Model(m_lmPath.c_str());
State startState = OSM->NullContextState();
State endState;
@@ -48,36 +30,6 @@ void OpSequenceModel :: readLanguageModel(const char *lmFile)
void OpSequenceModel::Load()
{
- /*
- // load future cost
-
- //vector <string> input;
- ifstream sr (m_featurePath.c_str());
- char* tmp;
-
- CHECK(sr.is_open());
-
- vector<FactorType> factorOrder;
- factorOrder.push_back(0);
-
- string line;
- while (std::getline(sr, line))
- {
- std::vector<std::string> tokens;
- tokens = TokenizeMultiCharSeparator(line, "|||");
- CHECK(tokens.size() == 3);
-
- Phrase source, target;
- source.CreateFromString(Input, factorOrder, tokens[0], "|", NULL);
- target.CreateFromString(Output, factorOrder, tokens[1], "|", NULL);
-
- ParallelPhrase pp(source, target);
- Scores scores = Tokenize<float>(tokens[2], " ");
- m_futureCost[pp] = scores;
- // m_coll[pp] = scores;
- }
-
- */
readLanguageModel(m_lmPath.c_str());
}
@@ -284,9 +236,8 @@ std::vector<float> OpSequenceModel::GetFutureScores(const Phrase &source, const
void OpSequenceModel::SetParameter(const std::string& key, const std::string& value)
{
- if (key == "feature-path") {
- m_featurePath = value;
- } else if (key == "path") {
+
+ if (key == "path") {
m_lmPath = value;
} else if (key == "order") {
lmOrder = Scan<int>(value);
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h
index fe9cef0bd..504a3dea8 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@@ -60,8 +60,7 @@ protected:
std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
std::set <int> targetNullWords;
- std::string m_featurePath, m_lmPath;
-
+ std::string m_lmPath;
};
diff --git a/moses/LVoc.h b/moses/LVoc.h
index 5f5423832..485e3f481 100644
--- a/moses/LVoc.h
+++ b/moses/LVoc.h
@@ -38,7 +38,7 @@ public:
}
LabelId add(const Key& k) {
std::pair<typename M::iterator,bool> p
- =m.insert(std::make_pair(k,data.size()));
+ =m.insert(std::make_pair(k,data.size()));
if(p.second) data.push_back(k);
CHECK(static_cast<size_t>(p.first->second)<data.size());
return p.first->second;
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index 96108c9e3..af52b5cbf 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -948,7 +948,7 @@ const TranslationOptionList* StaticData::FindTransOptListInCache(const DecodeGra
boost::mutex::scoped_lock lock(m_transOptCacheMutex);
#endif
std::map<std::pair<std::pair<size_t, std::string>, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter
- = m_transOptCache.find(key);
+ = m_transOptCache.find(key);
if (iter == m_transOptCache.end())
return NULL;
iter->second.second = clock(); // update last used time
diff --git a/moses/StaticData.h b/moses/StaticData.h
index 8eba8c774..c64b22940 100644
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@@ -673,7 +673,7 @@ public:
return false;
}
std::map< std::string, std::set< std::string > >::const_iterator lookupIgnoreFF
- = m_weightSettingIgnoreFF.find( m_currentWeightSetting );
+ = m_weightSettingIgnoreFF.find( m_currentWeightSetting );
if (lookupIgnoreFF == m_weightSettingIgnoreFF.end()) {
return false;
}
@@ -691,7 +691,7 @@ public:
return false;
}
std::map< std::string, std::set< size_t > >::const_iterator lookupIgnoreDP
- = m_weightSettingIgnoreDP.find( m_currentWeightSetting );
+ = m_weightSettingIgnoreDP.find( m_currentWeightSetting );
if (lookupIgnoreDP == m_weightSettingIgnoreDP.end()) {
return false;
}
diff --git a/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp b/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp
index 7e60dec7d..655ed01ca 100644
--- a/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp
+++ b/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp
@@ -428,7 +428,7 @@ void CompressionTaskReordering::operator()()
while(scoresNum < m_encodedScores.size()) {
std::string scores = m_encodedScores[scoresNum];
std::string compressedScores
- = m_creator.CompressEncodedScores(scores);
+ = m_creator.CompressEncodedScores(scores);
std::string dummy;
PackedItem packedItem(scoresNum, dummy, compressedScores, 0);
diff --git a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
index 057e49268..c0767dad9 100644
--- a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
@@ -61,7 +61,7 @@ PhraseDecoder::~PhraseDecoder()
inline unsigned PhraseDecoder::GetSourceSymbolId(std::string& symbol)
{
boost::unordered_map<std::string, unsigned>::iterator it
- = m_sourceSymbolsMap.find(symbol);
+ = m_sourceSymbolsMap.find(symbol);
if(it != m_sourceSymbolsMap.end())
return it->second;
@@ -200,7 +200,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
if(m_coding == PREnc) {
std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl
- = m_decodingCache.Retrieve(sourcePhrase);
+ = m_decodingCache.Retrieve(sourcePhrase);
// Has been cached and is complete or does not need to be completed
if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0))
@@ -255,7 +255,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
if(m_coding == REnc) {
for(size_t i = 0; i < sourcePhrase.GetSize(); i++) {
std::string sourceWord
- = sourcePhrase.GetWord(i).GetString(*m_input, false);
+ = sourcePhrase.GetWord(i).GetString(*m_input, false);
unsigned idx = GetSourceSymbolId(sourceWord);
sourceWords.push_back(idx);
}
diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
index 60c1876ba..8d0f9ff2f 100644
--- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
@@ -117,7 +117,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c
// Retrieve target phrase collection from phrase table
TargetPhraseVectorPtr decodedPhraseColl
- = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true);
+ = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true);
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
index 25e232eec..331f80642 100644
--- a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
@@ -426,7 +426,7 @@ void PhraseTableCreator::AddTargetSymbolId(std::string& symbol)
unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol)
{
boost::unordered_map<std::string, unsigned>::iterator it
- = m_sourceSymbolsMap.find(symbol);
+ = m_sourceSymbolsMap.find(symbol);
if(it != m_sourceSymbolsMap.end())
return it->second;
@@ -437,7 +437,7 @@ unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol)
unsigned PhraseTableCreator::GetTargetSymbolId(std::string& symbol)
{
boost::unordered_map<std::string, unsigned>::iterator it
- = m_targetSymbolsMap.find(symbol);
+ = m_targetSymbolsMap.find(symbol);
if(it != m_targetSymbolsMap.end())
return it->second;
@@ -451,7 +451,7 @@ unsigned PhraseTableCreator::GetOrAddTargetSymbolId(std::string& symbol)
boost::mutex::scoped_lock lock(m_mutex);
#endif
boost::unordered_map<std::string, unsigned>::iterator it
- = m_targetSymbolsMap.find(symbol);
+ = m_targetSymbolsMap.find(symbol);
if(it != m_targetSymbolsMap.end())
return it->second;
@@ -1212,7 +1212,7 @@ void CompressionTask::operator()()
while(collectionNum < m_encodedCollections.size()) {
std::string collection = m_encodedCollections[collectionNum];
std::string compressedCollection
- = m_creator.CompressEncodedCollection(collection);
+ = m_creator.CompressEncodedCollection(collection);
std::string dummy;
PackedItem packedItem(collectionNum, dummy, compressedCollection, 0);
diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.h b/moses/TranslationModel/CompactPT/PhraseTableCreator.h
index b92df1697..fd5fc1581 100644
--- a/moses/TranslationModel/CompactPT/PhraseTableCreator.h
+++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.h
@@ -143,7 +143,7 @@ public:
return data;
else {
typename std::vector<DataType>::iterator it
- = std::lower_bound(m_bestVec.begin(), m_bestVec.end(), data);
+ = std::lower_bound(m_bestVec.begin(), m_bestVec.end(), data);
if(it != m_bestVec.end())
return *it;
else
diff --git a/moses/TypeDef.h b/moses/TypeDef.h
index a49a3feb9..af3a47b23 100644
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@@ -108,28 +108,28 @@ enum DistortionOrientationOptions {
enum PhraseTableImplementation {
Memory = 0
- ,Binary = 1
- ,OnDisk = 2
- //,GlueRule = 3
- //,Joshua = 4
- //,MemorySourceLabel = 5
- ,SCFG = 6
- //,BerkeleyDb = 7
- ,SuffixArray = 8
- ,Hiero = 9
- ,ALSuffixArray = 10
- ,FuzzyMatch = 11
- ,Compact = 12
- ,Interpolated = 13
- ,DSuffixArray = 14
+ ,Binary = 1
+ ,OnDisk = 2
+ //,GlueRule = 3
+ //,Joshua = 4
+ //,MemorySourceLabel = 5
+ ,SCFG = 6
+ //,BerkeleyDb = 7
+ ,SuffixArray = 8
+ ,Hiero = 9
+ ,ALSuffixArray = 10
+ ,FuzzyMatch = 11
+ ,Compact = 12
+ ,Interpolated = 13
+ ,DSuffixArray = 14
};
enum InputTypeEnum {
SentenceInput = 0
- ,ConfusionNetworkInput = 1
- ,WordLatticeInput = 2
- ,TreeInputType = 3
- ,WordLatticeInput2 = 4
+ ,ConfusionNetworkInput = 1
+ ,WordLatticeInput = 2
+ ,TreeInputType = 3
+ ,WordLatticeInput2 = 4
};
@@ -142,7 +142,7 @@ enum XmlInputType {
enum DictionaryFind {
Best = 0
- ,All = 1
+ ,All = 1
};
enum ParsingAlgorithm {
@@ -152,22 +152,22 @@ enum ParsingAlgorithm {
enum SearchAlgorithm {
Normal = 0
- ,CubePruning = 1
- ,CubeGrowing = 2
- ,ChartDecoding= 3
- ,NormalBatch = 4
- ,ChartIncremental = 5
+ ,CubePruning = 1
+ ,CubeGrowing = 2
+ ,ChartDecoding= 3
+ ,NormalBatch = 4
+ ,ChartIncremental = 5
};
enum SourceLabelOverlap {
SourceLabelOverlapAdd = 0
- ,SourceLabelOverlapReplace = 1
- ,SourceLabelOverlapDiscard = 2
+ ,SourceLabelOverlapReplace = 1
+ ,SourceLabelOverlapDiscard = 2
};
enum WordAlignmentSort {
NoSort = 0
- ,TargetOrder = 1
+ ,TargetOrder = 1
};
enum FormatType {
diff --git a/phrase-extract/relax-parse-main.cpp b/phrase-extract/relax-parse-main.cpp
index d3cb4c527..a58d4d97f 100644
--- a/phrase-extract/relax-parse-main.cpp
+++ b/phrase-extract/relax-parse-main.cpp
@@ -137,7 +137,7 @@ void LeftBinarize( SyntaxTree &tree, ParentNodes &parents )
const SplitPoints &point = *p;
if (point.size() > 3) {
const vector< SyntaxNode* >& topNodes
- = tree.GetNodes( point[0], point[point.size()-1]-1);
+ = tree.GetNodes( point[0], point[point.size()-1]-1);
string topLabel = topNodes[0]->GetLabel();
for(size_t i=2; i<point.size()-1; i++) {
@@ -155,7 +155,7 @@ void RightBinarize( SyntaxTree &tree, ParentNodes &parents )
if (point.size() > 3) {
int endPoint = point[point.size()-1]-1;
const vector< SyntaxNode* >& topNodes
- = tree.GetNodes( point[0], endPoint);
+ = tree.GetNodes( point[0], endPoint);
string topLabel = topNodes[0]->GetLabel();
for(size_t i=1; i<point.size()-2; i++) {
diff --git a/scripts/OSM/OSM-Train.sh b/scripts/OSM/OSM-Train.sh
new file mode 100755
index 000000000..a5e002704
--- /dev/null
+++ b/scripts/OSM/OSM-Train.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+echo 'Training OSM - Start'
+date
+
+mkdir $5
+ln -s $1 $5/e
+ln -s $2 $5/f
+
+$6/scripts/OSM/flipAlignment $3 > $5/align
+
+echo 'Extracting Singletons'
+
+$6/scripts/OSM/extract-singletons.perl $5/e $5/f $5/align > $5/Singletons
+
+echo 'Converting Bilingual Sentence Pair into Operation Corpus'
+
+$6/scripts/OSM/generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus
+
+echo 'Learning Operation Sequence Translation Model'
+
+$7/ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM
+
+echo 'Binarizing'
+
+$6/bin/build_binary $5/operationLM $5/operationLM.bin
+
+\rm $5/e
+\rm $5/f
+\rm $5/align
+
+echo 'Training OSM - End'
+date
+
diff --git a/scripts/OSM/extract-singletons.perl b/scripts/OSM/extract-singletons.perl
new file mode 100755
index 000000000..33f857929
--- /dev/null
+++ b/scripts/OSM/extract-singletons.perl
@@ -0,0 +1,46 @@
+#!/usr/bin/perl
+
+use Getopt::Std;
+getopts('q');
+
+$target = shift;
+$source = shift;
+$align = shift or die "
+Usage: extract-singletons.perl target source align
+
+";
+open(TARGET,$target) or die "Error: unable to open target file \"$target\"!\n";
+open(SOURCE,$source) or die "Error: unable to open source file \"$source\"!\n";
+open(ALIGN,$align) or die "Error: unable to open alignment file \"$align\"!\n";
+
+while (<TARGET>) {
+ unless (defined $opt_q) {
+ print STDERR "\r$M" if ++$M%1000 == 0;
+ }
+ @T = split;
+ $_ = <SOURCE>;
+ @S = split;
+ $_ = <ALIGN>;
+ @A = split;
+
+ my(@source_links,@target_links);
+ for( $i=0; $i<=$#A; $i+=2 ) {
+ $target_links[$A[$i]]++;
+ $source_links[$A[$i+1]]++;
+ }
+
+ for( $i=0; $i<=$#A; $i+=2 ) {
+ if ($target_links[$A[$i]] == 1 && $source_links[$A[$i+1]] == 1 &&
+ $T[$A[$i]] eq $S[$A[$i+1]])
+ {
+ $count{$S[$A[$i+1]]}++; # Print this if it only occurs here
+ }
+ else {
+ $count{$S[$A[$i+1]]}+=2; # Don't print this
+ }
+ }
+}
+
+foreach $w (sort keys %count) {
+ print "$w\n" if $count{$w}==1;
+}
diff --git a/scripts/OSM/flipAlignment b/scripts/OSM/flipAlignment
new file mode 100755
index 000000000..edb2a8098
--- /dev/null
+++ b/scripts/OSM/flipAlignment
Binary files differ
diff --git a/scripts/OSM/generateSequences b/scripts/OSM/generateSequences
new file mode 100755
index 000000000..8ac22682e
--- /dev/null
+++ b/scripts/OSM/generateSequences
Binary files differ
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 875296d41..70590155b 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -511,6 +511,13 @@ pcfg-score
default-name: model/scored-corpus
pass-unless: use-pcfg-feature
template: ln -s IN.$input-extension OUT.$input-extension ; $moses-bin-dir/pcfg-score IN1.$output-extension < IN.$output-extension > OUT.$output-extension
+build-osm
+ in: corpus word-alignment
+ out: osm-model
+ ignore-unless: operation-sequence-model
+ rerun-on-change: operation-sequence-model training-options script giza-settings
+ template: $moses-script-dir/OSM/OSM-Train.sh IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir $srilm-dir
+ default-name: model/OSM
extract-phrases
in: corpus-mml-postfilter=OR=word-alignment scored-corpus
out: extracted-phrases
@@ -579,7 +586,7 @@ build-sparse
default-name: model/sparse-features
template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features"
create-config
- in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains INTERPOLATED-LM:binlm LM:binlm
+ in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
out: config
ignore-if: use-hiero
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index e22638d79..d3a4f9788 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -2164,11 +2164,13 @@ sub get_config_tables {
sub define_training_create_config {
my ($step_id) = @_;
- my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM)
+ my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,$osm, @LM)
= &get_output_and_input($step_id);
my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains);
+ $cmd .= "-osm-model $osm/operationLM.bin " if $osm;
+
# sparse lexical features provide additional content for config file
$cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features;
diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl
index 8f5580541..332eb78bb 100755
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@@ -31,7 +31,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
$_DECODING_GRAPH_BACKOFF,
$_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
@_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
- $_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG,
+ $_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM,
$_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
$_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2,
$_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
@@ -119,6 +119,7 @@ $_HELP = 1
'xml' => \$_XML,
'no-word-alignment' => \$_OMIT_WORD_ALIGNMENT,
'config=s' => \$_CONFIG,
+ 'osm-model=s' => \$_OSM,
'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING,
'do-steps=s' => \$_DO_STEPS,
'memscore:s' => \$_MEMSCORE,
@@ -1992,6 +1993,15 @@ sub create_ini {
}
}
+ # operation sequence model
+
+ if($_OSM)
+ {
+
+ $feature_spec .= "OpSequenceModel num-features=5 path=". $_OSM . " \n";
+ $weight_spec .= "OpSequenceModel0= 0.08 -0.02 0.02 -0.001 0.03\n";
+ }
+
# distance-based reordering
if (!$_HIERARCHICAL) {
$feature_spec .= "Distortion\n";
diff --git a/symal/cmd.h b/symal/cmd.h
index a728dda78..01a00abc0 100644
--- a/symal/cmd.h
+++ b/symal/cmd.h
@@ -33,14 +33,14 @@ extern "C" {
#endif
#if defined(__STDC__)
-int DeclareParams(char *, ...);
+ int DeclareParams(char *, ...);
#else
-int DeclareParams();
+ int DeclareParams();
#endif
-int GetParams(int *n, char ***a,char *CmdFileName),
- SPrintParams(),
- PrintParams();
+ int GetParams(int *n, char ***a,char *CmdFileName),
+ SPrintParams(),
+ PrintParams();
#ifdef __cplusplus
}