diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2013-07-09 15:41:08 +0400 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2013-07-09 15:41:08 +0400 |
commit | 49ede97304574996765d4cf0b8435de96ac26262 (patch) | |
tree | 6d4c277e8c8cbaeb84c6d9aae258cf8328d47b51 | |
parent | fad88a14515f15a41c53609a065f8985739d1ab1 (diff) | |
parent | 418abf42fa319ad13a48b6433e279da89ae0e078 (diff) |
merge
-rw-r--r-- | misc/queryPhraseTableMin.cpp | 2 | ||||
-rw-r--r-- | moses/FF/OSM-Feature/OpSequenceModel.cpp | 53 | ||||
-rw-r--r-- | moses/FF/OSM-Feature/OpSequenceModel.h | 3 | ||||
-rw-r--r-- | moses/LVoc.h | 2 | ||||
-rw-r--r-- | moses/StaticData.cpp | 2 | ||||
-rw-r--r-- | moses/StaticData.h | 4 | ||||
-rw-r--r-- | moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp | 2 | ||||
-rw-r--r-- | moses/TranslationModel/CompactPT/PhraseDecoder.cpp | 6 | ||||
-rw-r--r-- | moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp | 2 | ||||
-rw-r--r-- | moses/TranslationModel/CompactPT/PhraseTableCreator.cpp | 8 | ||||
-rw-r--r-- | moses/TranslationModel/CompactPT/PhraseTableCreator.h | 2 | ||||
-rw-r--r-- | moses/TypeDef.h | 54 | ||||
-rw-r--r-- | phrase-extract/relax-parse-main.cpp | 4 | ||||
-rwxr-xr-x | scripts/OSM/OSM-Train.sh | 34 | ||||
-rwxr-xr-x | scripts/OSM/extract-singletons.perl | 46 | ||||
-rwxr-xr-x | scripts/OSM/flipAlignment | bin | 0 -> 31388 bytes | |||
-rwxr-xr-x | scripts/OSM/generateSequences | bin | 0 -> 176800 bytes | |||
-rw-r--r-- | scripts/ems/experiment.meta | 9 | ||||
-rwxr-xr-x | scripts/ems/experiment.perl | 4 | ||||
-rwxr-xr-x | scripts/training/train-model.perl | 12 | ||||
-rw-r--r-- | symal/cmd.h | 10 |
21 files changed, 154 insertions, 105 deletions
diff --git a/misc/queryPhraseTableMin.cpp b/misc/queryPhraseTableMin.cpp index 2f0caf910..0b4324020 100644 --- a/misc/queryPhraseTableMin.cpp +++ b/misc/queryPhraseTableMin.cpp @@ -65,7 +65,7 @@ int main(int argc, char **argv) sourcePhrase.CreateFromString(Input, input, line, "||dummy_string||", NULL); TargetPhraseVectorPtr decodedPhraseColl - = pdc.GetTargetPhraseCollectionRaw(sourcePhrase); + = pdc.GetTargetPhraseCollectionRaw(sourcePhrase); if(decodedPhraseColl != NULL) { if(reportCounts) diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp index fa8007156..8d9227aa5 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.cpp +++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp @@ -20,24 +20,6 @@ void OpSequenceModel :: readLanguageModel(const char *lmFile) { string unkOp = "_TRANS_SLF_"; - - - /* - - // Code for SRILM - - vector <int> numbers; - int nonWordFlag = 0; - - ptrOp = new Api; - ptrOp -> read_lm(lmFile,lmOrder); - numbers.push_back(ptrOp->getLMID(const_cast <char *> (unkOp.c_str()))); - unkOpProb = ptrOp->contextProbN(numbers,nonWordFlag); - - */ - - // Code to load KenLM - OSM = new Model(m_lmPath.c_str()); State startState = OSM->NullContextState(); State endState; @@ -48,36 +30,6 @@ void OpSequenceModel :: readLanguageModel(const char *lmFile) void OpSequenceModel::Load() { - /* - // load future cost - - //vector <string> input; - ifstream sr (m_featurePath.c_str()); - char* tmp; - - CHECK(sr.is_open()); - - vector<FactorType> factorOrder; - factorOrder.push_back(0); - - string line; - while (std::getline(sr, line)) - { - std::vector<std::string> tokens; - tokens = TokenizeMultiCharSeparator(line, "|||"); - CHECK(tokens.size() == 3); - - Phrase source, target; - source.CreateFromString(Input, factorOrder, tokens[0], "|", NULL); - target.CreateFromString(Output, factorOrder, tokens[1], "|", NULL); - - ParallelPhrase pp(source, target); - Scores scores = Tokenize<float>(tokens[2], " "); - m_futureCost[pp] = scores; - // m_coll[pp] = scores; - } - - */ readLanguageModel(m_lmPath.c_str()); } @@ -284,9 +236,8 @@ std::vector<float> OpSequenceModel::GetFutureScores(const Phrase &source, const void OpSequenceModel::SetParameter(const std::string& key, const std::string& value) { - if (key == "feature-path") { - m_featurePath = value; - } else if (key == "path") { + + if (key == "path") { m_lmPath = value; } else if (key == "order") { lmOrder = Scan<int>(value); diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h index fe9cef0bd..504a3dea8 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.h +++ b/moses/FF/OSM-Feature/OpSequenceModel.h @@ -60,8 +60,7 @@ protected: std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase; std::set <int> targetNullWords; - std::string m_featurePath, m_lmPath; - + std::string m_lmPath; }; diff --git a/moses/LVoc.h b/moses/LVoc.h index 5f5423832..485e3f481 100644 --- a/moses/LVoc.h +++ b/moses/LVoc.h @@ -38,7 +38,7 @@ public: } LabelId add(const Key& k) { std::pair<typename M::iterator,bool> p - =m.insert(std::make_pair(k,data.size())); + =m.insert(std::make_pair(k,data.size())); if(p.second) data.push_back(k); CHECK(static_cast<size_t>(p.first->second)<data.size()); return p.first->second; diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp index 96108c9e3..af52b5cbf 100644 --- a/moses/StaticData.cpp +++ b/moses/StaticData.cpp @@ -948,7 +948,7 @@ const TranslationOptionList* StaticData::FindTransOptListInCache(const DecodeGra boost::mutex::scoped_lock lock(m_transOptCacheMutex); #endif std::map<std::pair<std::pair<size_t, std::string>, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter - = m_transOptCache.find(key); + = m_transOptCache.find(key); if (iter == m_transOptCache.end()) return NULL; iter->second.second = clock(); // update last used time diff --git a/moses/StaticData.h b/moses/StaticData.h index 8eba8c774..c64b22940 100644 --- a/moses/StaticData.h +++ b/moses/StaticData.h @@ -673,7 +673,7 @@ public: return false; } std::map< std::string, std::set< std::string > >::const_iterator lookupIgnoreFF - = m_weightSettingIgnoreFF.find( m_currentWeightSetting ); + = m_weightSettingIgnoreFF.find( m_currentWeightSetting ); if (lookupIgnoreFF == m_weightSettingIgnoreFF.end()) { return false; } @@ -691,7 +691,7 @@ public: return false; } std::map< std::string, std::set< size_t > >::const_iterator lookupIgnoreDP - = m_weightSettingIgnoreDP.find( m_currentWeightSetting ); + = m_weightSettingIgnoreDP.find( m_currentWeightSetting ); if (lookupIgnoreDP == m_weightSettingIgnoreDP.end()) { return false; } diff --git a/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp b/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp index 7e60dec7d..655ed01ca 100644 --- a/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp +++ b/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp @@ -428,7 +428,7 @@ void CompressionTaskReordering::operator()() while(scoresNum < m_encodedScores.size()) { std::string scores = m_encodedScores[scoresNum]; std::string compressedScores - = m_creator.CompressEncodedScores(scores); + = m_creator.CompressEncodedScores(scores); std::string dummy; PackedItem packedItem(scoresNum, dummy, compressedScores, 0); diff --git a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp index 057e49268..c0767dad9 100644 --- a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp +++ b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp @@ -61,7 +61,7 @@ PhraseDecoder::~PhraseDecoder() inline unsigned PhraseDecoder::GetSourceSymbolId(std::string& symbol) { boost::unordered_map<std::string, unsigned>::iterator it - = m_sourceSymbolsMap.find(symbol); + = m_sourceSymbolsMap.find(symbol); if(it != m_sourceSymbolsMap.end()) return it->second; @@ -200,7 +200,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase & if(m_coding == PREnc) { std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl - = m_decodingCache.Retrieve(sourcePhrase); + = m_decodingCache.Retrieve(sourcePhrase); // Has been cached and is complete or does not need to be completed if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0)) @@ -255,7 +255,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( if(m_coding == REnc) { for(size_t i = 0; i < sourcePhrase.GetSize(); i++) { std::string sourceWord - = sourcePhrase.GetWord(i).GetString(*m_input, false); + = sourcePhrase.GetWord(i).GetString(*m_input, false); unsigned idx = GetSourceSymbolId(sourceWord); sourceWords.push_back(idx); } diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp index 60c1876ba..8d0f9ff2f 100644 --- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp +++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp @@ -117,7 +117,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c // Retrieve target phrase collection from phrase table TargetPhraseVectorPtr decodedPhraseColl - = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true); + = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true); if(decodedPhraseColl != NULL && decodedPhraseColl->size()) { TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl)); diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp index 25e232eec..331f80642 100644 --- a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp +++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp @@ -426,7 +426,7 @@ void PhraseTableCreator::AddTargetSymbolId(std::string& symbol) unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol) { boost::unordered_map<std::string, unsigned>::iterator it - = m_sourceSymbolsMap.find(symbol); + = m_sourceSymbolsMap.find(symbol); if(it != m_sourceSymbolsMap.end()) return it->second; @@ -437,7 +437,7 @@ unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol) unsigned PhraseTableCreator::GetTargetSymbolId(std::string& symbol) { boost::unordered_map<std::string, unsigned>::iterator it - = m_targetSymbolsMap.find(symbol); + = m_targetSymbolsMap.find(symbol); if(it != m_targetSymbolsMap.end()) return it->second; @@ -451,7 +451,7 @@ unsigned PhraseTableCreator::GetOrAddTargetSymbolId(std::string& symbol) boost::mutex::scoped_lock lock(m_mutex); #endif boost::unordered_map<std::string, unsigned>::iterator it - = m_targetSymbolsMap.find(symbol); + = m_targetSymbolsMap.find(symbol); if(it != m_targetSymbolsMap.end()) return it->second; @@ -1212,7 +1212,7 @@ void CompressionTask::operator()() while(collectionNum < m_encodedCollections.size()) { std::string collection = m_encodedCollections[collectionNum]; std::string compressedCollection - = m_creator.CompressEncodedCollection(collection); + = m_creator.CompressEncodedCollection(collection); std::string dummy; PackedItem packedItem(collectionNum, dummy, compressedCollection, 0); diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.h b/moses/TranslationModel/CompactPT/PhraseTableCreator.h index b92df1697..fd5fc1581 100644 --- a/moses/TranslationModel/CompactPT/PhraseTableCreator.h +++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.h @@ -143,7 +143,7 @@ public: return data; else { typename std::vector<DataType>::iterator it - = std::lower_bound(m_bestVec.begin(), m_bestVec.end(), data); + = std::lower_bound(m_bestVec.begin(), m_bestVec.end(), data); if(it != m_bestVec.end()) return *it; else diff --git a/moses/TypeDef.h b/moses/TypeDef.h index a49a3feb9..af3a47b23 100644 --- a/moses/TypeDef.h +++ b/moses/TypeDef.h @@ -108,28 +108,28 @@ enum DistortionOrientationOptions { enum PhraseTableImplementation { Memory = 0 - ,Binary = 1 - ,OnDisk = 2 - //,GlueRule = 3 - //,Joshua = 4 - //,MemorySourceLabel = 5 - ,SCFG = 6 - //,BerkeleyDb = 7 - ,SuffixArray = 8 - ,Hiero = 9 - ,ALSuffixArray = 10 - ,FuzzyMatch = 11 - ,Compact = 12 - ,Interpolated = 13 - ,DSuffixArray = 14 + ,Binary = 1 + ,OnDisk = 2 + //,GlueRule = 3 + //,Joshua = 4 + //,MemorySourceLabel = 5 + ,SCFG = 6 + //,BerkeleyDb = 7 + ,SuffixArray = 8 + ,Hiero = 9 + ,ALSuffixArray = 10 + ,FuzzyMatch = 11 + ,Compact = 12 + ,Interpolated = 13 + ,DSuffixArray = 14 }; enum InputTypeEnum { SentenceInput = 0 - ,ConfusionNetworkInput = 1 - ,WordLatticeInput = 2 - ,TreeInputType = 3 - ,WordLatticeInput2 = 4 + ,ConfusionNetworkInput = 1 + ,WordLatticeInput = 2 + ,TreeInputType = 3 + ,WordLatticeInput2 = 4 }; @@ -142,7 +142,7 @@ enum XmlInputType { enum DictionaryFind { Best = 0 - ,All = 1 + ,All = 1 }; enum ParsingAlgorithm { @@ -152,22 +152,22 @@ enum ParsingAlgorithm { enum SearchAlgorithm { Normal = 0 - ,CubePruning = 1 - ,CubeGrowing = 2 - ,ChartDecoding= 3 - ,NormalBatch = 4 - ,ChartIncremental = 5 + ,CubePruning = 1 + ,CubeGrowing = 2 + ,ChartDecoding= 3 + ,NormalBatch = 4 + ,ChartIncremental = 5 }; enum SourceLabelOverlap { SourceLabelOverlapAdd = 0 - ,SourceLabelOverlapReplace = 1 - ,SourceLabelOverlapDiscard = 2 + ,SourceLabelOverlapReplace = 1 + ,SourceLabelOverlapDiscard = 2 }; enum WordAlignmentSort { NoSort = 0 - ,TargetOrder = 1 + ,TargetOrder = 1 }; enum FormatType { diff --git a/phrase-extract/relax-parse-main.cpp b/phrase-extract/relax-parse-main.cpp index d3cb4c527..a58d4d97f 100644 --- a/phrase-extract/relax-parse-main.cpp +++ b/phrase-extract/relax-parse-main.cpp @@ -137,7 +137,7 @@ void LeftBinarize( SyntaxTree &tree, ParentNodes &parents ) const SplitPoints &point = *p; if (point.size() > 3) { const vector< SyntaxNode* >& topNodes - = tree.GetNodes( point[0], point[point.size()-1]-1); + = tree.GetNodes( point[0], point[point.size()-1]-1); string topLabel = topNodes[0]->GetLabel(); for(size_t i=2; i<point.size()-1; i++) { @@ -155,7 +155,7 @@ void RightBinarize( SyntaxTree &tree, ParentNodes &parents ) if (point.size() > 3) { int endPoint = point[point.size()-1]-1; const vector< SyntaxNode* >& topNodes - = tree.GetNodes( point[0], endPoint); + = tree.GetNodes( point[0], endPoint); string topLabel = topNodes[0]->GetLabel(); for(size_t i=1; i<point.size()-2; i++) { diff --git a/scripts/OSM/OSM-Train.sh b/scripts/OSM/OSM-Train.sh new file mode 100755 index 000000000..a5e002704 --- /dev/null +++ b/scripts/OSM/OSM-Train.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +echo 'Training OSM - Start' +date + +mkdir $5 +ln -s $1 $5/e +ln -s $2 $5/f + +$6/scripts/OSM/flipAlignment $3 > $5/align + +echo 'Extracting Singletons' + +$6/scripts/OSM/extract-singletons.perl $5/e $5/f $5/align > $5/Singletons + +echo 'Converting Bilingual Sentence Pair into Operation Corpus' + +$6/scripts/OSM/generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus + +echo 'Learning Operation Sequence Translation Model' + +$7/ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM + +echo 'Binarizing' + +$6/bin/build_binary $5/operationLM $5/operationLM.bin + +\rm $5/e +\rm $5/f +\rm $5/align + +echo 'Training OSM - End' +date + diff --git a/scripts/OSM/extract-singletons.perl b/scripts/OSM/extract-singletons.perl new file mode 100755 index 000000000..33f857929 --- /dev/null +++ b/scripts/OSM/extract-singletons.perl @@ -0,0 +1,46 @@ +#!/usr/bin/perl + +use Getopt::Std; +getopts('q'); + +$target = shift; +$source = shift; +$align = shift or die " +Usage: extract-singletons.perl target source align + +"; +open(TARGET,$target) or die "Error: unable to open target file \"$target\"!\n"; +open(SOURCE,$source) or die "Error: unable to open source file \"$source\"!\n"; +open(ALIGN,$align) or die "Error: unable to open alignment file \"$align\"!\n"; + +while (<TARGET>) { + unless (defined $opt_q) { + print STDERR "\r$M" if ++$M%1000 == 0; + } + @T = split; + $_ = <SOURCE>; + @S = split; + $_ = <ALIGN>; + @A = split; + + my(@source_links,@target_links); + for( $i=0; $i<=$#A; $i+=2 ) { + $target_links[$A[$i]]++; + $source_links[$A[$i+1]]++; + } + + for( $i=0; $i<=$#A; $i+=2 ) { + if ($target_links[$A[$i]] == 1 && $source_links[$A[$i+1]] == 1 && + $T[$A[$i]] eq $S[$A[$i+1]]) + { + $count{$S[$A[$i+1]]}++; # Print this if it only occurs here + } + else { + $count{$S[$A[$i+1]]}+=2; # Don't print this + } + } +} + +foreach $w (sort keys %count) { + print "$w\n" if $count{$w}==1; +} diff --git a/scripts/OSM/flipAlignment b/scripts/OSM/flipAlignment Binary files differnew file mode 100755 index 000000000..edb2a8098 --- /dev/null +++ b/scripts/OSM/flipAlignment diff --git a/scripts/OSM/generateSequences b/scripts/OSM/generateSequences Binary files differnew file mode 100755 index 000000000..8ac22682e --- /dev/null +++ b/scripts/OSM/generateSequences diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta index 875296d41..70590155b 100644 --- a/scripts/ems/experiment.meta +++ b/scripts/ems/experiment.meta @@ -511,6 +511,13 @@ pcfg-score default-name: model/scored-corpus pass-unless: use-pcfg-feature template: ln -s IN.$input-extension OUT.$input-extension ; $moses-bin-dir/pcfg-score IN1.$output-extension < IN.$output-extension > OUT.$output-extension +build-osm + in: corpus word-alignment + out: osm-model + ignore-unless: operation-sequence-model + rerun-on-change: operation-sequence-model training-options script giza-settings + template: $moses-script-dir/OSM/OSM-Train.sh IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir $srilm-dir + default-name: model/OSM extract-phrases in: corpus-mml-postfilter=OR=word-alignment scored-corpus out: extracted-phrases @@ -579,7 +586,7 @@ build-sparse default-name: model/sparse-features template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features" create-config - in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains INTERPOLATED-LM:binlm LM:binlm + in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm out: config ignore-if: use-hiero rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl index e22638d79..d3a4f9788 100755 --- a/scripts/ems/experiment.perl +++ b/scripts/ems/experiment.perl @@ -2164,11 +2164,13 @@ sub get_config_tables { sub define_training_create_config { my ($step_id) = @_; - my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM) + my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,$osm, @LM) = &get_output_and_input($step_id); my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains); + $cmd .= "-osm-model $osm/operationLM.bin " if $osm; + # sparse lexical features provide additional content for config file $cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features; diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl index 8f5580541..332eb78bb 100755 --- a/scripts/training/train-model.perl +++ b/scripts/training/train-model.perl @@ -31,7 +31,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_ $_DECODING_GRAPH_BACKOFF, $_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE, @_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS, - $_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, + $_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS, $_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2, $_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES, @@ -119,6 +119,7 @@ $_HELP = 1 'xml' => \$_XML, 'no-word-alignment' => \$_OMIT_WORD_ALIGNMENT, 'config=s' => \$_CONFIG, + 'osm-model=s' => \$_OSM, 'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING, 'do-steps=s' => \$_DO_STEPS, 'memscore:s' => \$_MEMSCORE, @@ -1992,6 +1993,15 @@ sub create_ini { } } + # operation sequence model + + if($_OSM) + { + + $feature_spec .= "OpSequenceModel num-features=5 path=". $_OSM . " \n"; + $weight_spec .= "OpSequenceModel0= 0.08 -0.02 0.02 -0.001 0.03\n"; + } + # distance-based reordering if (!$_HIERARCHICAL) { $feature_spec .= "Distortion\n"; diff --git a/symal/cmd.h b/symal/cmd.h index a728dda78..01a00abc0 100644 --- a/symal/cmd.h +++ b/symal/cmd.h @@ -33,14 +33,14 @@ extern "C" { #endif #if defined(__STDC__) -int DeclareParams(char *, ...); + int DeclareParams(char *, ...); #else -int DeclareParams(); + int DeclareParams(); #endif -int GetParams(int *n, char ***a,char *CmdFileName), - SPrintParams(), - PrintParams(); + int GetParams(int *n, char ***a,char *CmdFileName), + SPrintParams(), + PrintParams(); #ifdef __cplusplus } |