Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Jamroot18
-rw-r--r--NOTICE5
-rw-r--r--OnDiskPt/TargetPhrase.cpp4
-rw-r--r--OnDiskPt/Word.cpp20
-rw-r--r--OnDiskPt/queryOnDiskPt.cpp2
-rw-r--r--contrib/other-builds/CreateOnDiskPt/.cproject18
-rw-r--r--contrib/other-builds/consolidate/.cproject136
-rw-r--r--contrib/other-builds/consolidate/.project12
-rw-r--r--contrib/other-builds/extract-ghkm/.cproject134
-rw-r--r--contrib/other-builds/extract-ghkm/.project122
-rw-r--r--contrib/other-builds/extract-mixed-syntax/.cproject140
-rw-r--r--contrib/other-builds/extract-mixed-syntax/.project192
-rw-r--r--contrib/other-builds/extract-mixed-syntax/Makefile17
-rw-r--r--contrib/other-builds/extract-mixed-syntax/OutputFileStream.cpp79
-rw-r--r--contrib/other-builds/extract-mixed-syntax/OutputFileStream.h50
-rw-r--r--contrib/other-builds/extract-mixed-syntax/Parameter.cpp41
-rwxr-xr-xcontrib/other-builds/extract-mixed-syntax/filter-by-source-word-count.perl27
-rwxr-xr-xcontrib/other-builds/extract-mixed-syntax/learnable/equal.perl33
-rwxr-xr-xcontrib/other-builds/extract-mixed-syntax/learnable/get-by-line-number.perl29
-rwxr-xr-xcontrib/other-builds/extract-mixed-syntax/learnable/learnable.perl108
-rwxr-xr-xcontrib/other-builds/extract-mixed-syntax/learnable/num-deriv.perl151
-rwxr-xr-xcontrib/other-builds/extract-mixed-syntax/learnable/reachable.perl147
-rwxr-xr-xcontrib/other-builds/extract-mixed-syntax/learnable/run-parallel.perl17
-rw-r--r--contrib/other-builds/extract-ordering/.cproject134
-rw-r--r--contrib/other-builds/extract-ordering/.project74
-rw-r--r--contrib/other-builds/extract-rules/.cproject131
-rw-r--r--contrib/other-builds/extract-rules/.gitignore1
-rw-r--r--contrib/other-builds/extract-rules/.project15
-rw-r--r--contrib/other-builds/extract/.cproject115
-rw-r--r--contrib/other-builds/extractor/.cproject2
-rw-r--r--contrib/other-builds/lm/.cproject20
-rw-r--r--contrib/other-builds/lm/.project1030
-rw-r--r--contrib/other-builds/manual-label/.cproject123
-rw-r--r--contrib/other-builds/mira/.cproject177
-rw-r--r--contrib/other-builds/mira/.project81
-rw-r--r--contrib/other-builds/moses-chart-cmd/.cproject20
-rw-r--r--contrib/other-builds/moses-cmd/.cproject20
-rw-r--r--contrib/other-builds/moses/.cproject163
-rw-r--r--contrib/other-builds/moses/.project990
-rw-r--r--contrib/other-builds/score/.cproject154
-rw-r--r--contrib/other-builds/score/.project10
-rw-r--r--contrib/rt/Empty.c0
-rw-r--r--contrib/rt/README9
-rwxr-xr-xcontrib/rt/compile.sh2
-rw-r--r--contrib/server/Jamfile2
-rw-r--r--contrib/server/mosesserver.cpp43
-rw-r--r--doc/PhraseDictionaryBitextSampling.howto4
-rw-r--r--lm/Jamfile2
-rw-r--r--lm/builder/Jamfile4
-rw-r--r--lm/builder/dump_counts_main.cc36
-rw-r--r--lm/builder/interpolate.cc10
-rw-r--r--lm/builder/print.cc5
-rw-r--r--lm/model_test.cc2
-rw-r--r--lm/ngram_query.hh2
-rw-r--r--lm/read_arpa.hh30
-rw-r--r--lm/test.arpa2
-rw-r--r--lm/test_nounk.arpa2
-rw-r--r--lm/trie_sort.cc14
-rw-r--r--lm/wrappers/README3
-rw-r--r--lm/wrappers/nplm.cc90
-rw-r--r--lm/wrappers/nplm.hh83
-rw-r--r--mert/BleuScorer.cpp4
-rw-r--r--mert/BleuScorer.h2
-rw-r--r--mert/FeatureStats.cpp30
-rw-r--r--mert/FeatureStats.h5
-rw-r--r--mert/ForestRescore.cpp432
-rw-r--r--mert/ForestRescore.h120
-rw-r--r--mert/ForestRescoreTest.cpp246
-rw-r--r--mert/HopeFearDecoder.cpp339
-rw-r--r--mert/HopeFearDecoder.h152
-rw-r--r--mert/Hypergraph.cpp313
-rw-r--r--mert/Hypergraph.h251
-rw-r--r--mert/HypergraphTest.cpp151
-rw-r--r--mert/Jamfile7
-rw-r--r--mert/MiraFeatureVector.cpp40
-rw-r--r--mert/MiraFeatureVector.h8
-rw-r--r--mert/MiraWeightVector.cpp17
-rw-r--r--mert/MiraWeightVector.h8
-rw-r--r--mert/TER/alignmentStruct.cpp34
-rw-r--r--mert/TER/alignmentStruct.h53
-rw-r--r--mert/TER/bestShiftStruct.h48
-rw-r--r--mert/TER/hashMap.cpp250
-rw-r--r--mert/TER/hashMap.h69
-rw-r--r--mert/TER/hashMapInfos.cpp249
-rw-r--r--mert/TER/hashMapInfos.h69
-rw-r--r--mert/TER/hashMapStringInfos.cpp322
-rw-r--r--mert/TER/hashMapStringInfos.h69
-rw-r--r--mert/TER/infosHasher.cpp71
-rw-r--r--mert/TER/infosHasher.h57
-rw-r--r--mert/TER/stringHasher.cpp64
-rw-r--r--mert/TER/stringHasher.h58
-rw-r--r--mert/TER/stringInfosHasher.cpp71
-rw-r--r--mert/TER/stringInfosHasher.h60
-rw-r--r--mert/TER/terAlignment.cpp285
-rw-r--r--mert/TER/terAlignment.h83
-rw-r--r--mert/TER/terShift.cpp134
-rw-r--r--mert/TER/terShift.h75
-rw-r--r--mert/TER/tercalc.cpp1823
-rw-r--r--mert/TER/tercalc.h121
-rw-r--r--mert/TER/tools.cpp1085
-rw-r--r--mert/TER/tools.h117
-rw-r--r--mert/kbmira.cpp187
-rw-r--r--mira/Decoder.cpp2
-rw-r--r--mira/Jamfile2
-rw-r--r--mira/Main.cpp2
-rw-r--r--misc/Jamfile10
-rw-r--r--misc/merge-sorted.cc (renamed from contrib/m4m/util/merge-sorted.cc)0
-rw-r--r--moses-chart-cmd/Jamfile2
-rw-r--r--moses-chart-cmd/Main.cpp28
-rw-r--r--moses-cmd/Jamfile12
-rw-r--r--moses-cmd/Main.cpp195
-rw-r--r--moses-cmd/simulate-pe.cc856
-rw-r--r--moses/BitmapContainer.cpp52
-rw-r--r--moses/ChartCell.cpp9
-rw-r--r--moses/ChartCell.h3
-rw-r--r--moses/ChartCellLabelSet.h9
-rw-r--r--moses/ChartHypothesis.cpp42
-rw-r--r--moses/ChartHypothesis.h6
-rw-r--r--moses/ChartHypothesisCollection.cpp25
-rw-r--r--moses/ChartHypothesisCollection.h4
-rw-r--r--moses/ChartManager.cpp41
-rw-r--r--moses/ChartManager.h21
-rw-r--r--moses/ChartParser.cpp14
-rw-r--r--moses/ChartParserCallback.h2
-rw-r--r--moses/ChartTranslationOption.cpp4
-rw-r--r--moses/ChartTranslationOption.h2
-rw-r--r--moses/ChartTranslationOptionList.cpp4
-rw-r--r--moses/ChartTranslationOptionList.h2
-rw-r--r--moses/ChartTranslationOptions.cpp4
-rw-r--r--moses/ChartTranslationOptions.h2
-rw-r--r--moses/DecodeGraph.h14
-rw-r--r--moses/DecodeStepGeneration.cpp2
-rw-r--r--moses/DecodeStepTranslation.cpp4
-rw-r--r--moses/FF/BleuScoreFeature.cpp4
-rw-r--r--moses/FF/BleuScoreFeature.h8
-rw-r--r--moses/FF/ConstrainedDecoding.cpp4
-rw-r--r--moses/FF/ConstrainedDecoding.h8
-rw-r--r--moses/FF/ControlRecombination.cpp4
-rw-r--r--moses/FF/ControlRecombination.h8
-rw-r--r--moses/FF/CountNonTerms.cpp2
-rw-r--r--moses/FF/CountNonTerms.h8
-rw-r--r--moses/FF/CoveredReferenceFeature.cpp8
-rw-r--r--moses/FF/CoveredReferenceFeature.h8
-rw-r--r--moses/FF/DecodeFeature.h8
-rw-r--r--moses/FF/DistortionScoreProducer.cpp2
-rw-r--r--moses/FF/DistortionScoreProducer.h8
-rw-r--r--moses/FF/ExternalFeature.cpp4
-rw-r--r--moses/FF/ExternalFeature.h8
-rw-r--r--moses/FF/Factory.cpp47
-rw-r--r--moses/FF/FeatureFunction.cpp8
-rw-r--r--moses/FF/FeatureFunction.h16
-rw-r--r--moses/FF/GlobalLexicalModel.cpp2
-rw-r--r--moses/FF/GlobalLexicalModel.h8
-rw-r--r--moses/FF/GlobalLexicalModelUnlimited.cpp2
-rw-r--r--moses/FF/GlobalLexicalModelUnlimited.h8
-rw-r--r--moses/FF/HyperParameterAsWeight.h8
-rw-r--r--moses/FF/InputFeature.cpp2
-rw-r--r--moses/FF/InputFeature.h9
-rw-r--r--moses/FF/InternalStructStatelessFF.cpp43
-rw-r--r--moses/FF/InternalStructStatelessFF.h40
-rw-r--r--moses/FF/LexicalReordering/LexicalReordering.cpp25
-rw-r--r--moses/FF/LexicalReordering/LexicalReordering.h19
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.cpp114
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.h42
-rw-r--r--moses/FF/LexicalReordering/ReorderingStack.cpp (renamed from moses/ReorderingStack.cpp)0
-rw-r--r--moses/FF/LexicalReordering/ReorderingStack.h (renamed from moses/ReorderingStack.h)2
-rw-r--r--moses/FF/LexicalReordering/SparseReordering.cpp254
-rw-r--r--moses/FF/LexicalReordering/SparseReordering.h133
-rw-r--r--moses/FF/MaxSpanFreeNonTermSource.cpp4
-rw-r--r--moses/FF/MaxSpanFreeNonTermSource.h8
-rw-r--r--moses/FF/NieceTerminal.cpp8
-rw-r--r--moses/FF/NieceTerminal.h8
-rw-r--r--moses/FF/OSM-Feature/KenOSM.cpp32
-rw-r--r--moses/FF/OSM-Feature/KenOSM.h50
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.cpp17
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.h14
-rw-r--r--moses/FF/OSM-Feature/osmHyp.cpp4
-rw-r--r--moses/FF/OSM-Feature/osmHyp.h5
-rw-r--r--moses/FF/PhraseBoundaryFeature.cpp2
-rw-r--r--moses/FF/PhraseBoundaryFeature.h8
-rw-r--r--moses/FF/PhraseLengthFeature.cpp2
-rw-r--r--moses/FF/PhraseLengthFeature.h8
-rw-r--r--moses/FF/PhrasePairFeature.cpp2
-rw-r--r--moses/FF/PhrasePairFeature.h8
-rw-r--r--moses/FF/PhrasePenalty.cpp39
-rw-r--r--moses/FF/PhrasePenalty.h12
-rw-r--r--moses/FF/ReferenceComparison.h8
-rw-r--r--moses/FF/RuleScope.cpp2
-rw-r--r--moses/FF/RuleScope.h8
-rw-r--r--moses/FF/SetSourcePhrase.cpp2
-rw-r--r--moses/FF/SetSourcePhrase.h8
-rw-r--r--moses/FF/SkeletonChangeInput.cpp92
-rw-r--r--moses/FF/SkeletonChangeInput.h41
-rw-r--r--moses/FF/SkeletonStatefulFF.cpp8
-rw-r--r--moses/FF/SkeletonStatefulFF.h8
-rw-r--r--moses/FF/SkeletonStatelessFF.cpp8
-rw-r--r--moses/FF/SkeletonStatelessFF.h8
-rw-r--r--moses/FF/SoftMatchingFeature.cpp2
-rw-r--r--moses/FF/SoftMatchingFeature.h8
-rw-r--r--moses/FF/SoftSourceSyntacticConstraintsFeature.cpp536
-rw-r--r--moses/FF/SoftSourceSyntacticConstraintsFeature.h87
-rw-r--r--moses/FF/SourceGHKMTreeInputMatchFeature.cpp2
-rw-r--r--moses/FF/SourceGHKMTreeInputMatchFeature.h8
-rw-r--r--moses/FF/SourceWordDeletionFeature.cpp2
-rw-r--r--moses/FF/SourceWordDeletionFeature.h8
-rw-r--r--moses/FF/SpanLength.cpp4
-rw-r--r--moses/FF/SpanLength.h8
-rw-r--r--moses/FF/SparseHieroReorderingFeature.cpp222
-rw-r--r--moses/FF/SparseHieroReorderingFeature.h80
-rw-r--r--moses/FF/SparseHieroReorderingFeatureTest.cpp36
-rw-r--r--moses/FF/StatefulFeatureFunction.h4
-rw-r--r--moses/FF/StatelessFeatureFunction.h4
-rw-r--r--moses/FF/SyntaxRHS.cpp8
-rw-r--r--moses/FF/SyntaxRHS.h8
-rw-r--r--moses/FF/TargetBigramFeature.cpp2
-rw-r--r--moses/FF/TargetBigramFeature.h8
-rw-r--r--moses/FF/TargetNgramFeature.cpp4
-rw-r--r--moses/FF/TargetNgramFeature.h8
-rw-r--r--moses/FF/TargetWordInsertionFeature.cpp2
-rw-r--r--moses/FF/TargetWordInsertionFeature.h8
-rw-r--r--moses/FF/TreeStructureFeature.cpp2
-rw-r--r--moses/FF/TreeStructureFeature.h8
-rw-r--r--moses/FF/UnknownWordPenaltyProducer.h8
-rw-r--r--moses/FF/WordPenaltyProducer.cpp2
-rw-r--r--moses/FF/WordPenaltyProducer.h8
-rw-r--r--moses/FF/WordTranslationFeature.cpp4
-rw-r--r--moses/FF/WordTranslationFeature.h8
-rw-r--r--moses/FactorCollection.cpp17
-rw-r--r--moses/FactorCollection.h2
-rw-r--r--moses/FeatureVector.cpp4
-rw-r--r--moses/FeatureVector.h2
-rw-r--r--moses/HypergraphOutput.cpp248
-rw-r--r--moses/HypergraphOutput.h95
-rw-r--r--moses/Hypothesis.cpp28
-rw-r--r--moses/Hypothesis.h22
-rw-r--r--moses/Incremental.cpp4
-rw-r--r--moses/Jamfile13
-rw-r--r--moses/LM/Base.cpp2
-rw-r--r--moses/LM/Base.h4
-rw-r--r--moses/LM/DALMWrapper.cpp4
-rw-r--r--moses/LM/DALMWrapper.h4
-rw-r--r--moses/LM/Implementation.cpp4
-rw-r--r--moses/LM/Implementation.h4
-rw-r--r--moses/LM/Jamfile15
-rw-r--r--moses/LM/Ken.cpp6
-rw-r--r--moses/LM/Ken.h4
-rw-r--r--moses/LM/LDHT.cpp4
-rw-r--r--moses/LM/NeuralLMWrapper.cpp56
-rw-r--r--moses/LM/NeuralLMWrapper.h10
-rw-r--r--moses/LM/SingleFactor.cpp11
-rw-r--r--moses/LM/SingleFactor.h2
-rw-r--r--moses/LM/oxlm/LBLLM.cpp172
-rw-r--r--moses/LM/oxlm/LBLLM.h53
-rw-r--r--moses/LM/oxlm/Mapper.cpp67
-rw-r--r--moses/LM/oxlm/Mapper.h46
-rw-r--r--moses/Manager.cpp60
-rw-r--r--moses/Manager.h4
-rw-r--r--moses/MockHypothesis.cpp2
-rw-r--r--moses/PDTAimp.cpp463
-rw-r--r--moses/PDTAimp.h443
-rw-r--r--moses/PP/Factory.cpp4
-rw-r--r--moses/PP/NonTermContextProperty.cpp137
-rw-r--r--moses/PP/NonTermContextProperty.h73
-rw-r--r--moses/PP/OrientationPhraseProperty.cpp26
-rw-r--r--moses/PP/OrientationPhraseProperty.h65
-rw-r--r--moses/PP/SourceLabelsPhraseProperty.cpp22
-rw-r--r--moses/Parameter.cpp21
-rw-r--r--moses/RuleCubeItem.cpp2
-rw-r--r--moses/ScoreComponentCollection.cpp14
-rw-r--r--moses/ScoreComponentCollection.h18
-rw-r--r--moses/ScoreComponentCollectionTest.cpp8
-rw-r--r--moses/SearchNormal.cpp2
-rw-r--r--moses/SearchNormalBatch.cpp6
-rw-r--r--moses/StaticData.cpp3
-rw-r--r--moses/StaticData.h4
-rw-r--r--moses/SyntacticLanguageModel.h2
-rw-r--r--moses/TargetPhrase.cpp36
-rw-r--r--moses/TargetPhrase.h18
-rw-r--r--moses/TranslationModel/BilingualDynSuffixArray.cpp4
-rw-r--r--moses/TranslationModel/BilingualDynSuffixArray.h2
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h13
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp169
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h21
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp170
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h19
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp2
-rw-r--r--moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp43
-rw-r--r--moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h6
-rw-r--r--moses/TranslationModel/CompactPT/PhraseDecoder.cpp2
-rw-r--r--moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp2
-rw-r--r--moses/TranslationModel/CompactPT/StringVector.h21
-rw-r--r--moses/TranslationModel/DynSAInclude/FileHandler.cpp8
-rw-r--r--moses/TranslationModel/PhraseDictionary.cpp2
-rw-r--r--moses/TranslationModel/PhraseDictionary.h5
-rw-r--r--moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp4
-rw-r--r--moses/TranslationModel/PhraseDictionaryMultiModel.cpp132
-rw-r--r--moses/TranslationModel/PhraseDictionaryMultiModel.h9
-rw-r--r--moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp9
-rw-r--r--moses/TranslationModel/PhraseDictionaryTransliteration.cpp4
-rw-r--r--moses/TranslationModel/PhraseDictionaryTree.cpp13
-rw-r--r--moses/TranslationModel/ProbingPT/ProbingPT.cpp4
-rw-r--r--moses/TranslationModel/RuleTable/LoaderCompact.cpp4
-rw-r--r--moses/TranslationModel/RuleTable/LoaderStandard.cpp4
-rw-r--r--moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp4
-rw-r--r--moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h4
-rw-r--r--moses/TranslationModel/SkeletonPT.cpp4
-rw-r--r--moses/TranslationModel/UG/Jamfile39
-rw-r--r--moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc50
-rw-r--r--moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h18
-rw-r--r--moses/TranslationModel/UG/lookup_mmsapt.cc104
-rw-r--r--moses/TranslationModel/UG/mm/Jamfile21
-rw-r--r--moses/TranslationModel/UG/mm/custom-pt.cc14
-rw-r--r--moses/TranslationModel/UG/mm/mtt-build.cc2
-rw-r--r--moses/TranslationModel/UG/mm/num_read_write.cc74
-rw-r--r--moses/TranslationModel/UG/mm/num_read_write.h124
-rw-r--r--moses/TranslationModel/UG/mm/tpt_pickler.cc23
-rw-r--r--moses/TranslationModel/UG/mm/tpt_pickler.h17
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext.cc192
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext.h72
-rw-r--r--moses/TranslationModel/UG/mm/ug_im_tsa.h8
-rw-r--r--moses/TranslationModel/UG/mm/ug_im_ttrack.h44
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h21
-rw-r--r--moses/TranslationModel/UG/mm/ug_mm_ttrack.h2
-rw-r--r--moses/TranslationModel/UG/mm/ug_phrasepair.cc97
-rw-r--r--moses/TranslationModel/UG/mm/ug_phrasepair.h243
-rw-r--r--moses/TranslationModel/UG/mm/ug_tsa_base.h8
-rw-r--r--moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h46
-rw-r--r--moses/TranslationModel/UG/mmsapt.cpp1086
-rw-r--r--moses/TranslationModel/UG/mmsapt.h94
-rw-r--r--moses/TranslationModel/UG/mmsapt_align.cc607
-rw-r--r--moses/TranslationModel/UG/mmsapt_phrase_scorers.h311
-rw-r--r--moses/TranslationModel/UG/ptable-lookup.cc123
-rw-r--r--moses/TranslationModel/UG/sapt_phrase_key.h13
-rw-r--r--moses/TranslationModel/UG/sapt_phrase_scorers.h14
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_base.h103
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_coherence.h33
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_lex1.h70
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_logcnt.h65
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_pbwd.h58
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_pfwd.h70
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_phrasecount.h34
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_provenance.h47
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_rareness.h41
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_unaligned.h67
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_wordcount.h34
-rw-r--r--moses/TranslationModel/UG/sim-pe.cc83
-rw-r--r--moses/TranslationModel/UG/spe-check-coverage.cc214
-rw-r--r--moses/TranslationModel/UG/spe-check-coverage2.cc76
-rw-r--r--moses/TranslationModel/UG/try-align.cc47
-rw-r--r--moses/TranslationModel/UG/util/tokenindex.dump.cc2
-rw-r--r--moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp4
-rw-r--r--moses/TranslationModel/fuzzy-match/create_xml.cpp36
-rw-r--r--moses/TranslationOption.cpp6
-rw-r--r--moses/TranslationOption.h2
-rw-r--r--moses/TranslationOptionCollection.cpp16
-rw-r--r--moses/TranslationOptionCollection.h2
-rw-r--r--moses/TranslationOptionCollectionLattice.cpp2
-rw-r--r--moses/TreeInput.cpp11
-rw-r--r--moses/TypeDef.h26
-rw-r--r--moses/Util.h4
-rw-r--r--moses/Word.cpp3
-rw-r--r--moses/XmlOption.cpp46
-rw-r--r--phrase-extract/ExtractionPhrasePair.cpp90
-rw-r--r--phrase-extract/ExtractionPhrasePair.h6
-rw-r--r--phrase-extract/Jamfile2
-rw-r--r--phrase-extract/PhraseExtractionOptions.h1
-rw-r--r--phrase-extract/PropertiesConsolidator.cpp159
-rw-r--r--phrase-extract/PropertiesConsolidator.h48
-rw-r--r--phrase-extract/consolidate-main.cpp36
-rw-r--r--phrase-extract/extract-ghkm/ExtractGHKM.cpp79
-rw-r--r--phrase-extract/extract-ghkm/Options.h2
-rw-r--r--phrase-extract/extract-ghkm/PhraseOrientation.cpp419
-rw-r--r--phrase-extract/extract-ghkm/PhraseOrientation.h102
-rw-r--r--phrase-extract/extract-ghkm/ScfgRuleWriter.cpp13
-rw-r--r--phrase-extract/extract-ghkm/ScfgRuleWriter.h2
-rw-r--r--phrase-extract/extract-ghkm/XmlTreeParser.h2
-rw-r--r--phrase-extract/extract-main.cpp5
-rw-r--r--phrase-extract/extract-mixed-syntax/AlignedSentence.cpp (renamed from contrib/other-builds/extract-mixed-syntax/AlignedSentence.cpp)7
-rw-r--r--phrase-extract/extract-mixed-syntax/AlignedSentence.h (renamed from contrib/other-builds/extract-mixed-syntax/AlignedSentence.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp (renamed from contrib/other-builds/extract-mixed-syntax/AlignedSentenceSyntax.cpp)0
-rw-r--r--phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.h (renamed from contrib/other-builds/extract-mixed-syntax/AlignedSentenceSyntax.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/ConsistentPhrase.cpp (renamed from contrib/other-builds/extract-mixed-syntax/ConsistentPhrase.cpp)0
-rw-r--r--phrase-extract/extract-mixed-syntax/ConsistentPhrase.h (renamed from contrib/other-builds/extract-mixed-syntax/ConsistentPhrase.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp (renamed from contrib/other-builds/extract-mixed-syntax/ConsistentPhrases.cpp)0
-rw-r--r--phrase-extract/extract-mixed-syntax/ConsistentPhrases.h (renamed from contrib/other-builds/extract-mixed-syntax/ConsistentPhrases.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/InputFileStream.cpp (renamed from contrib/other-builds/extract-mixed-syntax/InputFileStream.cpp)0
-rw-r--r--phrase-extract/extract-mixed-syntax/InputFileStream.h (renamed from contrib/other-builds/extract-mixed-syntax/InputFileStream.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/Jamfile2
-rw-r--r--phrase-extract/extract-mixed-syntax/Main.cpp (renamed from contrib/other-builds/extract-mixed-syntax/Main.cpp)36
-rw-r--r--phrase-extract/extract-mixed-syntax/Main.h (renamed from contrib/other-builds/extract-mixed-syntax/Main.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/NonTerm.cpp (renamed from contrib/other-builds/extract-mixed-syntax/NonTerm.cpp)1
-rw-r--r--phrase-extract/extract-mixed-syntax/NonTerm.h (renamed from contrib/other-builds/extract-mixed-syntax/NonTerm.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/Parameter.cpp72
-rw-r--r--phrase-extract/extract-mixed-syntax/Parameter.h (renamed from contrib/other-builds/extract-mixed-syntax/Parameter.h)18
-rw-r--r--phrase-extract/extract-mixed-syntax/Phrase.cpp (renamed from contrib/other-builds/extract-mixed-syntax/Phrase.cpp)0
-rw-r--r--phrase-extract/extract-mixed-syntax/Phrase.h (renamed from contrib/other-builds/extract-mixed-syntax/Phrase.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/Rule.cpp (renamed from contrib/other-builds/extract-mixed-syntax/Rule.cpp)166
-rw-r--r--phrase-extract/extract-mixed-syntax/Rule.h (renamed from contrib/other-builds/extract-mixed-syntax/Rule.h)7
-rw-r--r--phrase-extract/extract-mixed-syntax/RulePhrase.cpp (renamed from contrib/other-builds/extract-mixed-syntax/RulePhrase.cpp)0
-rw-r--r--phrase-extract/extract-mixed-syntax/RulePhrase.h (renamed from contrib/other-builds/extract-mixed-syntax/RulePhrase.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/RuleSymbol.cpp (renamed from contrib/other-builds/extract-mixed-syntax/RuleSymbol.cpp)0
-rw-r--r--phrase-extract/extract-mixed-syntax/RuleSymbol.h (renamed from contrib/other-builds/extract-mixed-syntax/RuleSymbol.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/Rules.cpp (renamed from contrib/other-builds/extract-mixed-syntax/Rules.cpp)0
-rw-r--r--phrase-extract/extract-mixed-syntax/Rules.h (renamed from contrib/other-builds/extract-mixed-syntax/Rules.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/SyntaxTree.cpp (renamed from contrib/other-builds/extract-mixed-syntax/SyntaxTree.cpp)0
-rw-r--r--phrase-extract/extract-mixed-syntax/SyntaxTree.h (renamed from contrib/other-builds/extract-mixed-syntax/SyntaxTree.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/Word.cpp (renamed from contrib/other-builds/extract-mixed-syntax/Word.cpp)12
-rw-r--r--phrase-extract/extract-mixed-syntax/Word.h (renamed from contrib/other-builds/extract-mixed-syntax/Word.h)2
-rw-r--r--phrase-extract/extract-mixed-syntax/gzfilebuf.h (renamed from contrib/other-builds/extract-mixed-syntax/gzfilebuf.h)0
-rw-r--r--phrase-extract/extract-mixed-syntax/pugiconfig.hpp (renamed from contrib/other-builds/extract-mixed-syntax/pugiconfig.hpp)0
-rw-r--r--phrase-extract/extract-mixed-syntax/pugixml.cpp (renamed from contrib/other-builds/extract-mixed-syntax/pugixml.cpp)0
-rw-r--r--phrase-extract/extract-mixed-syntax/pugixml.hpp (renamed from contrib/other-builds/extract-mixed-syntax/pugixml.hpp)0
-rw-r--r--phrase-extract/extract-ordering-main.cpp684
-rw-r--r--phrase-extract/extract-rules-main.cpp3
-rw-r--r--phrase-extract/relax-parse-main.cpp6
-rw-r--r--phrase-extract/score-main.cpp124
-rwxr-xr-xscripts/OSM/OSM-Train.perl19
-rwxr-xr-xscripts/Transliteration/post-decoding-transliteration.pl9
-rwxr-xr-xscripts/Transliteration/train-transliteration-module.pl12
-rw-r--r--scripts/ems/example/config.basic20
-rw-r--r--scripts/ems/example/config.factored20
-rw-r--r--scripts/ems/example/config.hierarchical20
-rw-r--r--scripts/ems/example/config.syntax20
-rw-r--r--scripts/ems/example/config.toy20
-rw-r--r--scripts/ems/experiment.meta21
-rwxr-xr-xscripts/ems/experiment.perl113
-rwxr-xr-xscripts/ems/support/interpolate-lm.perl66
-rwxr-xr-xscripts/generic/extract-parallel.perl35
-rwxr-xr-xscripts/generic/moses_sim_pe.py346
-rwxr-xr-xscripts/generic/score-parallel.perl20
-rw-r--r--scripts/server/moses.py10
-rwxr-xr-xscripts/server/sim-pe.py57
-rw-r--r--scripts/share/nonbreaking_prefixes/README.txt3
-rw-r--r--scripts/tokenizer/basic-protected-patterns5
-rwxr-xr-xscripts/tokenizer/normalize-punctuation.perl8
-rwxr-xr-xscripts/tokenizer/tokenizer.perl11
-rwxr-xr-xscripts/training/build-mmsapt.perl22
-rwxr-xr-xscripts/training/mert-moses.pl45
-rwxr-xr-xscripts/training/train-model.perl51
-rwxr-xr-xscripts/training/wrappers/make-factor-brown-cluster-mkcls.perl7
-rw-r--r--util/exception.hh8
-rw-r--r--util/read_compressed.cc12
-rw-r--r--util/read_compressed.hh4
444 files changed, 18997 insertions, 8241 deletions
diff --git a/.gitignore b/.gitignore
index f870bed03..e7c37d86c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,3 +79,4 @@ nbproject/
mingw/MosesGUI/MosesGUI.e4p
mingw/MosesGUI/_eric4project/
+contrib/m4m/merge-sorted
diff --git a/Jamroot b/Jamroot
index 687d1de7a..7635d7a87 100644
--- a/Jamroot
+++ b/Jamroot
@@ -77,7 +77,7 @@ import path ;
path-constant TOP : . ;
include $(TOP)/jam-files/sanity.jam ;
-boost 103600 ;
+boost 104400 ;
external-lib z ;
lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
@@ -114,7 +114,14 @@ requirements += [ option.get "with-mm" : : <define>PT_UG ] ;
requirements += [ option.get "with-mm" : : <define>MAX_NUM_FACTORS=4 ] ;
requirements += [ option.get "unlabelled-source" : : <define>UNLABELLED_SOURCE ] ;
-if [ option.get "with-cmph" ] {
+if [ option.get "with-lbllm" ] {
+ external-lib boost_serialization ;
+ external-lib gomp ;
+ requirements += <library>boost_serialization ;
+ requirements += <library>gomp ;
+}
+
+if [ option.get "with-cmph" : : "yes" ] {
requirements += <define>HAVE_CMPH ;
}
@@ -152,13 +159,15 @@ build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses
if [ option.get "with-mm" : : "yes" ]
{
alias mm :
- moses/TranslationModel/UG//lookup_mmsapt
+ moses/TranslationModel/UG//spe-check-coverage2
+ moses/TranslationModel/UG//ptable-lookup
+ moses/TranslationModel/UG//sim-pe
+ moses/TranslationModel/UG//spe-check-coverage
moses/TranslationModel/UG/mm//mtt-build
moses/TranslationModel/UG/mm//mtt-dump
moses/TranslationModel/UG/mm//symal2mam
moses/TranslationModel/UG/mm//mam2symal
moses/TranslationModel/UG/mm//mam_verify
- moses/TranslationModel/UG/mm//custom-pt
moses/TranslationModel/UG/mm//mmlex-build
moses/TranslationModel/UG/mm//mmlex-lookup
moses/TranslationModel/UG/mm//mtt-count-words
@@ -185,6 +194,7 @@ phrase-extract//lexical-reordering
phrase-extract//extract-ghkm
phrase-extract//pcfg-extract
phrase-extract//pcfg-score
+phrase-extract//extract-mixed-syntax
biconcor
mira//mira
contrib/server//mosesserver
diff --git a/NOTICE b/NOTICE
deleted file mode 100644
index 23d8b2ad1..000000000
--- a/NOTICE
+++ /dev/null
@@ -1,5 +0,0 @@
-This code includes data from Daniel Naber's Language Tools (czech abbreviations).
-
-This code includes data from czech wiktionary (also czech abbreviations).
-
-
diff --git a/OnDiskPt/TargetPhrase.cpp b/OnDiskPt/TargetPhrase.cpp
index 39f425b95..cb6135d45 100644
--- a/OnDiskPt/TargetPhrase.cpp
+++ b/OnDiskPt/TargetPhrase.cpp
@@ -257,7 +257,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
, const std::vector<float> &weightT
, bool isSyntax) const
{
- Moses::TargetPhrase *ret = new Moses::TargetPhrase();
+ Moses::TargetPhrase *ret = new Moses::TargetPhrase(&phraseDict);
// words
size_t phraseSize = GetSize();
@@ -312,7 +312,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
// property
ret->SetProperties(m_property);
- ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply());
+ ret->EvaluateInIsolation(mosesSP, phraseDict.GetFeaturesToApply());
return ret;
}
diff --git a/OnDiskPt/Word.cpp b/OnDiskPt/Word.cpp
index 23d29cc7a..33bdb6cc5 100644
--- a/OnDiskPt/Word.cpp
+++ b/OnDiskPt/Word.cpp
@@ -104,14 +104,20 @@ void Word::ConvertToMoses(
Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
overwrite = Moses::Word(m_isNonTerminal);
- // TODO: this conversion should have been done at load time.
- util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
-
- for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
- UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
- overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
+ if (m_isNonTerminal) {
+ const std::string &tok = vocab.GetString(m_vocabId);
+ overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
+ }
+ else {
+ // TODO: this conversion should have been done at load time.
+ util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
+
+ for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
+ UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
+ overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
+ }
+ UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
}
- UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
}
int Word::Compare(const Word &compare) const
diff --git a/OnDiskPt/queryOnDiskPt.cpp b/OnDiskPt/queryOnDiskPt.cpp
index a38fc5435..77576d956 100644
--- a/OnDiskPt/queryOnDiskPt.cpp
+++ b/OnDiskPt/queryOnDiskPt.cpp
@@ -22,7 +22,7 @@ int main(int argc, char **argv)
{
int tableLimit = 20;
std::string ttable = "";
- bool useAlignments = false;
+ // bool useAlignments = false;
for(int i = 1; i < argc; i++) {
if(!strcmp(argv[i], "-tlimit")) {
diff --git a/contrib/other-builds/CreateOnDiskPt/.cproject b/contrib/other-builds/CreateOnDiskPt/.cproject
index 4c46d70f8..18e0befb5 100644
--- a/contrib/other-builds/CreateOnDiskPt/.cproject
+++ b/contrib/other-builds/CreateOnDiskPt/.cproject
@@ -1,16 +1,18 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+<?fileVersion 4.0.0?>
+
+<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.602770742">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.602770742" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@@ -43,14 +45,7 @@
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
- <listOptionValue builtIn="false" value="dstruct"/>
- <listOptionValue builtIn="false" value="flm"/>
- <listOptionValue builtIn="false" value="oolm"/>
- <listOptionValue builtIn="false" value="lattice"/>
- <listOptionValue builtIn="false" value="misc"/>
- <listOptionValue builtIn="false" value="dalm"/>
<listOptionValue builtIn="false" value="search"/>
- <listOptionValue builtIn="false" value="RandLM"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
@@ -60,13 +55,16 @@
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
+ <listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
</option>
<option id="gnu.cpp.link.option.paths.815001500" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+ <listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
@@ -92,12 +90,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.168814843" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
diff --git a/contrib/other-builds/consolidate/.cproject b/contrib/other-builds/consolidate/.cproject
index 3c70ed365..9caa531d6 100644
--- a/contrib/other-builds/consolidate/.cproject
+++ b/contrib/other-builds/consolidate/.cproject
@@ -1,8 +1,8 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.2091728208">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2091728208" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -14,50 +14,63 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1312813804" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1457158442" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/consolidate}/Debug" id="cdt.managedbuild.builder.gnu.cross.401817170" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.584773180" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.548826159" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.69309976" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1869389417" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1684035985" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.1978964587" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.1174628687" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1899244069" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2091728208" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2091728208." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.69362991" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.641760346" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <builder buildPath="${workspace_loc:/consolidate}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1286696537" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1571215005" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1626949654" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1186248186" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1416850495" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.534201039" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1369007077" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1468157552" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.82249493" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.83105790" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.937329669" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.461173729" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.988122551" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.580092188" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <option id="gnu.cpp.link.option.libs.1224797947" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1950007837" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.110628197" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+ <option id="gnu.cpp.link.option.libs.1393924562" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+ <listOptionValue builtIn="false" value="moses"/>
+ <listOptionValue builtIn="false" value="util"/>
+ <listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="z"/>
- <listOptionValue builtIn="false" value="boost_iostreams-mt"/>
</option>
- <option id="gnu.cpp.link.option.paths.845281969" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+ <option id="gnu.cpp.link.option.paths.1967422094" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1562981657" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1093223502" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1813579853" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.660034723" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.2016181080" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1334927727" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.197989377" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.2091728208.911524129" name="PropertiesConsolidator.cpp" rcbsApplicability="disable" resourcePath="PropertiesConsolidator.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1626949654.741737356">
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1626949654.741737356" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1626949654">
+ <option id="gnu.cpp.compiler.option.include.paths.858416673" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
+ </option>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2042647079" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ </tool>
+ </fileInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1197533473">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1197533473" moduleId="org.eclipse.cdt.core.settings" name="Release">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.release.185559773">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.185559773" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -69,31 +82,31 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1197533473" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1197533473." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1193312581" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1614674218" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/consolidate}/Release" id="cdt.managedbuild.builder.gnu.cross.1921548268" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1402792534" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.172258714" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.949623548" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1960225725" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.185559773" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.release.185559773." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.33298530" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1524270442" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
+ <builder buildPath="${workspace_loc:/consolidate}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.1812036307" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1942293389" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.520681695" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.649091161" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1279967053" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.624630717" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1697856596" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.1575999400" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.732263649" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1685852561" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.233526141" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1882834640" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.1438334736" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1338220126" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1332869586" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.484647585" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2140954002" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.2105674082" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.1531731895" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.286541559" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.620666274" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.1478840357" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.412043972" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1075374533" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.231041028" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -103,30 +116,23 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="consolidate.cdt.managedbuild.target.gnu.cross.exe.1166003694" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
+ <project id="consolidate.cdt.managedbuild.target.gnu.exe.1024637209" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686;cdt.managedbuild.config.gnu.cross.exe.debug.1847651686.;cdt.managedbuild.tool.gnu.cross.c.compiler.584773180;cdt.managedbuild.tool.gnu.c.compiler.input.1869389417">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2091728208;cdt.managedbuild.config.gnu.exe.debug.2091728208.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1626949654;cdt.managedbuild.tool.gnu.cpp.compiler.input.1468157552">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1197533473;cdt.managedbuild.config.gnu.cross.exe.release.1197533473.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1697856596;cdt.managedbuild.tool.gnu.cpp.compiler.input.1685852561">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2091728208;cdt.managedbuild.config.gnu.exe.debug.2091728208.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.82249493;cdt.managedbuild.tool.gnu.c.compiler.input.461173729">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686;cdt.managedbuild.config.gnu.cross.exe.debug.1847651686.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1684035985;cdt.managedbuild.tool.gnu.cpp.compiler.input.1369007077">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.185559773;cdt.managedbuild.config.gnu.exe.release.185559773.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.233526141;cdt.managedbuild.tool.gnu.c.compiler.input.1338220126">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1197533473;cdt.managedbuild.config.gnu.cross.exe.release.1197533473.;cdt.managedbuild.tool.gnu.cross.c.compiler.1402792534;cdt.managedbuild.tool.gnu.c.compiler.input.1960225725">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.185559773;cdt.managedbuild.config.gnu.exe.release.185559773.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.520681695;cdt.managedbuild.tool.gnu.cpp.compiler.input.624630717">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
- <storageModule moduleId="refreshScope" versionNumber="2">
- <configuration configurationName="Release">
- <resource resourceType="PROJECT" workspacePath="/consolidate"/>
- </configuration>
- <configuration configurationName="Debug">
- <resource resourceType="PROJECT" workspacePath="/consolidate"/>
- </configuration>
- </storageModule>
+ <storageModule moduleId="refreshScope"/>
</cproject>
diff --git a/contrib/other-builds/consolidate/.project b/contrib/other-builds/consolidate/.project
index 4095862b4..7e5995030 100644
--- a/contrib/other-builds/consolidate/.project
+++ b/contrib/other-builds/consolidate/.project
@@ -3,6 +3,8 @@
<name>consolidate</name>
<comment></comment>
<projects>
+ <project>moses</project>
+ <project>util</project>
</projects>
<buildSpec>
<buildCommand>
@@ -46,6 +48,16 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
</link>
<link>
+ <name>PropertiesConsolidator.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/PropertiesConsolidator.cpp</locationURI>
+ </link>
+ <link>
+ <name>PropertiesConsolidator.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/PropertiesConsolidator.h</locationURI>
+ </link>
+ <link>
<name>consolidate-main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/consolidate-main.cpp</locationURI>
diff --git a/contrib/other-builds/extract-ghkm/.cproject b/contrib/other-builds/extract-ghkm/.cproject
index 61ea19161..0a829b697 100644
--- a/contrib/other-builds/extract-ghkm/.cproject
+++ b/contrib/other-builds/extract-ghkm/.cproject
@@ -1,59 +1,54 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.1975272196">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1975272196" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1035891586" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.242178856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/extract-ghkm/Debug}" id="cdt.managedbuild.builder.gnu.cross.430400318" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.251687262" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.962699619" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.230503798" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.433137197" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.971749711" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.984190691" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1374841264" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../phrase-extract&quot;"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1975272196" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1975272196." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1513645956" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.621141597" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <builder buildPath="${workspace_loc:/extract-ghkm}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1641243676" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.150240237" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.494510261" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.520735766" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.730994342" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1461708548" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1669405610" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1026620601" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1419857560" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <option id="gnu.cpp.link.option.paths.668926503" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/lib64&quot;"/>
- </option>
- <option id="gnu.cpp.link.option.libs.2091468346" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
- <listOptionValue builtIn="false" value="boost_program_options-mt"/>
- <listOptionValue builtIn="false" value="boost_thread-mt"/>
- <listOptionValue builtIn="false" value="boost_filesystem-mt"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.849972124" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.154971011" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.600284918" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2129236570" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1041890522" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.674199351" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+ <option id="gnu.cpp.link.option.libs.1221354875" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="z"/>
- <listOptionValue builtIn="false" value="bz2"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1684298294" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <option id="gnu.cpp.link.option.paths.1494157787" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+ </option>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1468265945" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.320160974" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.2021657841" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1689419664" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.882941613" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.387904024" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -61,44 +56,44 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" moduleId="org.eclipse.cdt.core.settings" name="Release">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.release.1834059581">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1834059581" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.2000920404" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1106451881" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/extract-ghkm/Release}" id="cdt.managedbuild.builder.gnu.cross.727887705" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.819016498" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1057468997" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.1130475273" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.164617278" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1834059581" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.release.1834059581." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.154645030" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.483189041" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
+ <builder buildPath="${workspace_loc:/extract-ghkm}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.882065438" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1816735709" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.788831102" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1367749352" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1361465069" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.162097682" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.406333630" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.1059243022" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.394449415" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.573463904" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.361552728" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.769108402" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1068655225" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1213865062" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.764325642" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1636823200" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.1458872383" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.961080011" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1299258961" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.896866692" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.276294580" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1285290074" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1686210477" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -108,30 +103,23 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="extract-ghkm.cdt.managedbuild.target.gnu.cross.exe.1830080171" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
+ <project id="extract-ghkm.cdt.managedbuild.target.gnu.exe.283582370" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641;cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1975272196;cdt.managedbuild.config.gnu.exe.debug.1975272196.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.494510261;cdt.managedbuild.tool.gnu.cpp.compiler.input.1669405610">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.c.compiler.251687262;cdt.managedbuild.tool.gnu.c.compiler.input.433137197">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1834059581;cdt.managedbuild.config.gnu.exe.release.1834059581.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.394449415;cdt.managedbuild.tool.gnu.c.compiler.input.769108402">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.c.compiler.819016498;cdt.managedbuild.tool.gnu.c.compiler.input.164617278">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1975272196;cdt.managedbuild.config.gnu.exe.debug.1975272196.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.849972124;cdt.managedbuild.tool.gnu.c.compiler.input.2129236570">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268;cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1834059581;cdt.managedbuild.config.gnu.exe.release.1834059581.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.788831102;cdt.managedbuild.tool.gnu.cpp.compiler.input.162097682">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
- <storageModule moduleId="refreshScope" versionNumber="2">
- <configuration configurationName="Release">
- <resource resourceType="PROJECT" workspacePath="/extract-ghkm"/>
- </configuration>
- <configuration configurationName="Debug">
- <resource resourceType="PROJECT" workspacePath="/extract-ghkm"/>
- </configuration>
- </storageModule>
+ <storageModule moduleId="refreshScope"/>
</cproject>
diff --git a/contrib/other-builds/extract-ghkm/.project b/contrib/other-builds/extract-ghkm/.project
index b7c40f069..f9570120b 100644
--- a/contrib/other-builds/extract-ghkm/.project
+++ b/contrib/other-builds/extract-ghkm/.project
@@ -26,49 +26,19 @@
</natures>
<linkedResources>
<link>
- <name>Alignment.cpp</name>
+ <name>Hole.h</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.cpp</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/Hole.h</locationURI>
</link>
<link>
- <name>Alignment.h</name>
+ <name>HoleCollection.cpp</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.cpp</locationURI>
</link>
<link>
- <name>AlignmentGraph.cpp</name>
+ <name>HoleCollection.h</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.cpp</locationURI>
- </link>
- <link>
- <name>AlignmentGraph.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.h</locationURI>
- </link>
- <link>
- <name>ComposedRule.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.cpp</locationURI>
- </link>
- <link>
- <name>ComposedRule.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.h</locationURI>
- </link>
- <link>
- <name>Exception.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Exception.h</locationURI>
- </link>
- <link>
- <name>ExtractGHKM.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.cpp</locationURI>
- </link>
- <link>
- <name>ExtractGHKM.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.h</locationURI>
</link>
<link>
<name>InputFileStream.cpp</name>
@@ -81,31 +51,6 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h</locationURI>
</link>
<link>
- <name>Jamfile</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Jamfile</locationURI>
- </link>
- <link>
- <name>Main.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Main.cpp</locationURI>
- </link>
- <link>
- <name>Node.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.cpp</locationURI>
- </link>
- <link>
- <name>Node.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.h</locationURI>
- </link>
- <link>
- <name>Options.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Options.h</locationURI>
- </link>
- <link>
<name>OutputFileStream.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp</locationURI>
@@ -116,54 +61,24 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
</link>
<link>
- <name>ParseTree.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.cpp</locationURI>
- </link>
- <link>
- <name>ParseTree.h</name>
+ <name>SentenceAlignment.cpp</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp</locationURI>
</link>
<link>
- <name>ScfgRule.cpp</name>
+ <name>SentenceAlignment.h</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.cpp</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h</locationURI>
</link>
<link>
- <name>ScfgRule.h</name>
+ <name>SentenceAlignmentWithSyntax.cpp</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.cpp</locationURI>
</link>
<link>
- <name>ScfgRuleWriter.cpp</name>
+ <name>SentenceAlignmentWithSyntax.h</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp</locationURI>
- </link>
- <link>
- <name>ScfgRuleWriter.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.h</locationURI>
- </link>
- <link>
- <name>Span.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.cpp</locationURI>
- </link>
- <link>
- <name>Span.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.h</locationURI>
- </link>
- <link>
- <name>Subgraph.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.cpp</locationURI>
- </link>
- <link>
- <name>Subgraph.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
</link>
<link>
<name>SyntaxTree.cpp</name>
@@ -186,14 +101,9 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI>
</link>
<link>
- <name>XmlTreeParser.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.cpp</locationURI>
- </link>
- <link>
- <name>XmlTreeParser.h</name>
+ <name>extract-rules-main.cpp</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-rules-main.cpp</locationURI>
</link>
<link>
<name>tables-core.cpp</name>
diff --git a/contrib/other-builds/extract-mixed-syntax/.cproject b/contrib/other-builds/extract-mixed-syntax/.cproject
index 46118b322..8745cf88d 100644
--- a/contrib/other-builds/extract-mixed-syntax/.cproject
+++ b/contrib/other-builds/extract-mixed-syntax/.cproject
@@ -1,8 +1,8 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.1409305044">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1409305044" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -14,56 +14,55 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.456080129" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.582801917" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/extract-mixed-syntax/Debug}" id="cdt.managedbuild.builder.gnu.cross.1220166455" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1245611568" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.2055012191" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.1768196213" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2007889843" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1194558915" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.855436310" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.506549229" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1497326561" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}&quot;"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1409305044" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1409305044." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1388217813" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.933039924" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <builder buildPath="${workspace_loc:/extract-mixed-syntax}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.48110463" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.98916974" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1188224255" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.391351501" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1590628643" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.968781133" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../..&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../phrase-extract&quot;"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2118510064" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1981472807" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.606353571" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.740521305" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <option id="gnu.cpp.link.option.libs.1946120010" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
- <listOptionValue builtIn="false" value="boost_program_options-mt"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.902271411" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.736647824" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.2105683691" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1947641767" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.966210211" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1701471219" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+ <option id="gnu.cpp.link.option.libs.1906832553" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+ <listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
+ <listOptionValue builtIn="false" value="boost_program_options-mt"/>
<listOptionValue builtIn="false" value="z"/>
</option>
- <option id="gnu.cpp.link.option.paths.1563475751" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+ <option id="gnu.cpp.link.option.paths.1107413288" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.106010037" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1613608534" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.136661991" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.2112208574" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.172930211" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1191140458" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.257834788" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
- <sourceEntries>
- <entry excluding="util/sorted_uniform_test.cc|util/sized_iterator_test.cc|util/read_compressed_test.cc|util/probing_hash_table_test.cc|util/joint_sort_test.cc|util/multi_intersection_test.cc|util/file_piece_test.cc|util/bit_packing_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
- </sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.715007893">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.715007893" moduleId="org.eclipse.cdt.core.settings" name="Release">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.release.1529383679">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1529383679" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -75,31 +74,31 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.715007893" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.715007893." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.99436307" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.801178939" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/extract-mixed-syntax/Release}" id="cdt.managedbuild.builder.gnu.cross.1999547547" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2138817906" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1481537766" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.1967527847" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.442342681" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1529383679" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.release.1529383679." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1048718406" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.456212753" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
+ <builder buildPath="${workspace_loc:/extract-mixed-syntax}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.1570266419" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.577209301" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1943090599" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1506916262" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.2132167444" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.619145487" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1604862038" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.1847950300" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.1130138972" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.870650754" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.2063838952" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.391536740" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.147725572" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1423330814" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.158429528" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.2020667840" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1372779734" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1089231126" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.1386796864" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1793802493" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.371006952" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.1770045040" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.707592414" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1864177991" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.2122644096" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -109,32 +108,35 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="extract-mixed-syntax.cdt.managedbuild.target.gnu.cross.exe.1868010260" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
+ <project id="extract-mixed-syntax.cdt.managedbuild.target.gnu.exe.1077520702" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.715007893;cdt.managedbuild.config.gnu.cross.exe.release.715007893.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1604862038;cdt.managedbuild.tool.gnu.cpp.compiler.input.870650754">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1409305044;cdt.managedbuild.config.gnu.exe.debug.1409305044.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1188224255;cdt.managedbuild.tool.gnu.cpp.compiler.input.1981472807">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.715007893;cdt.managedbuild.config.gnu.cross.exe.release.715007893.;cdt.managedbuild.tool.gnu.cross.c.compiler.2138817906;cdt.managedbuild.tool.gnu.c.compiler.input.442342681">
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.668933542;cdt.managedbuild.config.gnu.cross.exe.release.668933542.;cdt.managedbuild.tool.gnu.cross.c.compiler.1457475056;cdt.managedbuild.tool.gnu.c.compiler.input.90570918">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982;cdt.managedbuild.config.gnu.cross.exe.debug.1919499982.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1194558915;cdt.managedbuild.tool.gnu.cpp.compiler.input.2118510064">
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1529383679;cdt.managedbuild.config.gnu.exe.release.1529383679.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1943090599;cdt.managedbuild.tool.gnu.cpp.compiler.input.619145487">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+ </scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.717781750;cdt.managedbuild.config.gnu.cross.exe.debug.717781750.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.370220943;cdt.managedbuild.tool.gnu.cpp.compiler.input.1392992841">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982;cdt.managedbuild.config.gnu.cross.exe.debug.1919499982.;cdt.managedbuild.tool.gnu.cross.c.compiler.1245611568;cdt.managedbuild.tool.gnu.c.compiler.input.2007889843">
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1409305044;cdt.managedbuild.config.gnu.exe.debug.1409305044.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.902271411;cdt.managedbuild.tool.gnu.c.compiler.input.1947641767">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+ </scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.717781750;cdt.managedbuild.config.gnu.cross.exe.debug.717781750.;cdt.managedbuild.tool.gnu.cross.c.compiler.843537319;cdt.managedbuild.tool.gnu.c.compiler.input.1750960939">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.668933542;cdt.managedbuild.config.gnu.cross.exe.release.668933542.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.648756325;cdt.managedbuild.tool.gnu.cpp.compiler.input.1840233144">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ </scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1529383679;cdt.managedbuild.config.gnu.exe.release.1529383679.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.2063838952;cdt.managedbuild.tool.gnu.c.compiler.input.1423330814">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+ </scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
- <storageModule moduleId="refreshScope" versionNumber="2">
- <configuration configurationName="Release">
- <resource resourceType="PROJECT" workspacePath="/extract-mixed-syntax"/>
- </configuration>
- <configuration configurationName="Debug">
- <resource resourceType="PROJECT" workspacePath="/extract-mixed-syntax"/>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
- <storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
+ <storageModule moduleId="refreshScope"/>
</cproject>
diff --git a/contrib/other-builds/extract-mixed-syntax/.project b/contrib/other-builds/extract-mixed-syntax/.project
index 8f0f81f07..0f05a7b41 100644
--- a/contrib/other-builds/extract-mixed-syntax/.project
+++ b/contrib/other-builds/extract-mixed-syntax/.project
@@ -24,4 +24,196 @@
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
+ <linkedResources>
+ <link>
+ <name>AlignedSentence.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/AlignedSentence.cpp</locationURI>
+ </link>
+ <link>
+ <name>AlignedSentence.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/AlignedSentence.h</locationURI>
+ </link>
+ <link>
+ <name>AlignedSentenceSyntax.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp</locationURI>
+ </link>
+ <link>
+ <name>AlignedSentenceSyntax.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.h</locationURI>
+ </link>
+ <link>
+ <name>ConsistentPhrase.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/ConsistentPhrase.cpp</locationURI>
+ </link>
+ <link>
+ <name>ConsistentPhrase.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/ConsistentPhrase.h</locationURI>
+ </link>
+ <link>
+ <name>ConsistentPhrases.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp</locationURI>
+ </link>
+ <link>
+ <name>ConsistentPhrases.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/ConsistentPhrases.h</locationURI>
+ </link>
+ <link>
+ <name>InputFileStream.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/InputFileStream.cpp</locationURI>
+ </link>
+ <link>
+ <name>InputFileStream.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/InputFileStream.h</locationURI>
+ </link>
+ <link>
+ <name>Jamfile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Jamfile</locationURI>
+ </link>
+ <link>
+ <name>Main.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Main.cpp</locationURI>
+ </link>
+ <link>
+ <name>Main.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Main.h</locationURI>
+ </link>
+ <link>
+ <name>Makefile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Makefile</locationURI>
+ </link>
+ <link>
+ <name>NonTerm.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/NonTerm.cpp</locationURI>
+ </link>
+ <link>
+ <name>NonTerm.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/NonTerm.h</locationURI>
+ </link>
+ <link>
+ <name>OutputFileStream.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp</locationURI>
+ </link>
+ <link>
+ <name>OutputFileStream.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
+ </link>
+ <link>
+ <name>Parameter.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Parameter.cpp</locationURI>
+ </link>
+ <link>
+ <name>Parameter.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Parameter.h</locationURI>
+ </link>
+ <link>
+ <name>Phrase.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Phrase.cpp</locationURI>
+ </link>
+ <link>
+ <name>Phrase.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Phrase.h</locationURI>
+ </link>
+ <link>
+ <name>Rule.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Rule.cpp</locationURI>
+ </link>
+ <link>
+ <name>Rule.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Rule.h</locationURI>
+ </link>
+ <link>
+ <name>RulePhrase.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/RulePhrase.cpp</locationURI>
+ </link>
+ <link>
+ <name>RulePhrase.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/RulePhrase.h</locationURI>
+ </link>
+ <link>
+ <name>RuleSymbol.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/RuleSymbol.cpp</locationURI>
+ </link>
+ <link>
+ <name>RuleSymbol.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/RuleSymbol.h</locationURI>
+ </link>
+ <link>
+ <name>Rules.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Rules.cpp</locationURI>
+ </link>
+ <link>
+ <name>Rules.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Rules.h</locationURI>
+ </link>
+ <link>
+ <name>SyntaxTree.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/SyntaxTree.cpp</locationURI>
+ </link>
+ <link>
+ <name>SyntaxTree.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/SyntaxTree.h</locationURI>
+ </link>
+ <link>
+ <name>Word.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Word.cpp</locationURI>
+ </link>
+ <link>
+ <name>Word.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/Word.h</locationURI>
+ </link>
+ <link>
+ <name>gzfilebuf.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/gzfilebuf.h</locationURI>
+ </link>
+ <link>
+ <name>pugiconfig.hpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/pugiconfig.hpp</locationURI>
+ </link>
+ <link>
+ <name>pugixml.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/pugixml.cpp</locationURI>
+ </link>
+ <link>
+ <name>pugixml.hpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/pugixml.hpp</locationURI>
+ </link>
+ </linkedResources>
</projectDescription>
diff --git a/contrib/other-builds/extract-mixed-syntax/Makefile b/contrib/other-builds/extract-mixed-syntax/Makefile
deleted file mode 100644
index f612b8667..000000000
--- a/contrib/other-builds/extract-mixed-syntax/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-all: extract-mixed-syntax
-
-clean:
- rm -f *.o extract-mixed-syntax
-
-.cpp.o:
- g++ -O4 -g -c -I../../../boost/include -I../../../ $<
-
-OBJECTS = AlignedSentence.o ConsistentPhrase.o ConsistentPhrases.o InputFileStream.o \
- Main.o OutputFileStream.o Parameter.o Phrase.o Rule.o Rules.o RuleSymbol.o \
- SyntaxTree.o Word.o NonTerm.o RulePhrase.o AlignedSentenceSyntax.o pugixml.o
-
-extract-mixed-syntax: $(OBJECTS)
-
- g++ $(OBJECTS) -L../../../boost/lib64 -lz -lboost_iostreams-mt -lboost_program_options-mt -o extract-mixed-syntax
-
-
diff --git a/contrib/other-builds/extract-mixed-syntax/OutputFileStream.cpp b/contrib/other-builds/extract-mixed-syntax/OutputFileStream.cpp
deleted file mode 100644
index a61ce1ab1..000000000
--- a/contrib/other-builds/extract-mixed-syntax/OutputFileStream.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include <boost/iostreams/filter/gzip.hpp>
-#include "OutputFileStream.h"
-#include "gzfilebuf.h"
-
-using namespace std;
-
-namespace Moses
-{
-OutputFileStream::OutputFileStream()
- :boost::iostreams::filtering_ostream()
- ,m_outFile(NULL)
-{
-}
-
-OutputFileStream::OutputFileStream(const std::string &filePath)
- : m_outFile(NULL)
-{
- Open(filePath);
-}
-
-OutputFileStream::~OutputFileStream()
-{
- Close();
-}
-
-bool OutputFileStream::Open(const std::string &filePath)
-{
- m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary);
- if (m_outFile->fail()) {
- return false;
- }
-
- if (filePath.size() > 3 && filePath.substr(filePath.size() - 3, 3) == ".gz") {
- this->push(boost::iostreams::gzip_compressor());
- }
- this->push(*m_outFile);
-
- return true;
-}
-
-void OutputFileStream::Close()
-{
- if (m_outFile == NULL) {
- return;
- }
-
- this->flush();
- this->pop(); // file
-
- m_outFile->close();
- delete m_outFile;
- m_outFile = NULL;
- return;
-}
-
-
-}
-
diff --git a/contrib/other-builds/extract-mixed-syntax/OutputFileStream.h b/contrib/other-builds/extract-mixed-syntax/OutputFileStream.h
deleted file mode 100644
index f52e36d76..000000000
--- a/contrib/other-builds/extract-mixed-syntax/OutputFileStream.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <cstdlib>
-#include <fstream>
-#include <string>
-#include <iostream>
-#include <boost/iostreams/filtering_stream.hpp>
-
-namespace Moses
-{
-
-/** Used in place of std::istream, can read zipped files if it ends in .gz
- */
-class OutputFileStream : public boost::iostreams::filtering_ostream
-{
-protected:
- std::ofstream *m_outFile;
-public:
- OutputFileStream();
-
- OutputFileStream(const std::string &filePath);
- virtual ~OutputFileStream();
-
- bool Open(const std::string &filePath);
- void Close();
-};
-
-}
-
diff --git a/contrib/other-builds/extract-mixed-syntax/Parameter.cpp b/contrib/other-builds/extract-mixed-syntax/Parameter.cpp
deleted file mode 100644
index f22116638..000000000
--- a/contrib/other-builds/extract-mixed-syntax/Parameter.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Parameter.cpp
- *
- * Created on: 17 Feb 2014
- * Author: hieu
- */
-#include "Parameter.h"
-
-Parameter::Parameter()
-:maxSpan(10)
-,maxNonTerm(2)
-,maxHieroNonTerm(999)
-,maxSymbolsTarget(999)
-,maxSymbolsSource(5)
-,minHoleSource(2)
-,sentenceOffset(0)
-,nonTermConsecSource(false)
-,requireAlignedWord(true)
-,fractionalCounting(true)
-,gzOutput(false)
-
-,hieroNonTerm("[X]")
-,sourceSyntax(false)
-,targetSyntax(false)
-
-,mixedSyntaxType(0)
-,multiLabel(0)
-,nonTermConsecSourceMixed(true)
-,hieroSourceLHS(false)
-,maxSpanFreeNonTermSource(0)
-,nieceTerminal(true)
-,maxScope(UNDEFINED)
-
-,spanLength(false)
-,nonTermContext(false)
-{}
-
-Parameter::~Parameter() {
- // TODO Auto-generated destructor stub
-}
-
diff --git a/contrib/other-builds/extract-mixed-syntax/filter-by-source-word-count.perl b/contrib/other-builds/extract-mixed-syntax/filter-by-source-word-count.perl
deleted file mode 100755
index d0e482a02..000000000
--- a/contrib/other-builds/extract-mixed-syntax/filter-by-source-word-count.perl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-
-binmode(STDIN, ":utf8");
-binmode(STDOUT, ":utf8");
-binmode(STDERR, ":utf8");
-
-my $maxNumWords = $ARGV[0];
-
-while (my $line = <STDIN>) {
- chomp($line);
- my @toks = split(/ /,$line);
-
- my $numSourceWords = 0;
- my $tok = $toks[$numSourceWords];
- while ($tok ne "|||") {
- ++$numSourceWords;
- $tok = $toks[$numSourceWords];
- }
-
- if ($numSourceWords <= $maxNumWords) {
- print "$line\n";
- }
-}
-
-
diff --git a/contrib/other-builds/extract-mixed-syntax/learnable/equal.perl b/contrib/other-builds/extract-mixed-syntax/learnable/equal.perl
deleted file mode 100755
index e43b48a84..000000000
--- a/contrib/other-builds/extract-mixed-syntax/learnable/equal.perl
+++ /dev/null
@@ -1,33 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-
-sub trim($);
-
-my $file1 = $ARGV[0];
-my $file2 = $ARGV[1];
-
-open (FILE1, $file1);
-open (FILE2, $file2);
-
-my $countEqual = 0;
-while (my $line1 = <FILE1>) {
- my $line2 = <FILE2>;
- if (trim($line1) eq trim($line2)) {
- ++$countEqual;
- }
-}
-
-print $countEqual ."\n";
-
-
-######################
-# Perl trim function to remove whitespace from the start and end of the string
-sub trim($) {
- my $string = shift;
- $string =~ s/^\s+//;
- $string =~ s/\s+$//;
- return $string;
-}
-
-
diff --git a/contrib/other-builds/extract-mixed-syntax/learnable/get-by-line-number.perl b/contrib/other-builds/extract-mixed-syntax/learnable/get-by-line-number.perl
deleted file mode 100755
index f9ec9e39b..000000000
--- a/contrib/other-builds/extract-mixed-syntax/learnable/get-by-line-number.perl
+++ /dev/null
@@ -1,29 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-
-binmode(STDIN, ":utf8");
-binmode(STDOUT, ":utf8");
-binmode(STDERR, ":utf8");
-
-my $fileLineNum = $ARGV[0];
-open (FILE_LINE_NUM, $fileLineNum);
-
-my $nextLineNum = <FILE_LINE_NUM>;
-
-my $lineNum = 1;
-while (my $line = <STDIN>) {
- if (defined($nextLineNum) && $lineNum == $nextLineNum) {
- # matches. output line
- chomp($line);
- print "$line\n";
-
- # next line number
- $nextLineNum = <FILE_LINE_NUM>;
- }
-
- ++$lineNum;
-}
-
-
-
diff --git a/contrib/other-builds/extract-mixed-syntax/learnable/learnable.perl b/contrib/other-builds/extract-mixed-syntax/learnable/learnable.perl
deleted file mode 100755
index 6edcff3f9..000000000
--- a/contrib/other-builds/extract-mixed-syntax/learnable/learnable.perl
+++ /dev/null
@@ -1,108 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-
-my $iniPath = $ARGV[0];
-my $isHiero = $ARGV[1];
-my $decoderExec = $ARGV[2];
-my $extractExec = $ARGV[3];
-my $tmpName = $ARGV[4];
-
-my $WORK_DIR = `pwd`;
-chomp($WORK_DIR);
-
-my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu";
-
-$decoderExec = "$MOSES_DIR/bin/$decoderExec";
-$extractExec = "$MOSES_DIR/bin/$extractExec";
-
-my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
-if($SPLIT_EXEC) {
- $SPLIT_EXEC = 'gsplit';
-}
-else {
- $SPLIT_EXEC = 'split';
-}
-
-my $SORT_EXEC = `gsort --help 2>/dev/null`;
-if($SORT_EXEC) {
- $SORT_EXEC = 'gsort';
-}
-else {
- $SORT_EXEC = 'sort';
-}
-
-
-my $hieroFlag = "";
-if ($isHiero == 1) {
- $hieroFlag = "--Hierarchical";
-}
-
-print STDERR "WORK_DIR=$WORK_DIR \n";
-
-my $cmd;
-
-open (SOURCE, "source");
-open (TARGET, "target");
-open (ALIGNMENT, "alignment");
-
-my $lineNum = 0;
-my ($source, $target, $alignment);
-while ($source = <SOURCE>) {
- chomp($source);
- $target = <TARGET>; chomp($target);
- $alignment = <ALIGNMENT>; chomp($alignment);
-
- #print STDERR "$source ||| $target ||| $alignment \n";
-
- # write out 1 line
- my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
- `mkdir -p $tmpDir`;
-
- open (SOURCE1, ">$tmpDir/source");
- open (TARGET1, ">$tmpDir/target");
- open (ALIGNMENT1, ">$tmpDir/alignment");
-
- print SOURCE1 "$source\n";
- print TARGET1 "$target\n";
- print ALIGNMENT1 "$alignment\n";
-
- close (SOURCE1);
- close (TARGET1);
- close (ALIGNMENT1);
-
- # train
- if ($isHiero == 1) {
- $cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract --GZOutput";
- }
- else {
- # pb
- $cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract 7 --GZOutput";
- }
- $cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
- print STDERR "Executing: $cmd\n";
- `$cmd`;
-
- $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
- `$cmd`;
-
- $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
- `$cmd`;
-
- $cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
- `$cmd`;
-
- # decode
- $cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source -feature-add \"ConstrainedDecoding path=$tmpDir/target\"";
- print STDERR "Executing: $cmd\n";
- `$cmd`;
-
-# `rm -rf $tmpDir`;
-
- ++$lineNum;
-}
-
-close(SOURCE);
-close(TARGET);
-close(ALIGNMENT);
-
diff --git a/contrib/other-builds/extract-mixed-syntax/learnable/num-deriv.perl b/contrib/other-builds/extract-mixed-syntax/learnable/num-deriv.perl
deleted file mode 100755
index 5d66d5505..000000000
--- a/contrib/other-builds/extract-mixed-syntax/learnable/num-deriv.perl
+++ /dev/null
@@ -1,151 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-
-sub Write1Line;
-sub WriteCorpus1Holdout;
-
-my $iniPath = $ARGV[0];
-my $isHiero = $ARGV[1];
-my $decoderExec = $ARGV[2];
-my $extractExec = $ARGV[3];
-my $tmpName = $ARGV[4];
-my $startLine = $ARGV[5];
-my $endLine = $ARGV[6];
-
-print STDERR "iniPath=$iniPath \n isHiero=$isHiero \n decoderExec=$decoderExec \n extractExec=$extractExec \n";
-
-my $WORK_DIR = `pwd`;
-chomp($WORK_DIR);
-
-my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu.gna";
-
-$decoderExec = "$MOSES_DIR/bin/$decoderExec";
-$extractExec = "$MOSES_DIR/bin/$extractExec";
-
-my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
-if($SPLIT_EXEC) {
- $SPLIT_EXEC = 'gsplit';
-}
-else {
- $SPLIT_EXEC = 'split';
-}
-
-my $SORT_EXEC = `gsort --help 2>/dev/null`;
-if($SORT_EXEC) {
- $SORT_EXEC = 'gsort';
-}
-else {
- $SORT_EXEC = 'sort';
-}
-
-
-my $hieroFlag = "";
-if ($isHiero == 1) {
- $hieroFlag = "--Hierarchical";
-}
-
-print STDERR "WORK_DIR=$WORK_DIR \n";
-
-my $cmd;
-
-open (SOURCE, "source");
-open (TARGET, "target");
-open (ALIGNMENT, "alignment");
-
-my $numLines = `cat source | wc -l`;
-
-for (my $lineNum = 0; $lineNum < $numLines; ++$lineNum) {
- my $source = <SOURCE>; chomp($source);
- my $target = <TARGET>; chomp($target);
- my $alignment = <ALIGNMENT>; chomp($alignment);
-
- if ($lineNum < $startLine || $lineNum >= $endLine) {
- next;
- }
-
- #print STDERR "$source ||| $target ||| $alignment \n";
- # write out 1 line
- my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
- `mkdir -p $tmpDir`;
-
- Write1Line($source, $tmpDir, "source.1");
- Write1Line($target, $tmpDir, "target.1");
- Write1Line($alignment, $tmpDir, "alignment.1");
-
- WriteCorpus1Holdout($lineNum, "source", $tmpDir, "source.corpus");
- WriteCorpus1Holdout($lineNum, "target", $tmpDir, "target.corpus");
- WriteCorpus1Holdout($lineNum, "alignment", $tmpDir, "alignment.corpus");
-
- # train
- if ($isHiero == 1) {
- $cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract --GZOutput";
- }
- else {
- # pb
- $cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract 7 --GZOutput";
- }
- $cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
- print STDERR "Executing: $cmd\n";
- `$cmd`;
-
- $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
- `$cmd`;
-
- $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
- `$cmd`;
-
- $cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
- `$cmd`;
-
- # decode
- $cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source.1 -n-best-list $tmpDir/nbest 10000 distinct -v 2";
- print STDERR "Executing: $cmd\n";
- `$cmd`;
-
- # count the number of translation in nbest list
- $cmd = "wc -l $tmpDir/nbest >> out";
- `$cmd`;
-
- `rm -rf $tmpDir`;
-}
-
-close(SOURCE);
-close(TARGET);
-close(ALIGNMENT);
-
-
-######################
-sub Write1Line
-{
- my ($line, $tmpDir, $fileName) = @_;
-
- open (HANDLE, ">$tmpDir/$fileName");
- print HANDLE "$line\n";
- close (HANDLE);
-}
-
-sub WriteCorpus1Holdout
-{
- my ($holdoutLineNum, $inFilePath, $tmpDir, $outFileName) = @_;
-
- open (INFILE, "$inFilePath");
- open (OUTFILE, ">$tmpDir/$outFileName");
-
- my $lineNum = 0;
- while (my $line = <INFILE>) {
- chomp($line);
-
- if ($lineNum != $holdoutLineNum) {
- print OUTFILE "$line\n";
- }
-
- ++$lineNum;
- }
-
- close (OUTFILE);
- close(INFILE);
-
-}
-
-
diff --git a/contrib/other-builds/extract-mixed-syntax/learnable/reachable.perl b/contrib/other-builds/extract-mixed-syntax/learnable/reachable.perl
deleted file mode 100755
index 14432f5a7..000000000
--- a/contrib/other-builds/extract-mixed-syntax/learnable/reachable.perl
+++ /dev/null
@@ -1,147 +0,0 @@
-#! /usr/bin/perl -w
-
-use strict;
-
-sub Write1Line;
-sub WriteCorpus1Holdout;
-
-my $iniPath = $ARGV[0];
-my $isHiero = $ARGV[1];
-my $decoderExec = $ARGV[2];
-my $extractExec = $ARGV[3];
-my $tmpName = $ARGV[4];
-my $startLine = $ARGV[5];
-my $endLine = $ARGV[6];
-
-print STDERR "iniPath=$iniPath \n isHiero=$isHiero \n decoderExec=$decoderExec \n extractExec=$extractExec \n";
-
-my $WORK_DIR = `pwd`;
-chomp($WORK_DIR);
-
-my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu.gna";
-
-$decoderExec = "$MOSES_DIR/bin/$decoderExec";
-$extractExec = "$MOSES_DIR/bin/$extractExec";
-
-my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
-if($SPLIT_EXEC) {
- $SPLIT_EXEC = 'gsplit';
-}
-else {
- $SPLIT_EXEC = 'split';
-}
-
-my $SORT_EXEC = `gsort --help 2>/dev/null`;
-if($SORT_EXEC) {
- $SORT_EXEC = 'gsort';
-}
-else {
- $SORT_EXEC = 'sort';
-}
-
-
-my $hieroFlag = "";
-if ($isHiero == 1) {
- $hieroFlag = "--Hierarchical";
-}
-
-print STDERR "WORK_DIR=$WORK_DIR \n";
-
-my $cmd;
-
-open (SOURCE, "source");
-open (TARGET, "target");
-open (ALIGNMENT, "alignment");
-
-my $numLines = `cat source | wc -l`;
-
-for (my $lineNum = 0; $lineNum < $numLines; ++$lineNum) {
- my $source = <SOURCE>; chomp($source);
- my $target = <TARGET>; chomp($target);
- my $alignment = <ALIGNMENT>; chomp($alignment);
-
- if ($lineNum < $startLine || $lineNum >= $endLine) {
- next;
- }
-
- #print STDERR "$source ||| $target ||| $alignment \n";
- # write out 1 line
- my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
- `mkdir -p $tmpDir`;
-
- Write1Line($source, $tmpDir, "source.1");
- Write1Line($target, $tmpDir, "target.1");
- Write1Line($alignment, $tmpDir, "alignment.1");
-
- WriteCorpus1Holdout($lineNum, "source", $tmpDir, "source.corpus");
- WriteCorpus1Holdout($lineNum, "target", $tmpDir, "target.corpus");
- WriteCorpus1Holdout($lineNum, "alignment", $tmpDir, "alignment.corpus");
-
- # train
- if ($isHiero == 1) {
- $cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract --GZOutput";
- }
- else {
- # pb
- $cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract 7 --GZOutput";
- }
- $cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
- print STDERR "Executing: $cmd\n";
- `$cmd`;
-
- $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
- `$cmd`;
-
- $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
- `$cmd`;
-
- $cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
- `$cmd`;
-
- # decode
- $cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source.1 -feature-add \"ConstrainedDecoding path=$tmpDir/target.1\" -v 2";
- print STDERR "Executing: $cmd\n";
- `$cmd`;
-
- `rm -rf $tmpDir`;
-}
-
-close(SOURCE);
-close(TARGET);
-close(ALIGNMENT);
-
-
-######################
-sub Write1Line
-{
- my ($line, $tmpDir, $fileName) = @_;
-
- open (HANDLE, ">$tmpDir/$fileName");
- print HANDLE "$line\n";
- close (HANDLE);
-}
-
-sub WriteCorpus1Holdout
-{
- my ($holdoutLineNum, $inFilePath, $tmpDir, $outFileName) = @_;
-
- open (INFILE, "$inFilePath");
- open (OUTFILE, ">$tmpDir/$outFileName");
-
- my $lineNum = 0;
- while (my $line = <INFILE>) {
- chomp($line);
-
- if ($lineNum != $holdoutLineNum) {
- print OUTFILE "$line\n";
- }
-
- ++$lineNum;
- }
-
- close (OUTFILE);
- close(INFILE);
-
-}
-
-
diff --git a/contrib/other-builds/extract-mixed-syntax/learnable/run-parallel.perl b/contrib/other-builds/extract-mixed-syntax/learnable/run-parallel.perl
deleted file mode 100755
index fa271f9ad..000000000
--- a/contrib/other-builds/extract-mixed-syntax/learnable/run-parallel.perl
+++ /dev/null
@@ -1,17 +0,0 @@
-#! /usr/bin/perl -w
-
-my $iniPath = $ARGV[0];
-
-my $SPLIT_LINES = 200;
-my $lineCount = `cat source | wc -l`;
-print STDERR "lineCount=$lineCount \n";
-
-for (my $startLine = 0; $startLine < $lineCount; $startLine += $SPLIT_LINES) {
- my $endLine = $startLine + $SPLIT_LINES;
-
- my $cmd = "../../scripts/reachable.perl $iniPath 1 moses_chart extract-rules tmp-reachable $startLine $endLine &>out.reachable.$startLine &";
- print STDERR "Executing: $cmd \n";
- system($cmd);
-
-}
-
diff --git a/contrib/other-builds/extract-ordering/.cproject b/contrib/other-builds/extract-ordering/.cproject
deleted file mode 100644
index 1d4522e27..000000000
--- a/contrib/other-builds/extract-ordering/.cproject
+++ /dev/null
@@ -1,134 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
- <storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127" moduleId="org.eclipse.cdt.core.settings" name="Debug">
- <externalSettings/>
- <extensions>
- <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- </extensions>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.499747849" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.798364121" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/extract-ordering}/Debug" id="cdt.managedbuild.builder.gnu.cross.1976289814" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1699460827" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1324749613" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.1750299246" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.719498215" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1317297964" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.251118848" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.99297656" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.106920816" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
- </option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1327002489" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1844372739" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1178164658" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <option id="gnu.cpp.link.option.libs.1434184833" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
- <listOptionValue builtIn="false" value="z"/>
- <listOptionValue builtIn="false" value="boost_iostreams-mt"/>
- <listOptionValue builtIn="false" value="boost_system-mt"/>
- <listOptionValue builtIn="false" value="boost_filesystem-mt"/>
- </option>
- <option id="gnu.cpp.link.option.paths.974811544" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
- </option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.904916320" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
- <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
- <additionalInput kind="additionalinput" paths="$(LIBS)"/>
- </inputType>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1005231499" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.1318928675" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.604255673" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
- </tool>
- </toolChain>
- </folderInfo>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
- </cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.818331963">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.818331963" moduleId="org.eclipse.cdt.core.settings" name="Release">
- <externalSettings/>
- <extensions>
- <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- </extensions>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.818331963" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.818331963." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1489025499" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1052477856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/extract-ordering}/Release" id="cdt.managedbuild.builder.gnu.cross.33925527" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1505710417" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1884790737" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.197048136" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.106898878" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.157115446" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.1920378037" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.37950410" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.683027595" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1197641703" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1356351201" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2053623412" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
- <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
- <additionalInput kind="additionalinput" paths="$(LIBS)"/>
- </inputType>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1988048517" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.1494470963" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1553727957" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
- </tool>
- </toolChain>
- </folderInfo>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
- </cconfiguration>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="extract-ordering.cdt.managedbuild.target.gnu.cross.exe.1840421491" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
- </storageModule>
- <storageModule moduleId="scannerConfiguration">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.818331963;cdt.managedbuild.config.gnu.cross.exe.release.818331963.;cdt.managedbuild.tool.gnu.cross.c.compiler.1505710417;cdt.managedbuild.tool.gnu.c.compiler.input.106898878">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.818331963;cdt.managedbuild.config.gnu.cross.exe.release.818331963.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.157115446;cdt.managedbuild.tool.gnu.cpp.compiler.input.683027595">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127;cdt.managedbuild.config.gnu.cross.exe.debug.1624346127.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1317297964;cdt.managedbuild.tool.gnu.cpp.compiler.input.1327002489">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127;cdt.managedbuild.config.gnu.cross.exe.debug.1624346127.;cdt.managedbuild.tool.gnu.cross.c.compiler.1699460827;cdt.managedbuild.tool.gnu.c.compiler.input.719498215">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
- </scannerConfigBuildInfo>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
- <storageModule moduleId="refreshScope" versionNumber="2">
- <configuration configurationName="Release">
- <resource resourceType="PROJECT" workspacePath="/extract-ordering"/>
- </configuration>
- <configuration configurationName="Debug">
- <resource resourceType="PROJECT" workspacePath="/extract-ordering"/>
- </configuration>
- </storageModule>
-</cproject>
diff --git a/contrib/other-builds/extract-ordering/.project b/contrib/other-builds/extract-ordering/.project
deleted file mode 100644
index f95b064b7..000000000
--- a/contrib/other-builds/extract-ordering/.project
+++ /dev/null
@@ -1,74 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
- <name>extract-ordering</name>
- <comment></comment>
- <projects>
- </projects>
- <buildSpec>
- <buildCommand>
- <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
- <triggers>clean,full,incremental,</triggers>
- <arguments>
- </arguments>
- </buildCommand>
- <buildCommand>
- <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
- <triggers>full,incremental,</triggers>
- <arguments>
- </arguments>
- </buildCommand>
- </buildSpec>
- <natures>
- <nature>org.eclipse.cdt.core.cnature</nature>
- <nature>org.eclipse.cdt.core.ccnature</nature>
- <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
- <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
- </natures>
- <linkedResources>
- <link>
- <name>InputFileStream.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp</locationURI>
- </link>
- <link>
- <name>InputFileStream.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h</locationURI>
- </link>
- <link>
- <name>OutputFileStream.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp</locationURI>
- </link>
- <link>
- <name>OutputFileStream.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
- </link>
- <link>
- <name>SentenceAlignment.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp</locationURI>
- </link>
- <link>
- <name>SentenceAlignment.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h</locationURI>
- </link>
- <link>
- <name>extract-ordering-main.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ordering-main.cpp</locationURI>
- </link>
- <link>
- <name>tables-core.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp</locationURI>
- </link>
- <link>
- <name>tables-core.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h</locationURI>
- </link>
- </linkedResources>
-</projectDescription>
diff --git a/contrib/other-builds/extract-rules/.cproject b/contrib/other-builds/extract-rules/.cproject
index c1fa1a0cb..5591c45d4 100644
--- a/contrib/other-builds/extract-rules/.cproject
+++ b/contrib/other-builds/extract-rules/.cproject
@@ -1,8 +1,8 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.1909818145">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1909818145" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -14,45 +14,41 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.124769989" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.266544803" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/extract-rules}/Debug" id="cdt.managedbuild.builder.gnu.cross.335858926" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1376077469" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.947547329" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.426953885" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.include.paths.1671695899" name="Include paths (-I)" superClass="gnu.c.compiler.option.include.paths"/>
- <option id="gnu.c.compiler.option.include.files.1838960067" name="Include files (-include)" superClass="gnu.c.compiler.option.include.files"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.985831394" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.53480540" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.1726371873" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.899893408" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1099087456" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1909818145" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1909818145." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.702289239" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.769221744" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <builder buildPath="${workspace_loc:/extract-rules}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1538811811" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.417385938" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.274036343" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1227466042" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.640603457" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.231971122" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.88958138" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.61884195" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1616232021" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1411857637" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <option id="gnu.cpp.link.option.libs.109133121" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
- <listOptionValue builtIn="false" value="z"/>
- <listOptionValue builtIn="false" value="boost_iostreams-mt"/>
- <listOptionValue builtIn="false" value="boost_system-mt"/>
- <listOptionValue builtIn="false" value="boost_filesystem-mt"/>
- </option>
- <option id="gnu.cpp.link.option.paths.1030374421" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.212337827" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.831633145" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.1948518292" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1036034505" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.982611610" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.165444158" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+ <option id="gnu.cpp.link.option.paths.1351410350" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.272393234" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <option id="gnu.cpp.link.option.libs.1356683866" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+ <listOptionValue builtIn="false" value="boost_iostreams-mt"/>
+ <listOptionValue builtIn="false" value="z"/>
+ </option>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1569179988" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1391783790" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.2066621509" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1945638157" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1433595017" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.879628838" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -60,8 +56,8 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544" moduleId="org.eclipse.cdt.core.settings" name="Release">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.release.130284564">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.130284564" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -73,31 +69,31 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1113964425" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1722595316" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/extract-rules}/Release" id="cdt.managedbuild.builder.gnu.cross.691589832" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.593530229" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1320426973" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.947026588" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1217031668" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.130284564" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.release.130284564." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.933956450" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1114636926" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
+ <builder buildPath="${workspace_loc:/extract-rules}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.1972638661" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1382194499" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.605692631" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1543139461" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.307019882" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.771498068" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1401773863" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.1504181086" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.1645775798" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1484987112" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1332689416" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1372281360" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.2028047264" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1645644335" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1807515346" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.44234391" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1468234013" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.586184465" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.1438048814" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.733316869" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.467923425" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.1673313707" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.518252425" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.225998350" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1649512548" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -107,30 +103,23 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="extract-rules.cdt.managedbuild.target.gnu.cross.exe.1916763759" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
+ <project id="extract-rules.cdt.managedbuild.target.gnu.exe.1608401758" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292;cdt.managedbuild.config.gnu.cross.exe.debug.1438215292.;cdt.managedbuild.tool.gnu.cross.c.compiler.1376077469;cdt.managedbuild.tool.gnu.c.compiler.input.985831394">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.130284564;cdt.managedbuild.config.gnu.exe.release.130284564.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.1332689416;cdt.managedbuild.tool.gnu.c.compiler.input.1645644335">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292;cdt.managedbuild.config.gnu.cross.exe.debug.1438215292.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.53480540;cdt.managedbuild.tool.gnu.cpp.compiler.input.88958138">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.130284564;cdt.managedbuild.config.gnu.exe.release.130284564.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.605692631;cdt.managedbuild.tool.gnu.cpp.compiler.input.771498068">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1200693544;cdt.managedbuild.config.gnu.cross.exe.release.1200693544.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1401773863;cdt.managedbuild.tool.gnu.cpp.compiler.input.1484987112">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1909818145;cdt.managedbuild.config.gnu.exe.debug.1909818145.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.212337827;cdt.managedbuild.tool.gnu.c.compiler.input.1036034505">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1200693544;cdt.managedbuild.config.gnu.cross.exe.release.1200693544.;cdt.managedbuild.tool.gnu.cross.c.compiler.593530229;cdt.managedbuild.tool.gnu.c.compiler.input.1217031668">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1909818145;cdt.managedbuild.config.gnu.exe.debug.1909818145.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.274036343;cdt.managedbuild.tool.gnu.cpp.compiler.input.61884195">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
- <storageModule moduleId="refreshScope" versionNumber="2">
- <configuration configurationName="Release">
- <resource resourceType="PROJECT" workspacePath="/extract-rules"/>
- </configuration>
- <configuration configurationName="Debug">
- <resource resourceType="PROJECT" workspacePath="/extract-rules"/>
- </configuration>
- </storageModule>
+ <storageModule moduleId="refreshScope"/>
</cproject>
diff --git a/contrib/other-builds/extract-rules/.gitignore b/contrib/other-builds/extract-rules/.gitignore
deleted file mode 100644
index 98bbc3165..000000000
--- a/contrib/other-builds/extract-rules/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-/Debug
diff --git a/contrib/other-builds/extract-rules/.project b/contrib/other-builds/extract-rules/.project
index 29ffed2a9..79b72a58a 100644
--- a/contrib/other-builds/extract-rules/.project
+++ b/contrib/other-builds/extract-rules/.project
@@ -26,11 +26,6 @@
</natures>
<linkedResources>
<link>
- <name>ExtractedRule.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/ExtractedRule.h</locationURI>
- </link>
- <link>
<name>Hole.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/Hole.h</locationURI>
@@ -66,11 +61,6 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
</link>
<link>
- <name>RuleExtractionOptions.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/RuleExtractionOptions.h</locationURI>
- </link>
- <link>
<name>SentenceAlignment.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp</locationURI>
@@ -116,11 +106,6 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-rules-main.cpp</locationURI>
</link>
<link>
- <name>gzfilebuf.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/gzfilebuf.h</locationURI>
- </link>
- <link>
<name>tables-core.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp</locationURI>
diff --git a/contrib/other-builds/extract/.cproject b/contrib/other-builds/extract/.cproject
index 83bc724c3..54c91657b 100644
--- a/contrib/other-builds/extract/.cproject
+++ b/contrib/other-builds/extract/.cproject
@@ -1,8 +1,8 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.2119725657">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -14,43 +14,42 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.671913278" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1231657738" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/extract}/Debug" id="cdt.managedbuild.builder.gnu.cross.571044108" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.332036857" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1292572253" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.1873227592" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1165888615" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1342023600" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.698819695" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.1451916947" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1702398011" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2119725657." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1708444053" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.645190133" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <builder buildPath="${workspace_loc:/extract}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1816006533" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.876593881" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1859867372" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1585316374" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.535775760" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.874182289" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.579278848" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1355287045" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1856691234" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1699542791" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <option id="gnu.cpp.link.option.libs.1880730637" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
- <listOptionValue builtIn="false" value="z"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1202195555" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1840757183" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.876682032" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.676382830" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.83617569" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.943560690" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+ <option id="gnu.cpp.link.option.libs.599256050" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
- <listOptionValue builtIn="false" value="boost_system-mt"/>
- <listOptionValue builtIn="false" value="boost_filesystem-mt"/>
+ <listOptionValue builtIn="false" value="z"/>
</option>
- <option id="gnu.cpp.link.option.paths.298225069" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+ <option id="gnu.cpp.link.option.paths.1223834298" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+ <listOptionValue builtIn="false" value=""/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1339210059" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1129315792" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.976825054" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.1971927463" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.704926167" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.942430539" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1676263707" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -58,8 +57,8 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.140124152">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.140124152" moduleId="org.eclipse.cdt.core.settings" name="Release">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.release.1230189043">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1230189043" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -71,31 +70,31 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.140124152" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.140124152." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1250240843" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.597335968" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/extract}/Release" id="cdt.managedbuild.builder.gnu.cross.95066247" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2096762162" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.88795016" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.383328020" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.681105644" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1230189043" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.release.1230189043." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.280378247" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1881910636" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
+ <builder buildPath="${workspace_loc:/extract}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.872962284" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1342549060" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1229278587" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.509799885" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.682561415" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1043901368" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1806684544" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.553394848" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.1420596769" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1726759263" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1628542348" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1033362550" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.429156793" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.389761516" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.234409052" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.320346578" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2045242811" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1299282565" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.461289078" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1586085606" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.417132714" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.1944597759" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.203400619" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1190745343" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.221147938" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -105,21 +104,21 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="extract.cdt.managedbuild.target.gnu.cross.exe.1220534104" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
+ <project id="extract.cdt.managedbuild.target.gnu.exe.1053550598" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.140124152;cdt.managedbuild.config.gnu.cross.exe.release.140124152.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1806684544;cdt.managedbuild.tool.gnu.cpp.compiler.input.1726759263">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1230189043;cdt.managedbuild.config.gnu.exe.release.1230189043.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.1628542348;cdt.managedbuild.tool.gnu.c.compiler.input.389761516">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.140124152;cdt.managedbuild.config.gnu.cross.exe.release.140124152.;cdt.managedbuild.tool.gnu.cross.c.compiler.2096762162;cdt.managedbuild.tool.gnu.c.compiler.input.681105644">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2119725657;cdt.managedbuild.config.gnu.exe.debug.2119725657.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1202195555;cdt.managedbuild.tool.gnu.c.compiler.input.676382830">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.386290689;cdt.managedbuild.config.gnu.cross.exe.debug.386290689.;cdt.managedbuild.tool.gnu.cross.c.compiler.332036857;cdt.managedbuild.tool.gnu.c.compiler.input.1165888615">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1230189043;cdt.managedbuild.config.gnu.exe.release.1230189043.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1229278587;cdt.managedbuild.tool.gnu.cpp.compiler.input.1043901368">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.386290689;cdt.managedbuild.config.gnu.cross.exe.debug.386290689.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1342023600;cdt.managedbuild.tool.gnu.cpp.compiler.input.579278848">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2119725657;cdt.managedbuild.config.gnu.exe.debug.2119725657.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1859867372;cdt.managedbuild.tool.gnu.cpp.compiler.input.1355287045">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
diff --git a/contrib/other-builds/extractor/.cproject b/contrib/other-builds/extractor/.cproject
index a5fada0f1..c29c34a27 100644
--- a/contrib/other-builds/extractor/.cproject
+++ b/contrib/other-builds/extractor/.cproject
@@ -25,6 +25,7 @@
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.285958391" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.966722418" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1839105433" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
@@ -46,7 +47,6 @@
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="z"/>
- <listOptionValue builtIn="false" value="rt"/>
<listOptionValue builtIn="false" value="pthread"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.656319745" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
diff --git a/contrib/other-builds/lm/.cproject b/contrib/other-builds/lm/.cproject
index e3e47fd7e..c2dad0f8d 100644
--- a/contrib/other-builds/lm/.cproject
+++ b/contrib/other-builds/lm/.cproject
@@ -11,12 +11,12 @@
</externalSetting>
</externalSettings>
<extensions>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@@ -59,8 +59,18 @@
</tool>
</toolChain>
</folderInfo>
+ <folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.38452119" name="/" resourcePath="wrappers">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.1621748368" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug" unusedChildren="">
+ <tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.2002161718" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.1252826468"/>
+ <tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.2138497585" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1024598065"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.86927135" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.62265891"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.315991018" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base.775866405"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1319557326" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1042051280" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722"/>
+ </toolChain>
+ </folderInfo>
<sourceEntries>
- <entry excluding="left_test.cc|model_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+ <entry excluding="wrappers|left_test.cc|model_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
@@ -70,13 +80,13 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.203229648" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
diff --git a/contrib/other-builds/lm/.project b/contrib/other-builds/lm/.project
index 9498bb19e..7cc135fc6 100644
--- a/contrib/other-builds/lm/.project
+++ b/contrib/other-builds/lm/.project
@@ -87,6 +87,11 @@
<locationURI>PARENT-3-PROJECT_LOC/lm/.DS_Store</locationURI>
</link>
<link>
+ <name>CMakeLists.txt</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/CMakeLists.txt</locationURI>
+ </link>
+ <link>
<name>COPYING</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/COPYING</locationURI>
@@ -122,6 +127,11 @@
<locationURI>PARENT-3-PROJECT_LOC/lm/bhiksha.hh</locationURI>
</link>
<link>
+ <name>bin</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
<name>binary_format.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/binary_format.cc</locationURI>
@@ -142,6 +152,16 @@
<locationURI>PARENT-3-PROJECT_LOC/lm/build_binary</locationURI>
</link>
<link>
+ <name>build_binary_main.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/build_binary_main.cc</locationURI>
+ </link>
+ <link>
+ <name>builder</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
<name>clean.sh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/clean.sh</locationURI>
@@ -172,6 +192,16 @@
<locationURI>PARENT-3-PROJECT_LOC/lm/facade.hh</locationURI>
</link>
<link>
+ <name>filter</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>fragment_main.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/fragment_main.cc</locationURI>
+ </link>
+ <link>
<name>left.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/left.hh</locationURI>
@@ -257,6 +287,11 @@
<locationURI>PARENT-3-PROJECT_LOC/lm/query</locationURI>
</link>
<link>
+ <name>query_main.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/query_main.cc</locationURI>
+ </link>
+ <link>
<name>read_arpa.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/read_arpa.cc</locationURI>
@@ -292,6 +327,16 @@
<locationURI>PARENT-3-PROJECT_LOC/lm/search_trie.hh</locationURI>
</link>
<link>
+ <name>sizes.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/sizes.cc</locationURI>
+ </link>
+ <link>
+ <name>sizes.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/sizes.hh</locationURI>
+ </link>
+ <link>
<name>state.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/state.hh</locationURI>
@@ -376,5 +421,990 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/word_index.hh</locationURI>
</link>
+ <link>
+ <name>wrappers</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/order.log</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/order.log</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/Jamfile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/Jamfile</locationURI>
+ </link>
+ <link>
+ <name>builder/README.md</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/README.md</locationURI>
+ </link>
+ <link>
+ <name>builder/TODO</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/TODO</locationURI>
+ </link>
+ <link>
+ <name>builder/adjust_counts.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/adjust_counts.cc</locationURI>
+ </link>
+ <link>
+ <name>builder/adjust_counts.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/adjust_counts.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/adjust_counts_test.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/adjust_counts_test.cc</locationURI>
+ </link>
+ <link>
+ <name>builder/bin</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/corpus_count.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/corpus_count.cc</locationURI>
+ </link>
+ <link>
+ <name>builder/corpus_count.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/corpus_count.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/corpus_count_test.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/corpus_count_test.cc</locationURI>
+ </link>
+ <link>
+ <name>builder/discount.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/discount.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/dump_counts_main.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/dump_counts_main.cc</locationURI>
+ </link>
+ <link>
+ <name>builder/hash_gamma.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/hash_gamma.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/header_info.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/header_info.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/initial_probabilities.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/initial_probabilities.cc</locationURI>
+ </link>
+ <link>
+ <name>builder/initial_probabilities.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/initial_probabilities.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/interpolate.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/interpolate.cc</locationURI>
+ </link>
+ <link>
+ <name>builder/interpolate.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/interpolate.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/joint_order.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/joint_order.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/lmplz_main.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/lmplz_main.cc</locationURI>
+ </link>
+ <link>
+ <name>builder/ngram.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/ngram.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/ngram_stream.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/ngram_stream.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/pipeline.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/pipeline.cc</locationURI>
+ </link>
+ <link>
+ <name>builder/pipeline.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/pipeline.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/print.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/print.cc</locationURI>
+ </link>
+ <link>
+ <name>builder/print.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/print.hh</locationURI>
+ </link>
+ <link>
+ <name>builder/sort.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/sort.hh</locationURI>
+ </link>
+ <link>
+ <name>filter/Jamfile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/Jamfile</locationURI>
+ </link>
+ <link>
+ <name>filter/arpa_io.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/arpa_io.cc</locationURI>
+ </link>
+ <link>
+ <name>filter/arpa_io.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/arpa_io.hh</locationURI>
+ </link>
+ <link>
+ <name>filter/bin</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>filter/count_io.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/count_io.hh</locationURI>
+ </link>
+ <link>
+ <name>filter/filter_main.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/filter_main.cc</locationURI>
+ </link>
+ <link>
+ <name>filter/format.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/format.hh</locationURI>
+ </link>
+ <link>
+ <name>filter/phrase.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/phrase.cc</locationURI>
+ </link>
+ <link>
+ <name>filter/phrase.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/phrase.hh</locationURI>
+ </link>
+ <link>
+ <name>filter/phrase_table_vocab_main.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/phrase_table_vocab_main.cc</locationURI>
+ </link>
+ <link>
+ <name>filter/thread.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/thread.hh</locationURI>
+ </link>
+ <link>
+ <name>filter/vocab.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/vocab.cc</locationURI>
+ </link>
+ <link>
+ <name>filter/vocab.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/vocab.hh</locationURI>
+ </link>
+ <link>
+ <name>filter/wrapper.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/wrapper.hh</locationURI>
+ </link>
+ <link>
+ <name>wrappers/README</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/wrappers/README</locationURI>
+ </link>
+ <link>
+ <name>wrappers/nplm.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/wrappers/nplm.cc</locationURI>
+ </link>
+ <link>
+ <name>wrappers/nplm.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/wrappers/nplm.hh</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/clang-darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/clang-darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/clang-darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/clang-darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/darwin-4.2.1</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/clang-darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/clang-darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/clang-darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/clang-darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/darwin-4.2.1/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/clang-darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/bhiksha.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/bhiksha.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/binary_format.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/binary_format.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/build_binary</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/build_binary</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/build_binary_main.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/build_binary_main.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/config.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/config.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/fragment</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/fragment</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/fragment_main.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/fragment_main.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lm_exception.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lm_exception.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/quantize.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/quantize.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/query</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/query</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/query_main.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/query_main.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/read_arpa.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/read_arpa.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/search_hashed.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/search_hashed.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/search_trie.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/search_trie.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/sizes.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/sizes.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/trie.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/trie.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/trie_sort.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/trie_sort.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/value_build.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/value_build.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/virtual_interface.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/virtual_interface.o</locationURI>
+ </link>
+ <link>
+ <name>bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocab.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocab.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/bhiksha.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/bhiksha.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/binary_format.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/binary_format.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/build_binary</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/build_binary</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/build_binary_main.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/build_binary_main.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/config.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/config.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/fragment</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/fragment</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/fragment_main.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/fragment_main.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lm_exception.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lm_exception.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/quantize.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/quantize.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/query</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/query</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/query_main.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/query_main.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/read_arpa.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/read_arpa.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/search_hashed.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/search_hashed.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/search_trie.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/search_trie.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/sizes.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/sizes.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/trie.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/trie.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/trie_sort.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/trie_sort.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/value_build.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/value_build.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/virtual_interface.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/virtual_interface.o</locationURI>
+ </link>
+ <link>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocab.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocab.o</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test.o</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test.output</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test.output</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test.run</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test.run</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test.test</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/left_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test.test</locationURI>
+ </link>
+ <link>
+ <name>bin/left_test.test/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/left_test.test/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/left_test.o</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test.o</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test.output</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test.output</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test.run</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test.run</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test.test</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/model_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test.test</locationURI>
+ </link>
+ <link>
+ <name>bin/model_test.test/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/model_test.test/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/model_test.o</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test.o</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test.output</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test.output</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test.run</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test.run</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test.test</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/partial_test.test/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test.test</locationURI>
+ </link>
+ <link>
+ <name>bin/partial_test.test/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/bin/partial_test.test/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/partial_test.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/adjust_counts.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/adjust_counts.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/corpus_count.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/corpus_count.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/dump_counts</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/dump_counts</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/dump_counts_main.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/dump_counts_main.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/initial_probabilities.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/initial_probabilities.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/interpolate.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/interpolate.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lmplz</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lmplz</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lmplz_main.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lmplz_main.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/pipeline.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/pipeline.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/print.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/print.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/adjust_counts.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/adjust_counts.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/corpus_count.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/corpus_count.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/initial_probabilities.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/initial_probabilities.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/interpolate.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/interpolate.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lmplz</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lmplz</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lmplz_main.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/lmplz_main.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/pipeline.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/pipeline.o</locationURI>
+ </link>
+ <link>
+ <name>builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/print.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/builder/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/print.o</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/arpa_io.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/arpa_io.o</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/filter</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/filter</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/main.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/main.o</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/phrase.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/phrase.o</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocab.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocab.o</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/arpa_io.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/arpa_io.o</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/filter</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/filter</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/main.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/main.o</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/phrase.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/phrase.o</locationURI>
+ </link>
+ <link>
+ <name>filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocab.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/filter/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocab.o</locationURI>
+ </link>
</linkedResources>
</projectDescription>
diff --git a/contrib/other-builds/manual-label/.cproject b/contrib/other-builds/manual-label/.cproject
index 2efd96e70..8e0dcc8e2 100644
--- a/contrib/other-builds/manual-label/.cproject
+++ b/contrib/other-builds/manual-label/.cproject
@@ -1,54 +1,54 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.2107801703">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2107801703" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1899954923" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1645930772" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/manual-label/Debug}" id="cdt.managedbuild.builder.gnu.cross.1703642277" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1938374607" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1888648788" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.1838052643" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.798368516" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.950686503" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.153015988" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.418888584" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.406065865" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2107801703" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2107801703." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.502948364" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.1431969079" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <builder buildPath="${workspace_loc:/manual-label}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.2101075234" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1118840081" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2037265673" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.400985496" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1160903812" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.404589863" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <listOptionValue builtIn="false" value="${workspace_loc:}/../.."/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost&quot;"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.596589558" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.967940596" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1741441821" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1626431978" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <option id="gnu.cpp.link.option.libs.1886912770" superClass="gnu.cpp.link.option.libs" valueType="libs">
- <listOptionValue builtIn="false" value="boost_program_options-mt"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.789243964" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2033266575" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.1568929819" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.676866714" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.254144861" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.319879082" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+ <option id="gnu.cpp.link.option.paths.132164474" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
- <option id="gnu.cpp.link.option.paths.1541583695" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/lib64&quot;"/>
+ <option id="gnu.cpp.link.option.libs.1017214824" superClass="gnu.cpp.link.option.libs" valueType="libs">
+ <listOptionValue builtIn="false" value="boost_program_options-mt"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1367999206" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1672776758" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.31522559" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.826957235" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.350181339" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1104732611" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.372096550" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -56,44 +56,44 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815" moduleId="org.eclipse.cdt.core.settings" name="Release">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.release.649050588">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.649050588" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.97427761" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.564169339" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/manual-label/Release}" id="cdt.managedbuild.builder.gnu.cross.663164336" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2104943437" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.2135645103" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.764935013" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1841809129" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.649050588" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.release.649050588." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1107402972" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1038954684" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
+ <builder buildPath="${workspace_loc:/manual-label}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.100518450" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.2005888378" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1743303968" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.968169340" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.977676916" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1889240027" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1180544943" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.1877584345" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.935490779" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1084298301" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.924128295" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1914416581" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.826081780" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2048171432" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.355530813" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.940299092" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.17718999" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.940327646" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.369758737" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1186766936" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1527322008" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.480337803" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1788533940" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.266174128" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.558116084" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -103,22 +103,23 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="manual-label.cdt.managedbuild.target.gnu.cross.exe.2117548180" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
+ <project id="manual-label.cdt.managedbuild.target.gnu.exe.1701243340" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1335379815;cdt.managedbuild.config.gnu.cross.exe.release.1335379815.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1180544943;cdt.managedbuild.tool.gnu.cpp.compiler.input.1084298301">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.649050588;cdt.managedbuild.config.gnu.exe.release.649050588.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1743303968;cdt.managedbuild.tool.gnu.cpp.compiler.input.1889240027">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639;cdt.managedbuild.config.gnu.cross.exe.debug.1096604639.;cdt.managedbuild.tool.gnu.cross.c.compiler.1938374607;cdt.managedbuild.tool.gnu.c.compiler.input.798368516">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.649050588;cdt.managedbuild.config.gnu.exe.release.649050588.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.924128295;cdt.managedbuild.tool.gnu.c.compiler.input.2048171432">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1335379815;cdt.managedbuild.config.gnu.cross.exe.release.1335379815.;cdt.managedbuild.tool.gnu.cross.c.compiler.2104943437;cdt.managedbuild.tool.gnu.c.compiler.input.1841809129">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2107801703;cdt.managedbuild.config.gnu.exe.debug.2107801703.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2037265673;cdt.managedbuild.tool.gnu.cpp.compiler.input.967940596">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639;cdt.managedbuild.config.gnu.cross.exe.debug.1096604639.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.950686503;cdt.managedbuild.tool.gnu.cpp.compiler.input.596589558">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2107801703;cdt.managedbuild.config.gnu.exe.debug.2107801703.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.789243964;cdt.managedbuild.tool.gnu.c.compiler.input.676866714">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+ <storageModule moduleId="refreshScope"/>
</cproject>
diff --git a/contrib/other-builds/mira/.cproject b/contrib/other-builds/mira/.cproject
deleted file mode 100644
index b80748286..000000000
--- a/contrib/other-builds/mira/.cproject
+++ /dev/null
@@ -1,177 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
- <storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092" moduleId="org.eclipse.cdt.core.settings" name="Debug">
- <externalSettings/>
- <extensions>
- <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- </extensions>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.377583226" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.2071063316" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/mira/Debug}" id="cdt.managedbuild.builder.gnu.cross.881204887" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1218877049" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1094111510" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.2142370493" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1560615310" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool command="g++" id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.115638939" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.1315998281" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.778416356" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.preprocessor.def.317569168" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
- <listOptionValue builtIn="false" value="HAVE_BOOST"/>
- <listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
- <listOptionValue builtIn="false" value="TRACE_ENABLE"/>
- <listOptionValue builtIn="false" value="WITH_THREADS"/>
- </option>
- <option id="gnu.cpp.compiler.option.include.paths.1743631842" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
- </option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1454738757" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1480777831" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.485611005" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <option id="gnu.cpp.link.option.libs.1007486529" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
- <listOptionValue builtIn="false" value="moses"/>
- <listOptionValue builtIn="false" value="irstlm"/>
- <listOptionValue builtIn="false" value="dstruct"/>
- <listOptionValue builtIn="false" value="flm"/>
- <listOptionValue builtIn="false" value="oolm"/>
- <listOptionValue builtIn="false" value="lattice"/>
- <listOptionValue builtIn="false" value="misc"/>
- <listOptionValue builtIn="false" value="dalm"/>
- <listOptionValue builtIn="false" value="search"/>
- <listOptionValue builtIn="false" value="RandLM"/>
- <listOptionValue builtIn="false" value="OnDiskPt"/>
- <listOptionValue builtIn="false" value="lm"/>
- <listOptionValue builtIn="false" value="util"/>
- <listOptionValue builtIn="false" value="boost_iostreams-mt"/>
- <listOptionValue builtIn="false" value="boost_serialization"/>
- <listOptionValue builtIn="false" value="boost_system-mt"/>
- <listOptionValue builtIn="false" value="boost_thread-mt"/>
- <listOptionValue builtIn="false" value="boost_filesystem-mt"/>
- <listOptionValue builtIn="false" value="boost_program_options-mt"/>
- <listOptionValue builtIn="false" value="pthread"/>
- <listOptionValue builtIn="false" value="z"/>
- <listOptionValue builtIn="false" value="bz2"/>
- <listOptionValue builtIn="false" value="dl"/>
- <listOptionValue builtIn="false" value="rt"/>
- </option>
- <option id="gnu.cpp.link.option.paths.132082917" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../cmph/lib&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
- <listOptionValue builtIn="false" value="/opt/local/lib"/>
- </option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1827477602" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
- <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
- <additionalInput kind="additionalinput" paths="$(LIBS)"/>
- </inputType>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1554055737" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.1335019965" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1106765201" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
- </tool>
- </toolChain>
- </folderInfo>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
- </cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866" moduleId="org.eclipse.cdt.core.settings" name="Release">
- <externalSettings/>
- <extensions>
- <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- </extensions>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1722081106" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.36030994" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/mira/Release}" id="cdt.managedbuild.builder.gnu.cross.329863268" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.299271422" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1049770857" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.1354488968" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.674520633" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.568828285" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.1042930447" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.305563840" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1424960921" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.460791828" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.945282347" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.561813601" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
- <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
- <additionalInput kind="additionalinput" paths="$(LIBS)"/>
- </inputType>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1813861310" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.991451934" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1702585996" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
- </tool>
- </toolChain>
- </folderInfo>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
- </cconfiguration>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="mira.cdt.managedbuild.target.gnu.cross.exe.1862989567" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
- </storageModule>
- <storageModule moduleId="scannerConfiguration">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092;cdt.managedbuild.config.gnu.cross.exe.debug.1385309092.;cdt.managedbuild.tool.gnu.cross.c.compiler.1218877049;cdt.managedbuild.tool.gnu.c.compiler.input.1560615310">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.2038764866;cdt.managedbuild.config.gnu.cross.exe.release.2038764866.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.568828285;cdt.managedbuild.tool.gnu.cpp.compiler.input.1424960921">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092;cdt.managedbuild.config.gnu.cross.exe.debug.1385309092.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.115638939;cdt.managedbuild.tool.gnu.cpp.compiler.input.1454738757">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.2038764866;cdt.managedbuild.config.gnu.cross.exe.release.2038764866.;cdt.managedbuild.tool.gnu.cross.c.compiler.299271422;cdt.managedbuild.tool.gnu.c.compiler.input.674520633">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
- </scannerConfigBuildInfo>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
- <storageModule moduleId="refreshScope" versionNumber="2">
- <configuration configurationName="Release">
- <resource resourceType="PROJECT" workspacePath="/mira"/>
- </configuration>
- <configuration configurationName="Debug">
- <resource resourceType="PROJECT" workspacePath="/mira"/>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
- <storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
-</cproject>
diff --git a/contrib/other-builds/mira/.project b/contrib/other-builds/mira/.project
deleted file mode 100644
index 03838731f..000000000
--- a/contrib/other-builds/mira/.project
+++ /dev/null
@@ -1,81 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
- <name>mira</name>
- <comment></comment>
- <projects>
- <project>mert_lib</project>
- <project>moses</project>
- </projects>
- <buildSpec>
- <buildCommand>
- <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
- <triggers>clean,full,incremental,</triggers>
- <arguments>
- </arguments>
- </buildCommand>
- <buildCommand>
- <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
- <triggers>full,incremental,</triggers>
- <arguments>
- </arguments>
- </buildCommand>
- </buildSpec>
- <natures>
- <nature>org.eclipse.cdt.core.cnature</nature>
- <nature>org.eclipse.cdt.core.ccnature</nature>
- <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
- <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
- </natures>
- <linkedResources>
- <link>
- <name>Decoder.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/mira/Decoder.cpp</locationURI>
- </link>
- <link>
- <name>Decoder.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/mira/Decoder.h</locationURI>
- </link>
- <link>
- <name>Hildreth.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/mira/Hildreth.cpp</locationURI>
- </link>
- <link>
- <name>Hildreth.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/mira/Hildreth.h</locationURI>
- </link>
- <link>
- <name>HypothesisQueue.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/mira/HypothesisQueue.cpp</locationURI>
- </link>
- <link>
- <name>HypothesisQueue.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/mira/HypothesisQueue.h</locationURI>
- </link>
- <link>
- <name>Main.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/mira/Main.cpp</locationURI>
- </link>
- <link>
- <name>Main.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/mira/Main.h</locationURI>
- </link>
- <link>
- <name>MiraOptimiser.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/mira/MiraOptimiser.cpp</locationURI>
- </link>
- <link>
- <name>Perceptron.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/mira/Perceptron.cpp</locationURI>
- </link>
- </linkedResources>
-</projectDescription>
diff --git a/contrib/other-builds/moses-chart-cmd/.cproject b/contrib/other-builds/moses-chart-cmd/.cproject
index 0d720dbc2..2234711ae 100644
--- a/contrib/other-builds/moses-chart-cmd/.cproject
+++ b/contrib/other-builds/moses-chart-cmd/.cproject
@@ -1,17 +1,19 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+<?fileVersion 4.0.0?>
+
+<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.162355801">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.162355801" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@@ -69,14 +71,7 @@
<option id="gnu.cpp.link.option.libs.1177721357" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
- <listOptionValue builtIn="false" value="dstruct"/>
- <listOptionValue builtIn="false" value="dalm"/>
- <listOptionValue builtIn="false" value="flm"/>
- <listOptionValue builtIn="false" value="oolm"/>
- <listOptionValue builtIn="false" value="lattice"/>
- <listOptionValue builtIn="false" value="misc"/>
<listOptionValue builtIn="false" value="search"/>
- <listOptionValue builtIn="false" value="RandLM"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
@@ -89,7 +84,6 @@
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
- <listOptionValue builtIn="false" value="rt"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.128214028" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
@@ -109,13 +103,13 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.516628324" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
diff --git a/contrib/other-builds/moses-cmd/.cproject b/contrib/other-builds/moses-cmd/.cproject
index 52c457447..9771f4d4a 100644
--- a/contrib/other-builds/moses-cmd/.cproject
+++ b/contrib/other-builds/moses-cmd/.cproject
@@ -1,17 +1,19 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+<?fileVersion 4.0.0?>
+
+<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.461114338">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.461114338" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@@ -66,14 +68,7 @@
<option id="gnu.cpp.link.option.libs.998577284" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
- <listOptionValue builtIn="false" value="dstruct"/>
- <listOptionValue builtIn="false" value="flm"/>
- <listOptionValue builtIn="false" value="oolm"/>
- <listOptionValue builtIn="false" value="lattice"/>
- <listOptionValue builtIn="false" value="misc"/>
- <listOptionValue builtIn="false" value="dalm"/>
<listOptionValue builtIn="false" value="search"/>
- <listOptionValue builtIn="false" value="RandLM"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
@@ -86,7 +81,6 @@
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
- <listOptionValue builtIn="false" value="rt"/>
</option>
<option id="gnu.cpp.link.option.userobjs.1542590830" name="Other objects" superClass="gnu.cpp.link.option.userobjs"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.983725033" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
@@ -110,13 +104,13 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.2121690436" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
index 0d6abbb4f..669c64704 100644
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@@ -36,7 +36,6 @@
<listOptionValue builtIn="false" value="/opt/local/include/"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../irstlm/include"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../nplm/src&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../eigen&quot;"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../srilm/include"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/darts-clone&quot;"/>
@@ -52,12 +51,10 @@
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
<listOptionValue builtIn="false" value="LM_IRST"/>
- <listOptionValue builtIn="false" value="LM_DALM"/>
- <listOptionValue builtIn="false" value="LM_RAND"/>
- <listOptionValue builtIn="false" value="LM_NPLM"/>
<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
<listOptionValue builtIn="false" value="_LARGE_FILES"/>
</option>
+ <option id="gnu.cpp.compiler.option.dialect.std.1734198568" name="Language standard" superClass="gnu.cpp.compiler.option.dialect.std" value="gnu.cpp.compiler.dialect.c++98" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1905116220" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2126314903" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
@@ -80,8 +77,72 @@
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1845526535" name="SRI.h" rcbsApplicability="disable" resourcePath="LM/SRI.h" toolsToInvoke=""/>
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1917714409" name="/" resourcePath="TranslationModel">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1874031326" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug" unusedChildren="">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1671760867" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base.1976472988"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2072639167" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327">
+ <option id="gnu.cpp.compiler.option.preprocessor.def.1387618215" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <listOptionValue builtIn="false" value="IS_ECLIPSE"/>
+ <listOptionValue builtIn="false" value="HAVE_PROBINGPT"/>
+ <listOptionValue builtIn="false" value="HAVE_BOOST"/>
+ <listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
+ <listOptionValue builtIn="false" value="WITH_THREADS"/>
+ <listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
+ <listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+ <listOptionValue builtIn="false" value="LM_IRST"/>
+ <listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
+ <listOptionValue builtIn="false" value="_LARGE_FILES"/>
+ </option>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1138059468" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.241920461" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2126314903">
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1408639346" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.505647623" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1168585173"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1809234420" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.2074660557"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.2136353299" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug.933467113">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.190676079" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ </tool>
+ </toolChain>
+ </folderInfo>
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1401518461" name="/" resourcePath="TranslationModel/fuzzy-match">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.472269246" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug" unusedChildren="">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.84234118" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base.1671760867"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.299872725" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2072639167">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.664273995" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2044654215" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.241920461">
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1537423216" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1174866714" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug.505647623"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.239716723" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1809234420"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.2078651360" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug.2136353299">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.214869589" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ </tool>
+ </toolChain>
+ </folderInfo>
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.103170143" name="/" resourcePath="TranslationModel/ProbingPT">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.2026082807" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug" unusedChildren="">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1540835364" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base.1671760867"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1897459756" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2072639167">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1615949072" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1178947383" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.241920461">
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2013283881" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1997457966" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug.505647623"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.886709003" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1809234420"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.325064995" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug.2136353299">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1281335737" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ </tool>
+ </toolChain>
+ </folderInfo>
<sourceEntries>
- <entry excluding="TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+ <entry excluding="LM/SRI.h|LM/SRI.cpp|TranslationModel/UG|LM/DALMWrapper.h|LM/DALMWrapper.cpp|TranslationModel/UG/mm/test-dynamic-im-tsa.cc|TranslationModel/UG/mm/symal2mam.cc|TranslationModel/UG/mm/mtt-dump.cc|TranslationModel/UG/mm/mtt-count-words.cc|TranslationModel/UG/mm/mtt-build.cc|TranslationModel/UG/mm/mmlex-lookup.cc|TranslationModel/UG/mm/mmlex-build.cc|TranslationModel/UG/mm/mam_verify.cc|TranslationModel/UG/mm/mam2symal.cc|TranslationModel/UG/mm/custom-pt.cc|TranslationModel/UG/mm/calc-coverage.cc|TranslationModel/UG/mm/mtt.count.cc|TranslationModel/UG/util|LM/oxlm|LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
@@ -133,6 +194,95 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380" moduleId="org.eclipse.cdt.core.settings" name="Debug with oxlm">
+ <externalSettings>
+ <externalSetting>
+ <entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/moses"/>
+ <entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/moses/Debug with oxlm"/>
+ <entry flags="RESOLVED" kind="libraryFile" name="moses" srcPrefixMapping="" srcRootPath=""/>
+ </externalSetting>
+ </externalSettings>
+ <extensions>
+ <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+ </extensions>
+ </storageModule>
+ <storageModule moduleId="cdtBuildSystem" version="4.0.0">
+ <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380" name="Debug with oxlm" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.58016517" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+ <targetPlatform binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.target.gnu.platform.exe.debug.1519676809" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <builder buildPath="${workspace_loc:/moses/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.210713286" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.352461864" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1930334119" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.287782778" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1878892542" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1954109101" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../probingPT/helpers&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
+ <listOptionValue builtIn="false" value="/opt/local/include/"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../irstlm/include"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../nplm/src&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../eigen&quot;"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../srilm/include"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/include&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/darts-clone&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../randlm/include/RandLM&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../eigen-3&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../oxlm/src&quot;"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
+ </option>
+ <option id="gnu.cpp.compiler.option.preprocessor.def.274233516" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <listOptionValue builtIn="false" value="IS_ECLIPSE"/>
+ <listOptionValue builtIn="false" value="HAVE_PROBINGPT"/>
+ <listOptionValue builtIn="false" value="HAVE_BOOST"/>
+ <listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
+ <listOptionValue builtIn="false" value="WITH_THREADS"/>
+ <listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
+ <listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+ <listOptionValue builtIn="false" value="LM_IRST"/>
+ <listOptionValue builtIn="false" value="LM_DALM"/>
+ <listOptionValue builtIn="false" value="LM_NPLM"/>
+ <listOptionValue builtIn="false" value="LM_LBL"/>
+ <listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
+ <listOptionValue builtIn="false" value="_LARGE_FILES"/>
+ </option>
+ <option id="gnu.cpp.compiler.option.dialect.std.1353163586" name="Language standard" superClass="gnu.cpp.compiler.option.dialect.std" value="gnu.cpp.compiler.dialect.c++11" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1413141770" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1040012873" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.260276259" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.744208673" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.699852884" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1114065632" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.373508964" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1873470979" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+ <additionalInput kind="additionalinput" paths="$(LIBS)"/>
+ </inputType>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1046426871" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1482215763" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ </tool>
+ </toolChain>
+ </folderInfo>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380.LM/Rand.h" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380.LM/IRST.h" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380.LM/DALMWrapper.h" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
+ <sourceEntries>
+ <entry excluding="LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+ </sourceEntries>
+ </configuration>
+ </storageModule>
+ <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+ </cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="moses.cdt.managedbuild.target.gnu.exe.1375079569" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
@@ -171,6 +321,9 @@
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/moses"/>
</configuration>
+ <configuration configurationName="Debug with oxlm">
+ <resource resourceType="PROJECT" workspacePath="/moses"/>
+ </configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index ff35ca5ba..a98fc8b3a 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -377,6 +377,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/GenerationDictionary.h</locationURI>
</link>
<link>
+ <name>HypergraphOutput.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/HypergraphOutput.cpp</locationURI>
+ </link>
+ <link>
+ <name>HypergraphOutput.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/HypergraphOutput.h</locationURI>
+ </link>
+ <link>
<name>HypoList.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/HypoList.h</locationURI>
@@ -537,6 +547,11 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/PCNTools.h</locationURI>
</link>
<link>
+ <name>PDTAimp.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/PDTAimp.cpp</locationURI>
+ </link>
+ <link>
<name>PDTAimp.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PDTAimp.h</locationURI>
@@ -602,16 +617,6 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/ReorderingConstraint.h</locationURI>
</link>
<link>
- <name>ReorderingStack.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/ReorderingStack.cpp</locationURI>
- </link>
- <link>
- <name>ReorderingStack.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/ReorderingStack.h</locationURI>
- </link>
- <link>
<name>RuleCube.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/RuleCube.cpp</locationURI>
@@ -1157,16 +1162,6 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InputFeature.h</locationURI>
</link>
<link>
- <name>FF/InternalStructStatelessFF.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/FF/InternalStructStatelessFF.cpp</locationURI>
- </link>
- <link>
- <name>FF/InternalStructStatelessFF.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/FF/InternalStructStatelessFF.h</locationURI>
- </link>
- <link>
<name>FF/LexicalReordering</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
@@ -1272,6 +1267,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SetSourcePhrase.h</locationURI>
</link>
<link>
+ <name>FF/SkeletonChangeInput.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/SkeletonChangeInput.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.h</locationURI>
+ </link>
+ <link>
<name>FF/SkeletonStatefulFF.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonStatefulFF.cpp</locationURI>
@@ -1302,6 +1307,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftMatchingFeature.h</locationURI>
</link>
<link>
+ <name>FF/SoftSourceSyntacticConstraintsFeature.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/SoftSourceSyntacticConstraintsFeature.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftSourceSyntacticConstraintsFeature.h</locationURI>
+ </link>
+ <link>
<name>FF/SourceGHKMTreeInputMatchFeature.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SourceGHKMTreeInputMatchFeature.cpp</locationURI>
@@ -1332,6 +1347,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SpanLength.h</locationURI>
</link>
<link>
+ <name>FF/SparseHieroReorderingFeature.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/SparseHieroReorderingFeature.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/SparseHieroReorderingFeature.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/SparseHieroReorderingFeature.h</locationURI>
+ </link>
+ <link>
<name>FF/StatefulFeatureFunction.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/StatefulFeatureFunction.cpp</locationURI>
@@ -1637,6 +1662,11 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/backward.arpa</locationURI>
</link>
<link>
+ <name>LM/oxlm</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
<name>PP/CountsPhraseProperty.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/CountsPhraseProperty.cpp</locationURI>
@@ -1657,6 +1687,26 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/Factory.h</locationURI>
</link>
<link>
+ <name>PP/NonTermContextProperty.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/PP/NonTermContextProperty.cpp</locationURI>
+ </link>
+ <link>
+ <name>PP/NonTermContextProperty.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/PP/NonTermContextProperty.h</locationURI>
+ </link>
+ <link>
+ <name>PP/OrientationPhraseProperty.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/PP/OrientationPhraseProperty.cpp</locationURI>
+ </link>
+ <link>
+ <name>PP/OrientationPhraseProperty.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/PP/OrientationPhraseProperty.h</locationURI>
+ </link>
+ <link>
<name>PP/PhraseProperty.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/PhraseProperty.cpp</locationURI>
@@ -1852,6 +1902,11 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/SkeletonPT.h</locationURI>
</link>
<link>
+ <name>TranslationModel/UG</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
<name>TranslationModel/WordCoocTable.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/WordCoocTable.cpp</locationURI>
@@ -1917,6 +1972,31 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/LexicalReorderingTable.h</locationURI>
</link>
<link>
+ <name>FF/LexicalReordering/ReorderingStack.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/ReorderingStack.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/ReorderingStack.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/ReorderingStack.h</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/SparseReordering.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/SparseReordering.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/SparseReordering.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/SparseReordering.h</locationURI>
+ </link>
+ <link>
+ <name>FF/OSM-Feature/OSM-Feature</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
<name>FF/OSM-Feature/OpSequenceModel.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/OpSequenceModel.cpp</locationURI>
@@ -1937,6 +2017,26 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/osmHyp.h</locationURI>
</link>
<link>
+ <name>LM/oxlm/LBLLM.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM.cpp</locationURI>
+ </link>
+ <link>
+ <name>LM/oxlm/LBLLM.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM.h</locationURI>
+ </link>
+ <link>
+ <name>LM/oxlm/Mapper.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/Mapper.cpp</locationURI>
+ </link>
+ <link>
+ <name>LM/oxlm/Mapper.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/Mapper.h</locationURI>
+ </link>
+ <link>
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp</locationURI>
@@ -2497,6 +2597,136 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/Scope3Parser/VarSpanTrieBuilder.h</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/Jamfile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/Jamfile</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/Makefile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/Makefile</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mmsapt.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mmsapt.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mmsapt.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mmsapt.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mmsapt_align.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mmsapt_align.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mmsapt_phrase_scorers.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mmsapt_phrase_scorers.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/ptable-lookup.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/ptable-lookup.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_phrase_key.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_phrase_key.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_phrase_scorers.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_phrase_scorers.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_pscore_base.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_base.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_pscore_coherence.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_coherence.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_pscore_lex1.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_lex1.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_pscore_logcnt.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_logcnt.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_pscore_pbwd.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_pbwd.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_pscore_pfwd.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_pfwd.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_pscore_provenance.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_provenance.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_pscore_rareness.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_rareness.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_pscore_unaligned.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_unaligned.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sim-pe.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sim-pe.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/spe-check-coverage.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/spe-check-coverage.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/spe-check-coverage2.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/spe-check-coverage2.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/try-align.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/try-align.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/util</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
<name>TranslationModel/fuzzy-match/Alignments.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/fuzzy-match/Alignments.cpp</locationURI>
@@ -2572,6 +2802,16 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
+ <name>FF/OSM-Feature/OSM-Feature/KenOSM.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/KenOSM.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/OSM-Feature/OSM-Feature/KenOSM.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/KenOSM.h</locationURI>
+ </link>
+ <link>
<name>TranslationModel/CompactPT/bin/gcc-4.7</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
@@ -2582,6 +2822,356 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/pt.log</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/bin/gcc-4.8</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/Jamfile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/Jamfile</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/file_io</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/program_options</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/sampling</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/sorting</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/threading</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/Jamfile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/Jamfile</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/Makefile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/Makefile</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/calc-coverage.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/calc-coverage.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/custom-pt.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/custom-pt.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/mam2symal.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mam2symal.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/mam_verify.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mam_verify.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/mmlex-build.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mmlex-build.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/mmlex-lookup.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mmlex-lookup.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/mtt-build.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mtt-build.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/mtt-count-words.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mtt-count-words.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/mtt-dump.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mtt-dump.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/mtt.count.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mtt.count.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/num_read_write.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/num_read_write.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/num_read_write.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/num_read_write.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/obsolete</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/symal2mam.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/symal2mam.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/test-dynamic-im-tsa.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/test-dynamic-im-tsa.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/tpt_pickler.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_pickler.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/tpt_pickler.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_pickler.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/tpt_tightindex.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_tightindex.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/tpt_tightindex.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_tightindex.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/tpt_tokenindex.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_tokenindex.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/tpt_tokenindex.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_tokenindex.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/tpt_typedefs.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_typedefs.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_bitext.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_bitext.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_conll_bottom_up_token.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_conll_record.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_conll_record.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_conll_record.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_conll_record.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_corpus_token.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_corpus_token.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_corpus_token.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_corpus_token.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_deptree.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_deptree.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_deptree.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_deptree.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_im_tsa.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_im_tsa.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_im_ttrack.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_im_ttrack.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_load_primer.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_load_primer.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_load_primer.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_load_primer.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_mm_2d_table.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_2d_table.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_mm_tsa.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_tsa.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_mm_tsa_tree_iterator.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_tsa_tree_iterator.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_mm_ttrack.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_ttrack.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_mmbitext.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mmbitext.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_mmbitext.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mmbitext.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_phrasepair.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_phrasepair.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_phrasepair.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_phrasepair.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_tsa_array_entry.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_tsa_array_entry.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_tsa_array_entry.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_tsa_array_entry.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_tsa_base.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_tsa_base.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_tsa_bitset_cache.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_tsa_tree_iterator.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_ttrack_base.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_ttrack_base.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_ttrack_base.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_ttrack_base.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_ttrack_position.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_ttrack_position.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_ttrack_position.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_ttrack_position.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/ug_typedefs.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_typedefs.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/util/Makefile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/util/Makefile</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/util/ibm1-align</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/util/ibm1-align</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/util/ibm1-align.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/util/ibm1-align.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/util/tokenindex.dump.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/util/tokenindex.dump.cc</locationURI>
+ </link>
+ <link>
<name>bin/BackwardTest.test/gcc-4.7/release</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
@@ -2597,6 +3187,81 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/gcc-4.8</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/file_io/ug_stream.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/file_io/ug_stream.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/file_io/ug_stream.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/program_options/ug_get_options.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/program_options/ug_get_options.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/program_options/ug_get_options.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/program_options/ug_splice_arglist.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/program_options/ug_splice_arglist.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/sampling/Sampling.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/sampling/Sampling.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/sorting/NBestList.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/sorting/NBestList.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/sorting/VectorIndexSorter.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/threading/ug_thread_safe_counter.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/obsolete/ug_bitext_base.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h</locationURI>
+ </link>
+ <link>
<name>bin/BackwardTest.test/gcc-4.7/release/debug-symbols-on</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
@@ -2612,6 +3277,21 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/gcc-4.8/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
<name>bin/BackwardTest.test/gcc-4.7/release/debug-symbols-on/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
@@ -2627,6 +3307,21 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
<name>bin/BackwardTest.test/gcc-4.7/release/debug-symbols-on/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
@@ -3252,6 +3947,21 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
<name>bin/BackwardTest.test/gcc-4.7/release/debug-symbols-on/link-static/threading-multi/Backward.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/bin/BackwardTest.test/gcc-4.7/release/debug-symbols-on/link-static/threading-multi/Backward.o</locationURI>
@@ -3442,6 +4152,71 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.7/release/debug-symbols-on/link-static/threading-multi/ThrowingFwrite.o</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sim-pe</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sim-pe</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sim-pe.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sim-pe.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage2</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage2</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage2.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage2.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
<name>bin/gcc-4.7/release/debug-symbols-on/link-static/threading-multi/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/bin/gcc-4.7/release/debug-symbols-on/link-static/threading-multi/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.o</locationURI>
@@ -3596,5 +4371,180 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/bin/gcc-4.7/release/debug-symbols-on/link-static/threading-multi/TranslationModel/fuzzy-match/create_xml.o</locationURI>
</link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_stream.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_stream.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</locationURI>
+ </link>
</linkedResources>
</projectDescription>
diff --git a/contrib/other-builds/score/.cproject b/contrib/other-builds/score/.cproject
index f51f35ef5..0e0ae75ff 100644
--- a/contrib/other-builds/score/.cproject
+++ b/contrib/other-builds/score/.cproject
@@ -1,62 +1,85 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+<?fileVersion 4.0.0?>
+
+<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.634831890">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.634831890" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.852684782">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.852684782" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.634831890" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.634831890." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1361730953" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.2040884960" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/score/Debug}" id="cdt.managedbuild.builder.gnu.cross.1709170788" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.786339685" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1516054114" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.1061705384" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2108019237" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1013232238" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.1874109813" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.2032778777" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1713606194" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.852684782" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.852684782." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.628760407" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.40031730" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <builder buildPath="${workspace_loc:/score}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1494414913" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1369030665" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1299858559" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1103483066" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.11930558" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1147799314" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.509920006" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1638578889" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1279743060" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1563503789" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <option id="gnu.cpp.link.option.paths.1704292838" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2096513387" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1877980632" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.1972289345" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1767499123" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.9477188" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1008235812" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+ <option id="gnu.cpp.link.option.paths.2139594100" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../probingPT/helpers&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
+ <listOptionValue builtIn="false" value="/opt/local/lib"/>
</option>
- <option id="gnu.cpp.link.option.libs.936233947" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
- <listOptionValue builtIn="false" value="z"/>
+ <option id="gnu.cpp.link.option.libs.615408765" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
+ <listOptionValue builtIn="false" value="irstlm"/>
+ <listOptionValue builtIn="false" value="search"/>
+ <listOptionValue builtIn="false" value="OnDiskPt"/>
+ <listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
+ <listOptionValue builtIn="false" value="boost_serialization"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
+ <listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
- <listOptionValue builtIn="false" value="rt"/>
+ <listOptionValue builtIn="false" value="pthread"/>
+ <listOptionValue builtIn="false" value="z"/>
+ <listOptionValue builtIn="false" value="bz2"/>
+ <listOptionValue builtIn="false" value="dl"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.589709979" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.202044854" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1829423265" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.52947560" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1165474354" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1832317688" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1877599289" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -64,44 +87,44 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1994357180">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1994357180" moduleId="org.eclipse.cdt.core.settings" name="Release">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.release.1878418244">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1878418244" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1994357180" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1994357180." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.743463783" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1353054437" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/score/Release}" id="cdt.managedbuild.builder.gnu.cross.1851758128" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.323743241" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.534423111" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.518786530" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.392640311" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1878418244" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.release.1878418244." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1661678477" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.848161857" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
+ <builder buildPath="${workspace_loc:/score}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.1694318208" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1857970512" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.464441024" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1302447353" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.143379331" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.859419943" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.307472312" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.407718562" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.1687450255" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.593478428" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1103707928" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.2144910639" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.158963791" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.558236570" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.165176764" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.178129273" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.25375344" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1915067544" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.2131232485" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.530558382" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.986435372" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.1833814398" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1026471548" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1037806386" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.2129474260" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@@ -111,23 +134,30 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="score.cdt.managedbuild.target.gnu.cross.exe.1539177197" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
+ <project id="score.cdt.managedbuild.target.gnu.exe.812812835" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.634831890;cdt.managedbuild.config.gnu.cross.exe.debug.634831890.;cdt.managedbuild.tool.gnu.cross.c.compiler.786339685;cdt.managedbuild.tool.gnu.c.compiler.input.2108019237">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1878418244;cdt.managedbuild.config.gnu.exe.release.1878418244.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.1103707928;cdt.managedbuild.tool.gnu.c.compiler.input.558236570">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1994357180;cdt.managedbuild.config.gnu.cross.exe.release.1994357180.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.307472312;cdt.managedbuild.tool.gnu.cpp.compiler.input.593478428">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.852684782;cdt.managedbuild.config.gnu.exe.debug.852684782.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2096513387;cdt.managedbuild.tool.gnu.c.compiler.input.1767499123">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.634831890;cdt.managedbuild.config.gnu.cross.exe.debug.634831890.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1013232238;cdt.managedbuild.tool.gnu.cpp.compiler.input.509920006">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1878418244;cdt.managedbuild.config.gnu.exe.release.1878418244.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.464441024;cdt.managedbuild.tool.gnu.cpp.compiler.input.859419943">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1994357180;cdt.managedbuild.config.gnu.cross.exe.release.1994357180.;cdt.managedbuild.tool.gnu.cross.c.compiler.323743241;cdt.managedbuild.tool.gnu.c.compiler.input.392640311">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.852684782;cdt.managedbuild.config.gnu.exe.debug.852684782.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1299858559;cdt.managedbuild.tool.gnu.cpp.compiler.input.1638578889">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
- <storageModule moduleId="refreshScope"/>
+ <storageModule moduleId="refreshScope" versionNumber="2">
+ <configuration configurationName="Release">
+ <resource resourceType="PROJECT" workspacePath="/score"/>
+ </configuration>
+ <configuration configurationName="Debug">
+ <resource resourceType="PROJECT" workspacePath="/score"/>
+ </configuration>
+ </storageModule>
</cproject>
diff --git a/contrib/other-builds/score/.project b/contrib/other-builds/score/.project
index 05564d0f9..10e713124 100644
--- a/contrib/other-builds/score/.project
+++ b/contrib/other-builds/score/.project
@@ -88,16 +88,6 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/ScoreFeature.h</locationURI>
</link>
<link>
- <name>exception.cc</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/util/exception.cc</locationURI>
- </link>
- <link>
- <name>exception.hh</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/util/exception.hh</locationURI>
- </link>
- <link>
<name>score-main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/score-main.cpp</locationURI>
diff --git a/contrib/rt/Empty.c b/contrib/rt/Empty.c
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/contrib/rt/Empty.c
diff --git a/contrib/rt/README b/contrib/rt/README
new file mode 100644
index 000000000..d7a4cfebc
--- /dev/null
+++ b/contrib/rt/README
@@ -0,0 +1,9 @@
+FOR OSX ONLY
+------------
+
+This creates an empty library file
+ librt.a
+It should be used when you are compile with Eclipse on OSX.
+
+The Eclipse projects are set up to link to librt but OSX doesn't have it so this just creates a dummy library.
+
diff --git a/contrib/rt/compile.sh b/contrib/rt/compile.sh
new file mode 100755
index 000000000..6266d58d6
--- /dev/null
+++ b/contrib/rt/compile.sh
@@ -0,0 +1,2 @@
+gcc -c Empty.c -o Empty.o
+ar rcs librt.a Empty.o \ No newline at end of file
diff --git a/contrib/server/Jamfile b/contrib/server/Jamfile
index 49770d548..f7348c250 100644
--- a/contrib/server/Jamfile
+++ b/contrib/server/Jamfile
@@ -35,7 +35,7 @@ if $(build-moses-server) = true
xmlrpc-linkflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --libs" ] ;
xmlrpc-cxxflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --cflags" ] ;
- exe mosesserver : mosesserver.cpp ../../moses//moses ../../OnDiskPt//OnDiskPt ../../moses-cmd/IOWrapper.cpp : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
+ exe mosesserver : mosesserver.cpp ../../moses//moses ../../OnDiskPt//OnDiskPt ../../moses-cmd/IOWrapper.cpp ../..//boost_filesystem : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
} else {
alias mosesserver ;
}
diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp
index 1ff11f0ae..0ec412157 100644
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@@ -4,6 +4,7 @@
#include <algorithm>
+#include "moses/Util.h"
#include "moses/ChartManager.h"
#include "moses/Hypothesis.h"
#include "moses/Manager.h"
@@ -59,7 +60,7 @@ public:
if(add2ORLM_) {
//updateORLM();
}
- cerr << "Done inserting\n";
+ XVERBOSE(1,"Done inserting\n");
//PhraseDictionary* pdsa = (PhraseDictionary*) pdf->GetDictionary(*dummy);
map<string, xmlrpc_c::value> retData;
//*retvalP = xmlrpc_c::value_struct(retData);
@@ -120,17 +121,17 @@ public:
if(si == params.end())
throw xmlrpc_c::fault("Missing source sentence", xmlrpc_c::fault::CODE_PARSE);
source_ = xmlrpc_c::value_string(si->second);
- cerr << "source = " << source_ << endl;
+ XVERBOSE(1,"source = " << source_ << endl);
si = params.find("target");
if(si == params.end())
throw xmlrpc_c::fault("Missing target sentence", xmlrpc_c::fault::CODE_PARSE);
target_ = xmlrpc_c::value_string(si->second);
- cerr << "target = " << target_ << endl;
+ XVERBOSE(1,"target = " << target_ << endl);
si = params.find("alignment");
if(si == params.end())
throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
alignment_ = xmlrpc_c::value_string(si->second);
- cerr << "alignment = " << alignment_ << endl;
+ XVERBOSE(1,"alignment = " << alignment_ << endl);
si = params.find("bounded");
bounded_ = (si != params.end());
si = params.find("updateORLM");
@@ -224,7 +225,7 @@ public:
}
const string source((xmlrpc_c::value_string(si->second)));
- cerr << "Input: " << source << endl;
+ XVERBOSE(1,"Input: " << source << endl);
si = params.find("align");
bool addAlignInfo = (si != params.end());
si = params.find("word-align");
@@ -275,25 +276,25 @@ public:
inputFactorOrder = staticData.GetInputFactorOrder();
stringstream in(source + "\n");
tinput.Read(in,inputFactorOrder);
- ChartManager manager(tinput);
+ ChartManager manager(0,tinput);
manager.ProcessSentence();
const ChartHypothesis *hypo = manager.GetBestHypothesis();
outputChartHypo(out,hypo);
if (addGraphInfo) {
const size_t translationId = tinput.GetTranslationId();
std::ostringstream sgstream;
- manager.GetSearchGraph(translationId,sgstream);
+ manager.OutputSearchGraphMoses(sgstream);
retData.insert(pair<string, xmlrpc_c::value>("sg", xmlrpc_c::value_string(sgstream.str())));
}
} else {
Sentence sentence;
- const vector<FactorType> &inputFactorOrder =
- staticData.GetInputFactorOrder();
+ const vector<FactorType> &
+ inputFactorOrder = staticData.GetInputFactorOrder();
stringstream in(source + "\n");
sentence.Read(in,inputFactorOrder);
size_t lineNumber = 0; // TODO: Include sentence request number here?
Manager manager(lineNumber, sentence, staticData.GetSearchAlgorithm());
- manager.ProcessSentence();
+ manager.ProcessSentence();
const Hypothesis* hypo = manager.GetBestHypothesis();
vector<xmlrpc_c::value> alignInfo;
@@ -331,7 +332,7 @@ public:
pair<string, xmlrpc_c::value>
text("text", xmlrpc_c::value_string(out.str()));
retData.insert(text);
- cerr << "Output: " << out.str() << endl;
+ XVERBOSE(1,"Output: " << out.str() << endl);
*retvalP = xmlrpc_c::value_struct(retData);
}
@@ -574,7 +575,7 @@ int main(int argc, char** argv)
{
//Extract port and log, send other args to moses
- char** mosesargv = new char*[argc+2];
+ char** mosesargv = new char*[argc+2]; // why "+2" [UG]
int mosesargc = 0;
int port = 8080;
const char* logfile = "/dev/null";
@@ -634,11 +635,11 @@ int main(int argc, char** argv)
myRegistry.addMethod("updater", updater);
myRegistry.addMethod("optimize", optimizer);
- xmlrpc_c::serverAbyss myAbyssServer(
- myRegistry,
- port, // TCP port on which to listen
- logfile
- );
+ xmlrpc_c::serverAbyss myAbyssServer(
+ myRegistry,
+ port, // TCP port on which to listen
+ logfile
+ );
/* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04
xmlrpc_c::serverAbyss myAbyssServer(
xmlrpc_c::serverAbyss::constrOpt()
@@ -648,12 +649,10 @@ int main(int argc, char** argv)
.allowOrigin("*")
);
*/
-
- cerr << "Listening on port " << port << endl;
+
+ XVERBOSE(1,"Listening on port " << port << endl);
if (isSerial) {
- while(1) {
- myAbyssServer.runOnce();
- }
+ while(1) myAbyssServer.runOnce();
} else {
myAbyssServer.run();
}
diff --git a/doc/PhraseDictionaryBitextSampling.howto b/doc/PhraseDictionaryBitextSampling.howto
new file mode 100644
index 000000000..69ab11b5b
--- /dev/null
+++ b/doc/PhraseDictionaryBitextSampling.howto
@@ -0,0 +1,4 @@
+The documentation for memory-mapped, dynamic suffix arrays has moved to
+ http://www.statmt.org/moses/?n=Moses.AdvancedFeatures#ntoc40
+
+Search for PhraseDictionaryBitextSampling.
diff --git a/lm/Jamfile b/lm/Jamfile
index 4693f9e01..227b22014 100644
--- a/lm/Jamfile
+++ b/lm/Jamfile
@@ -37,4 +37,4 @@ for local p in [ glob *_main.cc ] {
exes += $(name) ;
}
-alias programs : $(exes) filter//filter : <threading>multi:<source>builder//lmplz ;
+alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;
diff --git a/lm/builder/Jamfile b/lm/builder/Jamfile
index b596e086a..1e0e18b5f 100644
--- a/lm/builder/Jamfile
+++ b/lm/builder/Jamfile
@@ -4,6 +4,10 @@ fakelib builder : [ glob *.cc : *test.cc *main.cc ]
exe lmplz : lmplz_main.cc builder /top//boost_program_options ;
+exe dump_counts : dump_counts_main.cc builder ;
+
+alias programs : lmplz dump_counts ;
+
import testing ;
unit-test corpus_count_test : corpus_count_test.cc builder /top//boost_unit_test_framework ;
unit-test adjust_counts_test : adjust_counts_test.cc builder /top//boost_unit_test_framework ;
diff --git a/lm/builder/dump_counts_main.cc b/lm/builder/dump_counts_main.cc
new file mode 100644
index 000000000..fa0016792
--- /dev/null
+++ b/lm/builder/dump_counts_main.cc
@@ -0,0 +1,36 @@
+#include "lm/builder/print.hh"
+#include "lm/word_index.hh"
+#include "util/file.hh"
+#include "util/read_compressed.hh"
+
+#include <boost/lexical_cast.hpp>
+
+#include <iostream>
+#include <vector>
+
+int main(int argc, char *argv[]) {
+ if (argc != 4) {
+ std::cerr << "Usage: " << argv[0] << " counts vocabulary order\n"
+ "The counts file contains records with 4-byte vocabulary ids followed by 8-byte\n"
+ "counts. Each record has order many vocabulary ids.\n"
+ "The vocabulary file contains the words delimited by NULL in order of id.\n"
+ "The vocabulary file may not be compressed because it is mmapped but the counts\n"
+ "file can be compressed.\n";
+ return 1;
+ }
+ util::ReadCompressed counts(util::OpenReadOrThrow(argv[1]));
+ util::scoped_fd vocab_file(util::OpenReadOrThrow(argv[2]));
+ lm::builder::VocabReconstitute vocab(vocab_file.get());
+ unsigned int order = boost::lexical_cast<unsigned int>(argv[3]);
+ std::vector<char> record(sizeof(uint32_t) * order + sizeof(uint64_t));
+ while (std::size_t got = counts.ReadOrEOF(&*record.begin(), record.size())) {
+ UTIL_THROW_IF(got != record.size(), util::Exception, "Read " << got << " bytes at the end of file, which is not a complete record of length " << record.size());
+ const lm::WordIndex *words = reinterpret_cast<const lm::WordIndex*>(&*record.begin());
+ for (const lm::WordIndex *i = words; i != words + order; ++i) {
+ UTIL_THROW_IF(*i >= vocab.Size(), util::Exception, "Vocab ID " << *i << " is larger than the vocab file's maximum of " << vocab.Size() << ". Are you sure you have the right order and vocab file for these counts?");
+ std::cout << vocab.Lookup(*i) << ' ';
+ }
+ // TODO don't use std::cout because it is slow. Add fast uint64_t printing support to FakeOFStream.
+ std::cout << *reinterpret_cast<const uint64_t*>(words + order) << '\n';
+ }
+}
diff --git a/lm/builder/interpolate.cc b/lm/builder/interpolate.cc
index db8537448..3e1225d9e 100644
--- a/lm/builder/interpolate.cc
+++ b/lm/builder/interpolate.cc
@@ -25,6 +25,10 @@ class Callback {
~Callback() {
for (std::size_t i = 0; i < backoffs_.size(); ++i) {
+ if(prune_thresholds_[i + 1] > 0)
+ while(backoffs_[i])
+ ++backoffs_[i];
+
if (backoffs_[i]) {
std::cerr << "Backoffs do not match for order " << (i + 1) << std::endl;
abort();
@@ -50,11 +54,9 @@ class Callback {
uint64_t current_hash = util::MurmurHashNative(gram.begin(), gram.Order() * sizeof(WordIndex));
const HashGamma *hashed_backoff = static_cast<const HashGamma*>(backoffs_[order_minus_1].Get());
- while(backoffs_[order_minus_1] && current_hash != hashed_backoff->hash_value) {
+ while(current_hash != hashed_backoff->hash_value && ++backoffs_[order_minus_1])
hashed_backoff = static_cast<const HashGamma*>(backoffs_[order_minus_1].Get());
- ++backoffs_[order_minus_1];
- }
-
+
if(current_hash == hashed_backoff->hash_value) {
pay.complete.backoff = log10(hashed_backoff->gamma);
++backoffs_[order_minus_1];
diff --git a/lm/builder/print.cc b/lm/builder/print.cc
index c70e62ed6..75f15f0a6 100644
--- a/lm/builder/print.cc
+++ b/lm/builder/print.cc
@@ -54,9 +54,8 @@ void PrintARPA::Run(const util::stream::ChainPositions &positions) {
for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
out << ' ' << vocab_.Lookup(*i);
}
- float backoff = stream->Value().complete.backoff;
- if (backoff != 0.0)
- out << '\t' << backoff;
+ if (order != positions.size())
+ out << '\t' << stream->Value().complete.backoff;
out << '\n';
}
diff --git a/lm/model_test.cc b/lm/model_test.cc
index 7005b05ea..0f54724bb 100644
--- a/lm/model_test.cc
+++ b/lm/model_test.cc
@@ -176,7 +176,7 @@ template <class M> void MinimalState(const M &model) {
AppendTest("to", 1, -1.687872, false);
AppendTest("look", 2, -0.2922095, true);
BOOST_CHECK_EQUAL(2, state.length);
- AppendTest("good", 3, -7, true);
+ AppendTest("a", 3, -7, true);
}
template <class M> void ExtendLeftTest(const M &model) {
diff --git a/lm/ngram_query.hh b/lm/ngram_query.hh
index 9e32d113a..5f330c5cc 100644
--- a/lm/ngram_query.hh
+++ b/lm/ngram_query.hh
@@ -36,7 +36,7 @@ struct FullPrint : public BasicPrint {
"Perplexity including OOVs:\t" << ppl_including_oov << "\n"
"Perplexity excluding OOVs:\t" << ppl_excluding_oov << "\n"
"OOVs:\t" << corpus_oov << "\n"
- "Tokenss:\t" << corpus_tokens << '\n'
+ "Tokens:\t" << corpus_tokens << '\n'
;
}
};
diff --git a/lm/read_arpa.hh b/lm/read_arpa.hh
index 213fe1caa..64eeef306 100644
--- a/lm/read_arpa.hh
+++ b/lm/read_arpa.hh
@@ -41,29 +41,24 @@ class PositiveProbWarn {
WarningAction action_;
};
-template <class Weights> StringPiece Read1Gram(util::FilePiece &f, Weights &weights, PositiveProbWarn &warn) {
+template <class Voc, class Weights> void Read1Gram(util::FilePiece &f, Voc &vocab, Weights *unigrams, PositiveProbWarn &warn) {
try {
- weights.prob = f.ReadFloat();
- if (weights.prob > 0.0) {
- warn.Warn(weights.prob);
- weights.prob = 0.0;
+ float prob = f.ReadFloat();
+ if (prob > 0.0) {
+ warn.Warn(prob);
+ prob = 0.0;
}
UTIL_THROW_IF(f.get() != '\t', FormatLoadException, "Expected tab after probability");
- StringPiece ret(f.ReadDelimited(kARPASpaces));
- ReadBackoff(f, weights);
- return ret;
+ WordIndex word = vocab.Insert(f.ReadDelimited(kARPASpaces));
+ Weights &w = unigrams[word];
+ w.prob = prob;
+ ReadBackoff(f, w);
} catch(util::Exception &e) {
e << " in the 1-gram at byte " << f.Offset();
throw;
}
}
-template <class Voc, class Weights> void Read1Gram(util::FilePiece &f, Voc &vocab, Weights *unigrams, PositiveProbWarn &warn) {
- Weights temp;
- WordIndex word = vocab.Insert(Read1Gram(f, temp, warn));
- unigrams[word] = temp;
-}
-
template <class Voc, class Weights> void Read1Grams(util::FilePiece &f, std::size_t count, Voc &vocab, Weights *unigrams, PositiveProbWarn &warn) {
ReadNGramHeader(f, 1);
for (std::size_t i = 0; i < count; ++i) {
@@ -81,7 +76,12 @@ template <class Voc, class Weights, class Iterator> void ReadNGram(util::FilePie
weights.prob = 0.0;
}
for (unsigned char i = 0; i < n; ++i, ++indices_out) {
- *indices_out = vocab.Index(f.ReadDelimited(kARPASpaces));
+ StringPiece word(f.ReadDelimited(kARPASpaces));
+ WordIndex index = vocab.Index(word);
+ *indices_out = index;
+ // Check for words mapped to <unk> that are not the string <unk>.
+ UTIL_THROW_IF(index == 0 /* mapped to <unk> */ && (word != StringPiece("<unk>", 5)) && (word != StringPiece("<UNK>", 5)),
+ FormatLoadException, "Word " << word << " was not seen in the unigrams (which are supposed to list the entire vocabulary) but appears");
}
ReadBackoff(f, weights);
} catch(util::Exception &e) {
diff --git a/lm/test.arpa b/lm/test.arpa
index ef214eae3..c4d2e6df5 100644
--- a/lm/test.arpa
+++ b/lm/test.arpa
@@ -105,7 +105,7 @@ ngram 5=4
-0.04835128 looking on a -0.4771212
-3 also would consider -7
-6 <unk> however <unk> -12
--7 to look good
+-7 to look a
\4-grams:
-0.009249173 looking on a little -0.4771212
diff --git a/lm/test_nounk.arpa b/lm/test_nounk.arpa
index 060733d98..e38fc8547 100644
--- a/lm/test_nounk.arpa
+++ b/lm/test_nounk.arpa
@@ -101,7 +101,7 @@ ngram 5=4
-0.1892331 little more loin
-0.04835128 looking on a -0.4771212
-3 also would consider -7
--7 to look good
+-7 to look a
\4-grams:
-0.009249173 looking on a little -0.4771212
diff --git a/lm/trie_sort.cc b/lm/trie_sort.cc
index dc24e5b75..c3f468746 100644
--- a/lm/trie_sort.cc
+++ b/lm/trie_sort.cc
@@ -107,14 +107,20 @@ FILE *WriteContextFile(uint8_t *begin, uint8_t *end, const std::string &temp_pre
}
struct ThrowCombine {
- void operator()(std::size_t /*entry_size*/, const void * /*first*/, const void * /*second*/, FILE * /*out*/) const {
- UTIL_THROW(FormatLoadException, "Duplicate n-gram detected.");
+ void operator()(std::size_t entry_size, unsigned char order, const void *first, const void *second, FILE * /*out*/) const {
+ const WordIndex *base = reinterpret_cast<const WordIndex*>(first);
+ FormatLoadException e;
+ e << "Duplicate n-gram detected with vocab ids";
+ for (const WordIndex *i = base; i != base + order; ++i) {
+ e << ' ' << *i;
+ }
+ throw e;
}
};
// Useful for context files that just contain records with no value.
struct FirstCombine {
- void operator()(std::size_t entry_size, const void *first, const void * /*second*/, FILE *out) const {
+ void operator()(std::size_t entry_size, unsigned char /*order*/, const void *first, const void * /*second*/, FILE *out) const {
util::WriteOrThrow(out, first, entry_size);
}
};
@@ -134,7 +140,7 @@ template <class Combine> FILE *MergeSortedFiles(FILE *first_file, FILE *second_f
util::WriteOrThrow(out_file.get(), second.Data(), entry_size);
++second;
} else {
- combine(entry_size, first.Data(), second.Data(), out_file.get());
+ combine(entry_size, order, first.Data(), second.Data(), out_file.get());
++first; ++second;
}
}
diff --git a/lm/wrappers/README b/lm/wrappers/README
new file mode 100644
index 000000000..56c34c23e
--- /dev/null
+++ b/lm/wrappers/README
@@ -0,0 +1,3 @@
+This directory is for wrappers around other people's LMs, presenting an interface similar to KenLM's. You will need to have their LM installed.
+
+NPLM is a work in progress.
diff --git a/lm/wrappers/nplm.cc b/lm/wrappers/nplm.cc
new file mode 100644
index 000000000..70622bd2b
--- /dev/null
+++ b/lm/wrappers/nplm.cc
@@ -0,0 +1,90 @@
+#include "lm/wrappers/nplm.hh"
+#include "util/exception.hh"
+#include "util/file.hh"
+
+#include <algorithm>
+
+#include <string.h>
+
+#include "neuralLM.h"
+
+namespace lm {
+namespace np {
+
+Vocabulary::Vocabulary(const nplm::vocabulary &vocab)
+ : base::Vocabulary(vocab.lookup_word("<s>"), vocab.lookup_word("</s>"), vocab.lookup_word("<unk>")),
+ vocab_(vocab), null_word_(vocab.lookup_word("<null>")) {}
+
+Vocabulary::~Vocabulary() {}
+
+WordIndex Vocabulary::Index(const std::string &str) const {
+ return vocab_.lookup_word(str);
+}
+
+bool Model::Recognize(const std::string &name) {
+ try {
+ util::scoped_fd file(util::OpenReadOrThrow(name.c_str()));
+ char magic_check[16];
+ util::ReadOrThrow(file.get(), magic_check, sizeof(magic_check));
+ const char nnlm_magic[] = "\\config\nversion ";
+ return !memcmp(magic_check, nnlm_magic, 16);
+ } catch (const util::Exception &) {
+ return false;
+ }
+}
+
+Model::Model(const std::string &file, std::size_t cache)
+ : base_instance_(new nplm::neuralLM(file)), vocab_(base_instance_->get_vocabulary()), cache_size_(cache) {
+ UTIL_THROW_IF(base_instance_->get_order() > NPLM_MAX_ORDER, util::Exception, "This NPLM has order " << (unsigned int)base_instance_->get_order() << " but the KenLM wrapper was compiled with " << NPLM_MAX_ORDER << ". Change the defintion of NPLM_MAX_ORDER and recompile.");
+ // log10 compatible with backoff models.
+ base_instance_->set_log_base(10.0);
+ State begin_sentence, null_context;
+ std::fill(begin_sentence.words, begin_sentence.words + NPLM_MAX_ORDER - 1, base_instance_->lookup_word("<s>"));
+ null_word_ = base_instance_->lookup_word("<null>");
+ std::fill(null_context.words, null_context.words + NPLM_MAX_ORDER - 1, null_word_);
+
+ Init(begin_sentence, null_context, vocab_, base_instance_->get_order());
+}
+
+Model::~Model() {}
+
+FullScoreReturn Model::FullScore(const State &from, const WordIndex new_word, State &out_state) const {
+ nplm::neuralLM *lm = backend_.get();
+ if (!lm) {
+ lm = new nplm::neuralLM(*base_instance_);
+ backend_.reset(lm);
+ lm->set_cache(cache_size_);
+ }
+ // State is in natural word order.
+ FullScoreReturn ret;
+ for (int i = 0; i < lm->get_order() - 1; ++i) {
+ lm->staging_ngram()(i) = from.words[i];
+ }
+ lm->staging_ngram()(lm->get_order() - 1) = new_word;
+ ret.prob = lm->lookup_from_staging();
+ // Always say full order.
+ ret.ngram_length = lm->get_order();
+ // Shift everything down by one.
+ memcpy(out_state.words, from.words + 1, sizeof(WordIndex) * (lm->get_order() - 2));
+ out_state.words[lm->get_order() - 2] = new_word;
+ // Fill in trailing words with zeros so state comparison works.
+ memset(out_state.words + lm->get_order() - 1, 0, sizeof(WordIndex) * (NPLM_MAX_ORDER - lm->get_order()));
+ return ret;
+}
+
+// TODO: optimize with direct call?
+FullScoreReturn Model::FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const {
+ // State is in natural word order. The API here specifies reverse order.
+ std::size_t state_length = std::min<std::size_t>(Order() - 1, context_rend - context_rbegin);
+ State state;
+ // Pad with null words.
+ for (lm::WordIndex *i = state.words; i < state.words + Order() - 1 - state_length; ++i) {
+ *i = null_word_;
+ }
+ // Put new words at the end.
+ std::reverse_copy(context_rbegin, context_rbegin + state_length, state.words + Order() - 1 - state_length);
+ return FullScore(state, new_word, out_state);
+}
+
+} // namespace np
+} // namespace lm
diff --git a/lm/wrappers/nplm.hh b/lm/wrappers/nplm.hh
new file mode 100644
index 000000000..b7dd4a21e
--- /dev/null
+++ b/lm/wrappers/nplm.hh
@@ -0,0 +1,83 @@
+#ifndef LM_WRAPPERS_NPLM_H
+#define LM_WRAPPERS_NPLM_H
+
+#include "lm/facade.hh"
+#include "lm/max_order.hh"
+#include "util/string_piece.hh"
+
+#include <boost/thread/tss.hpp>
+#include <boost/scoped_ptr.hpp>
+
+/* Wrapper to NPLM "by Ashish Vaswani, with contributions from David Chiang
+ * and Victoria Fossum."
+ * http://nlg.isi.edu/software/nplm/
+ */
+
+namespace nplm {
+class vocabulary;
+class neuralLM;
+} // namespace nplm
+
+namespace lm {
+namespace np {
+
+class Vocabulary : public base::Vocabulary {
+ public:
+ Vocabulary(const nplm::vocabulary &vocab);
+
+ ~Vocabulary();
+
+ WordIndex Index(const std::string &str) const;
+
+ // TODO: lobby them to support StringPiece
+ WordIndex Index(const StringPiece &str) const {
+ return Index(std::string(str.data(), str.size()));
+ }
+
+ lm::WordIndex NullWord() const { return null_word_; }
+
+ private:
+ const nplm::vocabulary &vocab_;
+
+ const lm::WordIndex null_word_;
+};
+
+// Sorry for imposing my limitations on your code.
+#define NPLM_MAX_ORDER 7
+
+struct State {
+ WordIndex words[NPLM_MAX_ORDER - 1];
+};
+
+class Model : public lm::base::ModelFacade<Model, State, Vocabulary> {
+ private:
+ typedef lm::base::ModelFacade<Model, State, Vocabulary> P;
+
+ public:
+ // Does this look like an NPLM?
+ static bool Recognize(const std::string &file);
+
+ explicit Model(const std::string &file, std::size_t cache_size = 1 << 20);
+
+ ~Model();
+
+ FullScoreReturn FullScore(const State &from, const WordIndex new_word, State &out_state) const;
+
+ FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;
+
+ private:
+ boost::scoped_ptr<nplm::neuralLM> base_instance_;
+
+ mutable boost::thread_specific_ptr<nplm::neuralLM> backend_;
+
+ Vocabulary vocab_;
+
+ lm::WordIndex null_word_;
+
+ const std::size_t cache_size_;
+};
+
+} // namespace np
+} // namespace lm
+
+#endif // LM_WRAPPERS_NPLM_H
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 467855d9b..f6ada2aa8 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -266,12 +266,12 @@ float smoothedSentenceBleu
float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg)
{
// Sum sent and background
- std::vector<float> stats;
UTIL_THROW_IF(sent.size()!=bg.size(), util::Exception, "Error");
UTIL_THROW_IF(sent.size() != kBleuNgramOrder * 2 + 1, util::Exception, "Error");
+ std::vector<float> stats(sent.size());
for(size_t i=0; i<sent.size(); i++)
- stats.push_back(sent[i]+bg[i]);
+ stats[i] = sent[i]+bg[i];
// Calculate BLEU
float logbleu = 0.0;
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index 8be567574..affa37fbf 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -13,7 +13,7 @@
namespace MosesTuning
{
-const int kBleuNgramOrder = 4;
+const size_t kBleuNgramOrder = 4;
class NgramCounts;
class Reference;
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index 5a12be70a..a0c6a6ebc 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -14,6 +14,8 @@
#include <boost/functional/hash.hpp>
+#include "util/murmur_hash.hh"
+
#include "Util.h"
using namespace std;
@@ -59,6 +61,11 @@ void SparseVector::set(const string& name, FeatureStatsType value)
m_fvector[id] = value;
}
+void SparseVector::set(size_t id, FeatureStatsType value) {
+ assert(m_id_to_name.size() > id);
+ m_fvector[id] = value;
+}
+
void SparseVector::write(ostream& out, const string& sep) const
{
for (fvector_t::const_iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
@@ -91,6 +98,16 @@ void SparseVector::load(const string& file)
}
}
+SparseVector& SparseVector::operator+=(const SparseVector& rhs)
+{
+
+ for (fvector_t::const_iterator i = rhs.m_fvector.begin();
+ i != rhs.m_fvector.end(); ++i) {
+ m_fvector[i->first] = get(i->first) + (i->second);
+ }
+ return *this;
+}
+
SparseVector& SparseVector::operator-=(const SparseVector& rhs)
{
@@ -162,12 +179,18 @@ bool operator==(SparseVector const& item1, SparseVector const& item2)
return item1.m_fvector==item2.m_fvector;
}
+
std::size_t hash_value(SparseVector const& item)
{
- boost::hash<SparseVector::fvector_t> hasher;
- return hasher(item.m_fvector);
+ size_t seed = 0;
+ for (SparseVector::fvector_t::const_iterator i = item.m_fvector.begin(); i != item.m_fvector.end(); ++i) {
+ seed = util::MurmurHashNative(&(i->first), sizeof(i->first), seed);
+ seed = util::MurmurHashNative(&(i->second), sizeof(i->second), seed);
+ }
+ return seed;
}
+
FeatureStats::FeatureStats()
: m_available_size(kAvailableSize), m_entries(0),
m_array(new FeatureStatsType[m_available_size]) {}
@@ -181,8 +204,7 @@ FeatureStats::FeatureStats(const size_t size)
FeatureStats::~FeatureStats()
{
- delete [] m_array;
- m_array = NULL;
+ delete [] m_array;
}
void FeatureStats::Copy(const FeatureStats &stats)
diff --git a/mert/FeatureStats.h b/mert/FeatureStats.h
index a882e7358..f989d9418 100644
--- a/mert/FeatureStats.h
+++ b/mert/FeatureStats.h
@@ -14,6 +14,9 @@
#include <map>
#include <string>
#include <vector>
+
+#include <boost/unordered_map.hpp>
+#include "util/string_piece.hh"
#include "Types.h"
namespace MosesTuning
@@ -31,6 +34,7 @@ public:
FeatureStatsType get(const std::string& name) const;
FeatureStatsType get(std::size_t id) const;
void set(const std::string& name, FeatureStatsType value);
+ void set(size_t id, FeatureStatsType value);
void clear();
void load(const std::string& file);
std::size_t size() const {
@@ -40,6 +44,7 @@ public:
void write(std::ostream& out, const std::string& sep = " ") const;
SparseVector& operator-=(const SparseVector& rhs);
+ SparseVector& operator+=(const SparseVector& rhs);
FeatureStatsType inner_product(const SparseVector& rhs) const;
// Added by cherryc
diff --git a/mert/ForestRescore.cpp b/mert/ForestRescore.cpp
new file mode 100644
index 000000000..0172c6d92
--- /dev/null
+++ b/mert/ForestRescore.cpp
@@ -0,0 +1,432 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cmath>
+#include <limits>
+#include <list>
+
+#include <boost/unordered_set.hpp>
+
+#include "util/file_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include "BleuScorer.h"
+#include "ForestRescore.h"
+
+using namespace std;
+
+namespace MosesTuning {
+
+std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
+ out << "[";
+ for (size_t i = 0; i < wordVec.size(); ++i) {
+ out << wordVec[i]->first;
+ if (i+1< wordVec.size()) out << " ";
+ }
+ out << "]";
+ return out;
+}
+
+
+void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
+ for (size_t i = 0; i < files.size(); ++i) {
+ util::FilePiece fh(files[i].c_str());
+ size_t sentenceId = 0;
+ while(true) {
+ StringPiece line;
+ try {
+ line = fh.ReadLine();
+ } catch (util::EndOfFileException &e) {
+ break;
+ }
+ AddLine(sentenceId, line, vocab);
+ ++sentenceId;
+ }
+ }
+
+}
+
+void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab) {
+ //cerr << line << endl;
+ NgramCounter ngramCounts;
+ list<WordVec> openNgrams;
+ size_t length = 0;
+ //tokenize & count
+ for (util::TokenIter<util::SingleCharacter, true> j(line, util::SingleCharacter(' ')); j; ++j) {
+ const Vocab::Entry* nextTok = &(vocab.FindOrAdd(*j));
+ ++length;
+ openNgrams.push_front(WordVec());
+ for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
+ k->push_back(nextTok);
+ ++ngramCounts[*k];
+ }
+ if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
+ }
+
+ //merge into overall ngram map
+ for (NgramCounter::const_iterator ni = ngramCounts.begin();
+ ni != ngramCounts.end(); ++ni) {
+ size_t count = ni->second;
+ //cerr << *ni << " " << count << endl;
+ if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
+ NgramMap::iterator totalsIter = ngramCounts_[sentenceId].find(ni->first);
+ if (totalsIter == ngramCounts_[sentenceId].end()) {
+ ngramCounts_[sentenceId][ni->first] = pair<size_t,size_t>(count,count);
+ } else {
+ ngramCounts_[sentenceId][ni->first].first = max(count, ngramCounts_[sentenceId][ni->first].first); //clip
+ ngramCounts_[sentenceId][ni->first].second += count; //no clip
+ }
+ }
+ //length
+ if (lengths_.size() <= sentenceId) lengths_.resize(sentenceId+1);
+ //TODO - length strategy - this is MIN
+ if (!lengths_[sentenceId]) {
+ lengths_[sentenceId] = length;
+ } else {
+ lengths_[sentenceId] = min(length,lengths_[sentenceId]);
+ }
+ //cerr << endl;
+
+}
+
+size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const {
+ const NgramMap& ngramCounts = ngramCounts_.at(sentenceId);
+ NgramMap::const_iterator ngi = ngramCounts.find(ngram);
+ if (ngi == ngramCounts.end()) return 0;
+ return clip ? ngi->second.first : ngi->second.second;
+}
+
+VertexState::VertexState(): bleuStats(kBleuNgramOrder), targetLength(0) {}
+
+void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const {
+ for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
+ //cerr << "Checking: " << *ngi << " matches " << references_.NgramMatches(sentenceId_,*ngi,false) << endl;
+ size_t order = ngi->first.size();
+ size_t count = ngi->second;
+ bleuStats[(order-1)*2 + 1] += count;
+ bleuStats[(order-1) * 2] += min(count, references_.NgramMatches(sentenceId_,ngi->first,false));
+ }
+}
+
+size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
+ size_t targetLength = 0;
+ for (size_t i = 0; i < edge.Words().size(); ++i) {
+ const Vocab::Entry* word = edge.Words()[i];
+ if (word) ++targetLength;
+ }
+ for (size_t i = 0; i < edge.Children().size(); ++i) {
+ const VertexState& state = vertexStates_[edge.Children()[i]];
+ targetLength += state.targetLength;
+ }
+ return targetLength;
+}
+
+FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats) {
+ NgramCounter ngramCounts;
+ size_t childId = 0;
+ size_t wordId = 0;
+ size_t contextId = 0; //position within left or right context
+ const VertexState* vertexState = NULL;
+ bool inLeftContext = false;
+ bool inRightContext = false;
+ list<WordVec> openNgrams;
+ const Vocab::Entry* currentWord = NULL;
+ while (wordId < edge.Words().size()) {
+ currentWord = edge.Words()[wordId];
+ if (currentWord != NULL) {
+ ++wordId;
+ } else {
+ if (!inLeftContext && !inRightContext) {
+ //entering a vertex
+ assert(!vertexState);
+ vertexState = &(vertexStates_[edge.Children()[childId]]);
+ ++childId;
+ if (vertexState->leftContext.size()) {
+ inLeftContext = true;
+ contextId = 0;
+ currentWord = vertexState->leftContext[contextId];
+ } else {
+ //empty context
+ vertexState = NULL;
+ ++wordId;
+ continue;
+ }
+ } else {
+ //already in a vertex
+ ++contextId;
+ if (inLeftContext && contextId < vertexState->leftContext.size()) {
+ //still in left context
+ currentWord = vertexState->leftContext[contextId];
+ } else if (inLeftContext) {
+ //at end of left context
+ if (vertexState->leftContext.size() == kBleuNgramOrder-1) {
+ //full size context, jump to right state
+ openNgrams.clear();
+ inLeftContext = false;
+ inRightContext = true;
+ contextId = 0;
+ currentWord = vertexState->rightContext[contextId];
+ } else {
+ //short context, just ignore right context
+ inLeftContext = false;
+ vertexState = NULL;
+ ++wordId;
+ continue;
+ }
+ } else {
+ //in right context
+ if (contextId < vertexState->rightContext.size()) {
+ currentWord = vertexState->rightContext[contextId];
+ } else {
+ //leaving vertex
+ inRightContext = false;
+ vertexState = NULL;
+ ++wordId;
+ continue;
+ }
+ }
+ }
+ }
+ assert(currentWord);
+ if (graph_.IsBoundary(currentWord)) continue;
+ openNgrams.push_front(WordVec());
+ openNgrams.front().reserve(kBleuNgramOrder);
+ for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
+ k->push_back(currentWord);
+ //Only insert ngrams that cross boundaries
+ if (!vertexState || (inLeftContext && k->size() > contextId+1)) ++ngramCounts[*k];
+ }
+ if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
+ }
+
+ //Collect matches
+ //This edge
+ //cerr << "edge ngrams" << endl;
+ UpdateMatches(ngramCounts, bleuStats);
+
+ //Child vertexes
+ for (size_t i = 0; i < edge.Children().size(); ++i) {
+ //cerr << "vertex ngrams " << edge.Children()[i] << endl;
+ for (size_t j = 0; j < bleuStats.size(); ++j) {
+ bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j];
+ }
+ }
+
+
+ FeatureStatsType sourceLength = head.SourceCovered();
+ size_t referenceLength = references_.Length(sentenceId_);
+ FeatureStatsType effectiveReferenceLength =
+ sourceLength / totalSourceLength_ * referenceLength;
+
+ bleuStats[bleuStats.size()-1] = effectiveReferenceLength;
+ //backgroundBleu_[backgroundBleu_.size()-1] =
+ // backgroundRefLength_ * sourceLength / totalSourceLength_;
+ FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_);
+
+ return bleu;
+}
+
+void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats) {
+ //TODO: Maybe more efficient to absorb into the Score() method
+ VertexState& vertexState = vertexStates_[vertexId];
+ //cerr << "Updating state for " << vertexId << endl;
+
+ //leftContext
+ int wi = 0;
+ const VertexState* childState = NULL;
+ int contexti = 0; //index within child context
+ int childi = 0;
+ while (vertexState.leftContext.size() < (kBleuNgramOrder-1)) {
+ if ((size_t)wi >= winnerEdge.Words().size()) break;
+ const Vocab::Entry* word = winnerEdge.Words()[wi];
+ if (word != NULL) {
+ vertexState.leftContext.push_back(word);
+ ++wi;
+ } else {
+ if (childState == NULL) {
+ //start of child state
+ childState = &(vertexStates_[winnerEdge.Children()[childi++]]);
+ contexti = 0;
+ }
+ if ((size_t)contexti < childState->leftContext.size()) {
+ vertexState.leftContext.push_back(childState->leftContext[contexti++]);
+ } else {
+ //end of child context
+ childState = NULL;
+ ++wi;
+ }
+ }
+ }
+
+ //rightContext
+ wi = winnerEdge.Words().size() - 1;
+ childState = NULL;
+ childi = winnerEdge.Children().size() - 1;
+ while (vertexState.rightContext.size() < (kBleuNgramOrder-1)) {
+ if (wi < 0) break;
+ const Vocab::Entry* word = winnerEdge.Words()[wi];
+ if (word != NULL) {
+ vertexState.rightContext.push_back(word);
+ --wi;
+ } else {
+ if (childState == NULL) {
+ //start (ie rhs) of child state
+ childState = &(vertexStates_[winnerEdge.Children()[childi--]]);
+ contexti = childState->rightContext.size()-1;
+ }
+ if (contexti >= 0) {
+ vertexState.rightContext.push_back(childState->rightContext[contexti--]);
+ } else {
+ //end (ie lhs) of child context
+ childState = NULL;
+ --wi;
+ }
+ }
+ }
+ reverse(vertexState.rightContext.begin(), vertexState.rightContext.end());
+
+ //length + counts
+ vertexState.targetLength = GetTargetLength(winnerEdge);
+ vertexState.bleuStats = bleuStats;
+}
+
+
+typedef pair<const Edge*,FeatureStatsType> BackPointer;
+
+
+/**
+ * Recurse through back pointers
+ **/
+static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<BackPointer>& bps,
+ HgHypothesis* bestHypo) {
+ //cerr << "Expanding " << vertexId << " Score: " << bps[vertexId].second << endl;
+ //UTIL_THROW_IF(bps[vertexId].second == kMinScore+1, HypergraphException, "Landed at vertex " << vertexId << " which is a dead end");
+ if (!bps[vertexId].first) return;
+ const Edge* prevEdge = bps[vertexId].first;
+ bestHypo->featureVector += *(prevEdge->Features().get());
+ size_t childId = 0;
+ for (size_t i = 0; i < prevEdge->Words().size(); ++i) {
+ if (prevEdge->Words()[i] != NULL) {
+ bestHypo->text.push_back(prevEdge->Words()[i]);
+ } else {
+ size_t childVertexId = prevEdge->Children()[childId++];
+ HgHypothesis childHypo;
+ GetBestHypothesis(childVertexId,graph,bps,&childHypo);
+ bestHypo->text.insert(bestHypo->text.end(), childHypo.text.begin(), childHypo.text.end());
+ bestHypo->featureVector += childHypo.featureVector;
+ }
+ }
+}
+
+void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
+{
+ BackPointer init(NULL,kMinScore);
+ vector<BackPointer> backPointers(graph.VertexSize(),init);
+ HgBleuScorer bleuScorer(references, graph, sentenceId, backgroundBleu);
+ vector<FeatureStatsType> winnerStats(kBleuNgramOrder*2+1);
+ for (size_t vi = 0; vi < graph.VertexSize(); ++vi) {
+ //cerr << "vertex id " << vi << endl;
+ FeatureStatsType winnerScore = kMinScore;
+ const Vertex& vertex = graph.GetVertex(vi);
+ const vector<const Edge*>& incoming = vertex.GetIncoming();
+ if (!incoming.size()) {
+ //UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges");
+ //If no incoming edges, vertex is a dead end
+ backPointers[vi].first = NULL;
+ backPointers[vi].second = kMinScore/2;
+ } else {
+ //cerr << "\nVertex: " << vi << endl;
+ for (size_t ei = 0; ei < incoming.size(); ++ei) {
+ //cerr << "edge id " << ei << endl;
+ FeatureStatsType incomingScore = incoming[ei]->GetScore(weights);
+ for (size_t i = 0; i < incoming[ei]->Children().size(); ++i) {
+ size_t childId = incoming[ei]->Children()[i];
+ UTIL_THROW_IF(backPointers[childId].second == kMinScore,
+ HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
+ incomingScore += backPointers[childId].second;
+ }
+ vector<FeatureStatsType> bleuStats(kBleuNgramOrder*2+1);
+ // cerr << "Score: " << incomingScore << " Bleu: ";
+ // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
+ FeatureStatsType totalScore = incomingScore;
+ if (bleuWeight) {
+ FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats);
+ if (isnan(bleuScore)) {
+ cerr << "WARN: bleu score undefined" << endl;
+ cerr << "\tVertex id : " << vi << endl;
+ cerr << "\tBleu stats : ";
+ for (size_t i = 0; i < bleuStats.size(); ++i) {
+ cerr << bleuStats[i] << ",";
+ }
+ cerr << endl;
+ bleuScore = 0;
+ }
+ //UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
+ totalScore += bleuWeight * bleuScore;
+ // cerr << bleuScore << " Total: " << incomingScore << endl << endl;
+ //cerr << "is " << incomingScore << " bs " << bleuScore << endl;
+ }
+ if (totalScore >= winnerScore) {
+ //We only store the feature score (not the bleu score) with the vertex,
+ //since the bleu score is always cumulative, ie from counts for the whole span.
+ winnerScore = totalScore;
+ backPointers[vi].first = incoming[ei];
+ backPointers[vi].second = incomingScore;
+ winnerStats = bleuStats;
+ }
+ }
+ //update with winner
+ //if (bleuWeight) {
+ //TODO: Not sure if we need this when computing max-model solution
+ bleuScorer.UpdateState(*(backPointers[vi].first), vi, winnerStats);
+
+ }
+ }
+
+ //expand back pointers
+ GetBestHypothesis(graph.VertexSize()-1, graph, backPointers, bestHypo);
+
+ //bleu stats and fv
+
+ //Need the actual (clipped) stats
+ //TODO: This repeats code in bleu scorer - factor out
+ bestHypo->bleuStats.resize(kBleuNgramOrder*2+1);
+ NgramCounter counts;
+ list<WordVec> openNgrams;
+ for (size_t i = 0; i < bestHypo->text.size(); ++i) {
+ const Vocab::Entry* entry = bestHypo->text[i];
+ if (graph.IsBoundary(entry)) continue;
+ openNgrams.push_front(WordVec());
+ for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
+ k->push_back(entry);
+ ++counts[*k];
+ }
+ if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
+ }
+ for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
+ size_t order = ngi->first.size();
+ size_t count = ngi->second;
+ bestHypo->bleuStats[(order-1)*2 + 1] += count;
+ bestHypo->bleuStats[(order-1) * 2] += min(count, references.NgramMatches(sentenceId,ngi->first,true));
+ }
+ bestHypo->bleuStats[kBleuNgramOrder*2] = references.Length(sentenceId);
+}
+
+
+};
diff --git a/mert/ForestRescore.h b/mert/ForestRescore.h
new file mode 100644
index 000000000..900275b74
--- /dev/null
+++ b/mert/ForestRescore.h
@@ -0,0 +1,120 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+#ifndef MERT_FOREST_RESCORE_H
+#define MERT_FOREST_RESCORE_H
+
+#include <valarray>
+#include <vector>
+
+#include <boost/unordered_set.hpp>
+
+#include "BleuScorer.h"
+#include "Hypergraph.h"
+
+namespace MosesTuning {
+
+std::ostream& operator<<(std::ostream& out, const WordVec& wordVec);
+
+struct NgramHash : public std::unary_function<const WordVec&, std::size_t> {
+ std::size_t operator()(const WordVec& ngram) const {
+ return util::MurmurHashNative(&(ngram[0]), ngram.size() * sizeof(WordVec::value_type));
+ }
+};
+
+struct NgramEquals : public std::binary_function<const WordVec&, const WordVec&, bool> {
+ bool operator()(const WordVec& first, const WordVec& second) const {
+ if (first.size() != second.size()) return false;
+ return memcmp(&(first[0]), &(second[0]), first.size() * sizeof(WordVec::value_type)) == 0;
+ }
+};
+
+typedef boost::unordered_map<WordVec, size_t, NgramHash, NgramEquals> NgramCounter;
+
+
+class ReferenceSet {
+
+
+public:
+
+ void AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab);
+
+ void Load(const std::vector<std::string>& files, Vocab& vocab);
+
+ size_t NgramMatches(size_t sentenceId, const WordVec&, bool clip) const;
+
+ size_t Length(size_t sentenceId) const {return lengths_[sentenceId];}
+
+private:
+ //ngrams to (clipped,unclipped) counts
+ typedef boost::unordered_map<WordVec, std::pair<std::size_t,std::size_t>, NgramHash,NgramEquals> NgramMap;
+ std::vector<NgramMap> ngramCounts_;
+ std::vector<size_t> lengths_;
+
+};
+
+struct VertexState {
+ VertexState();
+
+ std::vector<FeatureStatsType> bleuStats;
+ WordVec leftContext;
+ WordVec rightContext;
+ size_t targetLength;
+};
+
+/**
+ * Used to score an rule (ie edge) when we are applying it.
+**/
+class HgBleuScorer {
+ public:
+ HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
+ references_(references), sentenceId_(sentenceId), graph_(graph), backgroundBleu_(backgroundBleu),
+ backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
+ vertexStates_.resize(graph.VertexSize());
+ totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
+ }
+
+ FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
+
+ void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
+
+
+ private:
+ const ReferenceSet& references_;
+ std::vector<VertexState> vertexStates_;
+ size_t sentenceId_;
+ size_t totalSourceLength_;
+ const Graph& graph_;
+ std::vector<FeatureStatsType> backgroundBleu_;
+ FeatureStatsType backgroundRefLength_;
+
+ void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
+ size_t GetTargetLength(const Edge& edge) const;
+};
+
+struct HgHypothesis {
+ SparseVector featureVector;
+ WordVec text;
+ std::vector<FeatureStatsType> bleuStats;
+};
+
+void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo);
+
+};
+
+#endif
diff --git a/mert/ForestRescoreTest.cpp b/mert/ForestRescoreTest.cpp
new file mode 100644
index 000000000..86975d3a5
--- /dev/null
+++ b/mert/ForestRescoreTest.cpp
@@ -0,0 +1,246 @@
+#include <iostream>
+
+#include "ForestRescore.h"
+
+#define BOOST_TEST_MODULE MertForestRescore
+#include <boost/test/unit_test.hpp>
+
+
+
+using namespace std;
+using namespace MosesTuning;
+
+BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
+{
+ Vocab vocab;
+ WordVec words;
+ string wordStrings[] =
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
+ for (size_t i = 0; i < 9; ++i) {
+ words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+ }
+
+ const string f1 = "foo";
+ const string f2 = "bar";
+ Graph graph(vocab);
+ graph.SetCounts(5,5);
+
+ Edge* e0 = graph.NewEdge();
+ e0->AddWord(words[0]);
+ e0->AddFeature(f1, 2.0);
+
+ Vertex* v0 = graph.NewVertex();
+ v0->AddEdge(e0);
+
+ Edge* e1 = graph.NewEdge();
+ e1->AddWord(NULL);
+ e1->AddChild(0);
+ e1->AddWord(words[2]);
+ e1->AddWord(words[3]);
+ e1->AddFeature(f1, 1.0);
+ e1->AddFeature(f2, 3.0);
+
+ Vertex* v1 = graph.NewVertex();
+ v1->AddEdge(e1);
+
+ Edge* e2 = graph.NewEdge();
+ e2->AddWord(NULL);
+ e2->AddChild(1);
+ e2->AddWord(words[4]);
+ e2->AddWord(words[5]);
+ e2->AddFeature(f2, 2.5);
+
+ Vertex* v2 = graph.NewVertex();
+ v2->AddEdge(e2);
+
+ Edge* e3 = graph.NewEdge();
+ e3->AddWord(NULL);
+ e3->AddChild(2);
+ e3->AddWord(words[6]);
+ e3->AddWord(words[7]);
+ e3->AddWord(words[8]);
+ e3->AddFeature(f1, -1);
+
+ Vertex* v3 = graph.NewVertex();
+ v3->AddEdge(e3);
+
+ Edge* e4 = graph.NewEdge();
+ e4->AddWord(NULL);
+ e4->AddChild(3);
+ e4->AddWord(words[1]);
+ e3->AddFeature(f2, 0.5);
+
+ Vertex* v4 = graph.NewVertex();
+ v4->AddEdge(e4);
+
+ ReferenceSet references;
+ references.AddLine(0, "a b c k e f o", vocab);
+ HgHypothesis modelHypo;
+ vector<FeatureStatsType> bg(kBleuNgramOrder*2+1);
+ SparseVector weights;
+ weights.set(f1,2);
+ weights.set(f2,1);
+ Viterbi(graph, weights, 0, references, 0, bg, &modelHypo);
+ BOOST_CHECK_CLOSE(2.0,modelHypo.featureVector.get(f1), 0.0001);
+ BOOST_CHECK_CLOSE(6.0,modelHypo.featureVector.get(f2), 0.0001);
+
+ BOOST_CHECK_EQUAL(words[0]->first, modelHypo.text[0]->first);
+ BOOST_CHECK_EQUAL(words[2]->first, modelHypo.text[1]->first);
+ BOOST_CHECK_EQUAL(words[3]->first, modelHypo.text[2]->first);
+ BOOST_CHECK_EQUAL(words[4]->first, modelHypo.text[3]->first);
+ BOOST_CHECK_EQUAL(words[5]->first, modelHypo.text[4]->first);
+ BOOST_CHECK_EQUAL(words[6]->first, modelHypo.text[5]->first);
+ BOOST_CHECK_EQUAL(words[7]->first, modelHypo.text[6]->first);
+ BOOST_CHECK_EQUAL(words[8]->first, modelHypo.text[7]->first);
+ BOOST_CHECK_EQUAL(words[1]->first, modelHypo.text[8]->first);
+}
+
+
+
+BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
+{
+ Vocab vocab;
+ WordVec words;
+ string wordStrings[] =
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+ for (size_t i = 0; i < 13; ++i) {
+ words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+ }
+
+ const string f1 = "foo";
+ const string f2 = "bar";
+ Graph graph(vocab);
+ graph.SetCounts(5,8);
+
+ Edge* e0 = graph.NewEdge();
+ e0->AddWord(words[0]);
+
+ Vertex* v0 = graph.NewVertex();
+ v0->AddEdge(e0);
+
+ Edge* e1 = graph.NewEdge();
+ e1->AddWord(NULL);
+ e1->AddChild(0);
+ e1->AddWord(words[2]);
+ e1->AddWord(words[3]);
+ e1->AddFeature(f1,1);
+ e1->AddFeature(f2,1);
+ Edge* e5 = graph.NewEdge();
+ e5->AddWord(NULL);
+ e5->AddChild(0);
+ e5->AddWord(words[9]);
+ e5->AddWord(words[10]);
+ e5->AddFeature(f1,2);
+ e5->AddFeature(f2,-2);
+
+ Vertex* v1 = graph.NewVertex();
+ v1->AddEdge(e1);
+ v1->AddEdge(e5);
+ v1->SetSourceCovered(1);
+
+ Edge* e2 = graph.NewEdge();
+ e2->AddWord(NULL);
+ e2->AddChild(1);
+ e2->AddWord(words[4]);
+ e2->AddWord(words[5]);
+ e2->AddFeature(f2,3);
+
+ Vertex* v2 = graph.NewVertex();
+ v2->AddEdge(e2);
+ v2->SetSourceCovered(3);
+
+ Edge* e3 = graph.NewEdge();
+ e3->AddWord(NULL);
+ e3->AddChild(2);
+ e3->AddWord(words[6]);
+ e3->AddWord(words[7]);
+ e3->AddWord(words[8]);
+ e3->AddFeature(f1,1);
+ Edge* e6 = graph.NewEdge();
+ e6->AddWord(NULL);
+ e6->AddChild(2);
+ e6->AddWord(words[9]);
+ e6->AddWord(words[12]);
+ e6->AddFeature(f2,1);
+ Edge* e7 = graph.NewEdge();
+ e7->AddWord(NULL);
+ e7->AddChild(1);
+ e7->AddWord(words[11]);
+ e7->AddWord(words[12]);
+ e7->AddFeature(f1,2);
+ e7->AddFeature(f2,3);
+
+ Vertex* v3 = graph.NewVertex();
+ v3->AddEdge(e3);
+ v3->AddEdge(e6);
+ v3->AddEdge(e7);
+ v3->SetSourceCovered(5);
+
+ Edge* e4 = graph.NewEdge();
+ e4->AddWord(NULL);
+ e4->AddChild(3);
+ e4->AddWord(words[1]);
+
+ Vertex* v4 = graph.NewVertex();
+ v4->AddEdge(e4);
+ v4->SetSourceCovered(6);
+
+ /*Paths || foo || bar || s(2,1)
+ ab cd hk || 1 || 5 || 7
+ hi cd hk || 2 || 2 || 6
+ ab jk || 3 || 4 || 10
+ hi jk || 4 || 1 || 9
+ ab cd efg || 2 || 4 || 8
+ hi cd efg || 3 || 1 || 7
+ */
+
+ ReferenceSet references;
+ references.AddLine(0, "a b c d h k", vocab);
+ HgHypothesis modelHypo;
+ vector<FeatureStatsType> bg(kBleuNgramOrder*2+1, 0.1);
+ SparseVector weights;
+ weights.set(f1,2);
+ weights.set(f2,1);
+ Viterbi(graph, weights, 0, references, 0, bg, &modelHypo);
+ BOOST_CHECK_CLOSE(3.0,modelHypo.featureVector.get(f1), 0.0001);
+ BOOST_CHECK_CLOSE(4.0,modelHypo.featureVector.get(f2), 0.0001);
+
+ BOOST_CHECK_EQUAL(6, modelHypo.text.size());
+
+ //expect ab jk
+ BOOST_CHECK_EQUAL(words[0]->first, modelHypo.text[0]->first);
+ BOOST_CHECK_EQUAL(words[2]->first, modelHypo.text[1]->first);
+ BOOST_CHECK_EQUAL(words[3]->first, modelHypo.text[2]->first);
+ BOOST_CHECK_EQUAL(words[11]->first, modelHypo.text[3]->first);
+ BOOST_CHECK_EQUAL(words[12]->first, modelHypo.text[4]->first);
+ BOOST_CHECK_EQUAL(words[1]->first, modelHypo.text[5]->first);
+
+
+ HgHypothesis hopeHypo;
+ Viterbi(graph, weights, 1, references, 0, bg, &hopeHypo);
+ //expect abcdhk
+ BOOST_CHECK_EQUAL(8, hopeHypo.text.size());
+
+ BOOST_CHECK_EQUAL(words[0]->first, hopeHypo.text[0]->first);
+ BOOST_CHECK_EQUAL(words[2]->first, hopeHypo.text[1]->first);
+ BOOST_CHECK_EQUAL(words[3]->first, hopeHypo.text[2]->first);
+ BOOST_CHECK_EQUAL(words[4]->first, hopeHypo.text[3]->first);
+ BOOST_CHECK_EQUAL(words[5]->first, hopeHypo.text[4]->first);
+ BOOST_CHECK_EQUAL(words[9]->first, hopeHypo.text[5]->first);
+ BOOST_CHECK_EQUAL(words[12]->first, hopeHypo.text[6]->first);
+ BOOST_CHECK_EQUAL(words[1]->first, hopeHypo.text[7]->first);
+
+ BOOST_CHECK_EQUAL(kBleuNgramOrder*2+1, hopeHypo.bleuStats.size());
+ BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[0]);
+ BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[1]);
+ BOOST_CHECK_EQUAL(5, hopeHypo.bleuStats[2]);
+ BOOST_CHECK_EQUAL(5, hopeHypo.bleuStats[3]);
+ BOOST_CHECK_EQUAL(4, hopeHypo.bleuStats[4]);
+ BOOST_CHECK_EQUAL(4, hopeHypo.bleuStats[5]);
+ BOOST_CHECK_EQUAL(3, hopeHypo.bleuStats[6]);
+ BOOST_CHECK_EQUAL(3, hopeHypo.bleuStats[7]);
+ BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]);
+}
+
+
+
diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp
new file mode 100644
index 000000000..23ba1e099
--- /dev/null
+++ b/mert/HopeFearDecoder.cpp
@@ -0,0 +1,339 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <algorithm>
+#include <cmath>
+#include <iterator>
+
+#define BOOST_FILESYSTEM_VERSION 3
+#include <boost/filesystem.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include "util/exception.hh"
+#include "util/file_piece.hh"
+
+#include "BleuScorer.h"
+#include "HopeFearDecoder.h"
+
+using namespace std;
+namespace fs = boost::filesystem;
+
+namespace MosesTuning {
+
+static const ValType BLEU_RATIO = 5;
+
+ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
+ vector<ValType> stats(kBleuNgramOrder*2+1,0);
+ for(reset(); !finished(); next()) {
+ vector<ValType> sent;
+ MaxModel(wv,&sent);
+ for(size_t i=0; i<sent.size(); i++) {
+ stats[i]+=sent[i];
+ }
+ }
+ return unsmoothedBleu(stats);
+}
+
+NbestHopeFearDecoder::NbestHopeFearDecoder(
+ const vector<string>& featureFiles,
+ const vector<string>& scoreFiles,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope
+ ) : safe_hope_(safe_hope) {
+ if (streaming) {
+ train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
+ } else {
+ train_.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
+ }
+}
+
+
+void NbestHopeFearDecoder::next() {
+ train_->next();
+}
+
+bool NbestHopeFearDecoder::finished() {
+ return train_->finished();
+}
+
+void NbestHopeFearDecoder::reset() {
+ train_->reset();
+}
+
+void NbestHopeFearDecoder::HopeFear(
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ ) {
+
+
+ // Hope / fear decode
+ ValType hope_scale = 1.0;
+ size_t hope_index=0, fear_index=0, model_index=0;
+ ValType hope_score=0, fear_score=0, model_score=0;
+ for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
+ ValType hope_bleu, hope_model;
+ for(size_t i=0; i< train_->cur_size(); i++) {
+ const MiraFeatureVector& vec=train_->featuresAt(i);
+ ValType score = wv.score(vec);
+ ValType bleu = sentenceLevelBackgroundBleu(train_->scoresAt(i),backgroundBleu);
+ // Hope
+ if(i==0 || (hope_scale*score + bleu) > hope_score) {
+ hope_score = hope_scale*score + bleu;
+ hope_index = i;
+ hope_bleu = bleu;
+ hope_model = score;
+ }
+ // Fear
+ if(i==0 || (score - bleu) > fear_score) {
+ fear_score = score - bleu;
+ fear_index = i;
+ }
+ // Model
+ if(i==0 || score > model_score) {
+ model_score = score;
+ model_index = i;
+ }
+ }
+ // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
+ // where model score is having far more influence than BLEU
+ hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
+ if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
+ hope_scale = abs(hope_bleu) / abs(hope_model);
+ else break;
+ }
+ hopeFear->modelFeatures = train_->featuresAt(model_index);
+ hopeFear->hopeFeatures = train_->featuresAt(hope_index);
+ hopeFear->fearFeatures = train_->featuresAt(fear_index);
+
+ hopeFear->hopeStats = train_->scoresAt(hope_index);
+ hopeFear->hopeBleu = sentenceLevelBackgroundBleu(hopeFear->hopeStats, backgroundBleu);
+ const vector<float>& fear_stats = train_->scoresAt(fear_index);
+ hopeFear->fearBleu = sentenceLevelBackgroundBleu(fear_stats, backgroundBleu);
+
+ hopeFear->modelStats = train_->scoresAt(model_index);
+ hopeFear->hopeFearEqual = (hope_index == fear_index);
+}
+
+void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats) {
+ // Find max model
+ size_t max_index=0;
+ ValType max_score=0;
+ for(size_t i=0; i<train_->cur_size(); i++) {
+ MiraFeatureVector vec(train_->featuresAt(i));
+ ValType score = wv.score(vec);
+ if(i==0 || score > max_score) {
+ max_index = i;
+ max_score = score;
+ }
+ }
+ *stats = train_->scoresAt(max_index);
+}
+
+
+
+HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
+ (
+ const string& hypergraphDir,
+ const vector<string>& referenceFiles,
+ size_t num_dense,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ size_t hg_pruning,
+ const MiraWeightVector& wv
+ ) :
+ num_dense_(num_dense) {
+
+ UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs");
+ UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist");
+ UTIL_THROW_IF(!referenceFiles.size(), util::Exception, "No reference files supplied");
+ references_.Load(referenceFiles, vocab_);
+
+ SparseVector weights;
+ wv.ToSparse(&weights);
+
+ static const string kWeights = "weights";
+ fs::directory_iterator dend;
+ size_t fileCount = 0;
+
+ cerr << "Reading hypergraphs" << endl;
+ for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
+ const fs::path& hgpath = di->path();
+ if (hgpath.filename() == kWeights) continue;
+ Graph graph(vocab_);
+ size_t id = boost::lexical_cast<size_t>(hgpath.stem().string());
+ util::scoped_fd fd(util::OpenReadOrThrow(hgpath.string().c_str()));
+ //util::FilePiece file(di->path().string().c_str());
+ util::FilePiece file(fd.release());
+ ReadGraph(file,graph);
+
+ //cerr << "ref length " << references_.Length(id) << endl;
+ size_t edgeCount = hg_pruning * references_.Length(id);
+ boost::shared_ptr<Graph> prunedGraph;
+ prunedGraph.reset(new Graph(vocab_));
+ graph.Prune(prunedGraph.get(), weights, edgeCount);
+ graphs_[id] = prunedGraph;
+ //cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
+ ++fileCount;
+ if (fileCount % 10 == 0) cerr << ".";
+ if (fileCount % 400 == 0) cerr << " [count=" << fileCount << "]\n";
+ }
+ cerr << endl << "Done" << endl;
+
+ sentenceIds_.resize(graphs_.size());
+ for (size_t i = 0; i < graphs_.size(); ++i) sentenceIds_[i] = i;
+ if (!no_shuffle) {
+ random_shuffle(sentenceIds_.begin(), sentenceIds_.end());
+ }
+
+}
+
+void HypergraphHopeFearDecoder::reset() {
+ sentenceIdIter_ = sentenceIds_.begin();
+}
+
+void HypergraphHopeFearDecoder::next() {
+ sentenceIdIter_++;
+}
+
+bool HypergraphHopeFearDecoder::finished() {
+ return sentenceIdIter_ == sentenceIds_.end();
+}
+
+void HypergraphHopeFearDecoder::HopeFear(
+ const vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ ) {
+ size_t sentenceId = *sentenceIdIter_;
+ SparseVector weights;
+ wv.ToSparse(&weights);
+ const Graph& graph = *(graphs_[sentenceId]);
+
+ ValType hope_scale = 1.0;
+ HgHypothesis hopeHypo, fearHypo, modelHypo;
+ for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
+
+ //hope decode
+ Viterbi(graph, weights, 1, references_, sentenceId, backgroundBleu, &hopeHypo);
+
+ //fear decode
+ Viterbi(graph, weights, -1, references_, sentenceId, backgroundBleu, &fearHypo);
+
+ //Model decode
+ Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo);
+
+
+ // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
+ // where model score is having far more influence than BLEU
+ // hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
+ // if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
+ // hope_scale = abs(hope_bleu) / abs(hope_model);
+ // else break;
+ //TODO: Don't currently get model and bleu so commented this out for now.
+ break;
+ }
+ //modelFeatures, hopeFeatures and fearFeatures
+ hopeFear->modelFeatures = MiraFeatureVector(modelHypo.featureVector, num_dense_);
+ hopeFear->hopeFeatures = MiraFeatureVector(hopeHypo.featureVector, num_dense_);
+ hopeFear->fearFeatures = MiraFeatureVector(fearHypo.featureVector, num_dense_);
+
+ //Need to know which are to be mapped to dense features!
+
+ //Only C++11
+ //hopeFear->modelStats.assign(std::begin(modelHypo.bleuStats), std::end(modelHypo.bleuStats));
+ vector<ValType> fearStats(kBleuNgramOrder*2+1);
+ hopeFear->hopeStats.reserve(kBleuNgramOrder*2+1);
+ hopeFear->modelStats.reserve(kBleuNgramOrder*2+1);
+ for (size_t i = 0; i < fearStats.size(); ++i) {
+ hopeFear->modelStats.push_back(modelHypo.bleuStats[i]);
+ hopeFear->hopeStats.push_back(hopeHypo.bleuStats[i]);
+
+ fearStats[i] = fearHypo.bleuStats[i];
+ }
+ /*
+ cerr << "hope" << endl;;
+ for (size_t i = 0; i < hopeHypo.text.size(); ++i) {
+ cerr << hopeHypo.text[i]->first << " ";
+ }
+ cerr << endl;
+ for (size_t i = 0; i < fearStats.size(); ++i) {
+ cerr << hopeHypo.bleuStats[i] << " ";
+ }
+ cerr << endl;
+ cerr << "fear";
+ for (size_t i = 0; i < fearHypo.text.size(); ++i) {
+ cerr << fearHypo.text[i]->first << " ";
+ }
+ cerr << endl;
+ for (size_t i = 0; i < fearStats.size(); ++i) {
+ cerr << fearHypo.bleuStats[i] << " ";
+ }
+ cerr << endl;
+ cerr << "model";
+ for (size_t i = 0; i < modelHypo.text.size(); ++i) {
+ cerr << modelHypo.text[i]->first << " ";
+ }
+ cerr << endl;
+ for (size_t i = 0; i < fearStats.size(); ++i) {
+ cerr << modelHypo.bleuStats[i] << " ";
+ }
+ cerr << endl;
+ */
+ hopeFear->hopeBleu = sentenceLevelBackgroundBleu(hopeFear->hopeStats, backgroundBleu);
+ hopeFear->fearBleu = sentenceLevelBackgroundBleu(fearStats, backgroundBleu);
+
+ //If fv and bleu stats are equal, then assume equal
+ hopeFear->hopeFearEqual = true; //(hopeFear->hopeBleu - hopeFear->fearBleu) >= 1e-8;
+ if (hopeFear->hopeFearEqual) {
+ for (size_t i = 0; i < fearStats.size(); ++i) {
+ if (fearStats[i] != hopeFear->hopeStats[i]) {
+ hopeFear->hopeFearEqual = false;
+ break;
+ }
+ }
+ }
+ hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures);
+}
+
+void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats) {
+ assert(!finished());
+ HgHypothesis bestHypo;
+ size_t sentenceId = *sentenceIdIter_;
+ SparseVector weights;
+ wv.ToSparse(&weights);
+ vector<ValType> bg(kBleuNgramOrder*2+1);
+ Viterbi(*(graphs_[sentenceId]), weights, 0, references_, sentenceId, bg, &bestHypo);
+ stats->resize(bestHypo.bleuStats.size());
+ /*
+ for (size_t i = 0; i < bestHypo.text.size(); ++i) {
+ cerr << bestHypo.text[i]->first << " ";
+ }
+ cerr << endl;
+ */
+ for (size_t i = 0; i < bestHypo.bleuStats.size(); ++i) {
+ (*stats)[i] = bestHypo.bleuStats[i];
+ }
+}
+
+
+
+};
diff --git a/mert/HopeFearDecoder.h b/mert/HopeFearDecoder.h
new file mode 100644
index 000000000..694a3217e
--- /dev/null
+++ b/mert/HopeFearDecoder.h
@@ -0,0 +1,152 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+#ifndef MERT_HOPEFEARDECODER_H
+#define MERT_HOPEFEARDECODER_H
+
+#include <vector>
+
+#include <boost/scoped_ptr.hpp>
+#include <boost/shared_ptr.hpp>
+
+#include "ForestRescore.h"
+#include "Hypergraph.h"
+#include "HypPackEnumerator.h"
+#include "MiraFeatureVector.h"
+#include "MiraWeightVector.h"
+
+//
+// Used by batch mira to get the hope, fear and model hypothesis. This wraps
+// the n-best list and lattice/hypergraph implementations
+//
+
+namespace MosesTuning {
+
+/** To be filled in by the decoder */
+struct HopeFearData {
+ MiraFeatureVector modelFeatures;
+ MiraFeatureVector hopeFeatures;
+ MiraFeatureVector fearFeatures;
+
+ std::vector<float> modelStats;
+ std::vector<float> hopeStats;
+
+ ValType hopeBleu;
+ ValType fearBleu;
+
+ bool hopeFearEqual;
+};
+
+//Abstract base class
+class HopeFearDecoder {
+public:
+ //iterator methods
+ virtual void reset() = 0;
+ virtual void next() = 0;
+ virtual bool finished() = 0;
+
+ /**
+ * Calculate hope, fear and model hypotheses
+ **/
+ virtual void HopeFear(
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ ) = 0;
+
+ /** Max score decoding */
+ virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
+ = 0;
+
+ /** Calculate bleu on training set */
+ ValType Evaluate(const AvgWeightVector& wv);
+
+};
+
+
+/** Gets hope-fear from nbest lists */
+class NbestHopeFearDecoder : public virtual HopeFearDecoder {
+public:
+ NbestHopeFearDecoder(const std::vector<std::string>& featureFiles,
+ const std::vector<std::string>& scoreFiles,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope
+ );
+
+ virtual void reset();
+ virtual void next();
+ virtual bool finished();
+
+ virtual void HopeFear(
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ );
+
+ virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
+
+private:
+ boost::scoped_ptr<HypPackEnumerator> train_;
+ bool safe_hope_;
+
+};
+
+
+
+/** Gets hope-fear from hypergraphs */
+class HypergraphHopeFearDecoder : public virtual HopeFearDecoder {
+public:
+ HypergraphHopeFearDecoder(
+ const std::string& hypergraphDir,
+ const std::vector<std::string>& referenceFiles,
+ size_t num_dense,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ size_t hg_pruning,
+ const MiraWeightVector& wv
+ );
+
+ virtual void reset();
+ virtual void next();
+ virtual bool finished();
+
+ virtual void HopeFear(
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ );
+
+ virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
+
+private:
+ size_t num_dense_;
+ //maps sentence Id to graph ptr
+ typedef std::map<size_t, boost::shared_ptr<Graph> > GraphColl;
+ GraphColl graphs_;
+ std::vector<size_t> sentenceIds_;
+ std::vector<size_t>::const_iterator sentenceIdIter_;
+ ReferenceSet references_;
+ Vocab vocab_;
+};
+
+};
+
+#endif
+
diff --git a/mert/Hypergraph.cpp b/mert/Hypergraph.cpp
new file mode 100644
index 000000000..f373ebbb5
--- /dev/null
+++ b/mert/Hypergraph.cpp
@@ -0,0 +1,313 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+#include <iostream>
+#include <set>
+
+#include <boost/lexical_cast.hpp>
+
+#include "util/double-conversion/double-conversion.h"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include "Hypergraph.h"
+
+using namespace std;
+static const string kBOS = "<s>";
+static const string kEOS = "</s>";
+
+namespace MosesTuning {
+
+StringPiece NextLine(util::FilePiece& from) {
+ StringPiece line;
+ while ((line = from.ReadLine()).starts_with("#"));
+ return line;
+}
+
+Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS)){
+}
+
+const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str) {
+#if BOOST_VERSION >= 104200
+ Map::const_iterator i= map_.find(str, Hash(), Equals());
+#else
+ std::string copied_str(str.data(), str.size());
+ Map::const_iterator i = map_.find(copied_str.c_str());
+#endif
+ if (i != map_.end()) return *i;
+ char *copied = static_cast<char*>(piece_backing_.Allocate(str.size() + 1));
+ memcpy(copied, str.data(), str.size());
+ copied[str.size()] = 0;
+ return *map_.insert(Entry(copied, map_.size())).first;
+}
+
+double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
+
+
+/**
+ * Reads an incoming edge. Returns edge and source words covered.
+**/
+static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
+ Edge* edge = graph.NewEdge();
+ StringPiece line = NextLine(from);
+ util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| "));
+ //Target
+ for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
+ StringPiece got = *i;
+ if ('[' == *got.data() && ']' == got.data()[got.size() - 1]) {
+ // non-terminal
+ char *end_ptr;
+ unsigned long int child = std::strtoul(got.data() + 1, &end_ptr, 10);
+ UTIL_THROW_IF(end_ptr != got.data() + got.size() - 1, HypergraphException, "Bad non-terminal" << got);
+ UTIL_THROW_IF(child >= graph.VertexSize(), HypergraphException, "Reference to vertex " << child << " but we only have " << graph.VertexSize() << " vertices. Is the file in bottom-up format?");
+ edge->AddWord(NULL);
+ edge->AddChild(child);
+ } else {
+ const Vocab::Entry &found = graph.MutableVocab().FindOrAdd(got);
+ edge->AddWord(&found);
+ }
+ }
+
+ //Features
+ ++pipes;
+ for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
+ StringPiece fv = *i;
+ if (!fv.size()) break;
+ size_t equals = fv.find_last_of("=");
+ UTIL_THROW_IF(equals == fv.npos, HypergraphException, "Failed to parse feature '" << fv << "'");
+ StringPiece name = fv.substr(0,equals);
+ StringPiece value = fv.substr(equals+1);
+ int processed;
+ float score = converter.StringToFloat(value.data(), value.length(), &processed);
+ UTIL_THROW_IF(isnan(score), HypergraphException, "Failed to parse weight '" << value << "'");
+ edge->AddFeature(name,score);
+ }
+ //Covered words
+ ++pipes;
+ size_t sourceCovered = boost::lexical_cast<size_t>(*pipes);
+ return pair<Edge*,size_t>(edge,sourceCovered);
+}
+
+void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const {
+
+ Graph& newGraph = *pNewGraph;
+ //TODO: Optimise case where no pruning required
+
+ //For debug
+
+
+ /*
+ map<const Edge*, string> edgeIds;
+ for (size_t i = 0; i < edges_.Size(); ++i) {
+ stringstream str;
+ size_t childId = 0;
+ for (size_t j = 0; j < edges_[i].Words().size(); ++j) {
+ if (edges_[i].Words()[j]) {
+ str << edges_[i].Words()[j]->first << " ";
+ } else {
+ str << "[" << edges_[i].Children()[childId++] << "] ";
+ }
+ }
+ edgeIds[&(edges_[i])] = str.str();
+ }
+ */
+
+ //end For debug
+
+ map<const Edge*, FeatureStatsType> edgeBackwardScores;
+ map<const Edge*, size_t> edgeHeads;
+ vector<FeatureStatsType> vertexBackwardScores(vertices_.Size(), kMinScore);
+ vector<vector<const Edge*> > outgoing(vertices_.Size());
+
+ //Compute backward scores
+ for (size_t vi = 0; vi < vertices_.Size(); ++vi) {
+ // cerr << "Vertex " << vi << endl;
+ const Vertex& vertex = vertices_[vi];
+ const vector<const Edge*>& incoming = vertex.GetIncoming();
+ if (!incoming.size()) {
+ vertexBackwardScores[vi] = 0;
+ } else {
+ for (size_t ei = 0; ei < incoming.size(); ++ei) {
+ //cerr << "Edge " << edgeIds[incoming[ei]] << endl;
+ edgeHeads[incoming[ei]]= vi;
+ FeatureStatsType incomingScore = incoming[ei]->GetScore(weights);
+ for (size_t i = 0; i < incoming[ei]->Children().size(); ++i) {
+ //cerr << "\tChild " << incoming[ei]->Children()[i] << endl;
+ size_t childId = incoming[ei]->Children()[i];
+ UTIL_THROW_IF(vertexBackwardScores[childId] == kMinScore,
+ HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
+ outgoing[childId].push_back(incoming[ei]);
+ incomingScore += vertexBackwardScores[childId];
+ }
+ edgeBackwardScores[incoming[ei]]= incomingScore;
+ //cerr << "Backward score: " << incomingScore << endl;
+ if (incomingScore > vertexBackwardScores[vi]) vertexBackwardScores[vi] = incomingScore;
+ }
+ }
+ }
+
+ //Compute forward scores
+ vector<FeatureStatsType> vertexForwardScores(vertices_.Size(), kMinScore);
+ map<const Edge*, FeatureStatsType> edgeForwardScores;
+ for (size_t i = 1; i <= vertices_.Size(); ++i) {
+ size_t vi = vertices_.Size() - i;
+ //cerr << "Vertex " << vi << endl;
+ if (!outgoing[vi].size()) {
+ vertexForwardScores[vi] = 0;
+ } else {
+ for (size_t ei = 0; ei < outgoing[vi].size(); ++ei) {
+ //cerr << "Edge " << edgeIds[outgoing[vi][ei]] << endl;
+ FeatureStatsType outgoingScore = 0;
+ //add score of head
+ outgoingScore += vertexForwardScores[edgeHeads[outgoing[vi][ei]]];
+ //cerr << "Forward score " << outgoingScore << endl;
+ edgeForwardScores[outgoing[vi][ei]] = outgoingScore;
+ //sum scores of siblings
+ for (size_t i = 0; i < outgoing[vi][ei]->Children().size(); ++i) {
+ size_t siblingId = outgoing[vi][ei]->Children()[i];
+ if (siblingId != vi) {
+ //cerr << "\tSibling " << siblingId << endl;
+ outgoingScore += vertexBackwardScores[siblingId];
+ }
+ }
+ outgoingScore += outgoing[vi][ei]->GetScore(weights);
+ if (outgoingScore > vertexForwardScores[vi]) vertexForwardScores[vi] = outgoingScore;
+ //cerr << "Vertex " << vi << " forward score " << outgoingScore << endl;
+ }
+ }
+ }
+
+
+
+ multimap<FeatureStatsType, const Edge*> edgeScores;
+ for (size_t i = 0; i < edges_.Size(); ++i) {
+ const Edge* edge = &(edges_[i]);
+ if (edgeForwardScores.find(edge) == edgeForwardScores.end()) {
+ //This edge has no children, so didn't get a forward score. Its forward score
+ //is that of its head
+ edgeForwardScores[edge] = vertexForwardScores[edgeHeads[edge]];
+ }
+ FeatureStatsType score = edgeForwardScores[edge] + edgeBackwardScores[edge];
+ edgeScores.insert(pair<FeatureStatsType, const Edge*>(score,edge));
+ // cerr << edgeIds[edge] << " " << score << endl;
+ }
+
+
+
+ multimap<FeatureStatsType, const Edge*>::const_reverse_iterator ei = edgeScores.rbegin();
+ size_t edgeCount = 1;
+ while(edgeCount < minEdgeCount && ei != edgeScores.rend()) {
+ ++ei;
+ ++edgeCount;
+ }
+ multimap<FeatureStatsType, const Edge*>::const_iterator lowest = edgeScores.begin();
+ if (ei != edgeScores.rend()) lowest = edgeScores.lower_bound(ei->first);
+
+ //cerr << "Retained edges" << endl;
+ set<size_t> retainedVertices;
+ set<const Edge*> retainedEdges;
+ for (; lowest != edgeScores.end(); ++lowest) {
+ //cerr << lowest->first << " " << edgeIds[lowest->second] << endl;
+ retainedEdges.insert(lowest->second);
+ retainedVertices.insert(edgeHeads[lowest->second]);
+ for (size_t i = 0; i < lowest->second->Children().size(); ++i) {
+ retainedVertices.insert(lowest->second->Children()[i]);
+ }
+ }
+ newGraph.SetCounts(retainedVertices.size(), retainedEdges.size());
+
+ //cerr << "Retained vertices" << endl;
+ map<size_t,size_t> oldIdToNew;
+ size_t vi = 0;
+ for (set<size_t>::const_iterator i = retainedVertices.begin(); i != retainedVertices.end(); ++i, ++vi) {
+ //cerr << *i << " New: " << vi << endl;
+ oldIdToNew[*i] = vi;
+ Vertex* vertex = newGraph.NewVertex();
+ vertex->SetSourceCovered(vertices_[*i].SourceCovered());
+ }
+
+ for (set<const Edge*>::const_iterator i = retainedEdges.begin(); i != retainedEdges.end(); ++i) {
+ Edge* newEdge = newGraph.NewEdge();
+ const Edge* oldEdge = *i;
+ for (size_t j = 0; j < oldEdge->Words().size(); ++j) {
+ newEdge->AddWord(oldEdge->Words()[j]);
+ }
+ for (size_t j = 0; j < oldEdge->Children().size(); ++j) {
+ newEdge->AddChild(oldIdToNew[oldEdge->Children()[j]]);
+ }
+ newEdge->SetFeatures(oldEdge->Features());
+ Vertex& newHead = newGraph.vertices_[oldIdToNew[edgeHeads[oldEdge]]];
+ newHead.AddEdge(newEdge);
+ }
+
+ /*
+ cerr << "New graph" << endl;
+ for (size_t vi = 0; vi < newGraph.VertexSize(); ++vi) {
+ cerr << "Vertex " << vi << endl;
+ const vector<const Edge*> incoming = newGraph.GetVertex(vi).GetIncoming();
+ for (size_t ei = 0; ei < incoming.size(); ++ei) {
+ size_t childId = 0;
+ for (size_t wi = 0; wi < incoming[ei]->Words().size(); ++wi) {
+ const Vocab::Entry* word = incoming[ei]->Words()[wi];
+ if (word) {
+ cerr << word->first << " ";
+ } else {
+ cerr << "[" << incoming[ei]->Children()[childId++] << "] ";
+ }
+ }
+ cerr << " Score: " << incoming[ei]->GetScore(weights) << endl;
+ }
+ cerr << endl;
+ }
+ */
+
+
+}
+
+/**
+ * Read from "Kenneth's hypergraph" aka cdec target_graph format (with comments)
+**/
+void ReadGraph(util::FilePiece &from, Graph &graph) {
+
+ //First line should contain field names
+ StringPiece line = from.ReadLine();
+ UTIL_THROW_IF(line.compare("# target ||| features ||| source-covered") != 0, HypergraphException, "Incorrect format spec on first line: '" << line << "'");
+ line = NextLine(from);
+
+ //Then expect numbers of vertices
+ util::TokenIter<util::SingleCharacter, false> i(line, util::SingleCharacter(' '));
+ unsigned long int vertices = boost::lexical_cast<unsigned long int>(*i);
+ ++i;
+ unsigned long int edges = boost::lexical_cast<unsigned long int>(*i);
+ graph.SetCounts(vertices, edges);
+ //cerr << "vertices: " << vertices << "; edges: " << edges << endl;
+ for (size_t i = 0; i < vertices; ++i) {
+ line = NextLine(from);
+ unsigned long int edge_count = boost::lexical_cast<unsigned long int>(line);
+ Vertex* vertex = graph.NewVertex();
+ for (unsigned long int e = 0; e < edge_count; ++e) {
+ pair<Edge*,size_t> edge = ReadEdge(from, graph);
+ vertex->AddEdge(edge.first);
+ //Note: the file format attaches this to the edge, but it's really a property
+ //of the vertex.
+ if (!e) {vertex->SetSourceCovered(edge.second);}
+ }
+ }
+}
+
+};
diff --git a/mert/Hypergraph.h b/mert/Hypergraph.h
new file mode 100644
index 000000000..b6ee6c3f8
--- /dev/null
+++ b/mert/Hypergraph.h
@@ -0,0 +1,251 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef MERT_HYPERGRAPH_H
+#define MERT_HYPERGRAPH_H
+
+#include <string>
+
+#include <boost/noncopyable.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/functional/hash/hash.hpp>
+#include <boost/unordered_map.hpp>
+
+
+#include "util/exception.hh"
+#include "util/file_piece.hh"
+#include "util/murmur_hash.hh"
+#include "util/pool.hh"
+#include "util/string_piece.hh"
+
+#include "FeatureStats.h"
+
+namespace MosesTuning {
+
+typedef unsigned int WordIndex;
+const WordIndex kMaxWordIndex = UINT_MAX;
+const FeatureStatsType kMinScore = -std::numeric_limits<FeatureStatsType>::max();
+
+template <class T> class FixedAllocator : boost::noncopyable {
+ public:
+ FixedAllocator() : current_(NULL), end_(NULL) {}
+
+ void Init(std::size_t count) {
+ assert(!current_);
+ array_.reset(new T[count]);
+ current_ = array_.get();
+ end_ = current_ + count;
+ }
+
+ T &operator[](std::size_t idx) {
+ return array_.get()[idx];
+ }
+ const T &operator[](std::size_t idx) const {
+ return array_.get()[idx];
+ }
+
+ T *New() {
+ T *ret = current_++;
+ UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
+ return ret;
+ }
+
+ std::size_t Capacity() const {
+ return end_ - array_.get();
+ }
+
+ std::size_t Size() const {
+ return current_ - array_.get();
+ }
+
+ private:
+ boost::scoped_array<T> array_;
+ T *current_, *end_;
+};
+
+
+class Vocab {
+ public:
+ Vocab();
+
+ typedef std::pair<const char *const, WordIndex> Entry;
+
+ const Entry &FindOrAdd(const StringPiece &str);
+
+ const Entry& Bos() const {return bos_;}
+
+ const Entry& Eos() const {return eos_;}
+
+ private:
+ util::Pool piece_backing_;
+
+ struct Hash : public std::unary_function<const char *, std::size_t> {
+ std::size_t operator()(StringPiece str) const {
+ return util::MurmurHashNative(str.data(), str.size());
+ }
+ };
+
+ struct Equals : public std::binary_function<const char *, const char *, bool> {
+ bool operator()(StringPiece first, StringPiece second) const {
+ return first == second;
+ }
+ };
+
+ typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
+ Map map_;
+ Entry eos_;
+ Entry bos_;
+
+};
+
+typedef std::vector<const Vocab::Entry*> WordVec;
+
+class Vertex;
+
+//Use shared pointer to save copying when we prune
+typedef boost::shared_ptr<SparseVector> FeaturePtr;
+
+/**
+ * An edge has 1 head vertex, 0..n child (tail) vertices, a list of words and a feature vector.
+**/
+class Edge {
+ public:
+ Edge() {features_.reset(new SparseVector());}
+
+ void AddWord(const Vocab::Entry *word) {
+ words_.push_back(word);
+ }
+
+ void AddChild(size_t child) {
+ children_.push_back(child);
+ }
+
+ void AddFeature(const StringPiece& name, FeatureStatsType value) {
+ //TODO StringPiece interface
+ features_->set(name.as_string(),value);
+ }
+
+
+ const WordVec &Words() const {
+ return words_;
+ }
+
+ const FeaturePtr& Features() const {
+ return features_;
+ }
+
+ void SetFeatures(const FeaturePtr& features) {
+ features_ = features;
+ }
+
+ const std::vector<size_t>& Children() const {
+ return children_;
+ }
+
+ FeatureStatsType GetScore(const SparseVector& weights) const {
+ return inner_product(*(features_.get()), weights);
+ }
+
+ private:
+ // NULL for non-terminals.
+ std::vector<const Vocab::Entry*> words_;
+ std::vector<size_t> children_;
+ boost::shared_ptr<SparseVector> features_;
+};
+
+/*
+ * A vertex has 0..n incoming edges
+ **/
+class Vertex {
+ public:
+ Vertex() : sourceCovered_(0) {}
+
+ void AddEdge(const Edge* edge) {incoming_.push_back(edge);}
+
+ void SetSourceCovered(size_t sourceCovered) {sourceCovered_ = sourceCovered;}
+
+ const std::vector<const Edge*>& GetIncoming() const {return incoming_;}
+
+ size_t SourceCovered() const {return sourceCovered_;}
+
+ private:
+ std::vector<const Edge*> incoming_;
+ size_t sourceCovered_;
+};
+
+
+class Graph : boost::noncopyable {
+ public:
+ Graph(Vocab& vocab) : vocab_(vocab) {}
+
+ void SetCounts(std::size_t vertices, std::size_t edges) {
+ vertices_.Init(vertices);
+ edges_.Init(edges);
+ }
+
+ Vocab &MutableVocab() { return vocab_; }
+
+ Edge *NewEdge() {
+ return edges_.New();
+ }
+
+ Vertex *NewVertex() {
+ return vertices_.New();
+ }
+
+ const Vertex &GetVertex(std::size_t index) const {
+ return vertices_[index];
+ }
+
+ Edge &GetEdge(std::size_t index) {
+ return edges_[index];
+ }
+
+ /* Created a pruned copy of this graph with minEdgeCount edges. Uses
+ the scores in the max-product semiring to rank edges, as suggested by
+ Colin Cherry */
+ void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
+
+ std::size_t VertexSize() const { return vertices_.Size(); }
+ std::size_t EdgeSize() const { return edges_.Size(); }
+
+ bool IsBoundary(const Vocab::Entry* word) const {
+ return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
+ }
+
+ private:
+ FixedAllocator<Edge> edges_;
+ FixedAllocator<Vertex> vertices_;
+ Vocab& vocab_;
+};
+
+class HypergraphException : public util::Exception {
+ public:
+ HypergraphException() {}
+ ~HypergraphException() throw() {}
+};
+
+
+void ReadGraph(util::FilePiece &from, Graph &graph);
+
+
+};
+
+#endif
diff --git a/mert/HypergraphTest.cpp b/mert/HypergraphTest.cpp
new file mode 100644
index 000000000..345a445f0
--- /dev/null
+++ b/mert/HypergraphTest.cpp
@@ -0,0 +1,151 @@
+#include <iostream>
+
+#define BOOST_TEST_MODULE MertForestRescore
+#include <boost/test/unit_test.hpp>
+
+#include "Hypergraph.h"
+
+using namespace std;
+using namespace MosesTuning;
+
+BOOST_AUTO_TEST_CASE(prune)
+{
+ Vocab vocab;
+ WordVec words;
+ string wordStrings[] =
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+ for (size_t i = 0; i < 13; ++i) {
+ words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+ }
+
+ const string f1 = "foo";
+ const string f2 = "bar";
+ Graph graph(vocab);
+ graph.SetCounts(5,8);
+
+ Edge* e0 = graph.NewEdge();
+ e0->AddWord(words[0]);
+
+ Vertex* v0 = graph.NewVertex();
+ v0->AddEdge(e0);
+
+ Edge* e1 = graph.NewEdge();
+ e1->AddWord(NULL);
+ e1->AddChild(0);
+ e1->AddWord(words[2]);
+ e1->AddWord(words[3]);
+ e1->AddFeature(f1,1);
+ e1->AddFeature(f2,1);
+ Edge* e5 = graph.NewEdge();
+ e5->AddWord(NULL);
+ e5->AddChild(0);
+ e5->AddWord(words[9]);
+ e5->AddWord(words[10]);
+ e5->AddFeature(f1,2);
+ e5->AddFeature(f2,-2);
+
+ Vertex* v1 = graph.NewVertex();
+ v1->AddEdge(e1);
+ v1->AddEdge(e5);
+ v1->SetSourceCovered(1);
+
+ Edge* e2 = graph.NewEdge();
+ e2->AddWord(NULL);
+ e2->AddChild(1);
+ e2->AddWord(words[4]);
+ e2->AddWord(words[5]);
+ e2->AddFeature(f2,3);
+
+ Vertex* v2 = graph.NewVertex();
+ v2->AddEdge(e2);
+ v2->SetSourceCovered(3);
+
+ Edge* e3 = graph.NewEdge();
+ e3->AddWord(NULL);
+ e3->AddChild(2);
+ e3->AddWord(words[6]);
+ e3->AddWord(words[7]);
+ e3->AddWord(words[8]);
+ e3->AddFeature(f1,1);
+ Edge* e6 = graph.NewEdge();
+ e6->AddWord(NULL);
+ e6->AddChild(2);
+ e6->AddWord(words[9]);
+ e6->AddWord(words[12]);
+ e6->AddFeature(f2,1);
+ Edge* e7 = graph.NewEdge();
+ e7->AddWord(NULL);
+ e7->AddChild(1);
+ e7->AddWord(words[11]);
+ e7->AddWord(words[12]);
+ e7->AddFeature(f1,2);
+ e7->AddFeature(f2,3);
+
+ Vertex* v3 = graph.NewVertex();
+ v3->AddEdge(e3);
+ v3->AddEdge(e6);
+ v3->AddEdge(e7);
+ v3->SetSourceCovered(5);
+
+ Edge* e4 = graph.NewEdge();
+ e4->AddWord(NULL);
+ e4->AddChild(3);
+ e4->AddWord(words[1]);
+
+ Vertex* v4 = graph.NewVertex();
+ v4->AddEdge(e4);
+ v4->SetSourceCovered(6);
+
+ SparseVector weights;
+ weights.set(f1,2);
+ weights.set(f2,1);
+
+ Graph pruned(vocab);
+ graph.Prune(&pruned, weights, 5);
+
+ BOOST_CHECK_EQUAL(5, pruned.EdgeSize());
+ BOOST_CHECK_EQUAL(4, pruned.VertexSize());
+
+ //edges retained should be best path (<s> ab jk </s>) and hi
+ BOOST_CHECK_EQUAL(1, pruned.GetVertex(0).GetIncoming().size());
+ BOOST_CHECK_EQUAL(2, pruned.GetVertex(1).GetIncoming().size());
+ BOOST_CHECK_EQUAL(1, pruned.GetVertex(2).GetIncoming().size());
+ BOOST_CHECK_EQUAL(1, pruned.GetVertex(3).GetIncoming().size());
+
+ const Edge* edge;
+
+ edge = pruned.GetVertex(0).GetIncoming()[0];
+ BOOST_CHECK_EQUAL(1, edge->Words().size());
+ BOOST_CHECK_EQUAL(words[0], edge->Words()[0]);
+
+ edge = pruned.GetVertex(1).GetIncoming()[0];
+ BOOST_CHECK_EQUAL(3, edge->Words().size());
+ BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+ BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first);
+
+ edge = pruned.GetVertex(1).GetIncoming()[1];
+ BOOST_CHECK_EQUAL(3, edge->Words().size());
+ BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+ BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[10]->first, edge->Words()[2]->first);
+
+ edge = pruned.GetVertex(2).GetIncoming()[0];
+ BOOST_CHECK_EQUAL(3, edge->Words().size());
+ BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+ BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[12]->first, edge->Words()[2]->first);
+
+ edge = pruned.GetVertex(3).GetIncoming()[0];
+ BOOST_CHECK_EQUAL(2, edge->Words().size());
+ BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+ BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first);
+
+
+
+
+
+// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
+
+
+}
diff --git a/mert/Jamfile b/mert/Jamfile
index 34c640b06..d848c258f 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -15,6 +15,9 @@ FeatureStats.cpp
FeatureArray.cpp
FeatureData.cpp
FeatureDataIterator.cpp
+ForestRescore.cpp
+HopeFearDecoder.cpp
+Hypergraph.cpp
MiraFeatureVector.cpp
MiraWeightVector.cpp
HypPackEnumerator.cpp
@@ -62,13 +65,15 @@ exe sentence-bleu : sentence-bleu.cpp mert_lib ;
exe pro : pro.cpp mert_lib ..//boost_program_options ;
-exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ;
+exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ..//boost_filesystem ;
alias programs : mert extractor evaluator pro kbmira sentence-bleu ;
unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test data_test : DataTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test forest_rescore_test : ForestRescoreTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test hypergraph_test : HypergraphTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test ngram_test : NgramTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test optimizer_factory_test : OptimizerFactoryTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test point_test : PointTest.cpp mert_lib ..//boost_unit_test_framework ;
diff --git a/mert/MiraFeatureVector.cpp b/mert/MiraFeatureVector.cpp
index dea9b9b83..347ad488e 100644
--- a/mert/MiraFeatureVector.cpp
+++ b/mert/MiraFeatureVector.cpp
@@ -9,18 +9,17 @@ namespace MosesTuning
{
-MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
- : m_dense(vec.dense)
-{
- vector<size_t> sparseFeats = vec.sparse.feats();
+void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit) {
+ vector<size_t> sparseFeats = sparse.feats();
bool bFirst = true;
size_t lastFeat = 0;
m_sparseFeats.reserve(sparseFeats.size());
m_sparseVals.reserve(sparseFeats.size());
for(size_t i=0; i<sparseFeats.size(); i++) {
+ if (sparseFeats[i] < ignoreLimit) continue;
size_t feat = m_dense.size() + sparseFeats[i];
m_sparseFeats.push_back(feat);
- m_sparseVals.push_back(vec.sparse.get(sparseFeats[i]));
+ m_sparseVals.push_back(sparse.get(sparseFeats[i]));
// Check ordered property
if(bFirst) {
@@ -35,6 +34,21 @@ MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
}
}
+MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
+ : m_dense(vec.dense)
+{
+ InitSparse(vec.sparse);
+}
+
+MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense) {
+ m_dense.resize(num_dense);
+ //Assume that features with id [0,num_dense) are the dense features
+ for (size_t id = 0; id < num_dense; ++id) {
+ m_dense[id] = sparse.get(id);
+ }
+ InitSparse(sparse,num_dense);
+}
+
MiraFeatureVector::MiraFeatureVector(const MiraFeatureVector& other)
: m_dense(other.m_dense),
m_sparseFeats(other.m_sparseFeats),
@@ -148,6 +162,22 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
return MiraFeatureVector(dense,sparseFeats,sparseVals);
}
+bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b) {
+ ValType eps = 1e-8;
+ //dense features
+ if (a.m_dense.size() != b.m_dense.size()) return false;
+ for (size_t i = 0; i < a.m_dense.size(); ++i) {
+ if (fabs(a.m_dense[i]-b.m_dense[i]) < eps) return false;
+ }
+ if (a.m_sparseFeats.size() != b.m_sparseFeats.size()) return false;
+ for (size_t i = 0; i < a.m_sparseFeats.size(); ++i) {
+ if (a.m_sparseFeats[i] != b.m_sparseFeats[i]) return false;
+ if (fabs(a.m_sparseVals[i] != b.m_sparseVals[i])) return false;
+ }
+ return true;
+
+}
+
ostream& operator<<(ostream& o, const MiraFeatureVector& e)
{
for(size_t i=0; i<e.size(); i++) {
diff --git a/mert/MiraFeatureVector.h b/mert/MiraFeatureVector.h
index cb2b1c87d..48aa496b5 100644
--- a/mert/MiraFeatureVector.h
+++ b/mert/MiraFeatureVector.h
@@ -26,7 +26,10 @@ typedef FeatureStatsType ValType;
class MiraFeatureVector
{
public:
+ MiraFeatureVector() {}
MiraFeatureVector(const FeatureDataItem& vec);
+ //Assumes that features in sparse with id < num_dense are dense features
+ MiraFeatureVector(const SparseVector& sparse, size_t num_dense);
MiraFeatureVector(const MiraFeatureVector& other);
MiraFeatureVector(const std::vector<ValType>& dense,
const std::vector<std::size_t>& sparseFeats,
@@ -42,7 +45,12 @@ public:
friend std::ostream& operator<<(std::ostream& o, const MiraFeatureVector& e);
+ friend bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b);
+
private:
+ //Ignore any sparse features with id < ignoreLimit
+ void InitSparse(const SparseVector& sparse, size_t ignoreLimit = 0);
+
std::vector<ValType> m_dense;
std::vector<std::size_t> m_sparseFeats;
std::vector<ValType> m_sparseVals;
diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp
index e23804cbf..3b7b1780c 100644
--- a/mert/MiraWeightVector.cpp
+++ b/mert/MiraWeightVector.cpp
@@ -93,6 +93,14 @@ void MiraWeightVector::update(size_t index, ValType delta)
m_lastUpdated[index] = m_numUpdates;
}
+void MiraWeightVector::ToSparse(SparseVector* sparse) const {
+ for (size_t i = 0; i < m_weights.size(); ++i) {
+ if(abs(m_weights[i])>1e-8) {
+ sparse->set(i,m_weights[i]);
+ }
+ }
+}
+
/**
* Make sure everyone's total is up-to-date
*/
@@ -163,6 +171,15 @@ size_t AvgWeightVector::size() const
return m_wv.m_weights.size();
}
+void AvgWeightVector::ToSparse(SparseVector* sparse) const {
+ for (size_t i = 0; i < size(); ++i) {
+ ValType w = weight(i);
+ if(abs(w)>1e-8) {
+ sparse->set(i,w);
+ }
+ }
+}
+
// --Emacs trickery--
// Local Variables:
// mode:c++
diff --git a/mert/MiraWeightVector.h b/mert/MiraWeightVector.h
index eb27e8a6d..bbc28704b 100644
--- a/mert/MiraWeightVector.h
+++ b/mert/MiraWeightVector.h
@@ -63,6 +63,11 @@ public:
*/
AvgWeightVector avg();
+ /**
+ * Convert to sparse vector, interpreting all features as sparse.
+ **/
+ void ToSparse(SparseVector* sparse) const;
+
friend class AvgWeightVector;
friend std::ostream& operator<<(std::ostream& o, const MiraWeightVector& e);
@@ -99,12 +104,12 @@ public:
ValType score(const MiraFeatureVector& fv) const;
ValType weight(std::size_t index) const;
std::size_t size() const;
+ void ToSparse(SparseVector* sparse) const;
private:
const MiraWeightVector& m_wv;
};
-#endif // MERT_WEIGHT_VECTOR_H
// --Emacs trickery--
// Local Variables:
@@ -113,3 +118,4 @@ private:
// End:
}
+#endif // MERT_WEIGHT_VECTOR_H
diff --git a/mert/TER/alignmentStruct.cpp b/mert/TER/alignmentStruct.cpp
index 15b4a8032..544ee61ac 100644
--- a/mert/TER/alignmentStruct.cpp
+++ b/mert/TER/alignmentStruct.cpp
@@ -1,17 +1,37 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "alignmentStruct.h"
using namespace std;
namespace TERCpp
{
-string alignmentStruct::toString()
-{
- stringstream s;
+ string alignmentStruct::toString()
+ {
+ stringstream s;
// s << "nword : " << vectorToString(nwords)<<endl;
// s << "alignment" << vectorToString(alignment)<<endl;
// s << "afterShift" << vectorToString(alignment)<<endl;
- s << "Nothing to be printed" <<endl;
- return s.str();
-}
+ s << "Nothing to be printed" <<endl;
+ return s.str();
+ }
// alignmentStruct::alignmentStruct()
// {
@@ -79,7 +99,7 @@ string alignmentStruct::toString()
// return s.str();
// }
-/* The distance of the shift. */
+ /* The distance of the shift. */
// int alignmentStruct::distance()
// {
// if (moveto < start)
diff --git a/mert/TER/alignmentStruct.h b/mert/TER/alignmentStruct.h
index 9e9a75468..adda2c345 100644
--- a/mert/TER/alignmentStruct.h
+++ b/mert/TER/alignmentStruct.h
@@ -1,5 +1,26 @@
-#ifndef MERT_TER_ALIGNMENT_STRUCT_H_
-#define MERT_TER_ALIGNMENT_STRUCT_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef MERT_TER_ALIGNMENTSTRUCT_H_
+#define MERT_TER_ALIGNMENTSTRUCT_H_
+
#include <vector>
#include <stdio.h>
@@ -7,15 +28,16 @@
#include <sstream>
#include "tools.h"
+
using namespace std;
using namespace Tools;
namespace TERCpp
{
-class alignmentStruct
-{
-private:
-public:
+ class alignmentStruct
+ {
+ private:
+ public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -31,15 +53,14 @@ public:
// int end;
// int moveto;
// int newloc;
- vector<string> nwords; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
- string toString();
-};
+ vector<string> nwords; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<vecInt> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+ string toString();
+ };
}
-
-#endif // MERT_TER_ALIGNMENT_STRUCT_H_
+#endif \ No newline at end of file
diff --git a/mert/TER/bestShiftStruct.h b/mert/TER/bestShiftStruct.h
index bfebe3b1e..9457fd1d8 100644
--- a/mert/TER/bestShiftStruct.h
+++ b/mert/TER/bestShiftStruct.h
@@ -1,5 +1,26 @@
-#ifndef MERT_TER_BEST_SHIFT_STRUCT_H_
-#define MERT_TER_BEST_SHIFT_STRUCT_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef __BESTSHIFTSTRUCT_H_
+#define __BESTSHIFTSTRUCT_H_
+
#include <vector>
#include <stdio.h>
@@ -15,10 +36,10 @@ using namespace Tools;
namespace TERCpp
{
-class bestShiftStruct
-{
-private:
-public:
+ class bestShiftStruct
+ {
+ private:
+ public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -34,17 +55,16 @@ public:
// int end;
// int moveto;
// int newloc;
- terShift m_best_shift;
- terAlignment m_best_align;
- bool m_empty;
+ terShift m_best_shift;
+ terAlignment m_best_align;
+ bool m_empty;
// vector<string> nwords; // The words we shifted
// char* alignment ; // for pra_more output
// vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
// double cost;
-};
+ };
}
-
-#endif // MERT_TER_BEST_SHIFT_STRUCT_H_
+#endif \ No newline at end of file
diff --git a/mert/TER/hashMap.cpp b/mert/TER/hashMap.cpp
index 469167aaa..de84ff796 100644
--- a/mert/TER/hashMap.cpp
+++ b/mert/TER/hashMap.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "hashMap.h"
// The following class defines a hash function for strings
@@ -8,142 +28,156 @@ using namespace std;
namespace HashMapSpace
{
// hashMap::hashMap();
-/* hashMap::~hashMap()
+ /* hashMap::~hashMap()
+ {
+ // vector<stringHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+ /**
+ * int hashMap::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+ int hashMap::trouve ( long searchKey )
{
-// vector<stringHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
-/**
- * int hashMap::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
-int hashMap::trouve ( long searchKey )
-{
- long foundKey;
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-int hashMap::trouve ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;;
+ int hashMap::trouve ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-/**
- * long hashMap::hashValue ( string key )
- * @param key
- * @return
- */
-long hashMap::hashValue ( string key )
-{
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+ /**
+ * long hashMap::hashValue ( string key )
+ * @param key
+ * @return
+ */
+ long hashMap::hashValue ( string key )
+ {
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
-}
-/**
- * void hashMap::addHasher ( string key, string value )
- * @param key
- * @param value
- */
-void hashMap::addHasher ( string key, string value )
-{
- if ( trouve ( hashValue ( key ) ) ==0 ) {
+ }
+ /**
+ * void hashMap::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+ void hashMap::addHasher ( string key, string value )
+ {
+ if ( trouve ( hashValue ( key ) ) ==0 )
+ {
// cerr << "ICI1" <<endl;
- stringHasher H ( hashValue ( key ),key,value );
+ stringHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
-}
-stringHasher hashMap::getHasher ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
- stringHasher defaut(0,"","");
+ m_hasher.push_back ( H );
+ }
+ }
+ stringHasher hashMap::getHasher ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
+ stringHasher defaut(0,"","");
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return ( *l_hasher );
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return ( *l_hasher );
+ }
+ }
+ return defaut;
}
- }
- return defaut;
-}
-string hashMap::getValue ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ string hashMap::getValue ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return "";
}
- }
- return "";
-}
-string hashMap::searchValue ( string value )
-{
+ string hashMap::searchValue ( string value )
+ {
// long searchKey=hashValue ( key );
// long foundKey;
- string foundValue;
+ string foundValue;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundValue= ( *l_hasher ).getValue();
- if ( foundValue.compare ( value ) == 0 ) {
- return ( *l_hasher ).getKey();
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundValue= ( *l_hasher ).getValue();
+ if ( foundValue.compare ( value ) == 0 )
+ {
+ return ( *l_hasher ).getKey();
+ }
+ }
+ return "";
}
- }
- return "";
-}
-void hashMap::setValue ( string key , string value )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ void hashMap::setValue ( string key , string value )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- ( *l_hasher ).setValue ( value );
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
+ }
+ }
}
- }
-}
-/**
- *
- */
-void hashMap::printHash()
-{
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
-}
+ /**
+ *
+ */
+ void hashMap::printHash()
+ {
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ }
diff --git a/mert/TER/hashMap.h b/mert/TER/hashMap.h
index 85020d041..6cb721573 100644
--- a/mert/TER/hashMap.h
+++ b/mert/TER/hashMap.h
@@ -1,10 +1,29 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
/*
* Generic hashmap manipulation functions
*/
-
-#ifndef MERT_TER_HASHMAP_H_
-#define MERT_TER_HASHMAP_H_
-
+#ifndef __HASHMAP_H_
+#define __HASHMAP_H_
+#include <boost/functional/hash.hpp>
#include "stringHasher.h"
#include <vector>
#include <string>
@@ -16,28 +35,30 @@ using namespace std;
namespace HashMapSpace
{
-class hashMap
-{
-private:
- vector<stringHasher> m_hasher;
+ class hashMap
+ {
+ private:
+ vector<stringHasher> m_hasher;
-public:
+ public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, string value );
- stringHasher getHasher ( string key );
- string getValue ( string key );
- string searchValue ( string key );
- void setValue ( string key , string value );
- void printHash();
- vector<stringHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
-};
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, string value );
+ stringHasher getHasher ( string key );
+ string getValue ( string key );
+ string searchValue ( string key );
+ void setValue ( string key , string value );
+ void printHash();
+ vector<stringHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+ };
+
}
-#endif // MERT_TER_HASHMAP_H_
+
+#endif
diff --git a/mert/TER/hashMapInfos.cpp b/mert/TER/hashMapInfos.cpp
index 9cd431196..23f57d808 100644
--- a/mert/TER/hashMapInfos.cpp
+++ b/mert/TER/hashMapInfos.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "hashMapInfos.h"
// The following class defines a hash function for strings
@@ -8,108 +28,117 @@ using namespace std;
namespace HashMapSpace
{
// hashMapInfos::hashMap();
-/* hashMapInfos::~hashMap()
+ /* hashMapInfos::~hashMap()
+ {
+ // vector<infosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+ /**
+ * int hashMapInfos::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+ int hashMapInfos::trouve ( long searchKey )
{
-// vector<infosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
-/**
- * int hashMapInfos::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
-int hashMapInfos::trouve ( long searchKey )
-{
- long foundKey;
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-int hashMapInfos::trouve ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;;
+ int hashMapInfos::trouve ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-/**
- * long hashMapInfos::hashValue ( string key )
- * @param key
- * @return
- */
-long hashMapInfos::hashValue ( string key )
-{
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+ /**
+ * long hashMapInfos::hashValue ( string key )
+ * @param key
+ * @return
+ */
+ long hashMapInfos::hashValue ( string key )
+ {
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
-}
-/**
- * void hashMapInfos::addHasher ( string key, string value )
- * @param key
- * @param value
- */
-void hashMapInfos::addHasher ( string key, vector<int> value )
-{
- if ( trouve ( hashValue ( key ) ) ==0 ) {
+ }
+ /**
+ * void hashMapInfos::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+ void hashMapInfos::addHasher ( string key, vector<int> value )
+ {
+ if ( trouve ( hashValue ( key ) ) ==0 )
+ {
// cerr << "ICI1" <<endl;
- infosHasher H ( hashValue ( key ),key,value );
+ infosHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
-}
-void hashMapInfos::addValue ( string key, vector<int> value )
-{
- addHasher ( key, value );
-}
-infosHasher hashMapInfos::getHasher ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ m_hasher.push_back ( H );
+ }
+ }
+ void hashMapInfos::addValue ( string key, vector<int> value )
+ {
+ addHasher ( key, value );
+ }
+ infosHasher hashMapInfos::getHasher ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return ( *l_hasher );
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return ( *l_hasher );
+ }
+ }
+ vector<int> temp;
+ infosHasher defaut(0,"",temp);
+ return defaut;
}
- }
- vector<int> temp;
- infosHasher defaut(0,"",temp);
- return defaut;
-}
-vector<int> hashMapInfos::getValue ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
- vector<int> retour;
+ vector<int> hashMapInfos::getValue ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
+ vector<int> retour;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return retour;
}
- }
- return retour;
-}
// string hashMapInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
@@ -129,30 +158,42 @@ vector<int> hashMapInfos::getValue ( string key )
// }
//
-void hashMapInfos::setValue ( string key , vector<int> value )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ void hashMapInfos::setValue ( string key , vector<int> value )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- ( *l_hasher ).setValue ( value );
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
+ }
+ }
+ }
+ string hashMapInfos::toString ()
+ {
+ stringstream to_return;
+ for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
}
- }
-}
-
-/**
- *
- */
-void hashMapInfos::printHash()
-{
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ /**
+ *
+ */
+ void hashMapInfos::printHash()
+ {
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
-}
+ }
+ }
diff --git a/mert/TER/hashMapInfos.h b/mert/TER/hashMapInfos.h
index 8b56e9d02..5e7dbb6e7 100644
--- a/mert/TER/hashMapInfos.h
+++ b/mert/TER/hashMapInfos.h
@@ -1,9 +1,29 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
/*
* Generic hashmap manipulation functions
*/
-#ifndef MERT_TER_HASHMAP_INFOS_H_
-#define MERT_TER_HASHMAP_INFOS_H_
-
+#ifndef __HASHMAPINFOS_H_
+#define __HASHMAPINFOS_H_
+#include <boost/functional/hash.hpp>
#include "infosHasher.h"
#include <vector>
#include <string>
@@ -14,29 +34,32 @@ using namespace std;
namespace HashMapSpace
{
-class hashMapInfos
-{
-private:
- vector<infosHasher> m_hasher;
+ class hashMapInfos
+ {
+ private:
+ vector<infosHasher> m_hasher;
-public:
+ public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<int> value );
- void addValue ( string key, vector<int> value );
- infosHasher getHasher ( string key );
- vector<int> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<int> value );
+ void addValue ( string key, vector<int> value );
+ infosHasher getHasher ( string key );
+ vector<int> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<int> value );
- void printHash();
- vector<infosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
-};
+ void setValue ( string key , vector<int> value );
+ void printHash();
+ string toString();
+ vector<infosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+ };
+
}
-#endif // MERT_TER_HASHMAP_INFOS_H_
+
+#endif
diff --git a/mert/TER/hashMapStringInfos.cpp b/mert/TER/hashMapStringInfos.cpp
index 0fbb0a98a..773c148d4 100644
--- a/mert/TER/hashMapStringInfos.cpp
+++ b/mert/TER/hashMapStringInfos.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "hashMapStringInfos.h"
// The following class defines a hash function for strings
@@ -7,157 +27,179 @@ using namespace std;
namespace HashMapSpace
{
-// hashMapStringInfos::hashMap();
-/* hashMapStringInfos::~hashMap()
-{
-// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
-}*/
-/**
-* int hashMapStringInfos::trouve ( long searchKey )
-* @param searchKey
-* @return
-*/
-int hashMapStringInfos::trouve ( long searchKey )
-{
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ // hashMapStringInfos::hashMap();
+ /* hashMapStringInfos::~hashMap()
+ {
+ // vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+ /**
+ * int hashMapStringInfos::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+ int hashMapStringInfos::trouve ( long searchKey )
+ {
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-int hashMapStringInfos::trouve ( string key )
-{
- long searchKey = hashValue ( key );
- long foundKey;;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ int hashMapStringInfos::trouve ( string key )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-/**
-* long hashMapStringInfos::hashValue ( string key )
-* @param key
-* @return
-*/
-long hashMapStringInfos::hashValue ( string key )
-{
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> > ( loc );
- return coll.hash ( key.data(), key.data() + key.length() );
+ /**
+ * long hashMapStringInfos::hashValue ( string key )
+ * @param key
+ * @return
+ */
+ long hashMapStringInfos::hashValue ( string key )
+ {
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> > ( loc );
+ return coll.hash ( key.data(), key.data() + key.length() );
// boost::hash<string> hasher;
// return hasher ( key );
-}
-/**
-* void hashMapStringInfos::addHasher ( string key, string value )
-* @param key
-* @param value
-*/
-void hashMapStringInfos::addHasher ( string key, vector<string> value )
-{
- if ( trouve ( hashValue ( key ) ) == 0 ) {
- // cerr << "ICI1" <<endl;
- stringInfosHasher H ( hashValue ( key ), key, value );
- // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
- // cerr << "ICI2" <<endl;
-
- m_hasher.push_back ( H );
- }
-}
-void hashMapStringInfos::addValue ( string key, vector<string> value )
-{
- addHasher ( key, value );
-}
-stringInfosHasher hashMapStringInfos::getHasher ( string key )
-{
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return ( *l_hasher );
}
- }
- vector<string> tmp;
- stringInfosHasher defaut ( 0, "", tmp );
- return defaut;
-}
-vector<string> hashMapStringInfos::getValue ( string key )
-{
- long searchKey = hashValue ( key );
- long foundKey;
- vector<string> retour;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
+ /**
+ * void hashMapStringInfos::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+ void hashMapStringInfos::addHasher ( string key, vector<string> value )
+ {
+ if ( trouve ( hashValue ( key ) ) == 0 )
+ {
+ // cerr << "ICI1" <<endl;
+ stringInfosHasher H ( hashValue ( key ), key, value );
+ // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
+ // cerr << "ICI2" <<endl;
+
+ m_hasher.push_back ( H );
+ }
}
- }
- return retour;
-}
-// string hashMapStringInfos::searchValue ( string value )
-// {
-// // long searchKey=hashValue ( key );
-// // long foundKey;
-// vector<int> foundValue;
-//
-// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
-// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
-// {
-// foundValue= ( *l_hasher ).getValue();
-// /* if ( foundValue.compare ( value ) == 0 )
-// {
-// return ( *l_hasher ).getKey();
-// }*/
-// }
-// return "";
-// }
-//
-
-void hashMapStringInfos::setValue ( string key , vector<string> value )
-{
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- ( *l_hasher ).setValue ( value );
- // return ( *l_hasher ).getValue();
+ void hashMapStringInfos::addValue ( string key, vector<string> value )
+ {
+ addHasher ( key, value );
+ }
+ stringInfosHasher hashMapStringInfos::getHasher ( string key )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return ( *l_hasher );
+ }
+ }
+ vector<string> tmp;
+ stringInfosHasher defaut ( 0, "", tmp );
+ return defaut;
+ }
+ vector<string> hashMapStringInfos::getValue ( string key )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;
+ vector<string> retour;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return retour;
+ }
+ // string hashMapStringInfos::searchValue ( string value )
+ // {
+ // // long searchKey=hashValue ( key );
+ // // long foundKey;
+ // vector<int> foundValue;
+ //
+ // // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ // for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ // {
+ // foundValue= ( *l_hasher ).getValue();
+ // /* if ( foundValue.compare ( value ) == 0 )
+ // {
+ // return ( *l_hasher ).getKey();
+ // }*/
+ // }
+ // return "";
+ // }
+ //
+
+ void hashMapStringInfos::setValue ( string key , vector<string> value )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ ( *l_hasher ).setValue ( value );
+ // return ( *l_hasher ).getValue();
+ }
+ }
}
- }
-}
+ string hashMapStringInfos::toString ()
+ {
+ stringstream to_return;
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
+ }
-/**
-*
-*/
-void hashMapStringInfos::printHash()
-{
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
-}
-vector< stringInfosHasher > hashMapStringInfos::getHashMap()
-{
- return m_hasher;
-}
+ /**
+ *
+ */
+ void hashMapStringInfos::printHash()
+ {
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ }
+ vector< stringInfosHasher > hashMapStringInfos::getHashMap()
+ {
+ return m_hasher;
+ }
diff --git a/mert/TER/hashMapStringInfos.h b/mert/TER/hashMapStringInfos.h
index 870274f3d..5337d50f2 100644
--- a/mert/TER/hashMapStringInfos.h
+++ b/mert/TER/hashMapStringInfos.h
@@ -1,9 +1,29 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
/*
* Generic hashmap manipulation functions
*/
-#ifndef MERT_TER_HASHMAP_STRING_INFOS_H_
-#define MERT_TER_HASHMAP_STRING_INFOS_H_
-
+#ifndef __HASHMAPSTRINGINFOS_H_
+#define __HASHMAPSTRINGINFOS_H_
+#include <boost/functional/hash.hpp>
#include "stringInfosHasher.h"
#include <vector>
#include <string>
@@ -14,29 +34,32 @@ using namespace std;
namespace HashMapSpace
{
-class hashMapStringInfos
-{
-private:
- vector<stringInfosHasher> m_hasher;
+ class hashMapStringInfos
+ {
+ private:
+ vector<stringInfosHasher> m_hasher;
-public:
+ public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<string> value );
- void addValue ( string key, vector<string> value );
- stringInfosHasher getHasher ( string key );
- vector<string> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<string> value );
+ void addValue ( string key, vector<string> value );
+ stringInfosHasher getHasher ( string key );
+ vector<string> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<string> value );
- void printHash();
- vector<stringInfosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
-};
+ void setValue ( string key , vector<string> value );
+ void printHash();
+ string toString();
+ vector<stringInfosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+ };
+
}
-#endif // MERT_TER_HASHMAP_STRING_INFOS_H_
+
+#endif
diff --git a/mert/TER/infosHasher.cpp b/mert/TER/infosHasher.cpp
index 654b0b26f..8ce23ae44 100644
--- a/mert/TER/infosHasher.cpp
+++ b/mert/TER/infosHasher.cpp
@@ -1,34 +1,61 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "infosHasher.h"
// The following class defines a hash function for strings
using namespace std;
+using namespace Tools;
namespace HashMapSpace
{
-infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
-{
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
-}
+ infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
+ {
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+ }
// infosHasher::~infosHasher(){};*/
-long infosHasher::getHashKey()
-{
- return m_hashKey;
-}
-string infosHasher::getKey()
-{
- return m_key;
-}
-vector<int> infosHasher::getValue()
-{
- return m_value;
-}
-void infosHasher::setValue ( vector<int> value )
-{
- m_value=value;
-}
+ long infosHasher::getHashKey()
+ {
+ return m_hashKey;
+ }
+ string infosHasher::getKey()
+ {
+ return m_key;
+ }
+ vector<int> infosHasher::getValue()
+ {
+ return m_value;
+ }
+ void infosHasher::setValue ( vector<int> value )
+ {
+ m_value=value;
+ }
+ string infosHasher::toString()
+ {
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+ }
// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;
diff --git a/mert/TER/infosHasher.h b/mert/TER/infosHasher.h
index 02a32280b..d3d56317a 100644
--- a/mert/TER/infosHasher.h
+++ b/mert/TER/infosHasher.h
@@ -1,31 +1,54 @@
-#ifndef MERT_TER_INFO_SHASHER_H_
-#define MERT_TER_INFO_SHASHER_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef __INFOSHASHER_H_
+#define __INFOSHASHER_H_
#include <string>
+// #include <ext/hash_map>
#include <stdio.h>
#include <iostream>
+#include <sstream>
#include <vector>
+#include "tools.h"
using namespace std;
namespace HashMapSpace
{
-class infosHasher
-{
-private:
- long m_hashKey;
- string m_key;
- vector<int> m_value;
+ class infosHasher
+ {
+ private:
+ long m_hashKey;
+ string m_key;
+ vector<int> m_value;
-public:
- infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
- long getHashKey();
- string getKey();
- vector<int> getValue();
- void setValue ( vector<int> value );
+ public:
+ infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<int> getValue();
+ void setValue ( vector<int> value );
+ string toString();
-};
+ };
-}
-#endif // MERT_TER_INFO_SHASHER_H_
+}
+#endif \ No newline at end of file
diff --git a/mert/TER/stringHasher.cpp b/mert/TER/stringHasher.cpp
index 24fde0e32..f4d1526e8 100644
--- a/mert/TER/stringHasher.cpp
+++ b/mert/TER/stringHasher.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "stringHasher.h"
// The following class defines a hash function for strings
@@ -6,29 +26,29 @@ using namespace std;
namespace HashMapSpace
{
-stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
-{
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueTxt;
-}
+ stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
+ {
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueTxt;
+ }
// stringHasher::~stringHasher(){};*/
-long stringHasher::getHashKey()
-{
- return m_hashKey;
-}
-string stringHasher::getKey()
-{
- return m_key;
-}
-string stringHasher::getValue()
-{
- return m_value;
-}
-void stringHasher::setValue ( string value )
-{
- m_value=value;
-}
+ long stringHasher::getHashKey()
+ {
+ return m_hashKey;
+ }
+ string stringHasher::getKey()
+ {
+ return m_key;
+ }
+ string stringHasher::getValue()
+ {
+ return m_value;
+ }
+ void stringHasher::setValue ( string value )
+ {
+ m_value=value;
+ }
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringHasher.h b/mert/TER/stringHasher.h
index 897bd9ff5..d831f642c 100644
--- a/mert/TER/stringHasher.h
+++ b/mert/TER/stringHasher.h
@@ -1,28 +1,50 @@
-#ifndef MERT_TER_STRING_HASHER_H_
-#define MERT_TER_STRING_HASHER_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef __STRINGHASHER_H_
+#define __STRINGHASHER_H_
#include <string>
+//#include <ext/hash_map>
#include <iostream>
using namespace std;
namespace HashMapSpace
{
-class stringHasher
-{
-private:
- long m_hashKey;
- string m_key;
- string m_value;
-
-public:
- stringHasher ( long cle, string cleTxt, string valueTxt );
- long getHashKey();
- string getKey();
- string getValue();
- void setValue ( string value );
-};
+ class stringHasher
+ {
+ private:
+ long m_hashKey;
+ string m_key;
+ string m_value;
-}
+ public:
+ stringHasher ( long cle, string cleTxt, string valueTxt );
+ long getHashKey();
+ string getKey();
+ string getValue();
+ void setValue ( string value );
-#endif // MERT_TER_STRING_HASHER_H_
+
+ };
+
+
+}
+#endif
diff --git a/mert/TER/stringInfosHasher.cpp b/mert/TER/stringInfosHasher.cpp
index 3e02e7a20..007fd720f 100644
--- a/mert/TER/stringInfosHasher.cpp
+++ b/mert/TER/stringInfosHasher.cpp
@@ -1,34 +1,61 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "stringInfosHasher.h"
// The following class defines a hash function for strings
using namespace std;
+using namespace Tools;
namespace HashMapSpace
{
-stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
-{
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
-}
+ stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
+ {
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+ }
// stringInfosHasher::~stringInfosHasher(){};*/
-long stringInfosHasher::getHashKey()
-{
- return m_hashKey;
-}
-string stringInfosHasher::getKey()
-{
- return m_key;
-}
-vector<string> stringInfosHasher::getValue()
-{
- return m_value;
-}
-void stringInfosHasher::setValue ( vector<string> value )
-{
- m_value=value;
-}
+ long stringInfosHasher::getHashKey()
+ {
+ return m_hashKey;
+ }
+ string stringInfosHasher::getKey()
+ {
+ return m_key;
+ }
+ vector<string> stringInfosHasher::getValue()
+ {
+ return m_value;
+ }
+ void stringInfosHasher::setValue ( vector<string> value )
+ {
+ m_value=value;
+ }
+ string stringInfosHasher::toString()
+ {
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+ }
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringInfosHasher.h b/mert/TER/stringInfosHasher.h
index c1b891662..307b48da7 100644
--- a/mert/TER/stringInfosHasher.h
+++ b/mert/TER/stringInfosHasher.h
@@ -1,28 +1,52 @@
-#ifndef MERT_TER_STRING_INFOS_HASHER_H_
-#define MERT_TER_STRING_INFOS_HASHER_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef __STRINGINFOSHASHER_H_
+#define __STRINGINFOSHASHER_H_
#include <string>
+// #include <ext/hash_map>
#include <iostream>
#include <vector>
+#include "tools.h"
using namespace std;
namespace HashMapSpace
{
-class stringInfosHasher
-{
-private:
- long m_hashKey;
- string m_key;
- vector<string> m_value;
-
-public:
- stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
- long getHashKey();
- string getKey();
- vector<string> getValue();
- void setValue ( vector<string> value );
-};
+ class stringInfosHasher
+ {
+ private:
+ long m_hashKey;
+ string m_key;
+ vector<string> m_value;
-}
+ public:
+ stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<string> getValue();
+ void setValue ( vector<string> value );
+ string toString();
-#endif // MERT_TER_STRING_INFOS_HASHER_H_
+
+ };
+
+
+}
+#endif \ No newline at end of file
diff --git a/mert/TER/terAlignment.cpp b/mert/TER/terAlignment.cpp
index 87be53b11..6c5d35cc5 100644
--- a/mert/TER/terAlignment.cpp
+++ b/mert/TER/terAlignment.cpp
@@ -1,131 +1,214 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "terAlignment.h"
using namespace std;
namespace TERCpp
{
-terAlignment::terAlignment()
-{
+ terAlignment::terAlignment()
+ {
// vector<string> ref;
// vector<string> hyp;
// vector<string> aftershift;
- // TERshift[] allshifts = null;
+ // TERshift[] allshifts = null;
- numEdits=0;
- numWords=0;
- bestRef="";
+ numEdits=0;
+ numWords=0;
+ bestRef="";
- numIns=0;
- numDel=0;
- numSub=0;
- numSft=0;
- numWsf=0;
-}
-string terAlignment::toString()
-{
- stringstream s;
- s.str ( "" );
- s << "Original Ref: " << join ( " ", ref ) << endl;
- s << "Original Hyp: " << join ( " ", hyp ) <<endl;
- s << "Hyp After Shift: " << join ( " ", aftershift );
- s << endl;
+ numIns=0;
+ numDel=0;
+ numSub=0;
+ numSft=0;
+ numWsf=0;
+ }
+ string terAlignment::toString()
+ {
+ stringstream s;
+ s.str ( "" );
+ s << "Original Ref: \t" << join ( " ", ref ) << endl;
+ s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
+ s << "Hyp After Shift:\t" << join ( " ", aftershift );
+// s << "Hyp After Shift: " << join ( " ", aftershift );
+ s << endl;
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
- if ( ( int ) sizeof ( alignment ) >0 ) {
- s << "Alignment: (";
+ if ( ( int ) sizeof ( alignment ) >0 )
+ {
+ s << "Alignment: (";
// s += "\nAlignment: (";
- for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
- s << alignment[i];
+ for ( int i = 0; i < ( int ) ( alignment.size() ); i++ )
+ {
+ s << alignment[i];
// s+=alignment[i];
- }
+ }
// s += ")";
- s << ")";
- }
- s << endl;
- if ( ( int ) allshifts.size() == 0 ) {
+ s << ")";
+ }
+ s << endl;
+ if ( ( int ) allshifts.size() == 0 )
+ {
// s += "\nNumShifts: 0";
- s << "NumShifts: 0";
- } else {
+ s << "NumShifts: 0";
+ }
+ else
+ {
// s += "\nNumShifts: " + (int)allshifts.size();
- s << "NumShifts: "<< ( int ) allshifts.size();
- for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
- s << endl << " " ;
- s << ( ( terShift ) allshifts[i] ).toString();
+ s << "NumShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ )
+ {
+ s << endl << " " ;
+ s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
- }
- }
- s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
+ }
+ }
+ s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
- return s.str();
+ return s.str();
-}
-string terAlignment::join ( string delim, vector<string> arr )
-{
- if ( ( int ) arr.size() == 0 ) return "";
+ }
+ string terAlignment::join ( string delim, vector<string> arr )
+ {
+ if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
- stringstream s;
- s.str ( "" );
- for ( int i = 0; i < ( int ) arr.size(); i++ ) {
- if ( i == 0 ) {
- s << arr.at ( i );
- } else {
- s << delim << arr.at ( i );
- }
- }
- return s.str();
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ )
+ {
+ if ( i == 0 )
+ {
+ s << arr.at ( i );
+ }
+ else
+ {
+ s << delim << arr.at ( i );
+ }
+ }
+ return s.str();
// return "";
-}
-double terAlignment::score()
-{
- if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
- return 1.0;
- }
- if ( numWords <= 0.0 ) {
- return 0.0;
- }
- return ( double ) numEdits / numWords;
-}
-double terAlignment::scoreAv()
-{
- if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
- return 1.0;
- }
- if ( averageWords <= 0.0 ) {
- return 0.0;
- }
- return ( double ) numEdits / averageWords;
-}
-
-void terAlignment::scoreDetails()
-{
- numIns = numDel = numSub = numWsf = numSft = 0;
- if((int)allshifts.size()>0) {
- for(int i = 0; i < (int)allshifts.size(); ++i) {
- numWsf += allshifts[i].size();
}
- numSft = allshifts.size();
- }
-
- if((int)alignment.size()>0 ) {
- for(int i = 0; i < (int)alignment.size(); ++i) {
- switch (alignment[i]) {
- case 'S':
- case 'T':
- numSub++;
- break;
- case 'D':
- numDel++;
- break;
- case 'I':
- numIns++;
- break;
- }
+ double terAlignment::score()
+ {
+ if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) )
+ {
+ return 1.0;
+ }
+ if ( numWords <= 0.0 )
+ {
+ return 0.0;
+ }
+ return ( double ) numEdits / numWords;
+ }
+ double terAlignment::scoreAv()
+ {
+ if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) )
+ {
+ return 1.0;
+ }
+ if ( averageWords <= 0.0 )
+ {
+ return 0.0;
+ }
+ return ( double ) numEdits / averageWords;
}
+
+ void terAlignment::scoreDetails()
+ {
+ numIns = numDel = numSub = numWsf = numSft = 0;
+ if((int)allshifts.size()>0)
+ {
+ for(int i = 0; i < (int)allshifts.size(); ++i)
+ {
+ numWsf += allshifts[i].size();
+ }
+ numSft = allshifts.size();
+ }
+
+ if((int)alignment.size()>0 )
+ {
+ for(int i = 0; i < (int)alignment.size(); ++i)
+ {
+ switch (alignment[i])
+ {
+ case 'S':
+ case 'T':
+ numSub++;
+ break;
+ case 'D':
+ numDel++;
+ break;
+ case 'I':
+ numIns++;
+ break;
+ }
+ }
+ }
+ // if(numEdits != numSft + numDel + numIns + numSub)
+ // System.out.println("** Error, unmatch edit erros " + numEdits +
+ // " vs " + (numSft + numDel + numIns + numSub));
+ }
+ string terAlignment::printAlignments()
+ {
+ stringstream to_return;
+ for(int i = 0; i < (int)alignment.size(); ++i)
+ {
+ char alignInfo=alignment.at(i);
+ if (alignInfo == 'A' )
+ {
+ alignInfo='A';
+ }
+
+ if (i==0)
+ {
+ to_return << alignInfo;
+ }
+ else
+ {
+ to_return << " " << alignInfo;
+ }
+ }
+ return to_return.str();
}
- // if(numEdits != numSft + numDel + numIns + numSub)
- // System.out.println("** Error, unmatch edit erros " + numEdits +
- // " vs " + (numSft + numDel + numIns + numSub));
+string terAlignment::printAllShifts()
+{
+ stringstream to_return;
+ if ( ( int ) allshifts.size() == 0 )
+ {
+// s += "\nNumShifts: 0";
+ to_return << "NbrShifts: 0";
+ }
+ else
+ {
+// s += "\nNumShifts: " + (int)allshifts.size();
+ to_return << "NbrShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ )
+ {
+ to_return << "\t" ;
+ to_return << ( ( terShift ) allshifts[i] ).toString();
+// s += "\n " + allshifts[i];
+ }
+ }
+ return to_return.str();
}
} \ No newline at end of file
diff --git a/mert/TER/terAlignment.h b/mert/TER/terAlignment.h
index c8c82eac8..0af86f663 100644
--- a/mert/TER/terAlignment.h
+++ b/mert/TER/terAlignment.h
@@ -1,5 +1,26 @@
-#ifndef MERT_TER_TER_ALIGNMENT_H_
-#define MERT_TER_TER_ALIGNMENT_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef MERT_TER_TERALIGNMENT_H_
+#define MERT_TER_TERALIGNMENT_H_
+
#include <vector>
#include <stdio.h>
@@ -13,39 +34,41 @@ using namespace std;
namespace TERCpp
{
-class terAlignment
-{
-private:
-public:
-
- terAlignment();
- string toString();
- void scoreDetails();
+ class terAlignment
+ {
+ private:
+ public:
- vector<string> ref;
- vector<string> hyp;
- vector<string> aftershift;
+ terAlignment();
+ string toString();
+ void scoreDetails();
- vector<terShift> allshifts;
+ vector<string> ref;
+ vector<string> hyp;
+ vector<string> aftershift;
+ vector<terShift> allshifts;
+ vector<int> hyp_int;
+ vector<int> aftershift_int;
- double numEdits;
- double numWords;
- double averageWords;
- vector<char> alignment;
- string bestRef;
+ double numEdits;
+ double numWords;
+ double averageWords;
+ vector<char> alignment;
+ string bestRef;
- int numIns;
- int numDel;
- int numSub;
- int numSft;
- int numWsf;
+ int numIns;
+ int numDel;
+ int numSub;
+ int numSft;
+ int numWsf;
- string join ( string delim, vector<string> arr );
- double score();
- double scoreAv();
-};
+ string join ( string delim, vector<string> arr );
+ double score();
+ double scoreAv();
+ string printAlignments();
+ string printAllShifts();
+ };
}
-
-#endif // MERT_TER_TER_ALIGNMENT_H__
+#endif \ No newline at end of file
diff --git a/mert/TER/terShift.cpp b/mert/TER/terShift.cpp
index 428803849..c1106db76 100644
--- a/mert/TER/terShift.cpp
+++ b/mert/TER/terShift.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "terShift.h"
using namespace std;
@@ -22,32 +42,32 @@ namespace TERCpp
// numSft=0;
// numWsf=0;
// }
-terShift::terShift ()
-{
- start = 0;
- end = 0;
- moveto = 0;
- newloc = 0;
- cost=1.0;
-}
-terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
-{
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- cost=1.0;
-}
+ terShift::terShift ()
+ {
+ start = 0;
+ end = 0;
+ moveto = 0;
+ newloc = 0;
+ cost=1.0;
+ }
+ terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
+ {
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ cost=1.0;
+ }
-terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
-{
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- shifted = _shifted;
- cost=1.0;
-}
+ terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
+ {
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ shifted = _shifted;
+ cost=1.0;
+ }
// string terShift::vectorToString(vector<string> vec)
// {
// string retour("");
@@ -58,38 +78,44 @@ terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<stri
// return retour;
// }
-string terShift::toString()
-{
- stringstream s;
- s.str ( "" );
- s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
- if ( ( int ) shifted.size() > 0 ) {
- s << " (" << vectorToString ( shifted ) << ")";
- }
- return s.str();
-}
+ string terShift::toString()
+ {
+ stringstream s;
+ s.str ( "" );
+ s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
+ if ( ( int ) shifted.size() > 0 )
+ {
+ s << " (" << vectorToString ( shifted ) << ")";
+ }
+ return s.str();
+ }
-/* The distance of the shift. */
-int terShift::distance()
-{
- if ( moveto < start ) {
- return start - moveto;
- } else if ( moveto > end ) {
- return moveto - end;
- } else {
- return moveto - start;
- }
-}
+ /* The distance of the shift. */
+ int terShift::distance()
+ {
+ if ( moveto < start )
+ {
+ return start - moveto;
+ }
+ else if ( moveto > end )
+ {
+ return moveto - end;
+ }
+ else
+ {
+ return moveto - start;
+ }
+ }
-bool terShift::leftShift()
-{
- return ( moveto < start );
-}
+ bool terShift::leftShift()
+ {
+ return ( moveto < start );
+ }
-int terShift::size()
-{
- return ( end - start ) + 1;
-}
+ int terShift::size()
+ {
+ return ( end - start ) + 1;
+ }
// terShift terShift::operator=(terShift t)
// {
//
diff --git a/mert/TER/terShift.h b/mert/TER/terShift.h
index 679a7c8bb..ba84a5947 100644
--- a/mert/TER/terShift.h
+++ b/mert/TER/terShift.h
@@ -1,5 +1,26 @@
-#ifndef MERT_TER_TER_SHIFT_H_
-#define MERT_TER_TER_SHIFT_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef MERT_TER_TERSHIFT_H_
+#define MERT_TER_TERSHIFT_H_
+
#include <vector>
#include <stdio.h>
@@ -7,38 +28,38 @@
#include <sstream>
#include "tools.h"
+
using namespace std;
using namespace Tools;
namespace TERCpp
{
-class terShift
-{
-private:
-public:
-
- terShift();
- terShift ( int _start, int _end, int _moveto, int _newloc );
- terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
- string toString();
- int distance() ;
- bool leftShift();
- int size();
+ class terShift
+ {
+ private:
+ public:
+
+ terShift();
+ terShift ( int _start, int _end, int _moveto, int _newloc );
+ terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
+ string toString();
+ int distance() ;
+ bool leftShift();
+ int size();
// terShift operator=(terShift t);
// string vectorToString(vector<string> vec);
- int start;
- int end;
- int moveto;
- int newloc;
- vector<string> shifted; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<string> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
-};
+ int start;
+ int end;
+ int moveto;
+ int newloc;
+ vector<string> shifted; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<string> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+ };
}
-
-#endif // MERT_TER_TER_SHIFT_H_
+#endif \ No newline at end of file
diff --git a/mert/TER/tercalc.cpp b/mert/TER/tercalc.cpp
index e16f692e8..b7f63772c 100644
--- a/mert/TER/tercalc.cpp
+++ b/mert/TER/tercalc.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
//
// C++ Implementation: tercalc
//
@@ -15,1021 +35,902 @@ using namespace Tools;
namespace TERCpp
{
-terCalc::terCalc()
-{
- MAX_SHIFT_SIZE = 50;
- INF = 999999.0;
- shift_cost = 1.0;
- insert_cost = 1.0;
- delete_cost = 1.0;
- substitute_cost = 1.0;
- match_cost = 0.0;
- NUM_SEGMENTS_SCORED = 0;
- NUM_SHIFTS_CONSIDERED = 0;
- NUM_BEAM_SEARCH_CALLS = 0;
- BEAM_WIDTH = 20;
- MAX_SHIFT_DIST = 50;
- PRINT_DEBUG = false;
-}
-
-
-// terCalc::~terCalc()
-// {
-// }
-// size_t* terCalc::hashVec ( vector<string> s )
-// {
-// size_t retour[ ( int ) s.size() ];
-// int i=0;
-// for ( i=0; i< ( int ) s.size(); i++ )
-// {
-// boost::hash<std::string> hasher;
-// retour[i]=hasher ( s.at ( i ) );
-// }
-// return retour;
-// }
-
-
-int terCalc::WERCalculation ( size_t * ref, size_t * hyp )
-{
- int retour;
- int REFSize = sizeof ( ref ) + 1;
- int HYPSize = sizeof ( hyp ) + 1;
- int WER[REFSize][HYPSize];
- int i = 0;
- int j = 0;
- for ( i = 0; i < REFSize; i++ ) {
- WER[i][0] = ( int ) i;
- }
- for ( j = 0; j < HYPSize; j++ ) {
- WER[0][j] = ( int ) j;
- }
- for ( j = 1; j < HYPSize; j++ ) {
- for ( i = 1; i < REFSize; i++ ) {
- if ( i == 1 ) {
- cerr << endl;
- }
- if ( ref[i-1] == hyp[j-1] ) {
- WER[i][j] = WER[i-1][j-1];
- cerr << "- ";
- cerr << WER[i][j] << "-\t";
- } else {
- if ( ( ( WER[i-1][ j] + 1 ) < ( WER[i][j-1] + 1 ) ) && ( ( WER[i-1][j] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
- WER[i][j] = ( WER[i-1][j] + 1 );
-// cerr << "D ";
- cerr << WER[i][j] << "D\t";
- } else {
- if ( ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j] + 1 ) ) && ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
- WER[i][j] = ( WER[i][j-1] + 1 );
-// cerr << "I ";
- cerr << WER[i][j] << "I\t";
- } else {
- WER[i][j] = ( WER[i-1][j-1] + 1 );
-// cerr << "S ";
- cerr << WER[i][j] << "S\t";
- }
- }
- }
- }
- }
- cerr << endl;
- retour = WER[i-1][j-1];
- cerr << "i : " << i - 1 << "\tj : " << j - 1 << endl;
- return retour;
-}
-int terCalc::WERCalculation ( std::vector< int > ref, std::vector< int > hyp )
-{
- stringstream s;
- s.str ( "" );
- string stringRef ( "" );
- string stringHyp ( "" );
- for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) {
- if ( l_it == ref.begin() ) {
- s << ( *l_it );
- } else {
- s << " " << ( *l_it );
- }
- }
- stringRef = s.str();
- s.str ( "" );
- for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) {
- if ( l_itHyp == hyp.begin() ) {
- s << ( *l_itHyp );
- } else {
- s << " " << ( *l_itHyp );
- }
- }
- stringHyp = s.str();
- s.str ( "" );
- return WERCalculation ( stringToVector ( stringRef, " " ), stringToVector ( stringHyp , " " ) );
-}
-
-terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
-{
- stringstream s;
- s.str ( "" );
- string stringRef ( "" );
- string stringHyp ( "" );
- for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) {
- if ( l_it == ref.begin() ) {
- s << ( *l_it );
- } else {
- s << " " << ( *l_it );
- }
- }
- stringRef = s.str();
- s.str ( "" );
- for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) {
- if ( l_itHyp == hyp.begin() ) {
- s << ( *l_itHyp );
- } else {
- s << " " << ( *l_itHyp );
+ terCalc::terCalc()
+ {
+ TAILLE_PERMUT_MAX = 50;
+ infinite = 999999.0;
+ shift_cost = 1.0;
+ insert_cost = 1.0;
+ delete_cost = 1.0;
+ substitute_cost = 1.0;
+ match_cost = 0.0;
+ NBR_SEGS_EVALUATED = 0;
+ NBR_PERMUTS_CONSID = 0;
+ NBR_BS_APPELS = 0;
+ TAILLE_BEAM = 20;
+ DIST_MAX_PERMUT = 50;
+ PRINT_DEBUG = false;
+ hypSpans.clear();
+ refSpans.clear();
+ }
+
+
+ terAlignment terCalc::WERCalculation ( vector< string > hyp , vector< string > ref )
+ {
+
+ return minimizeDistanceEdition ( hyp, ref, hypSpans );
+
}
- }
- stringHyp = s.str();
- s.str ( "" );
- return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
-}
-int terCalc::WERCalculation ( vector<string> ref, vector<string> hyp )
-{
- int retour;
- int REFSize = ( int ) ref.size() + 1;
- int HYPSize = ( int ) hyp.size() + 1;
- int WER[REFSize][HYPSize];
- char WERchar[REFSize][HYPSize];
- int i = 0;
- int j = 0;
- for ( i = 0; i < REFSize; i++ ) {
- WER[i][0] = ( int ) i;
- }
- for ( j = 0; j < HYPSize; j++ ) {
- WER[0][j] = ( int ) j;
- }
- for ( j = 1; j < HYPSize; j++ ) {
- for ( i = 1; i < REFSize; i++ ) {
-// if (i==1)
-// {
-// cerr << endl;
-// }
- if ( ref[i-1] == hyp[j-1] ) {
- WER[i][j] = WER[i-1][j-1];
-// cerr << "- ";
-// cerr << WER[i][j]<< "-\t";
- WERchar[i][j] = '-';
- } else {
- if ( ( ( WER[i-1][ j] + 1 ) < ( WER[i][j-1] + 1 ) ) && ( ( WER[i-1][j] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
- WER[i][j] = ( WER[i-1][j] + 1 );
-// cerr << "D ";
-// cerr << WER[i][j]<< "D\t";
- WERchar[i][j] = 'D';
- } else {
- if ( ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j] + 1 ) ) && ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
- WER[i][j] = ( WER[i][j-1] + 1 );
-// cerr << "I ";
-// cerr << WER[i][j]<< "I\t";
- WERchar[i][j] = 'I';
- } else {
- WER[i][j] = ( WER[i-1][j-1] + 1 );
-// cerr << "S ";
-// cerr << WER[i][j]<< "S\t";
- WERchar[i][j] = 'S';
- }
- }
- }
- }
- }
- cerr << endl;
- retour = WER[REFSize-1][HYPSize-1];
- cerr << "i : " << i - 1 << "\tj : " << j - 1 << endl;
- j = HYPSize - 1;
- i = REFSize - 1;
- int k;
- stringstream s;
-// WERalignment local[HYPSize];
- if ( HYPSize > REFSize ) {
- k = HYPSize;
- } else {
- k = REFSize;
- }
- WERalignment local;
- while ( j > 0 && i > 0 ) {
- cerr << "indice i : " << i << "\t";
- cerr << "indice j : " << j << endl;
- if ( ( j == HYPSize - 1 ) && ( i == REFSize - 1 ) ) {
- alignmentElement localInfos;
- s << WER[i][j];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- s << WERchar[i][j];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- local.push_back ( localInfos );
-// // i--;
-// j--;
- }
-// else
+ terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
{
- if ( ( ( WER[i-1][j-1] ) <= ( WER[i-1][j] ) ) && ( ( WER[i-1][j-1] ) <= ( WER[i][j-1] ) ) ) {
- alignmentElement localInfos;
- s << WER[i-1][j-1];
- localInfos.push_back ( s.str() );
+ stringstream s;
s.str ( "" );
- s << WERchar[i-1][j-1];
- localInfos.push_back ( s.str() );
+ string stringRef ( "" );
+ string stringHyp ( "" );
+ for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ )
+ {
+ if ( l_it == ref.begin() )
+ {
+ s << ( *l_it );
+ }
+ else
+ {
+ s << " " << ( *l_it );
+ }
+ }
+ stringRef = s.str();
s.str ( "" );
- local.push_back ( localInfos );
- i--;
- j--;
- } else {
- if ( ( ( WER[i][j-1] ) <= ( WER[i-1][j] ) ) && ( ( WER[i][j-1] ) <= ( WER[i-1][j-1] ) ) ) {
- alignmentElement localInfos;
- s << WER[i][j-1];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- s << WERchar[i][j-1];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- local.push_back ( localInfos );
- j--;
- } else {
- alignmentElement localInfos;
- s << WER[i-1][j];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- s << WERchar[i-1][j];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- local.push_back ( localInfos );
- i--;
+ for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ )
+ {
+ if ( l_itHyp == hyp.begin() )
+ {
+ s << ( *l_itHyp );
+ }
+ else
+ {
+ s << " " << ( *l_itHyp );
+ }
}
- }
- }
- }
-
- for ( j = 1; j < HYPSize; j++ ) {
- for ( i = 1; i < REFSize; i++ ) {
- cerr << WERchar[i][j] << " ";
- }
- cerr << endl;
- }
- cerr << endl;
- for ( j = 1; j < HYPSize; j++ ) {
- for ( i = 1; i < REFSize; i++ ) {
- cerr << WER[i][j] << " ";
+ stringHyp = s.str();
+ s.str ( "" );
+ return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
}
- cerr << endl;
- }
- cerr << "=================" << endl;
-// k=local.size()-1;
-// while (k>0)
-// {
-// alignmentElement localInfos;
-// localInfos=local.at(k-1);
-// l_WERalignment.push_back(localInfos);
-// cerr << (string)localInfos.at(1)+"\t";
- k--;
-// }
-// cerr<<endl;
- k = local.size() - 1;
- int l = 0;
- int m = 0;
- while ( k > 0 ) {
- alignmentElement localInfos;
- localInfos = local.at ( k - 1 );
- if ( ( int ) ( localInfos.at ( 1 ).compare ( "D" ) ) == 0 || l > HYPSize - 1 ) {
- localInfos.push_back ( "***" );
- } else {
- localInfos.push_back ( hyp.at ( l ) );
- l++;
- }
- if ( ( int ) ( localInfos.at ( 1 ).compare ( "I" ) ) == 0 || m > REFSize - 1 ) {
- localInfos.push_back ( "***" );
- } else {
- localInfos.push_back ( ref.at ( m ) );
- m++;
+ hashMapInfos terCalc::createConcordMots ( vector<string> hyp, vector<string> ref )
+ {
+ hashMap tempHash;
+ hashMapInfos retour;
+ for ( int i = 0; i < ( int ) hyp.size(); i++ )
+ {
+ tempHash.addHasher ( hyp.at ( i ), "" );
+ }
+ bool cor[ref.size() ];
+ for ( int i = 0; i < ( int ) ref.size(); i++ )
+ {
+ if ( tempHash.trouve ( ( string ) ref.at ( i ) ) )
+ {
+ cor[i] = true;
+ }
+ else
+ {
+ cor[i] = false;
+ }
+ }
+ for ( int start = 0; start < ( int ) ref.size(); start++ )
+ {
+ if ( cor[start] )
+ {
+ for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) );end++ )
+ {
+ vector<string> ajouter = subVector ( ref, start, end + 1 );
+ string ajouterString = vectorToString ( ajouter );
+ vector<int> values = retour.getValue ( ajouterString );
+ values.push_back ( start );
+ if ( values.size() > 1 )
+ {
+ retour.setValue ( ajouterString, values );
+ }
+ else
+ {
+ retour.addValue ( ajouterString, values );
+ }
+ }
+ }
+ }
+ return retour;
}
-// cerr << vectorToString(localInfos)<<endl;
-// cerr <<localInfos.at(0)<<"\t"<<localInfos.at(1)<<"\t"<<localInfos.at(2)<<"\t"<<localInfos.at(3)<<endl;
- l_WERalignment.push_back ( localInfos );
-// cerr << (string)localInfos.at(1)+"\t";
- k--;
- }
- cerr << endl;
- /* k=local.size()-1;
- while (k>0)
- {
- alignmentElement localInfos;
- localInfos=local.at(k-1);
- // l_WERalignment.push_back(localInfos);
- cerr << (string)localInfos.at(0)+"\t";
- k--;
- }
- cerr<<endl;*/
- k = 0;
-// k=l_WERalignment.size()-1;
- m = 0;
- while ( k < ( int ) l_WERalignment.size() ) {
- alignmentElement localInfos;
- localInfos = l_WERalignment.at ( k );
- cerr << localInfos.at ( 0 ) << "\t" << localInfos.at ( 1 ) << "\t" << localInfos.at ( 2 ) << "\t" << localInfos.at ( 3 ) << endl;
- /* if ((int)(localInfos.at(1).compare("I"))==0)
- {
- cerr << "***\t";
- }
- else
- {
- // if (m<ref.size())
- {
- cerr << ref.at(m) << "\t";
- }
- m++;
- }
- */
- k++;
- }
- cerr << endl;
- /* k=local.size()-1;
- l=0;
- while (k>0)
- {
- alignmentElement localInfos;
- localInfos=local.at(k-1);
- // l_WERalignment.push_back(localInfos);
- if ((int)(localInfos.at(1).compare("D"))==0)
- {
- cerr << "***\t";
- }
- else
- {
- cerr << hyp.at(l) << "\t";
- l++;
- }
- k--;
- }
- cerr<<endl;*/
- cerr << "=================" << endl;
- return retour;
-}
-
-// string terCalc::vectorToString(vector<string> vec)
-// {
-// string retour("");
-// for (vector<string>::iterator vecIter=vec.begin();vecIter!=vec.end(); vecIter++)
-// {
-// retour+=(*vecIter)+"\t";
-// }
-// return retour;
-// }
-// vector<string> terCalc::subVector(vector<string> vec, int start, int end)
-// {
-// if (start>end)
-// {
-// cerr << "ERREUR : terCalc::subVector : end > start"<<endl;
-// exit(0);
-// }
-// vector<string> retour;
-// for (int i=start; ((i<end) && (i< vec.size())); i++)
-// {
-// retour.push_back(vec.at(i));
-// }
-// return retour;
-// }
-
-hashMapInfos terCalc::BuildWordMatches ( vector<string> hyp, vector<string> ref )
-{
- hashMap tempHash;
- hashMapInfos retour;
- for ( int i = 0; i < ( int ) hyp.size(); i++ ) {
- tempHash.addHasher ( hyp.at ( i ), "" );
- }
- bool cor[ref.size() ];
- for ( int i = 0; i < ( int ) ref.size(); i++ ) {
- if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) {
- cor[i] = true;
- } else {
- cor[i] = false;
- }
- }
- for ( int start = 0; start < ( int ) ref.size(); start++ ) {
- if ( cor[start] ) {
- for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= MAX_SHIFT_SIZE ) && ( cor[end] ) ); end++ ) {
- vector<string> ajouter = subVector ( ref, start, end + 1 );
- string ajouterString = vectorToString ( ajouter );
- vector<int> values = retour.getValue ( ajouterString );
- values.push_back ( start );
- if ( values.size() > 1 ) {
- retour.setValue ( ajouterString, values );
- } else {
- retour.addValue ( ajouterString, values );
+ bool terCalc::trouverIntersection ( vecInt refSpan, vecInt hypSpan )
+ {
+ if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) )
+ {
+ return true;
}
- }
+ return false;
}
- }
- return retour;
-}
-
-bool terCalc::spanIntersection ( vecInt refSpan, vecInt hypSpan )
-{
- if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) {
- return true;
- }
- return false;
-}
-terAlignment terCalc::MinEditDist ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
-{
- double current_best = INF;
- double last_best = INF;
- int first_good = 0;
- int current_first_good = 0;
- int last_good = -1;
- int cur_last_good = 0;
- int last_peak = 0;
- int cur_last_peak = 0;
- int i, j;
- double cost, icost, dcost;
- double score;
-
-// int hwsize = hyp.size()-1;
-// int rwsize = ref.size()-1;
- NUM_BEAM_SEARCH_CALLS++;
-// if ((ref.size()+1 > sizeof(S)) || (hyp.size()+1 > sizeof(S)))
-// {
-// int max = ref.size();
-// if (hyp.size() > ref.size()) max = hyp.size();
-// max += 26; // we only need a +1 here, but let's pad for future use
-// S = new double[max][max];
-// P = new char[max][max];
-// }
- for ( i = 0; i <= ( int ) ref.size(); i++ ) {
- for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
- S[i][j] = -1.0;
- P[i][j] = '0';
- }
- }
- S[0][0] = 0.0;
- for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
- last_best = current_best;
- current_best = INF;
- first_good = current_first_good;
- current_first_good = -1;
- last_good = cur_last_good;
- cur_last_good = -1;
- last_peak = cur_last_peak;
- cur_last_peak = 0;
- for ( i = first_good; i <= ( int ) ref.size(); i++ ) {
- if ( i > last_good ) {
- break;
- }
- if ( S[i][j] < 0 ) {
- continue;
- }
- score = S[i][j];
- if ( ( j < ( int ) hyp.size() ) && ( score > last_best + BEAM_WIDTH ) ) {
- continue;
- }
- if ( current_first_good == -1 ) {
- current_first_good = i ;
- }
- if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) ) {
- if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || spanIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) {
- if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) {
- cost = match_cost + score;
- if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) ) {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = ' ';
- }
- if ( cost < current_best ) {
- current_best = cost;
+ terAlignment terCalc::minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
+ {
+ double current_best = infinite;
+ double last_best = infinite;
+ int first_good = 0;
+ int current_first_good = 0;
+ int last_good = -1;
+ int cur_last_good = 0;
+ int last_peak = 0;
+ int cur_last_peak = 0;
+ int i, j;
+ double cost, icost, dcost;
+ double score;
+
+
+
+ NBR_BS_APPELS++;
+
+
+ for ( i = 0; i <= ( int ) ref.size(); i++ )
+ {
+ for ( j = 0; j <= ( int ) hyp.size(); j++ )
+ {
+ S[i][j] = -1.0;
+ P[i][j] = '0';
}
- if ( current_best == cost ) {
- cur_last_peak = i + 1;
+ }
+ S[0][0] = 0.0;
+ for ( j = 0; j <= ( int ) hyp.size(); j++ )
+ {
+ last_best = current_best;
+ current_best = infinite;
+ first_good = current_first_good;
+ current_first_good = -1;
+ last_good = cur_last_good;
+ cur_last_good = -1;
+ last_peak = cur_last_peak;
+ cur_last_peak = 0;
+ for ( i = first_good; i <= ( int ) ref.size(); i++ )
+ {
+ if ( i > last_good )
+ {
+ break;
+ }
+ if ( S[i][j] < 0 )
+ {
+ continue;
+ }
+ score = S[i][j];
+ if ( ( j < ( int ) hyp.size() ) && ( score > last_best + TAILLE_BEAM ) )
+ {
+ continue;
+ }
+ if ( current_first_good == -1 )
+ {
+ current_first_good = i ;
+ }
+ if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) )
+ {
+ if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) )
+ {
+ if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 )
+ {
+ cost = match_cost + score;
+ if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) )
+ {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = 'A';
+ }
+ if ( cost < current_best )
+ {
+ current_best = cost;
+ }
+ if ( current_best == cost )
+ {
+ cur_last_peak = i + 1;
+ }
+ }
+ else
+ {
+ cost = substitute_cost + score;
+ if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) )
+ {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = 'S';
+ if ( cost < current_best )
+ {
+ current_best = cost;
+ }
+ if ( current_best == cost )
+ {
+ cur_last_peak = i + 1 ;
+ }
+ }
+ }
+ }
+ }
+ cur_last_good = i + 1;
+ if ( j < ( int ) hyp.size() )
+ {
+ icost = score + insert_cost;
+ if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) )
+ {
+ S[i][j+1] = icost;
+ P[i][j+1] = 'I';
+ if ( ( cur_last_peak < i ) && ( current_best == icost ) )
+ {
+ cur_last_peak = i;
+ }
+ }
+ }
+ if ( i < ( int ) ref.size() )
+ {
+ dcost = score + delete_cost;
+ if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) )
+ {
+ S[i+1][j] = dcost;
+ P[i+1][j] = 'D';
+ if ( i >= last_good )
+ {
+ last_good = i + 1 ;
+ }
+ }
+ }
}
- } else {
- cost = substitute_cost + score;
- if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) ) {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = 'S';
- if ( cost < current_best ) {
- current_best = cost;
- }
- if ( current_best == cost ) {
- cur_last_peak = i + 1 ;
- }
+ }
+
+
+ int tracelength = 0;
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) )
+ {
+ tracelength++;
+ if ( P[i][j] == 'A' )
+ {
+ i--;
+ j--;
}
- }
+ else
+ if ( P[i][j] == 'S' )
+ {
+ i--;
+ j--;
+ }
+ else
+ if ( P[i][j] == 'D' )
+ {
+ i--;
+ }
+ else
+ if ( P[i][j] == 'I' )
+ {
+ j--;
+ }
+ else
+ {
+ cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P[i][j] << endl;
+ exit ( -1 );
+ }
}
- }
- cur_last_good = i + 1;
- if ( j < ( int ) hyp.size() ) {
- icost = score + insert_cost;
- if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) ) {
- S[i][j+1] = icost;
- P[i][j+1] = 'I';
- if ( ( cur_last_peak < i ) && ( current_best == icost ) ) {
- cur_last_peak = i;
- }
+ vector<char> path ( tracelength );
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) )
+ {
+ path[--tracelength] = P[i][j];
+ if ( P[i][j] == 'A' )
+ {
+ i--;
+ j--;
+ }
+ else
+ if ( P[i][j] == 'S' )
+ {
+ i--;
+ j--;
+ }
+ else
+ if ( P[i][j] == 'D' )
+ {
+ i--;
+ }
+ else
+ if ( P[i][j] == 'I' )
+ {
+ j--;
+ }
}
- }
- if ( i < ( int ) ref.size() ) {
- dcost = score + delete_cost;
- if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) ) {
- S[i+1][j] = dcost;
- P[i+1][j] = 'D';
- if ( i >= last_good ) {
- last_good = i + 1 ;
- }
+ terAlignment to_return;
+ to_return.numWords = ref.size();
+ to_return.alignment = path;
+ to_return.numEdits = S[ref.size() ][hyp.size() ];
+ to_return.hyp = hyp;
+ to_return.ref = ref;
+ to_return.averageWords = (int)ref.size();
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
}
- }
- }
- }
-
+ return to_return;
- int tracelength = 0;
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) ) {
- tracelength++;
- if ( P[i][j] == ' ' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'S' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'D' ) {
- i--;
- } else if ( P[i][j] == 'I' ) {
- j--;
- } else {
- cerr << "ERROR : terCalc::MinEditDist : Invalid path : " << P[i][j] << endl;
- exit ( -1 );
}
- }
- vector<char> path ( tracelength );
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) ) {
- path[--tracelength] = P[i][j];
- if ( P[i][j] == ' ' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'S' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'D' ) {
- i--;
- } else if ( P[i][j] == 'I' ) {
- j--;
- }
- }
- terAlignment to_return;
- to_return.numWords = ref.size();
- to_return.alignment = path;
- to_return.numEdits = S[ref.size() ][hyp.size() ];
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::MinEditDist : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
- }
- return to_return;
-
-}
-terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
-{
- hashMapInfos rloc = BuildWordMatches ( hyp, ref );
- terAlignment cur_align = MinEditDist ( hyp, ref, hypSpans );
- vector<string> cur = hyp;
- cur_align.hyp = hyp;
- cur_align.ref = ref;
- cur_align.aftershift = hyp;
- double edits = 0;
+ terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
+ {
+ hashMapInfos rloc = createConcordMots ( hyp, ref );
+ terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans );
+ vector<string> cur = hyp;
+ cur_align.hyp = hyp;
+ cur_align.ref = ref;
+ cur_align.aftershift = hyp;
+ double edits = 0;
// int numshifts = 0;
- vector<terShift> allshifts;
+ vector<terShift> allshifts;
// cerr << "Initial Alignment:" << endl << cur_align.toString() <<endl;
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
- }
- while ( true ) {
- bestShiftStruct returns;
- returns = CalcBestShift ( cur, hyp, ref, rloc, cur_align );
- if ( returns.m_empty ) {
- break;
- }
- terShift bestShift = returns.m_best_shift;
- cur_align = returns.m_best_align;
- edits += bestShift.cost;
- bestShift.alignment = cur_align.alignment;
- bestShift.aftershift = cur_align.aftershift;
- allshifts.push_back ( bestShift );
- cur = cur_align.aftershift;
- }
- terAlignment to_return;
- to_return = cur_align;
- to_return.allshifts = allshifts;
- to_return.numEdits += edits;
- NUM_SEGMENTS_SCORED++;
- return to_return;
-}
-bestShiftStruct terCalc::CalcBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
-{
- bestShiftStruct to_return;
- bool anygain = false;
- bool herr[ ( int ) hyp.size() ];
- bool rerr[ ( int ) ref.size() ];
- int ralign[ ( int ) ref.size() ];
- FindAlignErr ( med_align, herr, rerr, ralign );
- vector<vecTerShift> poss_shifts;
- poss_shifts = GatherAllPossShifts ( cur, ref, rloc, med_align, herr, rerr, ralign );
- double curerr = med_align.numEdits;
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
- cerr << "Possible Shifts:" << endl;
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
- for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ ) {
- cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
- }
- }
- cerr << endl;
- cerr << "END DEBUG " << endl;
- }
- double cur_best_shift_cost = 0.0;
- terAlignment cur_best_align = med_align;
- terShift cur_best_shift;
-
-
-
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
- cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
- cerr << "END DEBUG " << endl;
- }
- /* Consider shifts of length i+1 */
- double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- double maxfix = ( 2 * ( 1 + i ) );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
- break;
- }
-
- for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ ) {
- curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
- break;
- }
- terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
-
- alignmentStruct shiftReturns = PerformShift ( cur, curshift );
- vector<string> shiftarr = shiftReturns.nwords;
- vector<vecInt> curHypSpans = shiftReturns.aftershift;
-
- terAlignment curalign = MinEditDist ( shiftarr, ref, curHypSpans );
-
- curalign.hyp = hyp;
- curalign.ref = ref;
- curalign.aftershift = shiftarr;
-
- double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
-
- // if (DEBUG) {
- // string testeuh=terAlignment join(" ", shiftarr);
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
- cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
- cerr << "" << curalign.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
- }
- // }
- //
- if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) {
- anygain = true;
- cur_best_shift = curshift;
- cur_best_shift_cost = curshift.cost;
- cur_best_align = curalign;
- // if (DEBUG)
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
- cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
}
- }
- }
- }
- if ( anygain ) {
- to_return.m_best_shift = cur_best_shift;
- to_return.m_best_align = cur_best_align;
- to_return.m_empty = false;
- } else {
- to_return.m_empty = true;
- }
- return to_return;
-}
-
-void terCalc::FindAlignErr ( terAlignment align, bool* herr, bool* rerr, int* ralign )
-{
- int hpos = -1;
- int rpos = -1;
- if ( PRINT_DEBUG ) {
-
- cerr << "BEGIN DEBUG : terCalc::FindAlignErr : " << endl << align.toString() << endl;
- cerr << "END DEBUG " << endl;
- }
- for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) {
- char sym = align.alignment[i];
- if ( sym == ' ' ) {
- hpos++;
- rpos++;
- herr[hpos] = false;
- rerr[rpos] = false;
- ralign[rpos] = hpos;
- } else if ( sym == 'S' ) {
- hpos++;
- rpos++;
- herr[hpos] = true;
- rerr[rpos] = true;
- ralign[rpos] = hpos;
- } else if ( sym == 'I' ) {
- hpos++;
- herr[hpos] = true;
- } else if ( sym == 'D' ) {
- rpos++;
- rerr[rpos] = true;
- ralign[rpos] = hpos;
- } else {
- cerr << "ERROR : terCalc::FindAlignErr : Invalid mini align sequence " << sym << " at pos " << i << endl;
- exit ( -1 );
- }
- }
-}
-
-vector<vecTerShift> terCalc::GatherAllPossShifts ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
-{
- vector<vecTerShift> to_return;
- // Don't even bother to look if shifts can't be done
- if ( ( MAX_SHIFT_SIZE <= 0 ) || ( MAX_SHIFT_DIST <= 0 ) ) {
-// terShift[][] to_return = new terShift[0][];
- return to_return;
- }
+ while ( true )
+ {
+ bestShiftStruct returns;
+ returns = findBestShift ( cur, hyp, ref, rloc, cur_align );
+ if ( returns.m_empty )
+ {
+ break;
+ }
+ terShift bestShift = returns.m_best_shift;
+ cur_align = returns.m_best_align;
+ edits += bestShift.cost;
+ bestShift.alignment = cur_align.alignment;
+ bestShift.aftershift = cur_align.aftershift;
+ allshifts.push_back ( bestShift );
+ cur = cur_align.aftershift;
+ }
+ terAlignment to_return;
+ to_return = cur_align;
+ to_return.allshifts = allshifts;
+ to_return.numEdits += edits;
+ NBR_SEGS_EVALUATED++;
+ return to_return;
+ }
+ bestShiftStruct terCalc::findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
+ {
+ bestShiftStruct to_return;
+ bool anygain = false;
+ bool herr[ ( int ) hyp.size() ];
+ bool rerr[ ( int ) ref.size() ];
+ int ralign[ ( int ) ref.size() ];
+ calculateTerAlignment ( med_align, herr, rerr, ralign );
+ vector<vecTerShift> poss_shifts;
+
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl;
+ cerr << "indices: ";
+ for (int l_i=0; l_i < ( int ) ref.size() ; l_i++)
+ {
+ cerr << l_i << "\t";
+ }
+ cerr << endl;
+ cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl;
+ cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl;
+ cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl;
+ cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl;
+ cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl;
+ cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign );
+ double curerr = med_align.numEdits;
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Possible Shifts:" << endl;
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- )
+ {
+ for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ )
+ {
+ cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
+ }
+ }
+ cerr << endl;
+ cerr << "END DEBUG " << endl;
+ }
+// exit(0);
+ double cur_best_shift_cost = 0.0;
+ terAlignment cur_best_align = med_align;
+ terShift cur_best_shift;
- vector<vecTerShift> allshifts ( MAX_SHIFT_SIZE + 1 );
-// ArrayList[] allshifts = new ArrayList[MAX_SHIFT_SIZE+1];
-// for (int i = 0; i < allshifts.length; i++)
-// {
-// allshifts[i] = new ArrayList();
-// }
-// List hyplist = Arrays.asList(hyp);
- for ( int start = 0; start < ( int ) hyp.size(); start++ ) {
- string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
- if ( ! rloc.trouve ( subVectorHypString ) ) {
- continue;
- }
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- )
+ {
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ /* Consider shifts of length i+1 */
+ double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ double maxfix = ( 2 * ( 1 + i ) );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
+ {
+ break;
+ }
- bool ok = false;
- vector<int> mtiVec = rloc.getValue ( subVectorHypString );
- vector<int>::iterator mti = mtiVec.begin();
- while ( mti != mtiVec.end() && ( ! ok ) ) {
- int moveto = ( *mti );
- mti++;
- if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= MAX_SHIFT_DIST ) && ( ( start - ralign[moveto] - 1 ) <= MAX_SHIFT_DIST ) ) {
- ok = true;
- }
- }
- if ( ! ok ) {
- continue;
+ for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ )
+ {
+ curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
+ {
+ break;
+ }
+ terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "cur : "<< join(" ",cur) << endl;
+ cerr << "curshift : "<< curshift.toString() << endl;
+
+ }
+ alignmentStruct shiftReturns = permuter ( cur, curshift );
+ vector<string> shiftarr = shiftReturns.nwords;
+ vector<vecInt> curHypSpans = shiftReturns.aftershift;
+
+ if ( PRINT_DEBUG )
+ {
+ cerr << "shiftarr : "<< join(" ",shiftarr) << endl;
+// cerr << "curHypSpans : "<< curHypSpans.toString() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ terAlignment curalign = minimizeDistanceEdition ( shiftarr, ref, curHypSpans );
+
+ curalign.hyp = hyp;
+ curalign.ref = ref;
+ curalign.aftershift = shiftarr;
+
+
+ double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
+
+ // if (DEBUG) {
+ // string testeuh=terAlignment join(" ", shiftarr);
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
+ cerr << "Details of gains : gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost )"<<endl;
+ cerr << "Details of gains : gain = ("<<cur_best_align.numEdits << "+" << cur_best_shift_cost << ") - (" << curalign.numEdits << "+" << curshift.cost << ")"<<endl;
+ cerr << "" << curalign.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ // }
+ //
+ if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) )
+ {
+ anygain = true;
+ cur_best_shift = curshift;
+ cur_best_shift_cost = curshift.cost;
+ cur_best_align = curalign;
+ // if (DEBUG)
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ }
+ }
+ }
+ if ( anygain )
+ {
+ to_return.m_best_shift = cur_best_shift;
+ to_return.m_best_align = cur_best_align;
+ to_return.m_empty = false;
+ }
+ else
+ {
+ to_return.m_empty = true;
+ }
+ return to_return;
}
- ok = true;
- for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + MAX_SHIFT_SIZE ) ); end++ ) {
- /* check if cand is good if so, add it */
- vector<string> cand = subVector ( hyp, start, end + 1 );
- ok = false;
- if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) {
- continue;
- }
- bool any_herr = false;
+ void terCalc::calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign )
+ {
+ int hpos = -1;
+ int rpos = -1;
+ if ( PRINT_DEBUG )
+ {
- for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) {
- if ( herr[start+i] ) {
- any_herr = true;
+ cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl;
+ cerr << "END DEBUG " << endl;
}
- }
- if ( any_herr == false ) {
- ok = true;
- continue;
- }
-
- vector<int> movetoitVec;
- movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
- vector<int>::iterator movetoit = movetoitVec.begin();
- while ( movetoit != movetoitVec.end() ) {
- int moveto = ( *movetoit );
- movetoit++;
- if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= MAX_SHIFT_DIST ) && ( ( start - ralign[moveto] ) <= MAX_SHIFT_DIST ) ) ) {
- continue;
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
+ {
+ herr[i] = false;
+ rerr[i] = false;
+ ralign[i] = -1;
+ }
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
+ {
+ char sym = align.alignment[i];
+ if ( sym == 'A' )
+ {
+ hpos++;
+ rpos++;
+ herr[hpos] = false;
+ rerr[rpos] = false;
+ ralign[rpos] = hpos;
+ }
+ else
+ if ( sym == 'S' )
+ {
+ hpos++;
+ rpos++;
+ herr[hpos] = true;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos;
+ }
+ else
+ if ( sym == 'I' )
+ {
+ hpos++;
+ herr[hpos] = true;
+ }
+ else
+ if ( sym == 'D' )
+ {
+ rpos++;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos+1;
+ }
+ else
+ {
+ cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl;
+ exit ( -1 );
+ }
}
- ok = true;
-
- /* check to see if there are any errors in either string
- (only move if this is the case!)
- */
+ }
- bool any_rerr = false;
- for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ ) {
- if ( rerr[moveto+i] ) {
- any_rerr = true;
- }
- }
- if ( ! any_rerr ) {
- continue;
+ vector<vecTerShift> terCalc::calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
+ {
+ vector<vecTerShift> to_return;
+ if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) )
+ {
+ return to_return;
}
- for ( int roff = -1; roff <= ( end - start ); roff++ ) {
- terShift topush;
- bool topushNull = true;
- if ( ( roff == -1 ) && ( moveto == 0 ) ) {
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 01 : " << endl << "Consider making " << start << "..." << end << " moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
+ vector<vecTerShift> allshifts ( TAILLE_PERMUT_MAX + 1 );
+ for ( int start = 0; start < ( int ) hyp.size(); start++ )
+ {
+ string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
+ if ( ! rloc.trouve ( subVectorHypString ) )
+ {
+ continue;
}
- terShift t01 ( start, end, -1, -1 );
- topush = t01;
- topushNull = false;
- } else if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) ) {
- int newloc = ralign[moveto+roff];
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 02 : " << endl << "Consider making " << start << "..." << end << " moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
+ bool ok = false;
+ vector<int> mtiVec = rloc.getValue ( subVectorHypString );
+ vector<int>::iterator mti = mtiVec.begin();
+ while ( mti != mtiVec.end() && ( ! ok ) )
+ {
+ int moveto = ( *mti );
+ mti++;
+ if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] - 1 ) <= DIST_MAX_PERMUT ) )
+ {
+ ok = true;
+ }
}
- terShift t02 ( start, end, moveto + roff, newloc );
- topush = t02;
- topushNull = false;
- }
- if ( !topushNull ) {
- topush.shifted = cand;
- topush.cost = shift_cost;
- if ( PRINT_DEBUG ) {
-
- cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 02 : " << endl;
- cerr << "start : " << start << endl;
- cerr << "end : " << end << endl;
- cerr << "end - start : " << end - start << endl;
- cerr << "END DEBUG " << endl;
+ if ( ! ok )
+ {
+ continue;
+ }
+ ok = true;
+ for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ )
+ {
+ /* check if cand is good if so, add it */
+ vector<string> cand = subVector ( hyp, start, end + 1 );
+ ok = false;
+ if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) )
+ {
+ continue;
+ }
+
+ bool any_herr = false;
+
+ for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ )
+ {
+ if ( herr[start+i] )
+ {
+ any_herr = true;
+ }
+ }
+ if ( any_herr == false )
+ {
+ ok = true;
+ continue;
+ }
+
+ vector<int> movetoitVec;
+ movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
+// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl;
+ vector<int>::iterator movetoit = movetoitVec.begin();
+ while ( movetoit != movetoitVec.end() )
+ {
+ int moveto = ( *movetoit );
+ movetoit++;
+ if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] ) <= DIST_MAX_PERMUT ) ) )
+ {
+ continue;
+ }
+ ok = true;
+
+ /* check to see if there are any errors in either string
+ (only move if this is the case!)
+ */
+
+ bool any_rerr = false;
+ for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ )
+ {
+ if ( rerr[moveto+i] )
+ {
+ any_rerr = true;
+ }
+ }
+ if ( ! any_rerr )
+ {
+ continue;
+ }
+ for ( int roff = -1; roff <= ( end - start ); roff++ )
+ {
+ terShift topush;
+ bool topushNull = true;
+ if ( ( roff == -1 ) && ( moveto == 0 ) )
+ {
+ if ( PRINT_DEBUG )
+ {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
+ }
+ terShift t01 ( start, end, -1, -1 );
+ topush = t01;
+ topushNull = false;
+ }
+ else
+ if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) )
+ {
+ int newloc = ralign[moveto+roff];
+ if ( PRINT_DEBUG )
+ {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
+ }
+ terShift t02 ( start, end, moveto + roff, newloc );
+ topush = t02;
+ topushNull = false;
+ }
+ if ( !topushNull )
+ {
+ topush.shifted = cand;
+ topush.cost = shift_cost;
+ if ( PRINT_DEBUG )
+ {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl;
+ cerr << "start : " << start << endl;
+ cerr << "end : " << end << endl;
+ cerr << "end - start : " << end - start << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ ( allshifts.at ( end - start ) ).push_back ( topush );
+ }
+ }
+ }
}
- ( allshifts.at ( end - start ) ).push_back ( topush );
- }
}
- }
+ to_return.clear();
+ for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ )
+ {
+ to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
+ }
+ return to_return;
}
- }
-// vector<vecTerShift> to_return;
- to_return.clear();
-// terShift[][] to_return = new terShift[MAX_SHIFT_SIZE+1][];
- for ( int i = 0; i < MAX_SHIFT_SIZE + 1; i++ ) {
-// to_return[i] = (terShift[]) allshifts[i].toArray(new terShift[0]);
- to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
- }
- return to_return;
-}
-alignmentStruct terCalc::PerformShift ( vector<string> words, terShift s )
-{
- return PerformShift ( words, s.start, s.end, s.newloc );
-}
-
+ alignmentStruct terCalc::permuter ( vector<string> words, terShift s )
+ {
+ return permuter ( words, s.start, s.end, s.newloc );
+ }
-alignmentStruct terCalc::PerformShift ( vector<string> words, int start, int end, int newloc )
-{
- int c = 0;
- vector<string> nwords ( words );
- vector<vecInt> spans ( ( int ) hypSpans.size() );
- alignmentStruct toreturn;
-// ON EST ICI
-// if((int)hypSpans.size()>0) spans = new TERintpair[(int)hypSpans.size()];
-// if(DEBUG) {
- if ( PRINT_DEBUG ) {
- if ( ( int ) hypSpans.size() > 0 ) {
- cerr << "BEGIN DEBUG : terCalc::PerformShift :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl << "END DEBUG " << endl;
- } else {
- cerr << "BEGIN DEBUG : terCalc::PerformShift :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl << "END DEBUG " << endl;
- }
- }
+ alignmentStruct terCalc::permuter ( vector<string> words, int start, int end, int newloc )
+ {
+ int c = 0;
+ vector<string> nwords ( words );
+ vector<vecInt> spans ( ( int ) hypSpans.size() );
+ alignmentStruct to_return;
+ if ( PRINT_DEBUG )
+ {
+
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ;
+ }
+ else
+ {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ;
+ }
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl;
+ }
+ if (newloc >= ( int ) words.size())
+ {
+ if ( PRINT_DEBUG )
+ {
+ cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl;
+ }
+ newloc = ( int ) words.size()-1;
+ }
+
// }
- if ( newloc == -1 ) {
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = 0; i <= start - 1; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- } else {
- if ( newloc < start ) {
- for ( int i = 0; i <= newloc; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc + 1; i <= start - 1; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- } else {
- if ( newloc > end ) {
- for ( int i = 0; i <= start - 1; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i <= newloc; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc + 1; i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- } else {
- // we are moving inside of ourselves
- for ( int i = 0; i <= start - 1; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
+ if ( newloc == -1 )
+ {
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = 0; i <= start - 1;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
}
- for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
+ else
+ {
+ if ( newloc < start )
+ {
+
+ for ( int i = 0; i < newloc; i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = newloc ; i < start ;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
+ else
+ {
+ if ( newloc > end )
+ {
+ for ( int i = 0; i <= start - 1; i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i <= newloc;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = newloc + 1; i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
+ else
+ {
+ // we are moving inside of ourselves
+ for ( int i = 0; i <= start - 1; i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
+ }
}
- }
+ NBR_PERMUTS_CONSID++;
+
+ if ( PRINT_DEBUG )
+ {
+ cerr << "nwords" << join(" ",nwords) << endl;
+// cerr << "spans" << spans. << endl;
+ }
+
+ to_return.nwords = nwords;
+ to_return.aftershift = spans;
+ return to_return;
+ }
+ void terCalc::setDebugMode ( bool b )
+ {
+ PRINT_DEBUG = b;
}
- }
- NUM_SHIFTS_CONSIDERED++;
-
- toreturn.nwords = nwords;
- toreturn.aftershift = spans;
- return toreturn;
-}
-void terCalc::setDebugMode ( bool b )
-{
- PRINT_DEBUG = b;
-}
}
diff --git a/mert/TER/tercalc.h b/mert/TER/tercalc.h
index 9e1a01f65..92d9caf2b 100644
--- a/mert/TER/tercalc.h
+++ b/mert/TER/tercalc.h
@@ -1,5 +1,25 @@
-#ifndef MERT_TER_TER_CALC_H_
-#define MERT_TER_TER_CALC_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef _TERCPPTERCALC_H__
+#define _TERCPPTERCALC_H__
#include <vector>
#include <stdio.h>
@@ -21,62 +41,63 @@ namespace TERCpp
{
// typedef size_t WERelement[2];
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
-typedef vector<terShift> vecTerShift;
-/**
- @author
-*/
-class terCalc
-{
-private :
+ typedef vector<terShift> vecTerShift;
+ /**
+ @author
+ */
+ class terCalc
+ {
+ private :
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
- WERalignment l_WERalignment;
-// HashMap contenant les caleurs de hash de chaque mot
- hashMap bagOfWords;
- int MAX_SHIFT_SIZE;
- /* Variables for some internal counting. */
- int NUM_SEGMENTS_SCORED;
- int NUM_SHIFTS_CONSIDERED;
- int NUM_BEAM_SEARCH_CALLS;
- int MAX_SHIFT_DIST;
- bool PRINT_DEBUG;
+ WERalignment l_WERalignment;
+// HashMap contenant les valeurs de hash de chaque mot
+ hashMap bagOfWords;
+ int TAILLE_PERMUT_MAX;
+ // Increments internes
+ int NBR_SEGS_EVALUATED;
+ int NBR_PERMUTS_CONSID;
+ int NBR_BS_APPELS;
+ int DIST_MAX_PERMUT;
+ bool PRINT_DEBUG;
- /* These are resized by the MIN_EDIT_DIST code if they aren't big enough */
- double S[1000][1000];
- char P[1000][1000];
- vector<vecInt> refSpans;
- vector<vecInt> hypSpans;
- int BEAM_WIDTH;
+ // Utilisés dans minDistEdit et ils ne sont pas réajustés
+ double S[1000][1000];
+ char P[1000][1000];
+ vector<vecInt> refSpans;
+ vector<vecInt> hypSpans;
+ int TAILLE_BEAM;
-public:
- int shift_cost;
- int insert_cost;
- int delete_cost;
- int substitute_cost;
- int match_cost;
- double INF;
- terCalc();
+ public:
+ int shift_cost;
+ int insert_cost;
+ int delete_cost;
+ int substitute_cost;
+ int match_cost;
+ double infinite;
+ terCalc();
// ~terCalc();
// size_t* hashVec ( vector<string> s );
- void setDebugMode ( bool b );
- int WERCalculation ( size_t * ref, size_t * hyp );
- int WERCalculation ( vector<string> ref, vector<string> hyp );
- int WERCalculation ( vector<int> ref, vector<int> hyp );
+ void setDebugMode ( bool b );
+// int WERCalculation ( size_t * ref, size_t * hyp );
+// int WERCalculation ( vector<string> ref, vector<string> hyp );
+// int WERCalculation ( vector<int> ref, vector<int> hyp );
+ terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
// string vectorToString(vector<string> vec);
// vector<string> subVector(vector<string> vec, int start, int end);
- hashMapInfos BuildWordMatches ( vector<string> hyp, vector<string> ref );
- terAlignment MinEditDist ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
- bool spanIntersection ( vecInt refSpan, vecInt hypSpan );
- terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
- terAlignment TER ( vector<string> hyp, vector<string> ref );
- terAlignment TER ( vector<int> hyp, vector<int> ref );
- bestShiftStruct CalcBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
- void FindAlignErr ( terAlignment align, bool* herr, bool* rerr, int* ralign );
- vector<vecTerShift> GatherAllPossShifts ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
- alignmentStruct PerformShift ( vector<string> words, terShift s );
- alignmentStruct PerformShift ( vector<string> words, int start, int end, int newloc );
-};
+ hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
+ terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
+ bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
+ terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
+ terAlignment TER ( vector<string> hyp, vector<string> ref );
+ terAlignment TER ( vector<int> hyp, vector<int> ref );
+ bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
+ void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
+ vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
+ alignmentStruct permuter ( vector<string> words, terShift s );
+ alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
+ };
}
-#endif // MERT_TER_TER_CALC_H_
+#endif
diff --git a/mert/TER/tools.cpp b/mert/TER/tools.cpp
index 2d910ec05..64e1483b6 100644
--- a/mert/TER/tools.cpp
+++ b/mert/TER/tools.cpp
@@ -1,545 +1,772 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "tools.h"
using namespace std;
+using namespace boost::xpressive;
namespace Tools
{
-string vectorToString ( vector<string> vec )
-{
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
- if ( vecIter == vec.begin() ) {
- retour += ( *vecIter );
- } else {
- retour += "\t" + ( *vecIter );
+ string vectorToString ( vector<string> vec )
+ {
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour += ( *vecIter );
+ }
+ else
+ {
+ retour += "\t" + ( *vecIter );
+ }
+ }
+ return retour;
+ }
+ string vectorToString ( vector<char> vec )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << "\t" << ( *vecIter );
+ }
+ }
+ return retour.str();
+ }
+ string vectorToString ( vector<int> vec )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << "\t" << ( *vecIter );
+ }
+ }
+ return retour.str();
}
- }
- return retour;
-}
-string vectorToString ( vector< string > vec, string s )
-{
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
- if ( vecIter == vec.begin() ) {
- retour += ( *vecIter );
- } else {
- retour += s + ( *vecIter );
+ string vectorToString ( vector< string > vec, string s )
+ {
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour += ( *vecIter );
+ }
+ else
+ {
+ retour += s + ( *vecIter );
+ }
+ }
+ return retour;
+
}
- }
- return retour;
-}
+ string vectorToString ( vector< char > vec, string s )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << s << ( *vecIter );
+ }
+ }
+ return retour.str();
-vector<string> subVector ( vector<string> vec, int start, int end )
-{
- vector<string> retour;
- if ( start > end ) {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
+ }
-vector<int> subVector ( vector<int> vec, int start, int end )
-{
- vector<int> retour;
- if ( start > end ) {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
+ string vectorToString ( vector< int > vec, string s )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << s << ( *vecIter );
+ }
+ }
+ return retour.str();
-vector<float> subVector ( vector<float> vec, int start, int end )
-{
- vector<float> retour;
- if ( start > end ) {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
+ }
+
+ string vectorToString ( vector< bool > vec, string s )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<bool>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << s << ( *vecIter );
+ }
+ }
+ return retour.str();
-vector<string> copyVector ( vector<string> vec )
-{
- vector<string> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
-vector<int> copyVector ( vector<int> vec )
-{
- vector<int> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
-vector<float> copyVector ( vector<float> vec )
-{
- vector<float> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
-vector<string> stringToVector ( string s, string tok )
-{
- vector<string> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
- if ( ( *sIt ) == ( *sTok ) ) {
- to_return.push_back ( to_push );
- to_push = "";
- pushed = true;
- }
}
- if ( !pushed ) {
- to_push.push_back ( ( *sIt ) );
+ string vectorToString ( char* vec, string s , int taille)
+ {
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++)
+ {
+ if ( l_i == 0 )
+ {
+ retour << vec[l_i];
+ }
+ else
+ {
+ retour << s << vec[l_i];
+ }
+ }
+ return retour.str();
+
}
- }
- to_return.push_back ( to_push );
- return to_return;
-}
-vector<int> stringToVectorInt ( string s, string tok )
-{
- vector<int> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
- if ( ( *sIt ) == ( *sTok ) ) {
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atoi ( to_push.c_str() ) );
+
+ string vectorToString ( int* vec, string s , int taille)
+ {
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++)
+ {
+ if ( l_i == 0 )
+ {
+ retour << vec[l_i];
+ }
+ else
+ {
+ retour << s << vec[l_i];
+ }
}
- to_push = "";
- pushed = true;
- }
+ return retour.str();
+
}
- if ( !pushed ) {
- to_push.push_back ( ( *sIt ) );
+
+ string vectorToString ( bool* vec, string s , int taille)
+ {
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++)
+ {
+ if ( l_i == 0 )
+ {
+ retour << vec[l_i];
+ }
+ else
+ {
+ retour << s << vec[l_i];
+ }
+ }
+ return retour.str();
+
}
- }
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- return to_return;
-}
-vector<float> stringToVectorFloat ( string s, string tok )
-{
- vector<float> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
- if ( ( *sIt ) == ( *sTok ) ) {
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atof ( to_push.c_str() ) );
+
+ vector<string> subVector ( vector<string> vec, int start, int end )
+ {
+ vector<string> retour;
+ if ( start > end )
+ {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
}
- to_push = "";
- pushed = true;
- }
+ return retour;
}
- if ( !pushed ) {
- to_push.push_back ( ( *sIt ) );
+
+ vector<int> subVector ( vector<int> vec, int start, int end )
+ {
+ vector<int> retour;
+ if ( start > end )
+ {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
}
- }
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- return to_return;
-}
-
-string lowerCase ( string str )
-{
- for ( int i = 0; i < ( int ) str.size(); i++ ) {
- if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) ) {
- str[i] = str[i] + 0x20;
+
+ vector<float> subVector ( vector<float> vec, int start, int end )
+ {
+ vector<float> retour;
+ if ( start > end )
+ {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+ }
+
+ vector<string> copyVector ( vector<string> vec )
+ {
+ vector<string> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+ }
+ vector<int> copyVector ( vector<int> vec )
+ {
+ vector<int> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+ }
+ vector<float> copyVector ( vector<float> vec )
+ {
+ vector<float> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+ }
+ vector<string> stringToVector ( string s, string tok )
+ {
+ vector<string> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ )
+ {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
+ {
+ if ( ( *sIt ) == ( *sTok ) )
+ {
+ to_return.push_back ( to_push );
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed )
+ {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ to_return.push_back ( to_push );
+ return to_return;
+ }
+ vector<int> stringToVectorInt ( string s, string tok )
+ {
+ vector<int> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ )
+ {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
+ {
+ if ( ( *sIt ) == ( *sTok ) )
+ {
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed )
+ {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
+ }
+ vector<float> stringToVectorFloat ( string s, string tok )
+ {
+ vector<float> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ )
+ {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
+ {
+ if ( ( *sIt ) == ( *sTok ) )
+ {
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atof ( to_push.c_str() ) );
+ }
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed )
+ {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
}
- }
- return str;
-}
-/*
-string removePunctTercom ( string str )
-{
- string str_mod = str;
- sregex rex;
- string replace;
+ string lowerCase ( string str )
+ {
+ for ( int i = 0;i < ( int ) str.size();i++ )
+ {
+ if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) )
+ {
+ str[i] = str[i] + 0x20;
+ }
+ }
+ return str;
+ }
+ string removePunctTercom ( string str )
+ {
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
-string removePunct ( string str )
-{
- string str_mod = str;
- sregex rex;
- string replace;
+ return str_mod;
+ }
+ string removePunct ( string str )
+ {
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
-string tokenizePunct ( string str )
-{
- string str_mod = str;
- sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
- string replace ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ return str_mod;
+ }
+ string tokenizePunct ( string str )
+ {
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
+ string replace ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
- replace = ( "$2.$4. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
+ replace = ( "$2.$4. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " ? " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " ? " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " ; " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " ; " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " ! " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " ! " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " ( " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " ( " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " ) " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " ) " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " \" " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " \" " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
- replace = ( "num_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
+ replace = ( "num_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
- replace = ( "ordinal_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
+ replace = ( "ordinal_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Dd]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Dd]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Dd]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Dd]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]rs) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]rs) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]rs) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]rs) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Nn]o) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Nn]o) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Nn]o) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Nn]o) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
// rex = sregex::compile ( "(^(([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "(^(([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
-
-string normalizeStd ( string str )
-{
- string str_mod = str;
- sregex rex = sregex::compile ( "(<skipped>)" );
- string replace ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ return str_mod;
+ }
- rex = sregex::compile ( "-\n" );
- replace = ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ string normalizeStd ( string str )
+ {
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(<skipped>)" );
+ string replace ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "\n" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "-\n" );
+ replace = ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&quot;" );
- replace = ( "\"" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "\n" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&amp;" );
- replace = ( "& " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&quot;" );
+ replace = ( "\"" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&lt;" );
- replace = ( "<" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&amp;" );
+ replace = ( "& " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&gt;" );
- replace = ( ">" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&lt;" );
+ replace = ( "<" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
-*/
+ rex = sregex::compile ( "&gt;" );
+ replace = ( ">" );
+ str_mod = regex_replace ( str_mod, rex, replace );
-param copyParam ( param p )
-{
- param to_return;
- to_return.caseOn = p.caseOn;
- to_return.noPunct = p.noPunct;
- to_return.debugMode = p.debugMode;
- to_return.hypothesisFile = p.hypothesisFile;
- to_return.referenceFile = p.referenceFile;
- to_return.normalize = p.normalize;
- to_return.noTxtIds = p.noTxtIds;
- to_return.outputFileExtension = p.outputFileExtension;
- to_return.outputFileName = p.outputFileName;
- to_return.sgmlInputs = p.sgmlInputs;
- to_return.tercomLike = p.tercomLike;
- return to_return;
-}
-string printParams ( param p )
-{
- stringstream s;
- s << "caseOn = " << p.caseOn << endl;
- s << "noPunct = " << p.noPunct << endl;
- s << "debugMode = " << p.debugMode << endl;
- s << "hypothesisFile = " << p.hypothesisFile << endl;
- s << "referenceFile = " << p.referenceFile << endl;
- s << "normalize = " << p.normalize << endl;
- s << "noTxtIds = " << p.noTxtIds << endl;
- s << "outputFileExtension = " << p.outputFileExtension << endl;
- s << "outputFileName = " << p.outputFileName << endl;
- s << "sgmlInputs = " << p.sgmlInputs << endl;
- s << "tercomLike = " << p.tercomLike << endl;
- return s.str();
+ return str_mod;
+ }
-}
+ param copyParam ( param p )
+ {
+ param to_return;
+ to_return.caseOn = p.caseOn;
+ to_return.noPunct = p.noPunct;
+ to_return.debugMode = p.debugMode;
+ to_return.debugLevel = p.debugLevel;
+ to_return.hypothesisFile = p.hypothesisFile;
+ to_return.referenceFile = p.referenceFile;
+ to_return.normalize = p.normalize;
+ to_return.noTxtIds = p.noTxtIds;
+ to_return.outputFileExtension = p.outputFileExtension;
+ to_return.outputFileName = p.outputFileName;
+ to_return.sgmlInputs = p.sgmlInputs;
+ to_return.tercomLike = p.tercomLike;
+ to_return.printAlignments = p.printAlignments;
+ to_return.WER=p.WER;
+ return to_return;
+ }
+ string printParams ( param p )
+ {
+ stringstream s;
+ s << "caseOn = " << p.caseOn << endl;
+ s << "noPunct = " << p.noPunct << endl;
+ s << "debugMode = " << p.debugMode << endl;
+ s << "debugLevel = " << p.debugLevel << endl;
+ s << "hypothesisFile = " << p.hypothesisFile << endl;
+ s << "referenceFile = " << p.referenceFile << endl;
+ s << "normalize = " << p.normalize << endl;
+ s << "noTxtIds = " << p.noTxtIds << endl;
+ s << "outputFileExtension = " << p.outputFileExtension << endl;
+ s << "outputFileName = " << p.outputFileName << endl;
+ s << "sgmlInputs = " << p.sgmlInputs << endl;
+ s << "tercomLike = " << p.tercomLike << endl;
+ return s.str();
+ }
+ string join ( string delim, vector<string> arr )
+ {
+ if ( ( int ) arr.size() == 0 ) return "";
+// if ((int)delim.compare("") == 0) delim = new String("");
+// String s = new String("");
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ )
+ {
+ if ( i == 0 )
+ {
+ s << arr.at ( i );
+ }
+ else
+ {
+ s << delim << arr.at ( i );
+ }
+ }
+ return s.str();
+// return "";
+ }
}
diff --git a/mert/TER/tools.h b/mert/TER/tools.h
index 6f78b9a6a..0a85e7b4b 100644
--- a/mert/TER/tools.h
+++ b/mert/TER/tools.h
@@ -1,38 +1,66 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#ifndef MERT_TER_TOOLS_H_
#define MERT_TER_TOOLS_H_
+
#include <vector>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <sstream>
+#include <boost/xpressive/xpressive.hpp>
+
using namespace std;
namespace Tools
{
-typedef vector<double> vecDouble;
-typedef vector<char> vecChar;
-typedef vector<int> vecInt;
-typedef vector<float> vecFloat;
-typedef vector<string> vecString;
-typedef vector<string> alignmentElement;
-typedef vector<alignmentElement> WERalignment;
+ typedef vector<double> vecDouble;
+ typedef vector<char> vecChar;
+ typedef vector<int> vecInt;
+ typedef vector<float> vecFloat;
+ typedef vector<size_t> vecSize_t;
+ typedef vector<string> vecString;
+ typedef vector<string> alignmentElement;
+ typedef vector<alignmentElement> WERalignment;
-struct param {
- bool debugMode;
- string referenceFile; // path to the resources
- string hypothesisFile; // path to the configuration files
- string outputFileExtension;
- string outputFileName;
- bool noPunct;
- bool caseOn;
- bool normalize;
- bool tercomLike;
- bool sgmlInputs;
- bool noTxtIds;
+struct param
+{
+ bool debugMode;
+ string referenceFile; // path to the resources
+ string hypothesisFile; // path to the configuration files
+ string outputFileExtension;
+ string outputFileName;
+ bool noPunct;
+ bool caseOn;
+ bool normalize;
+ bool tercomLike;
+ bool sgmlInputs;
+ bool noTxtIds;
+ bool printAlignments;
+ bool WER;
+ int debugLevel;
};
// param = { false, "","","","" };
@@ -40,26 +68,35 @@ struct param {
// private:
// public:
-string vectorToString ( vector<string> vec );
-string vectorToString ( vector<string> vec, string s );
-vector<string> subVector ( vector<string> vec, int start, int end );
-vector<int> subVector ( vector<int> vec, int start, int end );
-vector<float> subVector ( vector<float> vec, int start, int end );
-vector<string> copyVector ( vector<string> vec );
-vector<int> copyVector ( vector<int> vec );
-vector<float> copyVector ( vector<float> vec );
-vector<string> stringToVector ( string s, string tok );
-vector<int> stringToVectorInt ( string s, string tok );
-vector<float> stringToVectorFloat ( string s, string tok );
-string lowerCase(string str);
-string removePunct(string str);
-string tokenizePunct(string str);
-string removePunctTercom(string str);
-string normalizeStd(string str);
-string printParams(param p);
+ string vectorToString ( vector<string> vec );
+ string vectorToString ( vector<char> vec );
+ string vectorToString ( vector<int> vec );
+ string vectorToString ( vector<string> vec, string s );
+ string vectorToString ( vector<char> vec, string s );
+ string vectorToString ( vector<int> vec, string s );
+ string vectorToString ( vector<bool> vec, string s );
+ string vectorToString ( char* vec, string s, int taille );
+ string vectorToString ( int* vec, string s , int taille );
+ string vectorToString ( bool* vec, string s , int taille );
+ vector<string> subVector ( vector<string> vec, int start, int end );
+ vector<int> subVector ( vector<int> vec, int start, int end );
+ vector<float> subVector ( vector<float> vec, int start, int end );
+ vector<string> copyVector ( vector<string> vec );
+ vector<int> copyVector ( vector<int> vec );
+ vector<float> copyVector ( vector<float> vec );
+ vector<string> stringToVector ( string s, string tok );
+ vector<string> stringToVector ( char s, string tok );
+ vector<string> stringToVector ( int s, string tok );
+ vector<int> stringToVectorInt ( string s, string tok );
+ vector<float> stringToVectorFloat ( string s, string tok );
+ string lowerCase(string str);
+ string removePunct(string str);
+ string tokenizePunct(string str);
+ string removePunctTercom(string str);
+ string normalizeStd(string str);
+ string printParams(param p);
+ string join ( string delim, vector<string> arr );
// };
-param copyParam(param p);
-
+ param copyParam(param p);
}
-
-#endif // MERT_TER_TOOLS_H_
+#endif
diff --git a/mert/kbmira.cpp b/mert/kbmira.cpp
index a2665ac13..90f201a00 100644
--- a/mert/kbmira.cpp
+++ b/mert/kbmira.cpp
@@ -39,8 +39,10 @@ de recherches du Canada
#include <boost/program_options.hpp>
#include <boost/scoped_ptr.hpp>
+#include "util/exception.hh"
+
#include "BleuScorer.h"
-#include "HypPackEnumerator.h"
+#include "HopeFearDecoder.h"
#include "MiraFeatureVector.h"
#include "MiraWeightVector.h"
@@ -49,38 +51,16 @@ using namespace MosesTuning;
namespace po = boost::program_options;
-ValType evaluate(HypPackEnumerator* train, const AvgWeightVector& wv)
-{
- vector<ValType> stats(kBleuNgramOrder*2+1,0);
- for(train->reset(); !train->finished(); train->next()) {
- // Find max model
- size_t max_index=0;
- ValType max_score=0;
- for(size_t i=0; i<train->cur_size(); i++) {
- MiraFeatureVector vec(train->featuresAt(i));
- ValType score = wv.score(vec);
- if(i==0 || score > max_score) {
- max_index = i;
- max_score = score;
- }
- }
- // Update stats
- const vector<float>& sent = train->scoresAt(max_index);
- for(size_t i=0; i<sent.size(); i++) {
- stats[i]+=sent[i];
- }
- }
- return unsmoothedBleu(stats);
-}
-
int main(int argc, char** argv)
{
- const ValType BLEU_RATIO = 5;
bool help;
string denseInitFile;
string sparseInitFile;
+ string type = "nbest";
vector<string> scoreFiles;
vector<string> featureFiles;
+ vector<string> referenceFiles; //for hg mira
+ string hgDir;
int seed;
string outputFile;
float c = 0.01; // Step-size cap C
@@ -91,25 +71,30 @@ int main(int argc, char** argv)
bool model_bg = false; // Use model for background corpus
bool verbose = false; // Verbose updates
bool safe_hope = false; // Model score cannot have more than BLEU_RATIO times more influence than BLEU
+ size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word
// Command-line processing follows pro.cpp
po::options_description desc("Allowed options");
desc.add_options()
("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
+ ("type,t", po::value<string>(&type), "Either nbest or hypergraph")
("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
+ ("hgdir,H", po::value<string> (&hgDir), "Directory containing hypergraphs")
+ ("reference,R", po::value<vector<string> > (&referenceFiles), "Reference files, only required for hypergraph mira")
("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
("output-file,o", po::value<string>(&outputFile), "Output file")
("cparam,C", po::value<float>(&c), "MIRA C-parameter, lower for more regularization (default 0.01)")
("decay,D", po::value<float>(&decay), "BLEU background corpus decay rate (default 0.999)")
("iters,J", po::value<int>(&n_iters), "Number of MIRA iterations to run (default 60)")
- ("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features")
+ ("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features. This should have 'name= value' on each line, or (legacy) should be the Moses mert 'init.opt' format.")
("sparse-init,s", po::value<string>(&sparseInitFile), "Weight file for sparse features")
("streaming", po::value(&streaming)->zero_tokens()->default_value(false), "Stream n-best lists to save memory, implies --no-shuffle")
("no-shuffle", po::value(&no_shuffle)->zero_tokens()->default_value(false), "Don't shuffle hypotheses before each epoch")
("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background")
("verbose", po::value(&verbose)->zero_tokens()->default_value(false), "Verbose updates")
("safe-hope", po::value(&safe_hope)->zero_tokens()->default_value(false), "Mode score's influence on hope decoding is limited")
+ ("hg-prune", po::value<size_t>(&hgPruning), "Prune hypergraphs to have this many edges per reference word")
;
po::options_description cmdline_options;
@@ -145,12 +130,56 @@ int main(int argc, char** argv)
cerr << "could not open dense initfile: " << denseInitFile << endl;
exit(3);
}
+ if (verbose) cerr << "Reading dense features:" << endl;
parameter_t val;
getline(opt,buffer);
- istringstream strstrm(buffer);
- while(strstrm >> val) {
- initParams.push_back(val);
+ if (buffer.find_first_of("=") == buffer.npos) {
+ UTIL_THROW_IF(type == "hypergraph", util::Exception, "For hypergraph version, require dense features in 'name= value' format");
+ cerr << "WARN: dense features in deprecated Moses mert format. Prefer 'name= value' format." << endl;
+ istringstream strstrm(buffer);
+ while(strstrm >> val) {
+ initParams.push_back(val);
+ if(verbose) cerr << val << endl;
+ }
+ } else {
+ vector<string> names;
+ string last_name = "";
+ size_t feature_ctr = 1;
+ do {
+ size_t equals = buffer.find_last_of("=");
+ UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '"
+ << buffer << "'");
+ string name = buffer.substr(0,equals);
+ names.push_back(name);
+ initParams.push_back(boost::lexical_cast<ValType>(buffer.substr(equals+2)));
+
+ //Names for features with several values need to have their id added
+ if (name != last_name) feature_ctr = 1;
+ last_name = name;
+ if (feature_ctr>1) {
+ stringstream namestr;
+ namestr << names.back() << "_" << feature_ctr;
+ names[names.size()-1] = namestr.str();
+ if (feature_ctr == 2) {
+ stringstream namestr;
+ namestr << names[names.size()-2] << "_" << (feature_ctr-1);
+ names[names.size()-2] = namestr.str();
+ }
+ }
+ ++feature_ctr;
+
+ } while(getline(opt,buffer));
+
+
+ //Make sure that SparseVector encodes dense feature names as 0..n-1.
+ for (size_t i = 0; i < names.size(); ++i) {
+ size_t id = SparseVector::encode(names[i]);
+ assert(id == i);
+ if (verbose) cerr << names[i] << " " << initParams[i] << endl;
+ }
+
}
+
opt.close();
}
size_t initDenseSize = initParams.size();
@@ -188,82 +217,45 @@ int main(int argc, char** argv)
}
bg.push_back(kBleuNgramOrder);
+ boost::scoped_ptr<HopeFearDecoder> decoder;
+ if (type == "nbest") {
+ decoder.reset(new NbestHopeFearDecoder(featureFiles, scoreFiles, streaming, no_shuffle, safe_hope));
+ } else if (type == "hypergraph") {
+ decoder.reset(new HypergraphHopeFearDecoder(hgDir, referenceFiles, initDenseSize, streaming, no_shuffle, safe_hope, hgPruning, wv));
+ } else {
+ UTIL_THROW(util::Exception, "Unknown batch mira type: '" << type << "'");
+ }
+
// Training loop
- boost::scoped_ptr<HypPackEnumerator> train;
- if(streaming)
- train.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
- else
- train.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
- cerr << "Initial BLEU = " << evaluate(train.get(), wv.avg()) << endl;
+ cerr << "Initial BLEU = " << decoder->Evaluate(wv.avg()) << endl;
ValType bestBleu = 0;
for(int j=0; j<n_iters; j++) {
// MIRA train for one epoch
- int iNumHyps = 0;
int iNumExamples = 0;
int iNumUpdates = 0;
ValType totalLoss = 0.0;
- for(train->reset(); !train->finished(); train->next()) {
- // Hope / fear decode
- ValType hope_scale = 1.0;
- size_t hope_index=0, fear_index=0, model_index=0;
- ValType hope_score=0, fear_score=0, model_score=0;
- int iNumHypsBackup = iNumHyps;
- for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
- iNumHyps = iNumHypsBackup;
- ValType hope_bleu, hope_model;
- for(size_t i=0; i< train->cur_size(); i++) {
- const MiraFeatureVector& vec=train->featuresAt(i);
- ValType score = wv.score(vec);
- ValType bleu = sentenceLevelBackgroundBleu(train->scoresAt(i),bg);
- // Hope
- if(i==0 || (hope_scale*score + bleu) > hope_score) {
- hope_score = hope_scale*score + bleu;
- hope_index = i;
- hope_bleu = bleu;
- hope_model = score;
- }
- // Fear
- if(i==0 || (score - bleu) > fear_score) {
- fear_score = score - bleu;
- fear_index = i;
- }
- // Model
- if(i==0 || score > model_score) {
- model_score = score;
- model_index = i;
- }
- iNumHyps++;
- }
- // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
- // where model score is having far more influence than BLEU
- hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
- if(safe_hope && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
- hope_scale = abs(hope_bleu) / abs(hope_model);
- else break;
- }
+ size_t sentenceIndex = 0;
+ for(decoder->reset();!decoder->finished(); decoder->next()) {
+ HopeFearData hfd;
+ decoder->HopeFear(bg,wv,&hfd);
+
// Update weights
- if(hope_index!=fear_index) {
+ if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
// Vector difference
- const MiraFeatureVector& hope=train->featuresAt(hope_index);
- const MiraFeatureVector& fear=train->featuresAt(fear_index);
- MiraFeatureVector diff = hope - fear;
+ MiraFeatureVector diff = hfd.hopeFeatures - hfd.fearFeatures;
// Bleu difference
- const vector<float>& hope_stats = train->scoresAt(hope_index);
- ValType hopeBleu = sentenceLevelBackgroundBleu(hope_stats, bg);
- const vector<float>& fear_stats = train->scoresAt(fear_index);
- ValType fearBleu = sentenceLevelBackgroundBleu(fear_stats, bg);
- assert(hopeBleu + 1e-8 >= fearBleu);
- ValType delta = hopeBleu - fearBleu;
+ //assert(hfd.hopeBleu + 1e-8 >= hfd.fearBleu);
+ ValType delta = hfd.hopeBleu - hfd.fearBleu;
// Loss and update
ValType diff_score = wv.score(diff);
ValType loss = delta - diff_score;
if(verbose) {
- cerr << "Updating sent " << train->cur_id() << endl;
+ cerr << "Updating sent " << sentenceIndex << endl;
cerr << "Wght: " << wv << endl;
- cerr << "Hope: " << hope << " BLEU:" << hopeBleu << " Score:" << wv.score(hope) << endl;
- cerr << "Fear: " << fear << " BLEU:" << fearBleu << " Score:" << wv.score(fear) << endl;
+ cerr << "Hope: " << hfd.hopeFeatures << " BLEU:" << hfd.hopeBleu << " Score:" << wv.score(hfd.hopeFeatures) << endl;
+ cerr << "Fear: " << hfd.fearFeatures << " BLEU:" << hfd.fearBleu << " Score:" << wv.score(hfd.fearFeatures) << endl;
cerr << "Diff: " << diff << " BLEU:" << delta << " Score:" << diff_score << endl;
- cerr << "Loss: " << loss << " Scale: " << hope_scale << endl;
+ cerr << "Loss: " << loss << " Scale: " << 1 << endl;
cerr << endl;
}
if(loss > 0) {
@@ -273,16 +265,16 @@ int main(int argc, char** argv)
iNumUpdates++;
}
// Update BLEU statistics
- const vector<float>& model_stats = train->scoresAt(model_index);
for(size_t k=0; k<bg.size(); k++) {
bg[k]*=decay;
if(model_bg)
- bg[k]+=model_stats[k];
+ bg[k]+=hfd.modelStats[k];
else
- bg[k]+=hope_stats[k];
+ bg[k]+=hfd.hopeStats[k];
}
}
iNumExamples++;
+ ++sentenceIndex;
}
// Training Epoch summary
cerr << iNumUpdates << "/" << iNumExamples << " updates"
@@ -291,15 +283,16 @@ int main(int argc, char** argv)
// Evaluate current average weights
AvgWeightVector avg = wv.avg();
- ValType bleu = evaluate(train.get(), avg);
+ ValType bleu = decoder->Evaluate(avg);
cerr << ", BLEU = " << bleu << endl;
if(bleu > bestBleu) {
+ /*
size_t num_dense = train->num_dense();
if(initDenseSize>0 && initDenseSize!=num_dense) {
cerr << "Error: Initial dense feature count and dense feature count from n-best do not match: "
<< initDenseSize << "!=" << num_dense << endl;
exit(1);
- }
+ }*/
// Write to a file
ostream* out;
ofstream outFile;
@@ -314,11 +307,11 @@ int main(int argc, char** argv)
out = &cout;
}
for(size_t i=0; i<avg.size(); i++) {
- if(i<num_dense)
+ if(i<initDenseSize)
*out << "F" << i << " " << avg.weight(i) << endl;
else {
if(abs(avg.weight(i))>1e-8)
- *out << SparseVector::decode(i-num_dense) << " " << avg.weight(i) << endl;
+ *out << SparseVector::decode(i-initDenseSize) << " " << avg.weight(i) << endl;
}
}
outFile.close();
diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp
index a4d6fdb64..122106b96 100644
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@@ -220,7 +220,7 @@ vector< vector<const Word*> > MosesDecoder::runChartDecoder(const std::string& s
size_t epoch)
{
// run the decoder
- m_chartManager = new ChartManager(*m_sentence);
+ m_chartManager = new ChartManager(0,*m_sentence);
m_chartManager->ProcessSentence();
ChartKBestExtractor::KBestVec nBestList;
m_chartManager->CalcNBest(nBestSize, nBestList, distinct);
diff --git a/mira/Jamfile b/mira/Jamfile
index 3862cb172..e43a993b5 100644
--- a/mira/Jamfile
+++ b/mira/Jamfile
@@ -2,7 +2,7 @@ lib mira_lib :
[ glob *.cpp : *Test.cpp Main.cpp ]
../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ;
-exe mira : Main.cpp mira_lib ../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ;
+exe mira : Main.cpp mira_lib ../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ..//boost_filesystem ;
alias programs : mira ;
diff --git a/mira/Main.cpp b/mira/Main.cpp
index c22a80ece..70b5971c9 100644
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@@ -665,7 +665,7 @@ int main(int argc, char** argv)
}
// number of weight dumps this epoch
- // size_t weightMixingThisEpoch = 0;
+ size_t weightMixingThisEpoch = 0;
size_t weightEpochDump = 0;
size_t shardPosition = 0;
diff --git a/misc/Jamfile b/misc/Jamfile
index d466e306c..e90ec8d0d 100644
--- a/misc/Jamfile
+++ b/misc/Jamfile
@@ -36,4 +36,12 @@ else {
alias programsProbing ;
}
-alias programs : 1-1-Extraction TMining generateSequences processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable programsMin programsProbing ;
+exe merge-sorted :
+merge-sorted.cc
+../moses//moses
+../moses/TranslationModel/UG/generic//generic
+$(TOP)//boost_iostreams
+$(TOP)//boost_program_options
+;
+
+alias programs : 1-1-Extraction TMining generateSequences processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable programsMin programsProbing merge-sorted ;
diff --git a/contrib/m4m/util/merge-sorted.cc b/misc/merge-sorted.cc
index ae693215b..ae693215b 100644
--- a/contrib/m4m/util/merge-sorted.cc
+++ b/misc/merge-sorted.cc
diff --git a/moses-chart-cmd/Jamfile b/moses-chart-cmd/Jamfile
index ba107fa67..cab15ede1 100644
--- a/moses-chart-cmd/Jamfile
+++ b/moses-chart-cmd/Jamfile
@@ -1,2 +1,2 @@
-exe moses_chart : Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp ../moses//moses $(TOP)//boost_iostreams ;
+exe moses_chart : Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp ../moses//moses $(TOP)//boost_iostreams ..//boost_filesystem ..//z ;
diff --git a/moses-chart-cmd/Main.cpp b/moses-chart-cmd/Main.cpp
index 7540453cd..a90f69012 100644
--- a/moses-chart-cmd/Main.cpp
+++ b/moses-chart-cmd/Main.cpp
@@ -45,6 +45,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "IOWrapper.h"
#include "moses/FactorCollection.h"
+#include "moses/HypergraphOutput.h"
#include "moses/Manager.h"
#include "moses/Phrase.h"
#include "moses/Util.h"
@@ -80,9 +81,11 @@ void fix(std::ostream& stream, size_t size)
class TranslationTask : public Task
{
public:
- TranslationTask(InputType *source, IOWrapper &ioWrapper)
+ TranslationTask(InputType *source, IOWrapper &ioWrapper,
+ boost::shared_ptr<HypergraphOutput<ChartManager> > hypergraphOutput)
: m_source(source)
- , m_ioWrapper(ioWrapper) {
+ , m_ioWrapper(ioWrapper)
+ , m_hypergraphOutput(hypergraphOutput) {
}
~TranslationTask() {
@@ -116,11 +119,17 @@ public:
return;
}
- ChartManager manager(*m_source);
+ ChartManager manager(translationId,*m_source);
manager.ProcessSentence();
UTIL_THROW_IF2(staticData.UseMBR(), "Cannot use MBR");
+ // Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
+ if (m_hypergraphOutput.get()) {
+ m_hypergraphOutput->Write(manager);
+ }
+
+
// 1-best
const ChartHypothesis *bestHypo = manager.GetBestHypothesis();
m_ioWrapper.OutputBestHypo(bestHypo, translationId);
@@ -168,7 +177,7 @@ public:
if (staticData.GetOutputSearchGraph()) {
std::ostringstream out;
- manager.GetSearchGraph(translationId, out);
+ manager.OutputSearchGraphMoses( out);
OutputCollector *oc = m_ioWrapper.GetSearchGraphOutputCollector();
UTIL_THROW_IF2(oc == NULL, "File for search graph output not specified");
oc->Write(translationId, out.str());
@@ -187,6 +196,7 @@ private:
InputType *m_source;
IOWrapper &m_ioWrapper;
+ boost::shared_ptr<HypergraphOutput<ChartManager> > m_hypergraphOutput;
};
bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source)
@@ -289,6 +299,11 @@ int main(int argc, char* argv[])
TRACE_ERR("\n");
}
+ boost::shared_ptr<HypergraphOutput<ChartManager> > hypergraphOutput;
+ if (staticData.GetOutputSearchGraphHypergraph()) {
+ hypergraphOutput.reset(new HypergraphOutput<ChartManager>(3));
+ }
+
if (ioWrapper == NULL)
return EXIT_FAILURE;
@@ -301,7 +316,10 @@ int main(int argc, char* argv[])
while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
IFVERBOSE(1)
ResetUserTime();
- TranslationTask *task = new TranslationTask(source, *ioWrapper);
+
+ FeatureFunction::CallChangeSource(source);
+
+ TranslationTask *task = new TranslationTask(source, *ioWrapper, hypergraphOutput);
source = NULL; // task will delete source
#ifdef WITH_THREADS
pool.Submit(task); // pool will delete task
diff --git a/moses-cmd/Jamfile b/moses-cmd/Jamfile
index bddc10911..79af196e9 100644
--- a/moses-cmd/Jamfile
+++ b/moses-cmd/Jamfile
@@ -3,4 +3,16 @@ alias deps : IOWrapper.cpp mbr.cpp LatticeMBR.cpp TranslationAnalysis.cpp ..//z
exe moses : Main.cpp deps ;
exe lmbrgrid : LatticeMBRGrid.cpp deps ;
+if [ option.get "with-mm" : : "yes" ] {
+
+exe simulate-pe :
+simulate-pe.cc
+$(TOP)/moses/TranslationModel/UG/generic//generic
+$(TOP)//boost_program_options
+deps
+;
+
+alias programs : moses lmbrgrid simulate-pe ;
+} else {
alias programs : moses lmbrgrid ;
+}
diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp
index db7aa69e8..a8b50db17 100644
--- a/moses-cmd/Main.cpp
+++ b/moses-cmd/Main.cpp
@@ -22,14 +22,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
/**
* Moses main, for single-threaded and multi-threaded.
**/
-
-#include <boost/algorithm/string/predicate.hpp>
-#include <boost/filesystem.hpp>
-#include <boost/iostreams/device/file.hpp>
-#include <boost/iostreams/filter/bzip2.hpp>
-#include <boost/iostreams/filter/gzip.hpp>
-#include <boost/iostreams/filtering_stream.hpp>
-
#include <exception>
#include <fstream>
#include <sstream>
@@ -47,6 +39,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "mbr.h"
#include "moses/Hypothesis.h"
+#include "moses/HypergraphOutput.h"
#include "moses/Manager.h"
#include "moses/StaticData.h"
#include "moses/Util.h"
@@ -95,7 +88,7 @@ public:
OutputCollector* alignmentInfoCollector,
OutputCollector* unknownsCollector,
bool outputSearchGraphSLF,
- bool outputSearchGraphHypergraph) :
+ boost::shared_ptr<HypergraphOutput<Manager> > hypergraphOutput) :
m_source(source), m_lineNumber(lineNumber),
m_outputCollector(outputCollector), m_nbestCollector(nbestCollector),
m_latticeSamplesCollector(latticeSamplesCollector),
@@ -104,7 +97,7 @@ public:
m_alignmentInfoCollector(alignmentInfoCollector),
m_unknownsCollector(unknownsCollector),
m_outputSearchGraphSLF(outputSearchGraphSLF),
- m_outputSearchGraphHypergraph(outputSearchGraphHypergraph) {}
+ m_hypergraphOutput(hypergraphOutput) {}
/** Translate one sentence
* gets called by main function implemented at end of this source file */
@@ -184,105 +177,10 @@ public:
}
// Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
- if (m_outputSearchGraphHypergraph) {
-
- vector<string> hypergraphParameters = staticData.GetParam("output-search-graph-hypergraph");
-
- bool appendSuffix;
- if (hypergraphParameters.size() > 0 && hypergraphParameters[0] == "true") {
- appendSuffix = true;
- } else {
- appendSuffix = false;
- }
-
- string compression;
- if (hypergraphParameters.size() > 1) {
- compression = hypergraphParameters[1];
- } else {
- compression = "txt";
- }
-
- string hypergraphDir;
- if ( hypergraphParameters.size() > 2 ) {
- hypergraphDir = hypergraphParameters[2];
- } else {
- string nbestFile = staticData.GetNBestFilePath();
- if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) {
- boost::filesystem::path nbestPath(nbestFile);
-
- // In the Boost filesystem API version 2,
- // which was the default prior to Boost 1.46,
- // the filename() method returned a string.
- //
- // In the Boost filesystem API version 3,
- // which is the default starting with Boost 1.46,
- // the filename() method returns a path object.
- //
- // To get a string from the path object,
- // the native() method must be called.
- // hypergraphDir = nbestPath.parent_path().filename()
- //#if BOOST_VERSION >= 104600
- // .native()
- //#endif
- //;
-
- // Hopefully the following compiles under all versions of Boost.
- //
- // If this line gives you compile errors,
- // contact Lane Schwartz on the Moses mailing list
- hypergraphDir = nbestPath.parent_path().string();
-
- } else {
- stringstream hypergraphDirName;
- hypergraphDirName << boost::filesystem::current_path().string() << "/hypergraph";
- hypergraphDir = hypergraphDirName.str();
- }
- }
-
- if ( ! boost::filesystem::exists(hypergraphDir) ) {
- boost::filesystem::create_directory(hypergraphDir);
- }
-
- if ( ! boost::filesystem::exists(hypergraphDir) ) {
- TRACE_ERR("Cannot output hypergraphs to " << hypergraphDir << " because the directory does not exist" << std::endl);
- } else if ( ! boost::filesystem::is_directory(hypergraphDir) ) {
- TRACE_ERR("Cannot output hypergraphs to " << hypergraphDir << " because that path exists, but is not a directory" << std::endl);
- } else {
- stringstream fileName;
- fileName << hypergraphDir << "/" << m_lineNumber;
- if ( appendSuffix ) {
- fileName << "." << compression;
- }
- boost::iostreams::filtering_ostream *file
- = new boost::iostreams::filtering_ostream;
-
- if ( compression == "gz" ) {
- file->push( boost::iostreams::gzip_compressor() );
- } else if ( compression == "bz2" ) {
- file->push( boost::iostreams::bzip2_compressor() );
- } else if ( compression != "txt" ) {
- TRACE_ERR("Unrecognized hypergraph compression format ("
- << compression
- << ") - using uncompressed plain txt" << std::endl);
- compression = "txt";
- }
-
- file->push( boost::iostreams::file_sink(fileName.str(), ios_base::out) );
-
- if (file->is_complete() && file->good()) {
- fix(*file,PRECISION);
- manager.OutputSearchGraphAsHypergraph(m_lineNumber, *file);
- file -> flush();
- } else {
- TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber
- << " because the output file " << fileName.str()
- << " is not open or not ready for writing"
- << std::endl);
- }
- file -> pop();
- delete file;
- }
+ if (m_hypergraphOutput.get()) {
+ m_hypergraphOutput->Write(manager);
}
+
additionalReportingTime.stop();
// apply decision rule and output best translation(s)
@@ -476,7 +374,7 @@ private:
OutputCollector* m_alignmentInfoCollector;
OutputCollector* m_unknownsCollector;
bool m_outputSearchGraphSLF;
- bool m_outputSearchGraphHypergraph;
+ boost::shared_ptr<HypergraphOutput<Manager> > m_hypergraphOutput;
std::ofstream *m_alignmentStream;
@@ -520,58 +418,11 @@ static void ShowWeights()
}
}
-size_t OutputFeatureWeightsForHypergraph(size_t index, const FeatureFunction* ff, std::ostream &outputSearchGraphStream)
-{
- size_t numScoreComps = ff->GetNumScoreComponents();
- if (numScoreComps != 0) {
- vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
- if (numScoreComps > 1) {
- for (size_t i = 0; i < numScoreComps; ++i) {
- outputSearchGraphStream << ff->GetScoreProducerDescription()
- << i
- << "=" << values[i] << endl;
- }
- } else {
- outputSearchGraphStream << ff->GetScoreProducerDescription()
- << "=" << values[0] << endl;
- }
- return index+numScoreComps;
- } else {
- UTIL_THROW2("Sparse features are not yet supported when outputting hypergraph format");
- }
-}
-
void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
{
outputSearchGraphStream.setf(std::ios::fixed);
outputSearchGraphStream.precision(6);
-
- const vector<const StatelessFeatureFunction*>& slf =StatelessFeatureFunction::GetStatelessFeatureFunctions();
- const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
- size_t featureIndex = 1;
- for (size_t i = 0; i < sff.size(); ++i) {
- featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, sff[i], outputSearchGraphStream);
- }
- for (size_t i = 0; i < slf.size(); ++i) {
- /*
- if (slf[i]->GetScoreProducerWeightShortName() != "u" &&
- slf[i]->GetScoreProducerWeightShortName() != "tm" &&
- slf[i]->GetScoreProducerWeightShortName() != "I" &&
- slf[i]->GetScoreProducerWeightShortName() != "g")
- */
- {
- featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, slf[i], outputSearchGraphStream);
- }
- }
- const vector<PhraseDictionary*>& pds = PhraseDictionary::GetColl();
- for( size_t i=0; i<pds.size(); i++ ) {
- featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, pds[i], outputSearchGraphStream);
- }
- const vector<GenerationDictionary*>& gds = GenerationDictionary::GetColl();
- for( size_t i=0; i<gds.size(); i++ ) {
- featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, gds[i], outputSearchGraphStream);
- }
-
+ StaticData::Instance().GetAllWeights().Save(outputSearchGraphStream);
}
@@ -638,30 +489,9 @@ int main(int argc, char** argv)
TRACE_ERR(weights);
TRACE_ERR("\n");
}
+ boost::shared_ptr<HypergraphOutput<Manager> > hypergraphOutput;
if (staticData.GetOutputSearchGraphHypergraph()) {
- ofstream* weightsOut = new std::ofstream;
- stringstream weightsFilename;
- if (staticData.GetParam("output-search-graph-hypergraph").size() > 3) {
- weightsFilename << staticData.GetParam("output-search-graph-hypergraph")[3];
- } else {
- string nbestFile = staticData.GetNBestFilePath();
- if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) {
- boost::filesystem::path nbestPath(nbestFile);
- weightsFilename << nbestPath.parent_path().filename() << "/weights";
- } else {
- weightsFilename << boost::filesystem::current_path().string() << "/hypergraph/weights";
- }
- }
- boost::filesystem::path weightsFilePath(weightsFilename.str());
- if ( ! boost::filesystem::exists(weightsFilePath.parent_path()) ) {
- boost::filesystem::create_directory(weightsFilePath.parent_path());
- }
- TRACE_ERR("The weights file is " << weightsFilename.str() << "\n");
- weightsOut->open(weightsFilename.str().c_str());
- OutputFeatureWeightsForHypergraph(*weightsOut);
- weightsOut->flush();
- weightsOut->close();
- delete weightsOut;
+ hypergraphOutput.reset(new HypergraphOutput<Manager>(PRECISION));
}
@@ -758,6 +588,9 @@ int main(int argc, char** argv)
IFVERBOSE(1) {
ResetUserTime();
}
+
+ FeatureFunction::CallChangeSource(source);
+
// set up task of translating one sentence
TranslationTask* task =
new TranslationTask(lineCount,source, outputCollector.get(),
@@ -769,7 +602,7 @@ int main(int argc, char** argv)
alignmentInfoCollector.get(),
unknownsCollector.get(),
staticData.GetOutputSearchGraphSLF(),
- staticData.GetOutputSearchGraphHypergraph());
+ hypergraphOutput);
// execute task
#ifdef WITH_THREADS
pool.Submit(task);
diff --git a/moses-cmd/simulate-pe.cc b/moses-cmd/simulate-pe.cc
new file mode 100644
index 000000000..5384d9886
--- /dev/null
+++ b/moses-cmd/simulate-pe.cc
@@ -0,0 +1,856 @@
+// Fork of Main.cpp, to simulate post-editing sessions.
+// Written by Ulrich Germann.
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/filesystem.hpp>
+#include <boost/iostreams/device/file.hpp>
+#include <boost/iostreams/filter/bzip2.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/foreach.hpp>
+
+#include <exception>
+#include <fstream>
+#include <sstream>
+#include <vector>
+
+#include "util/usage.hh"
+#include "util/exception.hh"
+#include "moses/Util.h"
+#include "moses/TranslationModel/UG/mmsapt.h"
+#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
+
+#ifdef WIN32
+// Include Visual Leak Detector
+//#include <vld.h>
+#endif
+
+#include "TranslationAnalysis.h"
+#include "IOWrapper.h"
+#include "mbr.h"
+
+#include "moses/Hypothesis.h"
+#include "moses/Manager.h"
+#include "moses/StaticData.h"
+#include "moses/Util.h"
+#include "moses/Timer.h"
+#include "moses/ThreadPool.h"
+#include "moses/OutputCollector.h"
+#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/FF/StatefulFeatureFunction.h"
+#include "moses/FF/StatelessFeatureFunction.h"
+
+#ifdef HAVE_PROTOBUF
+#include "hypergraph.pb.h"
+#endif
+
+using namespace std;
+using namespace Moses;
+using namespace MosesCmd;
+
+namespace MosesCmd
+{
+// output floats with five significant digits
+static const size_t PRECISION = 3;
+
+/** Enforce rounding */
+void fix(std::ostream& stream, size_t size)
+{
+ stream.setf(std::ios::fixed);
+ stream.precision(size);
+}
+
+/** Translates a sentence.
+ * - calls the search (Manager)
+ * - applies the decision rule
+ * - outputs best translation and additional reporting
+ **/
+class TranslationTask : public Task
+{
+
+public:
+
+ TranslationTask(size_t lineNumber,
+ InputType* source,
+ OutputCollector* outputCollector,
+ OutputCollector* nbestCollector,
+ OutputCollector* latticeSamplesCollector,
+ OutputCollector* wordGraphCollector,
+ OutputCollector* searchGraphCollector,
+ OutputCollector* detailedTranslationCollector,
+ OutputCollector* alignmentInfoCollector,
+ OutputCollector* unknownsCollector,
+ bool outputSearchGraphSLF,
+ bool outputSearchGraphHypergraph)
+ : m_source(source)
+ , m_lineNumber(lineNumber)
+ , m_outputCollector(outputCollector)
+ , m_nbestCollector(nbestCollector)
+ , m_latticeSamplesCollector(latticeSamplesCollector)
+ , m_wordGraphCollector(wordGraphCollector)
+ , m_searchGraphCollector(searchGraphCollector)
+ , m_detailedTranslationCollector(detailedTranslationCollector)
+ , m_alignmentInfoCollector(alignmentInfoCollector)
+ , m_unknownsCollector(unknownsCollector)
+ , m_outputSearchGraphSLF(outputSearchGraphSLF)
+ , m_outputSearchGraphHypergraph(outputSearchGraphHypergraph)
+ { }
+
+ /** Translate one sentence
+ * gets called by main function implemented at end of this source file */
+ void Run() {
+ // shorthand for "global data"
+ const StaticData &staticData = StaticData::Instance();
+
+ // input sentence
+ Sentence sentence;
+
+ // report wall time spent on translation
+ Timer translationTime;
+ translationTime.start();
+
+ // report thread number
+#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
+ TRACE_ERR("Translating line " << m_lineNumber << " in thread id " << pthread_self() << std::endl);
+#endif
+
+
+ // execute the translation
+ // note: this executes the search, resulting in a search graph
+ // we still need to apply the decision rule (MAP, MBR, ...)
+ Timer initTime;
+ initTime.start();
+ Manager manager(m_lineNumber, *m_source,staticData.GetSearchAlgorithm());
+ VERBOSE(1, "Line " << m_lineNumber << ": Initialize search took " << initTime << " seconds total" << endl);
+ manager.ProcessSentence();
+
+ // we are done with search, let's look what we got
+ Timer additionalReportingTime;
+ additionalReportingTime.start();
+
+ // output word graph
+ if (m_wordGraphCollector) {
+ ostringstream out;
+ fix(out,PRECISION);
+ manager.GetWordGraph(m_lineNumber, out);
+ m_wordGraphCollector->Write(m_lineNumber, out.str());
+ }
+
+ // output search graph
+ if (m_searchGraphCollector) {
+ ostringstream out;
+ fix(out,PRECISION);
+ manager.OutputSearchGraph(m_lineNumber, out);
+ m_searchGraphCollector->Write(m_lineNumber, out.str());
+
+#ifdef HAVE_PROTOBUF
+ if (staticData.GetOutputSearchGraphPB()) {
+ ostringstream sfn;
+ sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << m_lineNumber << ".pb" << ends;
+ string fn = sfn.str();
+ VERBOSE(2, "Writing search graph to " << fn << endl);
+ fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out);
+ manager.SerializeSearchGraphPB(m_lineNumber, output);
+ }
+#endif
+ }
+
+ // Output search graph in HTK standard lattice format (SLF)
+ if (m_outputSearchGraphSLF) {
+ stringstream fileName;
+ fileName << staticData.GetParam("output-search-graph-slf")[0] << "/" << m_lineNumber << ".slf";
+ std::ofstream *file = new std::ofstream;
+ file->open(fileName.str().c_str());
+ if (file->is_open() && file->good()) {
+ ostringstream out;
+ fix(out,PRECISION);
+ manager.OutputSearchGraphAsSLF(m_lineNumber, out);
+ *file << out.str();
+ file -> flush();
+ } else {
+ TRACE_ERR("Cannot output HTK standard lattice for line " << m_lineNumber << " because the output file is not open or not ready for writing" << std::endl);
+ }
+ delete file;
+ }
+
+ // Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
+ if (m_outputSearchGraphHypergraph) {
+
+ vector<string> hypergraphParameters = staticData.GetParam("output-search-graph-hypergraph");
+
+ bool appendSuffix;
+ if (hypergraphParameters.size() > 0 && hypergraphParameters[0] == "true") {
+ appendSuffix = true;
+ } else {
+ appendSuffix = false;
+ }
+
+ string compression;
+ if (hypergraphParameters.size() > 1) {
+ compression = hypergraphParameters[1];
+ } else {
+ compression = "txt";
+ }
+
+ string hypergraphDir;
+ if ( hypergraphParameters.size() > 2 ) {
+ hypergraphDir = hypergraphParameters[2];
+ } else {
+ string nbestFile = staticData.GetNBestFilePath();
+ if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) {
+ boost::filesystem::path nbestPath(nbestFile);
+
+ // In the Boost filesystem API version 2,
+ // which was the default prior to Boost 1.46,
+ // the filename() method returned a string.
+ //
+ // In the Boost filesystem API version 3,
+ // which is the default starting with Boost 1.46,
+ // the filename() method returns a path object.
+ //
+ // To get a string from the path object,
+ // the native() method must be called.
+ // hypergraphDir = nbestPath.parent_path().filename()
+ //#if BOOST_VERSION >= 104600
+ // .native()
+ //#endif
+ //;
+
+ // Hopefully the following compiles under all versions of Boost.
+ //
+ // If this line gives you compile errors,
+ // contact Lane Schwartz on the Moses mailing list
+ hypergraphDir = nbestPath.parent_path().string();
+
+ } else {
+ stringstream hypergraphDirName;
+ hypergraphDirName << boost::filesystem::current_path().string() << "/hypergraph";
+ hypergraphDir = hypergraphDirName.str();
+ }
+ }
+
+ if ( ! boost::filesystem::exists(hypergraphDir) ) {
+ boost::filesystem::create_directory(hypergraphDir);
+ }
+
+ if ( ! boost::filesystem::exists(hypergraphDir) ) {
+ TRACE_ERR("Cannot output hypergraphs to " << hypergraphDir << " because the directory does not exist" << std::endl);
+ } else if ( ! boost::filesystem::is_directory(hypergraphDir) ) {
+ TRACE_ERR("Cannot output hypergraphs to " << hypergraphDir << " because that path exists, but is not a directory" << std::endl);
+ } else {
+ stringstream fileName;
+ fileName << hypergraphDir << "/" << m_lineNumber;
+ if ( appendSuffix ) {
+ fileName << "." << compression;
+ }
+ boost::iostreams::filtering_ostream *file
+ = new boost::iostreams::filtering_ostream;
+
+ if ( compression == "gz" ) {
+ file->push( boost::iostreams::gzip_compressor() );
+ } else if ( compression == "bz2" ) {
+ file->push( boost::iostreams::bzip2_compressor() );
+ } else if ( compression != "txt" ) {
+ TRACE_ERR("Unrecognized hypergraph compression format ("
+ << compression
+ << ") - using uncompressed plain txt" << std::endl);
+ compression = "txt";
+ }
+
+ file->push( boost::iostreams::file_sink(fileName.str(), ios_base::out) );
+
+ if (file->is_complete() && file->good()) {
+ fix(*file,PRECISION);
+ manager.OutputSearchGraphAsHypergraph(*file);
+ file -> flush();
+ } else {
+ TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber
+ << " because the output file " << fileName.str()
+ << " is not open or not ready for writing"
+ << std::endl);
+ }
+ file -> pop();
+ delete file;
+ }
+ }
+ additionalReportingTime.stop();
+
+ // apply decision rule and output best translation(s)
+ if (m_outputCollector) {
+ ostringstream out;
+ ostringstream debug;
+ fix(debug,PRECISION);
+
+ // all derivations - send them to debug stream
+ if (staticData.PrintAllDerivations()) {
+ additionalReportingTime.start();
+ manager.PrintAllDerivations(m_lineNumber, debug);
+ additionalReportingTime.stop();
+ }
+
+ Timer decisionRuleTime;
+ decisionRuleTime.start();
+
+ // MAP decoding: best hypothesis
+ const Hypothesis* bestHypo = NULL;
+ if (!staticData.UseMBR()) {
+ bestHypo = manager.GetBestHypothesis();
+ if (bestHypo) {
+ if (StaticData::Instance().GetOutputHypoScore()) {
+ out << bestHypo->GetTotalScore() << ' ';
+ }
+ if (staticData.IsPathRecoveryEnabled()) {
+ OutputInput(out, bestHypo);
+ out << "||| ";
+ }
+ if (staticData.GetParam("print-id").size() && Scan<bool>(staticData.GetParam("print-id")[0]) ) {
+ out << m_source->GetTranslationId() << " ";
+ }
+
+ if (staticData.GetReportSegmentation() == 2) {
+ manager.GetOutputLanguageModelOrder(out, bestHypo);
+ }
+ OutputBestSurface(
+ out,
+ bestHypo,
+ staticData.GetOutputFactorOrder(),
+ staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors());
+ if (staticData.PrintAlignmentInfo()) {
+ out << "||| ";
+ OutputAlignment(out, bestHypo);
+ }
+
+ OutputAlignment(m_alignmentInfoCollector, m_lineNumber, bestHypo);
+ IFVERBOSE(1) {
+ debug << "BEST TRANSLATION: " << *bestHypo << endl;
+ }
+ } else {
+ VERBOSE(1, "NO BEST TRANSLATION" << endl);
+ }
+
+ out << endl;
+ }
+
+ // MBR decoding (n-best MBR, lattice MBR, consensus)
+ else {
+ // we first need the n-best translations
+ size_t nBestSize = staticData.GetMBRSize();
+ if (nBestSize <= 0) {
+ cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
+ exit(1);
+ }
+ TrellisPathList nBestList;
+ manager.CalcNBest(nBestSize, nBestList,true);
+ VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
+ IFVERBOSE(2) {
+ PrintUserTime("calculated n-best list for (L)MBR decoding");
+ }
+
+ // lattice MBR
+ if (staticData.UseLatticeMBR()) {
+ if (m_nbestCollector) {
+ //lattice mbr nbest
+ vector<LatticeMBRSolution> solutions;
+ size_t n = min(nBestSize, staticData.GetNBestSize());
+ getLatticeMBRNBest(manager,nBestList,solutions,n);
+ ostringstream out;
+ OutputLatticeMBRNBest(out, solutions,m_lineNumber);
+ m_nbestCollector->Write(m_lineNumber, out.str());
+ } else {
+ //Lattice MBR decoding
+ vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
+ OutputBestHypo(mbrBestHypo, m_lineNumber, staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors(),out);
+ IFVERBOSE(2) {
+ PrintUserTime("finished Lattice MBR decoding");
+ }
+ }
+ }
+
+ // consensus decoding
+ else if (staticData.UseConsensusDecoding()) {
+ const TrellisPath &conBestHypo = doConsensusDecoding(manager,nBestList);
+ OutputBestHypo(conBestHypo, m_lineNumber,
+ staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors(),out);
+ OutputAlignment(m_alignmentInfoCollector, m_lineNumber, conBestHypo);
+ IFVERBOSE(2) {
+ PrintUserTime("finished Consensus decoding");
+ }
+ }
+
+ // n-best MBR decoding
+ else {
+ const Moses::TrellisPath &mbrBestHypo = doMBR(nBestList);
+ OutputBestHypo(mbrBestHypo, m_lineNumber,
+ staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors(),out);
+ OutputAlignment(m_alignmentInfoCollector, m_lineNumber, mbrBestHypo);
+ IFVERBOSE(2) {
+ PrintUserTime("finished MBR decoding");
+ }
+ }
+ }
+
+ // report best translation to output collector
+ m_outputCollector->Write(m_lineNumber,out.str(),debug.str());
+
+ decisionRuleTime.stop();
+ VERBOSE(1, "Line " << m_lineNumber << ": Decision rule took " << decisionRuleTime << " seconds total" << endl);
+ }
+
+ additionalReportingTime.start();
+
+ // output n-best list
+ if (m_nbestCollector && !staticData.UseLatticeMBR()) {
+ TrellisPathList nBestList;
+ ostringstream out;
+ manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
+ OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_lineNumber,
+ staticData.GetReportSegmentation());
+ m_nbestCollector->Write(m_lineNumber, out.str());
+ }
+
+ //lattice samples
+ if (m_latticeSamplesCollector) {
+ TrellisPathList latticeSamples;
+ ostringstream out;
+ manager.CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
+ OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_lineNumber,
+ staticData.GetReportSegmentation());
+ m_latticeSamplesCollector->Write(m_lineNumber, out.str());
+ }
+
+ // detailed translation reporting
+ if (m_detailedTranslationCollector) {
+ ostringstream out;
+ fix(out,PRECISION);
+ TranslationAnalysis::PrintTranslationAnalysis(out, manager.GetBestHypothesis());
+ m_detailedTranslationCollector->Write(m_lineNumber,out.str());
+ }
+
+ //list of unknown words
+ if (m_unknownsCollector) {
+ const vector<const Phrase*>& unknowns = manager.getSntTranslationOptions()->GetUnknownSources();
+ ostringstream out;
+ for (size_t i = 0; i < unknowns.size(); ++i) {
+ out << *(unknowns[i]);
+ }
+ out << endl;
+ m_unknownsCollector->Write(m_lineNumber, out.str());
+ }
+
+ // report additional statistics
+ manager.CalcDecoderStatistics();
+ VERBOSE(1, "Line " << m_lineNumber << ": Additional reporting took " << additionalReportingTime << " seconds total" << endl);
+ VERBOSE(1, "Line " << m_lineNumber << ": Translation took " << translationTime << " seconds total" << endl);
+ IFVERBOSE(2) {
+ PrintUserTime("Sentence Decoding Time:");
+ }
+ }
+
+ ~TranslationTask() {
+ delete m_source;
+ }
+
+private:
+ InputType* m_source;
+ size_t m_lineNumber;
+ OutputCollector* m_outputCollector;
+ OutputCollector* m_nbestCollector;
+ OutputCollector* m_latticeSamplesCollector;
+ OutputCollector* m_wordGraphCollector;
+ OutputCollector* m_searchGraphCollector;
+ OutputCollector* m_detailedTranslationCollector;
+ OutputCollector* m_alignmentInfoCollector;
+ OutputCollector* m_unknownsCollector;
+ bool m_outputSearchGraphSLF;
+ bool m_outputSearchGraphHypergraph;
+ std::ofstream *m_alignmentStream;
+
+
+};
+
+static void PrintFeatureWeight(const FeatureFunction* ff)
+{
+ cout << ff->GetScoreProducerDescription() << "=";
+ size_t numScoreComps = ff->GetNumScoreComponents();
+ vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
+ for (size_t i = 0; i < numScoreComps; ++i) {
+ cout << " " << values[i];
+ }
+ cout << endl;
+}
+
+static void ShowWeights()
+{
+ //TODO: Find a way of ensuring this order is synced with the nbest
+ fix(cout,6);
+ const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
+ const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+
+ for (size_t i = 0; i < sff.size(); ++i) {
+ const StatefulFeatureFunction *ff = sff[i];
+ if (ff->IsTuneable()) {
+ PrintFeatureWeight(ff);
+ }
+ else {
+ cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
+ }
+ }
+ for (size_t i = 0; i < slf.size(); ++i) {
+ const StatelessFeatureFunction *ff = slf[i];
+ if (ff->IsTuneable()) {
+ PrintFeatureWeight(ff);
+ }
+ else {
+ cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
+ }
+ }
+}
+
+size_t OutputFeatureWeightsForHypergraph(size_t index, const FeatureFunction* ff, std::ostream &outputSearchGraphStream)
+{
+ size_t numScoreComps = ff->GetNumScoreComponents();
+ if (numScoreComps != 0) {
+ vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
+ if (numScoreComps > 1) {
+ for (size_t i = 0; i < numScoreComps; ++i) {
+ outputSearchGraphStream << ff->GetScoreProducerDescription()
+ << i
+ << "=" << values[i] << endl;
+ }
+ } else {
+ outputSearchGraphStream << ff->GetScoreProducerDescription()
+ << "=" << values[0] << endl;
+ }
+ return index+numScoreComps;
+ } else {
+ UTIL_THROW2("Sparse features are not yet supported when outputting hypergraph format");
+ }
+}
+
+void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
+{
+ outputSearchGraphStream.setf(std::ios::fixed);
+ outputSearchGraphStream.precision(6);
+
+ const vector<const StatelessFeatureFunction*>& slf =StatelessFeatureFunction::GetStatelessFeatureFunctions();
+ const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+ size_t featureIndex = 1;
+ for (size_t i = 0; i < sff.size(); ++i) {
+ featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, sff[i], outputSearchGraphStream);
+ }
+ for (size_t i = 0; i < slf.size(); ++i) {
+ /*
+ if (slf[i]->GetScoreProducerWeightShortName() != "u" &&
+ slf[i]->GetScoreProducerWeightShortName() != "tm" &&
+ slf[i]->GetScoreProducerWeightShortName() != "I" &&
+ slf[i]->GetScoreProducerWeightShortName() != "g")
+ */
+ {
+ featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, slf[i], outputSearchGraphStream);
+ }
+ }
+ const vector<PhraseDictionary*>& pds = PhraseDictionary::GetColl();
+ for( size_t i=0; i<pds.size(); i++ ) {
+ featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, pds[i], outputSearchGraphStream);
+ }
+ const vector<GenerationDictionary*>& gds = GenerationDictionary::GetColl();
+ for( size_t i=0; i<gds.size(); i++ ) {
+ featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, gds[i], outputSearchGraphStream);
+ }
+
+}
+
+
+} //namespace
+
+/** main function of the command line version of the decoder **/
+int main(int argc, char** argv)
+{
+ try {
+
+#ifdef HAVE_PROTOBUF
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
+#endif
+
+ // echo command line, if verbose
+ IFVERBOSE(1) {
+ TRACE_ERR("command: ");
+ for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
+ TRACE_ERR(endl);
+ }
+
+ // set number of significant decimals in output
+ fix(cout,PRECISION);
+ fix(cerr,PRECISION);
+
+ vector<pair<string,int> > argfilter(4);
+ argfilter[0] = std::make_pair(string("--spe-src"),1);
+ argfilter[1] = std::make_pair(string("--spe-trg"),1);
+ argfilter[2] = std::make_pair(string("--spe-aln"),1);
+
+ char** my_args; int my_acnt;
+ char** mo_args; int mo_acnt;
+ filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter);
+
+ ifstream spe_src,spe_trg,spe_aln;
+ // instead of translating show coverage by phrase tables
+ for (int i = 0; i < my_acnt; i += 2)
+ {
+ if (!strcmp(my_args[i],"--spe-src"))
+ spe_src.open(my_args[i+1]);
+ else if (!strcmp(my_args[i],"--spe-trg"))
+ spe_trg.open(my_args[i+1]);
+ else if (!strcmp(my_args[i],"--spe-aln"))
+ spe_aln.open(my_args[i+1]);
+ }
+
+ // load all the settings into the Parameter class
+ // (stores them as strings, or array of strings)
+ Parameter params;
+ if (!params.LoadParam(mo_acnt,mo_args)) {
+ exit(1);
+ }
+
+
+ // initialize all "global" variables, which are stored in StaticData
+ // note: this also loads models such as the language model, etc.
+ if (!StaticData::LoadDataStatic(&params, argv[0])) {
+ exit(1);
+ }
+
+ // setting "-show-weights" -> just dump out weights and exit
+ if (params.isParamSpecified("show-weights")) {
+ ShowWeights();
+ exit(0);
+ }
+
+ // shorthand for accessing information in StaticData
+ const StaticData& staticData = StaticData::Instance();
+
+
+ //initialise random numbers
+ srand(time(NULL));
+
+ // set up read/writing class
+ IOWrapper* ioWrapper = GetIOWrapper(staticData);
+ if (!ioWrapper) {
+ cerr << "Error; Failed to create IO object" << endl;
+ exit(1);
+ }
+
+ // check on weights
+ const ScoreComponentCollection& weights = staticData.GetAllWeights();
+ IFVERBOSE(2) {
+ TRACE_ERR("The global weight vector looks like this: ");
+ TRACE_ERR(weights);
+ TRACE_ERR("\n");
+ }
+ if (staticData.GetOutputSearchGraphHypergraph()) {
+ ofstream* weightsOut = new std::ofstream;
+ stringstream weightsFilename;
+ if (staticData.GetParam("output-search-graph-hypergraph").size() > 3) {
+ weightsFilename << staticData.GetParam("output-search-graph-hypergraph")[3];
+ } else {
+ string nbestFile = staticData.GetNBestFilePath();
+ if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) {
+ boost::filesystem::path nbestPath(nbestFile);
+ weightsFilename << nbestPath.parent_path().filename() << "/weights";
+ } else {
+ weightsFilename << boost::filesystem::current_path().string() << "/hypergraph/weights";
+ }
+ }
+ boost::filesystem::path weightsFilePath(weightsFilename.str());
+ if ( ! boost::filesystem::exists(weightsFilePath.parent_path()) ) {
+ boost::filesystem::create_directory(weightsFilePath.parent_path());
+ }
+ TRACE_ERR("The weights file is " << weightsFilename.str() << "\n");
+ weightsOut->open(weightsFilename.str().c_str());
+ OutputFeatureWeightsForHypergraph(*weightsOut);
+ weightsOut->flush();
+ weightsOut->close();
+ delete weightsOut;
+ }
+
+
+ // initialize output streams
+ // note: we can't just write to STDOUT or files
+ // because multithreading may return sentences in shuffled order
+ auto_ptr<OutputCollector> outputCollector; // for translations
+ auto_ptr<OutputCollector> nbestCollector; // for n-best lists
+ auto_ptr<OutputCollector> latticeSamplesCollector; //for lattice samples
+ auto_ptr<ofstream> nbestOut;
+ auto_ptr<ofstream> latticeSamplesOut;
+ size_t nbestSize = staticData.GetNBestSize();
+ string nbestFile = staticData.GetNBestFilePath();
+ bool output1best = true;
+ if (nbestSize) {
+ if (nbestFile == "-" || nbestFile == "/dev/stdout") {
+ // nbest to stdout, no 1-best
+ nbestCollector.reset(new OutputCollector());
+ output1best = false;
+ } else {
+ // nbest to file, 1-best to stdout
+ nbestOut.reset(new ofstream(nbestFile.c_str()));
+ if (!nbestOut->good()) {
+ TRACE_ERR("ERROR: Failed to open " << nbestFile << " for nbest lists" << endl);
+ exit(1);
+ }
+ nbestCollector.reset(new OutputCollector(nbestOut.get()));
+ }
+ }
+ size_t latticeSamplesSize = staticData.GetLatticeSamplesSize();
+ string latticeSamplesFile = staticData.GetLatticeSamplesFilePath();
+ if (latticeSamplesSize) {
+ if (latticeSamplesFile == "-" || latticeSamplesFile == "/dev/stdout") {
+ latticeSamplesCollector.reset(new OutputCollector());
+ output1best = false;
+ } else {
+ latticeSamplesOut.reset(new ofstream(latticeSamplesFile.c_str()));
+ if (!latticeSamplesOut->good()) {
+ TRACE_ERR("ERROR: Failed to open " << latticeSamplesFile << " for lattice samples" << endl);
+ exit(1);
+ }
+ latticeSamplesCollector.reset(new OutputCollector(latticeSamplesOut.get()));
+ }
+ }
+ if (output1best) {
+ outputCollector.reset(new OutputCollector());
+ }
+
+ // initialize stream for word graph (aka: output lattice)
+ auto_ptr<OutputCollector> wordGraphCollector;
+ if (staticData.GetOutputWordGraph()) {
+ wordGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputWordGraphStream())));
+ }
+
+ // initialize stream for search graph
+ // note: this is essentially the same as above, but in a different format
+ auto_ptr<OutputCollector> searchGraphCollector;
+ if (staticData.GetOutputSearchGraph()) {
+ searchGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputSearchGraphStream())));
+ }
+
+ // initialize stram for details about the decoder run
+ auto_ptr<OutputCollector> detailedTranslationCollector;
+ if (staticData.IsDetailedTranslationReportingEnabled()) {
+ detailedTranslationCollector.reset(new OutputCollector(&(ioWrapper->GetDetailedTranslationReportingStream())));
+ }
+
+ // initialize stram for word alignment between input and output
+ auto_ptr<OutputCollector> alignmentInfoCollector;
+ if (!staticData.GetAlignmentOutputFile().empty()) {
+ alignmentInfoCollector.reset(new OutputCollector(ioWrapper->GetAlignmentOutputStream()));
+ }
+
+ //initialise stream for unknown (oov) words
+ auto_ptr<OutputCollector> unknownsCollector;
+ auto_ptr<ofstream> unknownsStream;
+ if (!staticData.GetOutputUnknownsFile().empty()) {
+ unknownsStream.reset(new ofstream(staticData.GetOutputUnknownsFile().c_str()));
+ if (!unknownsStream->good()) {
+ TRACE_ERR("Unable to open " << staticData.GetOutputUnknownsFile() << " for unknowns");
+ exit(1);
+ }
+ unknownsCollector.reset(new OutputCollector(unknownsStream.get()));
+ }
+
+#ifdef WITH_THREADS
+ ThreadPool pool(staticData.ThreadCount());
+#endif
+
+ // main loop over set of input sentences
+ InputType* source = NULL;
+ size_t lineCount = staticData.GetStartTranslationId();
+ while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
+ IFVERBOSE(1) {
+ ResetUserTime();
+ }
+ // set up task of translating one sentence
+ TranslationTask* task =
+ new TranslationTask(lineCount,source, outputCollector.get(),
+ nbestCollector.get(),
+ latticeSamplesCollector.get(),
+ wordGraphCollector.get(),
+ searchGraphCollector.get(),
+ detailedTranslationCollector.get(),
+ alignmentInfoCollector.get(),
+ unknownsCollector.get(),
+ staticData.GetOutputSearchGraphSLF(),
+ staticData.GetOutputSearchGraphHypergraph());
+ // execute task
+#ifdef WITH_THREADS
+ if (my_acnt)
+ {
+ task->Run();
+ delete task;
+ string src,trg,aln;
+ UTIL_THROW_IF2(!getline(spe_src,src), "[" << HERE << "] "
+ << "missing update data for simulated post-editing.");
+ UTIL_THROW_IF2(!getline(spe_trg,trg), "[" << HERE << "] "
+ << "missing update data for simulated post-editing.");
+ UTIL_THROW_IF2(!getline(spe_aln,aln), "[" << HERE << "] "
+ << "missing update data for simulated post-editing.");
+ BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl())
+ {
+ Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
+ if (sapt) sapt->add(src,trg,aln);
+ VERBOSE(1,"[" << HERE << " added src] " << src << endl);
+ VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
+ VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
+ }
+ }
+ else pool.Submit(task);
+#else
+ task->Run();
+ delete task;
+#endif
+
+ source = NULL; //make sure it doesn't get deleted
+ ++lineCount;
+ }
+
+ // we are done, finishing up
+#ifdef WITH_THREADS
+ pool.Stop(true); //flush remaining jobs
+#endif
+
+ delete ioWrapper;
+ FeatureFunction::Destroy();
+
+ } catch (const std::exception &e) {
+ std::cerr << "Exception: " << e.what() << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ IFVERBOSE(1) util::PrintUsage(std::cerr);
+
+#ifndef EXIT_RETURN
+ //This avoids that destructors are called (it can take a long time)
+ exit(EXIT_SUCCESS);
+#else
+ return EXIT_SUCCESS;
+#endif
+}
diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp
index 981b04895..061a5953f 100644
--- a/moses/BitmapContainer.cpp
+++ b/moses/BitmapContainer.cpp
@@ -53,14 +53,30 @@ class HypothesisScoreOrdererWithDistortion
{
public:
HypothesisScoreOrdererWithDistortion(const WordsRange* transOptRange) :
- m_transOptRange(transOptRange) {}
+ m_transOptRange(transOptRange) {
+ m_totalWeightDistortion = 0;
+ const StaticData &staticData = StaticData::Instance();
+ const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
+ std::vector<FeatureFunction*>::const_iterator iter;
+ for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
+ const FeatureFunction *ff = *iter;
+
+ const DistortionScoreProducer *model = dynamic_cast<const DistortionScoreProducer*>(ff);
+ if (model) {
+ float weight =staticData.GetAllWeights().GetScoreForProducer(model);
+ m_totalWeightDistortion += weight;
+ }
+ }
+
+
+ }
const WordsRange* m_transOptRange;
+ float m_totalWeightDistortion;
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
UTIL_THROW_IF2(m_transOptRange == NULL, "Words range not set");
- const StaticData &staticData = StaticData::Instance();
const float distortionScoreA = DistortionScoreProducer::CalculateDistortionScore(
*hypoA,
@@ -76,20 +92,8 @@ public:
);
- float totalWeightDistortion = 0;
- const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
- std::vector<FeatureFunction*>::const_iterator iter;
- for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
- const FeatureFunction *ff = *iter;
-
- const DistortionScoreProducer *model = dynamic_cast<const DistortionScoreProducer*>(ff);
- if (model) {
- float weight =staticData.GetAllWeights().GetScoreForProducer(model);
- totalWeightDistortion += weight;
- }
- }
- const float scoreA = hypoA->GetScore() + distortionScoreA * totalWeightDistortion;
- const float scoreB = hypoB->GetScore() + distortionScoreB * totalWeightDistortion;
+ const float scoreA = hypoA->GetScore() + distortionScoreA * m_totalWeightDistortion;
+ const float scoreB = hypoB->GetScore() + distortionScoreB * m_totalWeightDistortion;
if (scoreA > scoreB) {
@@ -161,13 +165,17 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
}
if (m_translations.size() > 1) {
- UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
- "Non-monotonic future score");
+ UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
+ "Non-monotonic future score: "
+ << m_translations.Get(0)->GetFutureScore() << " vs. "
+ << m_translations.Get(1)->GetFutureScore());
}
if (m_hypotheses.size() > 1) {
UTIL_THROW_IF2(m_hypotheses[0]->GetTotalScore() < m_hypotheses[1]->GetTotalScore(),
- "Non-monotonic total score");
+ "Non-monotonic total score"
+ << m_hypotheses[0]->GetTotalScore() << " vs. "
+ << m_hypotheses[1]->GetTotalScore());
}
HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
@@ -207,7 +215,7 @@ Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const
IFVERBOSE(2) {
hypothesis.GetManager().GetSentenceStats().StopTimeBuildHyp();
}
- newHypo->Evaluate(m_futurescore);
+ newHypo->EvaluateWhenApplied(m_futurescore);
return newHypo;
}
@@ -442,7 +450,9 @@ BitmapContainer::ProcessBestHypothesis()
if (!Empty()) {
HypothesisQueueItem *check = Dequeue(true);
UTIL_THROW_IF2(item->GetHypothesis()->GetTotalScore() < check->GetHypothesis()->GetTotalScore(),
- "Non-monotonic total score");
+ "Non-monotonic total score: "
+ << item->GetHypothesis()->GetTotalScore() << " vs. "
+ << check->GetHypothesis()->GetTotalScore());
}
// Logging for the criminally insane
diff --git a/moses/ChartCell.cpp b/moses/ChartCell.cpp
index 125efd204..1d84ba71b 100644
--- a/moses/ChartCell.cpp
+++ b/moses/ChartCell.cpp
@@ -22,6 +22,7 @@
#include <algorithm>
#include "ChartCell.h"
#include "ChartCellCollection.h"
+#include "HypergraphOutput.h"
#include "RuleCubeQueue.h"
#include "RuleCube.h"
#include "WordsRange.h"
@@ -59,7 +60,7 @@ ChartCell::~ChartCell() {}
/** Add the given hypothesis to the cell.
* Returns true if added, false if not. Maybe it already exists in the collection or score falls below threshold etc.
- * This function just calls the correspondind AddHypothesis() in ChartHypothesisCollection
+ * This function just calls the corresponding AddHypothesis() in ChartHypothesisCollection
* \param hypo Hypothesis to be added
*/
bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
@@ -195,13 +196,13 @@ const HypoList *ChartCell::GetAllSortedHypotheses() const
return ret;
}
-//! call GetSearchGraph() for each hypo collection
-void ChartCell::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const std::map<unsigned, bool> &reachable) const
+//! call WriteSearchGraph() for each hypo collection
+void ChartCell::WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned, bool> &reachable) const
{
MapType::const_iterator iterOutside;
for (iterOutside = m_hypoColl.begin(); iterOutside != m_hypoColl.end(); ++iterOutside) {
const ChartHypothesisCollection &coll = iterOutside->second;
- coll.GetSearchGraph(translationId, outputSearchGraphStream, reachable);
+ coll.WriteSearchGraph(writer, reachable);
}
}
diff --git a/moses/ChartCell.h b/moses/ChartCell.h
index 1fed695ac..99bc90866 100644
--- a/moses/ChartCell.h
+++ b/moses/ChartCell.h
@@ -40,6 +40,7 @@
namespace Moses
{
+class ChartSearchGraphWriter;
class ChartTranslationOptionList;
class ChartCellCollection;
class ChartManager;
@@ -124,7 +125,7 @@ public:
return m_coverage < compare.m_coverage;
}
- void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const std::map<unsigned,bool> &reachable) const;
+ void WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned,bool> &reachable) const;
};
diff --git a/moses/ChartCellLabelSet.h b/moses/ChartCellLabelSet.h
index 45d281d35..d946058ba 100644
--- a/moses/ChartCellLabelSet.h
+++ b/moses/ChartCellLabelSet.h
@@ -124,6 +124,15 @@ public:
}
}
+ const ChartCellLabel *Find(size_t idx) const {
+ try {
+ return m_map.at(idx);
+ }
+ catch (const std::out_of_range& oor) {
+ return NULL;
+ }
+ }
+
ChartCellLabel::Stack &FindOrInsert(const Word &w) {
size_t idx = w[0]->GetId();
if (! ChartCellExists(idx)) {
diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp
index 212a28d23..8339ee5b2 100644
--- a/moses/ChartHypothesis.cpp
+++ b/moses/ChartHypothesis.cpp
@@ -149,6 +149,40 @@ Phrase ChartHypothesis::GetOutputPhrase() const
return outPhrase;
}
+void ChartHypothesis::GetOutputPhrase(int leftRightMost, int numWords, Phrase &outPhrase) const
+{
+ const TargetPhrase &tp = GetCurrTargetPhrase();
+
+ int targetSize = tp.GetSize();
+ for (int i = 0; i < targetSize; ++i) {
+ int pos;
+ if (leftRightMost == 1) {
+ pos = i;
+ }
+ else if (leftRightMost == 2) {
+ pos = targetSize - i - 1;
+ }
+ else {
+ abort();
+ }
+
+ const Word &word = tp.GetWord(pos);
+
+ if (word.IsNonTerminal()) {
+ // non-term. fill out with prev hypo
+ size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
+ const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
+ prevHypo->GetOutputPhrase(outPhrase);
+ } else {
+ outPhrase.AddWord(word);
+ }
+
+ if (outPhrase.GetSize() >= numWords) {
+ return;
+ }
+ }
+}
+
/** check, if two hypothesis can be recombined.
this is actually a sorting function that allows us to
keep an ordered list of hypotheses. This makes recombination
@@ -178,7 +212,7 @@ int ChartHypothesis::RecombineCompare(const ChartHypothesis &compare) const
/** calculate total score
* @todo this should be in ScoreBreakdown
*/
-void ChartHypothesis::Evaluate()
+void ChartHypothesis::EvaluateWhenApplied()
{
const StaticData &staticData = StaticData::Instance();
// total scores from prev hypos
@@ -200,7 +234,7 @@ void ChartHypothesis::Evaluate()
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) {
if (! staticData.IsFeatureFunctionIgnored( *sfs[i] )) {
- sfs[i]->EvaluateChart(*this,&m_scoreBreakdown);
+ sfs[i]->EvaluateWhenApplied(*this,&m_scoreBreakdown);
}
}
@@ -208,7 +242,7 @@ void ChartHypothesis::Evaluate()
StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i) {
if (! staticData.IsFeatureFunctionIgnored( *ffs[i] )) {
- m_ffStates[i] = ffs[i]->EvaluateChart(*this,i,&m_scoreBreakdown);
+ m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_scoreBreakdown);
}
}
@@ -259,7 +293,7 @@ void ChartHypothesis::CleanupArcList()
*/
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
- bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph();
+ bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph();
if (!distinctNBest && m_arcList->size() > nBestSize) {
// prune arc list only if there too many arcs
diff --git a/moses/ChartHypothesis.h b/moses/ChartHypothesis.h
index 12050e764..8dc26e721 100644
--- a/moses/ChartHypothesis.h
+++ b/moses/ChartHypothesis.h
@@ -138,9 +138,13 @@ public:
void GetOutputPhrase(Phrase &outPhrase) const;
Phrase GetOutputPhrase() const;
+ // get leftmost/rightmost words only
+ // leftRightMost: 1=left, 2=right
+ void GetOutputPhrase(int leftRightMost, int numWords, Phrase &outPhrase) const;
+
int RecombineCompare(const ChartHypothesis &compare) const;
- void Evaluate();
+ void EvaluateWhenApplied();
void AddArc(ChartHypothesis *loserHypo);
void CleanupArcList();
diff --git a/moses/ChartHypothesisCollection.cpp b/moses/ChartHypothesisCollection.cpp
index ec5a86e59..d5a3b3d53 100644
--- a/moses/ChartHypothesisCollection.cpp
+++ b/moses/ChartHypothesisCollection.cpp
@@ -24,6 +24,7 @@
#include "ChartHypothesisCollection.h"
#include "ChartHypothesis.h"
#include "ChartManager.h"
+#include "HypergraphOutput.h"
#include "util/exception.hh"
using namespace std;
@@ -55,7 +56,7 @@ ChartHypothesisCollection::~ChartHypothesisCollection()
/** public function to add hypothesis to this collection.
* Returns false if equiv hypo exists in collection, otherwise returns true.
* Takes care of update arc list for n-best list creation.
- * Will delete hypo is it exist - once this function is call don't delete hypothesis.
+ * Will delete hypo if it exists - once this function is call don't delete hypothesis.
* \param hypo hypothesis to add
* \param manager pointer back to manager
*/
@@ -293,27 +294,9 @@ void ChartHypothesisCollection::CleanupArcList()
* \param outputSearchGraphStream stream to output the info to
* \param reachable @todo don't know
*/
-void ChartHypothesisCollection::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const std::map<unsigned, bool> &reachable) const
+void ChartHypothesisCollection::WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned, bool> &reachable) const
{
- HCType::const_iterator iter;
- for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
- ChartHypothesis &mainHypo = **iter;
- if (StaticData::Instance().GetUnprunedSearchGraph() ||
- reachable.find(mainHypo.GetId()) != reachable.end()) {
- outputSearchGraphStream << translationId << " " << mainHypo << endl;
- }
-
- const ChartArcList *arcList = mainHypo.GetArcList();
- if (arcList) {
- ChartArcList::const_iterator iterArc;
- for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) {
- const ChartHypothesis &arc = **iterArc;
- if (reachable.find(arc.GetId()) != reachable.end()) {
- outputSearchGraphStream << translationId << " " << arc << endl;
- }
- }
- }
- }
+ writer.WriteHypos(*this,reachable);
}
std::ostream& operator<<(std::ostream &out, const ChartHypothesisCollection &coll)
diff --git a/moses/ChartHypothesisCollection.h b/moses/ChartHypothesisCollection.h
index 438c2dd33..37cd907d9 100644
--- a/moses/ChartHypothesisCollection.h
+++ b/moses/ChartHypothesisCollection.h
@@ -28,6 +28,8 @@
namespace Moses
{
+class ChartSearchGraphWriter;
+
//! functor to compare (chart) hypotheses by (descending) score
class ChartHypothesisScoreOrderer
{
@@ -117,7 +119,7 @@ public:
return m_bestScore;
}
- void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const std::map<unsigned,bool> &reachable) const;
+ void WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned,bool> &reachable) const;
};
diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp
index e137da915..c7abc697d 100644
--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@@ -25,6 +25,7 @@
#include "ChartHypothesis.h"
#include "ChartKBestExtractor.h"
#include "ChartTranslationOptions.h"
+#include "HypergraphOutput.h"
#include "StaticData.h"
#include "DecodeStep.h"
#include "TreeInput.h"
@@ -41,11 +42,12 @@ extern bool g_mosesDebug;
* \param source the sentence to be decoded
* \param system which particular set of models to use.
*/
-ChartManager::ChartManager(InputType const& source)
+ChartManager::ChartManager(size_t lineNumber,InputType const& source)
:m_source(source)
,m_hypoStackColl(source, *this)
,m_start(clock())
,m_hypothesisId(0)
+ ,m_lineNumber(lineNumber)
,m_parser(source, m_hypoStackColl)
,m_translationOptionList(StaticData::Instance().GetRuleLimit(), source)
{
@@ -85,7 +87,7 @@ void ChartManager::ProcessSentence()
m_translationOptionList.ApplyThreshold();
const InputPath &inputPath = m_parser.GetInputPath(range);
- m_translationOptionList.Evaluate(m_source, inputPath);
+ m_translationOptionList.EvaluateWithSourceContext(m_source, inputPath);
// decode
ChartCell &cell = m_hypoStackColl.Get(range);
@@ -141,7 +143,7 @@ void ChartManager::AddXmlChartOptions()
RuleCubeItem* item = new RuleCubeItem( *opt, m_hypoStackColl );
ChartHypothesis* hypo = new ChartHypothesis(*opt, *item, *this);
- hypo->Evaluate();
+ hypo->EvaluateWhenApplied();
ChartCell &cell = m_hypoStackColl.Get(range);
@@ -222,8 +224,9 @@ void ChartManager::CalcNBest(
}
}
-void ChartManager::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const
+void ChartManager::WriteSearchGraph(const ChartSearchGraphWriter& writer) const
{
+
size_t size = m_source.GetSize();
// which hypotheses are reachable?
@@ -236,7 +239,11 @@ void ChartManager::GetSearchGraph(long translationId, std::ostream &outputSearch
// no hypothesis
return;
}
- FindReachableHypotheses( hypo, reachable);
+ size_t winners = 0;
+ size_t losers = 0;
+
+ FindReachableHypotheses( hypo, reachable, &winners, &losers);
+ writer.WriteHeader(winners, losers);
for (size_t width = 1; width <= size; ++width) {
for (size_t startPos = 0; startPos <= size-width; ++startPos) {
@@ -245,12 +252,13 @@ void ChartManager::GetSearchGraph(long translationId, std::ostream &outputSearch
TRACE_ERR(" " << range << "=");
const ChartCell &cell = m_hypoStackColl.Get(range);
- cell.GetSearchGraph(translationId, outputSearchGraphStream, reachable);
+ cell.WriteSearchGraph(writer, reachable);
}
}
}
-void ChartManager::FindReachableHypotheses( const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable ) const
+void ChartManager::FindReachableHypotheses(
+ const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable, size_t* winners, size_t* losers) const
{
// do not recurse, if already visited
if (reachable.find(hypo->GetId()) != reachable.end()) {
@@ -259,9 +267,14 @@ void ChartManager::FindReachableHypotheses( const ChartHypothesis *hypo, std::ma
// recurse
reachable[ hypo->GetId() ] = true;
+ if (hypo->GetWinningHypothesis() == hypo) {
+ (*winners)++;
+ } else {
+ (*losers)++;
+ }
const std::vector<const ChartHypothesis*> &previous = hypo->GetPrevHypos();
for(std::vector<const ChartHypothesis*>::const_iterator i = previous.begin(); i != previous.end(); ++i) {
- FindReachableHypotheses( *i, reachable );
+ FindReachableHypotheses( *i, reachable, winners, losers );
}
// also loop over recombined hypotheses (arcs)
@@ -270,9 +283,19 @@ void ChartManager::FindReachableHypotheses( const ChartHypothesis *hypo, std::ma
ChartArcList::const_iterator iterArc;
for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) {
const ChartHypothesis &arc = **iterArc;
- FindReachableHypotheses( &arc, reachable );
+ FindReachableHypotheses( &arc, reachable, winners, losers );
}
}
}
+void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const {
+ ChartSearchGraphWriterHypergraph writer(&outputSearchGraphStream);
+ WriteSearchGraph(writer);
+}
+
+void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const {
+ ChartSearchGraphWriterMoses writer(&outputSearchGraphStream, m_lineNumber);
+ WriteSearchGraph(writer);
+}
+
} // namespace Moses
diff --git a/moses/ChartManager.h b/moses/ChartManager.h
index a660f94d9..b39b078bd 100644
--- a/moses/ChartManager.h
+++ b/moses/ChartManager.h
@@ -38,6 +38,7 @@ namespace Moses
{
class ChartHypothesis;
+class ChartSearchGraphWriter;
/** Holds everything you need to decode 1 sentence with the hierachical/syntax decoder
*/
@@ -49,21 +50,31 @@ private:
std::auto_ptr<SentenceStats> m_sentenceStats;
clock_t m_start; /**< starting time, used for logging */
unsigned m_hypothesisId; /* For handing out hypothesis ids to ChartHypothesis */
+ size_t m_lineNumber;
ChartParser m_parser;
ChartTranslationOptionList m_translationOptionList; /**< pre-computed list of translation options for the phrases in this sentence */
+ /* auxilliary functions for SearchGraphs */
+ void FindReachableHypotheses(
+ const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
+ void WriteSearchGraph(const ChartSearchGraphWriter& writer) const;
+
public:
- ChartManager(InputType const& source);
+ ChartManager(size_t lineNumber, InputType const& source);
~ChartManager();
void ProcessSentence();
void AddXmlChartOptions();
const ChartHypothesis *GetBestHypothesis() const;
void CalcNBest(size_t n, std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList, bool onlyDistinct=false) const;
- void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
- void FindReachableHypotheses( const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable ) const; /* auxilliary function for GetSearchGraph */
+ /** "Moses" (osg) type format */
+ void OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const;
+
+ /** Output in (modified) Kenneth hypergraph format */
+ void OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const;
+
//! the input sentence being decoded
const InputType& GetSource() const {
@@ -97,6 +108,10 @@ public:
}
const ChartParser &GetParser() const { return m_parser; }
+
+ size_t GetLineNumber() const {
+ return m_lineNumber;
+ }
};
}
diff --git a/moses/ChartParser.cpp b/moses/ChartParser.cpp
index b77a5c20c..5c1e82184 100644
--- a/moses/ChartParser.cpp
+++ b/moses/ChartParser.cpp
@@ -68,6 +68,12 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
m_unksrcs.push_back(unksrc);
+ // hack. Once the OOV FF is a phrase table, get rid of this
+ PhraseDictionary *firstPt = NULL;
+ if (PhraseDictionary::GetColl().size() == 0) {
+ firstPt = PhraseDictionary::GetColl()[0];
+ }
+
//TranslationOption *transOpt;
if (! staticData.GetDropUnknown() || isDigit) {
// loop
@@ -85,7 +91,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
// add to dictionary
- TargetPhrase *targetPhrase = new TargetPhrase();
+ TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
Word &targetWord = targetPhrase->AddWord();
targetWord.CreateUnknownWord(sourceWord);
@@ -93,7 +99,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
float unknownScore = FloorScore(TransformScore(prob));
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
- targetPhrase->Evaluate(*unksrc);
+ targetPhrase->EvaluateInIsolation(*unksrc);
targetPhrase->SetTargetLHS(targetLHS);
targetPhrase->SetAlignmentInfo("0-0");
@@ -108,7 +114,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
// drop source word. create blank trans opt
float unknownScore = FloorScore(-numeric_limits<float>::infinity());
- TargetPhrase *targetPhrase = new TargetPhrase();
+ TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
// loop
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS;
@@ -121,7 +127,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
- targetPhrase->Evaluate(*unksrc);
+ targetPhrase->EvaluateInIsolation(*unksrc);
targetPhrase->SetTargetLHS(targetLHS);
diff --git a/moses/ChartParserCallback.h b/moses/ChartParserCallback.h
index ce4af3ab4..9b03e1f5b 100644
--- a/moses/ChartParserCallback.h
+++ b/moses/ChartParserCallback.h
@@ -25,7 +25,7 @@ public:
virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range) = 0;
- virtual void Evaluate(const InputType &input, const InputPath &inputPath) = 0;
+ virtual void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) = 0;
virtual float GetBestScore(const ChartCellLabel *chartCell) const = 0;
diff --git a/moses/ChartTranslationOption.cpp b/moses/ChartTranslationOption.cpp
index 0fece0a09..332b26a15 100644
--- a/moses/ChartTranslationOption.cpp
+++ b/moses/ChartTranslationOption.cpp
@@ -10,7 +10,7 @@ ChartTranslationOption::ChartTranslationOption(const TargetPhrase &targetPhrase)
{
}
-void ChartTranslationOption::Evaluate(const InputType &input,
+void ChartTranslationOption::EvaluateWithSourceContext(const InputType &input,
const InputPath &inputPath,
const StackVec &stackVec)
{
@@ -18,7 +18,7 @@ void ChartTranslationOption::Evaluate(const InputType &input,
for (size_t i = 0; i < ffs.size(); ++i) {
const FeatureFunction &ff = *ffs[i];
- ff.Evaluate(input, inputPath, m_targetPhrase, &stackVec, m_scoreBreakdown);
+ ff.EvaluateWithSourceContext(input, inputPath, m_targetPhrase, &stackVec, m_scoreBreakdown);
}
}
diff --git a/moses/ChartTranslationOption.h b/moses/ChartTranslationOption.h
index e8cba1347..06a6f797e 100644
--- a/moses/ChartTranslationOption.h
+++ b/moses/ChartTranslationOption.h
@@ -44,7 +44,7 @@ public:
return m_scoreBreakdown;
}
- void Evaluate(const InputType &input,
+ void EvaluateWithSourceContext(const InputType &input,
const InputPath &inputPath,
const StackVec &stackVec);
};
diff --git a/moses/ChartTranslationOptionList.cpp b/moses/ChartTranslationOptionList.cpp
index 8c52eca32..71a8b4b0f 100644
--- a/moses/ChartTranslationOptionList.cpp
+++ b/moses/ChartTranslationOptionList.cpp
@@ -168,13 +168,13 @@ float ChartTranslationOptionList::GetBestScore(const ChartCellLabel *chartCell)
return bestHypo.GetTotalScore();
}
-void ChartTranslationOptionList::Evaluate(const InputType &input, const InputPath &inputPath)
+void ChartTranslationOptionList::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
{
// NEVER iterate over ALL of the collection. Just over the first m_size
CollType::iterator iter;
for (iter = m_collection.begin(); iter != m_collection.begin() + m_size; ++iter) {
ChartTranslationOptions &transOpts = **iter;
- transOpts.Evaluate(input, inputPath);
+ transOpts.EvaluateWithSourceContext(input, inputPath);
}
// get rid of empty trans opts
diff --git a/moses/ChartTranslationOptionList.h b/moses/ChartTranslationOptionList.h
index dcf4f1c4e..4723bdd1d 100644
--- a/moses/ChartTranslationOptionList.h
+++ b/moses/ChartTranslationOptionList.h
@@ -65,7 +65,7 @@ public:
void Clear();
void ApplyThreshold();
- void Evaluate(const InputType &input, const InputPath &inputPath);
+ void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
private:
typedef std::vector<ChartTranslationOptions*> CollType;
diff --git a/moses/ChartTranslationOptions.cpp b/moses/ChartTranslationOptions.cpp
index 114eae868..44aa67619 100644
--- a/moses/ChartTranslationOptions.cpp
+++ b/moses/ChartTranslationOptions.cpp
@@ -51,7 +51,7 @@ ChartTranslationOptions::~ChartTranslationOptions()
}
-void ChartTranslationOptions::Evaluate(const InputType &input, const InputPath &inputPath)
+void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
{
SetInputPath(&inputPath);
if (StaticData::Instance().GetPlaceholderFactor() != NOT_FOUND) {
@@ -62,7 +62,7 @@ void ChartTranslationOptions::Evaluate(const InputType &input, const InputPath &
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
ChartTranslationOption &transOpt = **iter;
transOpt.SetInputPath(&inputPath);
- transOpt.Evaluate(input, inputPath, m_stackVec);
+ transOpt.EvaluateWithSourceContext(input, inputPath, m_stackVec);
}
// get rid of -inf trans opts
diff --git a/moses/ChartTranslationOptions.h b/moses/ChartTranslationOptions.h
index 7dd930118..cdead7889 100644
--- a/moses/ChartTranslationOptions.h
+++ b/moses/ChartTranslationOptions.h
@@ -85,7 +85,7 @@ public:
return m_estimateOfBestScore;
}
- void Evaluate(const InputType &input, const InputPath &inputPath);
+ void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
void SetInputPath(const InputPath *inputPath);
diff --git a/moses/DecodeGraph.h b/moses/DecodeGraph.h
index ecac3ee7e..aa5c7ace9 100644
--- a/moses/DecodeGraph.h
+++ b/moses/DecodeGraph.h
@@ -38,7 +38,7 @@ class DecodeGraph
{
protected:
std::list<const DecodeStep*> m_steps;
- size_t m_position;
+ size_t m_id; // contiguous unique id, starting from 0
size_t m_maxChartSpan;
size_t m_backoff;
@@ -46,15 +46,15 @@ public:
/**
* position: The position of this graph within the decode sequence.
**/
- DecodeGraph(size_t position)
- : m_position(position)
+ DecodeGraph(size_t id)
+ : m_id(id)
, m_maxChartSpan(NOT_FOUND)
, m_backoff(0)
{}
// for chart decoding
- DecodeGraph(size_t position, size_t maxChartSpan)
- : m_position(position)
+ DecodeGraph(size_t id, size_t maxChartSpan)
+ : m_id(id)
, m_maxChartSpan(maxChartSpan) {
}
@@ -90,8 +90,8 @@ public:
m_backoff = backoff;
}
- size_t GetPosition() const {
- return m_position;
+ size_t GetId() const {
+ return m_id;
}
};
diff --git a/moses/DecodeStepGeneration.cpp b/moses/DecodeStepGeneration.cpp
index bcc151bb4..26b95cdb5 100644
--- a/moses/DecodeStepGeneration.cpp
+++ b/moses/DecodeStepGeneration.cpp
@@ -148,7 +148,7 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp
outPhrase.GetScoreBreakdown().PlusEquals(generationScore);
outPhrase.MergeFactors(genPhrase, m_newOutputFactors);
- outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply);
+ outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply);
const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
diff --git a/moses/DecodeStepTranslation.cpp b/moses/DecodeStepTranslation.cpp
index e61ab7840..e7dbba4f3 100644
--- a/moses/DecodeStepTranslation.cpp
+++ b/moses/DecodeStepTranslation.cpp
@@ -84,7 +84,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
}
outPhrase.Merge(targetPhrase, m_newOutputFactors);
- outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
+ outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
assert(newTransOpt != NULL);
@@ -258,7 +258,7 @@ void DecodeStepTranslation::ProcessLEGACY(const TranslationOption &inputPartialT
}
outPhrase.Merge(targetPhrase, m_newOutputFactors);
- outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
+ outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp
index 348eaa0ea..0d0a20797 100644
--- a/moses/FF/BleuScoreFeature.cpp
+++ b/moses/FF/BleuScoreFeature.cpp
@@ -502,7 +502,7 @@ void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
* Given a previous state, compute Bleu score for the updated state with an additional target
* phrase translated.
*/
-FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
+FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
@@ -563,7 +563,7 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
return new_state;
}
-FFState* BleuScoreFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureID,
+FFState* BleuScoreFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID,
ScoreComponentCollection* accumulator ) const
{
if (!m_enabled) return new BleuScoreState();
diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h
index 99f04f5ff..cdba578ac 100644
--- a/moses/FF/BleuScoreFeature.h
+++ b/moses/FF/BleuScoreFeature.h
@@ -115,20 +115,20 @@ public:
std::vector< size_t >&,
size_t skip = 0) const;
- FFState* Evaluate( const Hypothesis& cur_hypo,
+ FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- FFState* EvaluateChart(const ChartHypothesis& cur_hypo,
+ FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
int featureID,
ScoreComponentCollection* accumulator) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/ConstrainedDecoding.cpp b/moses/FF/ConstrainedDecoding.cpp
index 9a8ecd1c3..bfe412913 100644
--- a/moses/FF/ConstrainedDecoding.cpp
+++ b/moses/FF/ConstrainedDecoding.cpp
@@ -100,7 +100,7 @@ const std::vector<Phrase> *GetConstraint(const std::map<long,std::vector<Phrase>
}
}
-FFState* ConstrainedDecoding::Evaluate(
+FFState* ConstrainedDecoding::EvaluateWhenApplied(
const Hypothesis& hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
@@ -143,7 +143,7 @@ FFState* ConstrainedDecoding::Evaluate(
return ret;
}
-FFState* ConstrainedDecoding::EvaluateChart(
+FFState* ConstrainedDecoding::EvaluateWhenApplied(
const ChartHypothesis &hypo,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h
index 2db192ce8..ca007f21d 100644
--- a/moses/FF/ConstrainedDecoding.h
+++ b/moses/FF/ConstrainedDecoding.h
@@ -41,13 +41,13 @@ public:
return true;
}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -55,12 +55,12 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- FFState* Evaluate(
+ FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- FFState* EvaluateChart(
+ FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/ControlRecombination.cpp b/moses/FF/ControlRecombination.cpp
index d3e7c82ab..85e88ac94 100644
--- a/moses/FF/ControlRecombination.cpp
+++ b/moses/FF/ControlRecombination.cpp
@@ -56,7 +56,7 @@ std::vector<float> ControlRecombination::DefaultWeights() const
return ret;
}
-FFState* ControlRecombination::Evaluate(
+FFState* ControlRecombination::EvaluateWhenApplied(
const Hypothesis& hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
@@ -64,7 +64,7 @@ FFState* ControlRecombination::Evaluate(
return new ControlRecombinationState(hypo, *this);
}
-FFState* ControlRecombination::EvaluateChart(
+FFState* ControlRecombination::EvaluateWhenApplied(
const ChartHypothesis &hypo,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h
index 0100d500d..095cc6b29 100644
--- a/moses/FF/ControlRecombination.h
+++ b/moses/FF/ControlRecombination.h
@@ -57,24 +57,24 @@ public:
return true;
}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- FFState* Evaluate(
+ FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- FFState* EvaluateChart(
+ FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/CountNonTerms.cpp b/moses/FF/CountNonTerms.cpp
index 92b79cd5d..03c7b7315 100644
--- a/moses/FF/CountNonTerms.cpp
+++ b/moses/FF/CountNonTerms.cpp
@@ -16,7 +16,7 @@ CountNonTerms::CountNonTerms(const std::string &line)
ReadParameters();
}
-void CountNonTerms::Evaluate(const Phrase &sourcePhrase
+void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h
index 1fe71745d..c4e1467e9 100644
--- a/moses/FF/CountNonTerms.h
+++ b/moses/FF/CountNonTerms.h
@@ -12,12 +12,12 @@ public:
bool IsUseable(const FactorMask &mask) const
{ return true; }
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -25,11 +25,11 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- void EvaluateChart(
+ void EvaluateWhenApplied(
const ChartHypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp
index 25ab829f8..3a2482d0d 100644
--- a/moses/FF/CoveredReferenceFeature.cpp
+++ b/moses/FF/CoveredReferenceFeature.cpp
@@ -40,13 +40,13 @@ int CoveredReferenceState::Compare(const FFState& other) const
// return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
}
-void CoveredReferenceFeature::Evaluate(const Phrase &source
+void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
-void CoveredReferenceFeature::Evaluate(const InputType &input
+void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -90,7 +90,7 @@ void CoveredReferenceFeature::SetParameter(const std::string& key, const std::st
}
}
-FFState* CoveredReferenceFeature::Evaluate(
+FFState* CoveredReferenceFeature::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
@@ -131,7 +131,7 @@ FFState* CoveredReferenceFeature::Evaluate(
return ret;
}
-FFState* CoveredReferenceFeature::EvaluateChart(
+FFState* CoveredReferenceFeature::EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h
index cd2b2f966..a6cdd6f99 100644
--- a/moses/FF/CoveredReferenceFeature.h
+++ b/moses/FF/CoveredReferenceFeature.h
@@ -52,21 +52,21 @@ public:
return new CoveredReferenceState();
}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
- FFState* Evaluate(
+ FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- FFState* EvaluateChart(
+ FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/DecodeFeature.h b/moses/FF/DecodeFeature.h
index d79598328..ac4e9392b 100644
--- a/moses/FF/DecodeFeature.h
+++ b/moses/FF/DecodeFeature.h
@@ -62,20 +62,20 @@ public:
bool IsUseable(const FactorMask &mask) const;
void SetParameter(const std::string& key, const std::string& value);
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/DistortionScoreProducer.cpp b/moses/FF/DistortionScoreProducer.cpp
index 303f35236..5995fe213 100644
--- a/moses/FF/DistortionScoreProducer.cpp
+++ b/moses/FF/DistortionScoreProducer.cpp
@@ -87,7 +87,7 @@ float DistortionScoreProducer::CalculateDistortionScore(const Hypothesis& hypo,
}
-FFState* DistortionScoreProducer::Evaluate(
+FFState* DistortionScoreProducer::EvaluateWhenApplied(
const Hypothesis& hypo,
const FFState* prev_state,
ScoreComponentCollection* out) const
diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h
index 1bc6493e2..aa2c18b95 100644
--- a/moses/FF/DistortionScoreProducer.h
+++ b/moses/FF/DistortionScoreProducer.h
@@ -28,26 +28,26 @@ public:
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
- virtual FFState* Evaluate(
+ virtual FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- virtual FFState* EvaluateChart(
+ virtual FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection*) const {
throw std::logic_error("DistortionScoreProducer not supported in chart decoder, yet");
}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/ExternalFeature.cpp b/moses/FF/ExternalFeature.cpp
index 141541170..10800d24d 100644
--- a/moses/FF/ExternalFeature.cpp
+++ b/moses/FF/ExternalFeature.cpp
@@ -51,7 +51,7 @@ void ExternalFeature::SetParameter(const std::string& key, const std::string& va
}
}
-FFState* ExternalFeature::Evaluate(
+FFState* ExternalFeature::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
@@ -59,7 +59,7 @@ FFState* ExternalFeature::Evaluate(
return new ExternalFeatureState(m_stateSize);
}
-FFState* ExternalFeature::EvaluateChart(
+FFState* ExternalFeature::EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/ExternalFeature.h b/moses/FF/ExternalFeature.h
index 19eb45f2a..a8916a853 100644
--- a/moses/FF/ExternalFeature.h
+++ b/moses/FF/ExternalFeature.h
@@ -51,24 +51,24 @@ public:
void SetParameter(const std::string& key, const std::string& value);
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- FFState* Evaluate(
+ FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- FFState* EvaluateChart(
+ FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 56295805d..5dc4ccb86 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -10,6 +10,7 @@
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryScope3.h"
#include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
+#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
#include "moses/FF/LexicalReordering/LexicalReordering.h"
@@ -26,6 +27,7 @@
#include "moses/FF/PhrasePairFeature.h"
#include "moses/FF/PhraseLengthFeature.h"
#include "moses/FF/DistortionScoreProducer.h"
+#include "moses/FF/SparseHieroReorderingFeature.h"
#include "moses/FF/WordPenaltyProducer.h"
#include "moses/FF/InputFeature.h"
#include "moses/FF/PhrasePenalty.h"
@@ -33,6 +35,7 @@
#include "moses/FF/ControlRecombination.h"
#include "moses/FF/ExternalFeature.h"
#include "moses/FF/ConstrainedDecoding.h"
+#include "moses/FF/SoftSourceSyntacticConstraintsFeature.h"
#include "moses/FF/CoveredReferenceFeature.h"
#include "moses/FF/TreeStructureFeature.h"
#include "moses/FF/SoftMatchingFeature.h"
@@ -50,6 +53,7 @@
#include "moses/FF/SkeletonStatelessFF.h"
#include "moses/FF/SkeletonStatefulFF.h"
#include "moses/LM/SkeletonLM.h"
+#include "SkeletonChangeInput.h"
#include "moses/TranslationModel/SkeletonPT.h"
#ifdef HAVE_CMPH
@@ -91,6 +95,10 @@
#include "moses/LM/DALMWrapper.h"
#endif
+#ifdef LM_LBL
+#include "moses/LM/oxlm/LBLLM.h"
+#endif
+
#include "util/exception.hh"
#include <vector>
@@ -154,6 +162,18 @@ FeatureRegistry::FeatureRegistry()
#define MOSES_FNAME(name) Add(#name, new DefaultFeatureFactory< name >());
// Feature with different name than class.
#define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >());
+
+ MOSES_FNAME2("PhraseDictionaryBinary", PhraseDictionaryTreeAdaptor);
+ MOSES_FNAME(PhraseDictionaryOnDisk);
+ MOSES_FNAME(PhraseDictionaryMemory);
+ MOSES_FNAME(PhraseDictionaryScope3);
+ MOSES_FNAME(PhraseDictionaryMultiModel);
+ MOSES_FNAME(PhraseDictionaryMultiModelCounts);
+ MOSES_FNAME(PhraseDictionaryALSuffixArray);
+ MOSES_FNAME(PhraseDictionaryDynSuffixArray);
+ MOSES_FNAME(PhraseDictionaryTransliteration);
+ MOSES_FNAME(PhraseDictionaryFuzzyMatch);
+
MOSES_FNAME(GlobalLexicalModel);
//MOSES_FNAME(GlobalLexicalModelUnlimited); This was commented out in the original
MOSES_FNAME(SourceWordDeletionFeature);
@@ -170,15 +190,6 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME2("Distortion", DistortionScoreProducer);
MOSES_FNAME2("WordPenalty", WordPenaltyProducer);
MOSES_FNAME(InputFeature);
- MOSES_FNAME2("PhraseDictionaryBinary", PhraseDictionaryTreeAdaptor);
- MOSES_FNAME(PhraseDictionaryOnDisk);
- MOSES_FNAME(PhraseDictionaryMemory);
- MOSES_FNAME(PhraseDictionaryScope3);
- MOSES_FNAME(PhraseDictionaryMultiModel);
- MOSES_FNAME(PhraseDictionaryMultiModelCounts);
- MOSES_FNAME(PhraseDictionaryALSuffixArray);
- MOSES_FNAME(PhraseDictionaryDynSuffixArray);
- MOSES_FNAME(PhraseDictionaryTransliteration);
MOSES_FNAME(OpSequenceModel);
MOSES_FNAME(PhrasePenalty);
MOSES_FNAME2("UnknownWordPenalty", UnknownWordPenaltyProducer);
@@ -187,6 +198,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(CoveredReferenceFeature);
MOSES_FNAME(ExternalFeature);
MOSES_FNAME(SourceGHKMTreeInputMatchFeature);
+ MOSES_FNAME(SoftSourceSyntacticConstraintsFeature);
MOSES_FNAME(TreeStructureFeature);
MOSES_FNAME(SoftMatchingFeature);
MOSES_FNAME(HyperParameterAsWeight);
@@ -196,12 +208,14 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(RuleScope);
MOSES_FNAME(MaxSpanFreeNonTermSource);
MOSES_FNAME(NieceTerminal);
+ MOSES_FNAME(SparseHieroReorderingFeature);
MOSES_FNAME(SpanLength);
MOSES_FNAME(SyntaxRHS);
MOSES_FNAME(SkeletonStatelessFF);
MOSES_FNAME(SkeletonStatefulFF);
MOSES_FNAME(SkeletonLM);
+ MOSES_FNAME(SkeletonChangeInput);
MOSES_FNAME(SkeletonPT);
#ifdef HAVE_CMPH
@@ -209,6 +223,7 @@ FeatureRegistry::FeatureRegistry()
#endif
#ifdef PT_UG
MOSES_FNAME(Mmsapt);
+ MOSES_FNAME2("PhraseDictionaryBitextSampling",Mmsapt); // that's an alias for Mmsapt!
#endif
#ifdef HAVE_PROBINGPT
MOSES_FNAME(ProbingPT);
@@ -235,6 +250,11 @@ FeatureRegistry::FeatureRegistry()
#ifdef LM_DALM
MOSES_FNAME2("DALM", LanguageModelDALM);
#endif
+#ifdef LM_LBL
+ MOSES_FNAME2("LBLLM-LM", LBLLM<oxlm::LM>);
+ MOSES_FNAME2("LBLLM-FactoredLM", LBLLM<oxlm::FactoredLM>);
+ MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM<oxlm::FactoredMaxentLM>);
+#endif
Add("KENLM", new KenFactory());
}
@@ -263,12 +283,21 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line
void FeatureRegistry::PrintFF() const
{
+ vector<string> ffs;
std::cerr << "Available feature functions:" << std::endl;
Map::const_iterator iter;
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
const string &ffName = iter->first;
+ ffs.push_back(ffName);
+ }
+
+ vector<string>::const_iterator iterVec;
+ std::sort(ffs.begin(), ffs.end());
+ for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
+ const string &ffName = *iterVec;
std::cerr << ffName << " ";
}
+
std::cerr << std::endl;
}
diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp
index 4d5d59f8a..5d4e0f91e 100644
--- a/moses/FF/FeatureFunction.cpp
+++ b/moses/FF/FeatureFunction.cpp
@@ -34,6 +34,14 @@ void FeatureFunction::Destroy()
RemoveAllInColl(s_staticColl);
}
+void FeatureFunction::CallChangeSource(InputType *&input)
+{
+ for (size_t i = 0; i < s_staticColl.size(); ++i) {
+ const FeatureFunction &ff = *s_staticColl[i];
+ ff.ChangeSource(input);
+ }
+}
+
FeatureFunction::
FeatureFunction(const std::string& line)
: m_tuneable(true)
diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h
index 18b016c8f..b30815e05 100644
--- a/moses/FF/FeatureFunction.h
+++ b/moses/FF/FeatureFunction.h
@@ -4,6 +4,7 @@
#include <vector>
#include <set>
#include <string>
+#include "moses/FeatureVector.h"
#include "moses/TypeDef.h"
namespace Moses
@@ -47,6 +48,8 @@ public:
static FeatureFunction &FindFeatureFunction(const std::string& name);
static void Destroy();
+ static void CallChangeSource(InputType *&input);
+
FeatureFunction(const std::string &line);
FeatureFunction(size_t numScoreComponents, const std::string &line);
virtual bool IsStateless() const = 0;
@@ -71,6 +74,11 @@ public:
return m_description;
}
+ FName GetFeatureName(const std::string& name) const {
+ return FName(GetScoreProducerDescription(), name);
+ }
+
+
//! if false, then this feature is not displayed in the n-best list.
// use with care
virtual bool IsTuneable() const {
@@ -98,11 +106,15 @@ public:
// source phrase is the substring that the phrase table uses to look up the target phrase,
// may have more factors than actually need, but not guaranteed.
// For SCFG decoding, the source contains non-terminals, NOT the raw source from the input sentence
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const = 0;
+ // override this method if you want to change the input before decoding
+ virtual void ChangeSource(InputType *&input) const
+ {}
+
// This method is called once all the translation options are retrieved from the phrase table, and
// just before search.
// 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
@@ -110,7 +122,7 @@ public:
// It is guaranteed to be in the same order as the non-terms in the source phrase.
// For pb models, stackvec is NULL.
// No FF should set estimatedFutureScore in both overloads!
- virtual void Evaluate(const InputType &input
+ virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
diff --git a/moses/FF/GlobalLexicalModel.cpp b/moses/FF/GlobalLexicalModel.cpp
index ff9e87bb0..f6eb165a8 100644
--- a/moses/FF/GlobalLexicalModel.cpp
+++ b/moses/FF/GlobalLexicalModel.cpp
@@ -165,7 +165,7 @@ float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetP
return score;
}
-void GlobalLexicalModel::Evaluate
+void GlobalLexicalModel::EvaluateWhenApplied
(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{
diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h
index 664835df5..151dbf472 100644
--- a/moses/FF/GlobalLexicalModel.h
+++ b/moses/FF/GlobalLexicalModel.h
@@ -70,24 +70,24 @@ public:
bool IsUseable(const FactorMask &mask) const;
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const;
- void EvaluateChart(
+ void EvaluateWhenApplied(
const ChartHypothesis& hypo,
ScoreComponentCollection* accumulator) const {
throw std::logic_error("GlobalLexicalModel not supported in chart decoder, yet");
}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/GlobalLexicalModelUnlimited.cpp b/moses/FF/GlobalLexicalModelUnlimited.cpp
index a6883a7e8..c8dbd5883 100644
--- a/moses/FF/GlobalLexicalModelUnlimited.cpp
+++ b/moses/FF/GlobalLexicalModelUnlimited.cpp
@@ -108,7 +108,7 @@ void GlobalLexicalModelUnlimited::InitializeForInput( Sentence const& in )
m_local->input = &in;
}
-void GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const
+void GlobalLexicalModelUnlimited::EvaluateWhenApplied(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const
{
const Sentence& input = *(m_local->input);
const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h
index f12df7d61..096254613 100644
--- a/moses/FF/GlobalLexicalModelUnlimited.h
+++ b/moses/FF/GlobalLexicalModelUnlimited.h
@@ -81,23 +81,23 @@ public:
//TODO: This implements the old interface, but cannot be updated because
//it appears to be stateful
- void Evaluate(const Hypothesis& cur_hypo,
+ void EvaluateWhenApplied(const Hypothesis& cur_hypo,
ScoreComponentCollection* accumulator) const;
- void EvaluateChart(const ChartHypothesis& /* cur_hypo */,
+ void EvaluateWhenApplied(const ChartHypothesis& /* cur_hypo */,
int /* featureID */,
ScoreComponentCollection* ) const {
throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet");
}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h
index 9db375c0f..aaad21c14 100644
--- a/moses/FF/HyperParameterAsWeight.h
+++ b/moses/FF/HyperParameterAsWeight.h
@@ -17,13 +17,13 @@ public:
virtual bool IsUseable(const FactorMask &mask) const
{ return true; }
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
- virtual void Evaluate(const InputType &input
+ virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -31,14 +31,14 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- virtual void Evaluate(const Hypothesis& hypo,
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
/**
* Same for chart-based features.
**/
- virtual void EvaluateChart(const ChartHypothesis &hypo,
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp
index 0fa2005d1..61753c595 100644
--- a/moses/FF/InputFeature.cpp
+++ b/moses/FF/InputFeature.cpp
@@ -44,7 +44,7 @@ void InputFeature::SetParameter(const std::string& key, const std::string& value
}
-void InputFeature::Evaluate(const InputType &input
+void InputFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h
index e4b1a8d99..ad4fe398a 100644
--- a/moses/FF/InputFeature.h
+++ b/moses/FF/InputFeature.h
@@ -41,22 +41,23 @@ public:
return m_numRealWordCount;
}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
- void Evaluate(const InputType &input
+
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/InternalStructStatelessFF.cpp b/moses/FF/InternalStructStatelessFF.cpp
deleted file mode 100644
index 06014a1cf..000000000
--- a/moses/FF/InternalStructStatelessFF.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-#include "InternalStructStatelessFF.h"
-#include "moses/InputPath.h"
-#include "moses/ScoreComponentCollection.h"
-using namespace std;
-
-namespace Moses
-{
-void InternalStructStatelessFF::Evaluate(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
-{
-// cerr << "MARIA!!!" << endl;
- scoreBreakdown.PlusEquals(this, 0);
-
-}
-
-void InternalStructStatelessFF::Evaluate(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
- {
-
-cerr << "HHHHH" << scoreBreakdown << endl;
-scoreBreakdown.PlusEquals(this, 66);
-/* FactorList f_mask;
- f_mask.push_back(0);
- //if(inputPath.GetPhrase().GetStringRep(f_mask).)
- int score =50;
- for(size_t i=0;i<inputPath.GetPhrase().GetSize();i++){
- if(inputPath.GetPhrase(). GetFactor(i,0)->GetString().as_string()=="ist"){
- //cout<<inputPath.GetPhrase().GetStringRep(f_mask);
- score+=1;
- }
- }
- scoreBreakdown.PlusEquals(this, score);
-*/
-}
-
-}
-
diff --git a/moses/FF/InternalStructStatelessFF.h b/moses/FF/InternalStructStatelessFF.h
deleted file mode 100644
index a0ea3f712..000000000
--- a/moses/FF/InternalStructStatelessFF.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#pragma once
-
-#include <string>
-#include "StatelessFeatureFunction.h"
-
-namespace Moses
-{
-
-class InternalStructStatelessFF : public StatelessFeatureFunction
-{
-public:
- InternalStructStatelessFF(const std::string &line)
- :StatelessFeatureFunction(line)
- {}
-
- bool IsUseable(const FactorMask &mask) const
- { return true; }
-
- void Evaluate(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
-
- void Evaluate(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
- virtual void Evaluate(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
- void EvaluateChart(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
-};
-
-}
-
diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index 5a9758398..426a7d91c 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -14,11 +14,12 @@ LexicalReordering::LexicalReordering(const std::string &line)
{
std::cerr << "Initializing LexicalReordering.." << std::endl;
+ map<string,string> sparseArgs;
for (size_t i = 0; i < m_args.size(); ++i) {
const vector<string> &args = m_args[i];
if (args[0] == "type") {
- m_configuration = new LexicalReorderingConfiguration(args[1]);
+ m_configuration.reset(new LexicalReorderingConfiguration(args[1]));
m_configuration->SetScoreProducer(this);
m_modelTypeString = m_configuration->GetModelString();
} else if (args[0] == "input-factor") {
@@ -27,8 +28,10 @@ LexicalReordering::LexicalReordering(const std::string &line)
m_factorsE =Tokenize<FactorType>(args[1]);
} else if (args[0] == "path") {
m_filePath = args[1];
+ } else if (args[0].substr(0,7) == "sparse-") {
+ sparseArgs[args[0].substr(7)] = args[1];
} else {
- throw "Unknown argument " + args[0];
+ UTIL_THROW(util::Exception,"Unknown argument " + args[0]);
}
}
@@ -36,29 +39,29 @@ LexicalReordering::LexicalReordering(const std::string &line)
case LexicalReorderingConfiguration::FE:
case LexicalReorderingConfiguration::E:
if(m_factorsE.empty()) {
- throw "TL factor mask for lexical reordering is unexpectedly empty";
+ UTIL_THROW(util::Exception,"TL factor mask for lexical reordering is unexpectedly empty");
}
if(m_configuration->GetCondition() == LexicalReorderingConfiguration::E)
break; // else fall through
case LexicalReorderingConfiguration::F:
if(m_factorsF.empty()) {
- throw "SL factor mask for lexical reordering is unexpectedly empty";
+ UTIL_THROW(util::Exception,"SL factor mask for lexical reordering is unexpectedly empty");
}
break;
default:
- throw "Unknown conditioning option!";
+ UTIL_THROW(util::Exception,"Unknown conditioning option!");
}
+
+ m_configuration->ConfigureSparse(sparseArgs, this);
}
LexicalReordering::~LexicalReordering()
{
- delete m_table;
- delete m_configuration;
}
void LexicalReordering::Load()
{
- m_table = LexicalReorderingTable::LoadAvailable(m_filePath, m_factorsF, m_factorsE, std::vector<FactorType>());
+ m_table.reset(LexicalReorderingTable::LoadAvailable(m_filePath, m_factorsF, m_factorsE, std::vector<FactorType>()));
}
Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
@@ -66,13 +69,13 @@ Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
return m_table->GetScore(f, e, Phrase(ARRAY_SIZE_INCR));
}
-FFState* LexicalReordering::Evaluate(const Hypothesis& hypo,
+FFState* LexicalReordering::EvaluateWhenApplied(const Hypothesis& hypo,
const FFState* prev_state,
ScoreComponentCollection* out) const
{
Scores score(GetNumScoreComponents(), 0);
- const LexicalReorderingState *prev = static_cast<const LexicalReorderingState *>(prev_state);
- LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), score);
+ const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *>(prev_state);
+ LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out);
out->PlusEquals(this, score);
diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h
index 4ff0057f0..09d3b73cc 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@@ -3,17 +3,20 @@
#include <string>
#include <vector>
+#include <boost/scoped_ptr.hpp>
#include "moses/Factor.h"
#include "moses/Phrase.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
#include "moses/WordsRange.h"
-#include "LexicalReorderingState.h"
-#include "LexicalReorderingTable.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "util/exception.hh"
+#include "LexicalReorderingState.h"
+#include "LexicalReorderingTable.h"
+#include "SparseReordering.h"
+
namespace Moses
{
@@ -42,23 +45,23 @@ public:
Scores GetProb(const Phrase& f, const Phrase& e) const;
- virtual FFState* Evaluate(const Hypothesis& cur_hypo,
+ virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- virtual FFState* EvaluateChart(const ChartHypothesis&,
+ virtual FFState* EvaluateWhenApplied(const ChartHypothesis&,
int /* featureID */,
ScoreComponentCollection*) const {
UTIL_THROW(util::Exception, "LexicalReordering is not valid for chart decoder");
}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
@@ -69,10 +72,10 @@ private:
bool DecodeDirection(std::string s);
bool DecodeNumFeatureFunctions(std::string s);
- LexicalReorderingConfiguration *m_configuration;
+ boost::scoped_ptr<LexicalReorderingConfiguration> m_configuration;
std::string m_modelTypeString;
std::vector<std::string> m_modelType;
- LexicalReorderingTable* m_table;
+ boost::scoped_ptr<LexicalReorderingTable> m_table;
//std::vector<Direction> m_direction;
std::vector<LexicalReorderingConfiguration::Condition> m_condition;
//std::vector<size_t> m_scoreOffset;
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index 6e8647837..fa88fdeab 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -5,11 +5,11 @@
#include "moses/FF/FFState.h"
#include "moses/Hypothesis.h"
#include "moses/WordsRange.h"
-#include "moses/ReorderingStack.h"
#include "moses/TranslationOption.h"
#include "LexicalReordering.h"
#include "LexicalReorderingState.h"
+#include "ReorderingStack.h"
namespace Moses
{
@@ -38,6 +38,14 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
}
}
+void LexicalReorderingConfiguration::ConfigureSparse
+ (const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
+{
+ if (sparseArgs.size()) {
+ m_sparse.reset(new SparseReordering(sparseArgs, producer));
+ }
+}
+
void LexicalReorderingConfiguration::SetAdditionalScoreComponents(size_t number)
{
m_additionalScoreComponents = number;
@@ -122,52 +130,52 @@ LexicalReorderingState *LexicalReorderingConfiguration::CreateLexicalReorderingS
return new BidirectionalReorderingState(*this, bwd, fwd, 0);
}
-void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption &topt, ReorderingType reoType) const
+void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const TranslationOption &topt, const InputType& input, ReorderingType reoType) const
{
// don't call this on a bidirectional object
UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
"Unknown direction: " << m_direction);
- const Scores *cachedScores = (m_direction == LexicalReorderingConfiguration::Backward) ?
- topt.GetLexReorderingScores(m_configuration.GetScoreProducer()) : m_prevScore;
-
- // No scores available. TODO: Using a good prior distribution would be nicer.
- if(cachedScores == NULL)
- return;
-
- const Scores &scoreSet = *cachedScores;
- if(m_configuration.CollapseScores())
- scores[m_offset] = scoreSet[m_offset + reoType];
- else {
- std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
- scores[m_offset + reoType] = scoreSet[m_offset + reoType];
+ const TranslationOption* relevantOpt = &topt;
+ if (m_direction != LexicalReorderingConfiguration::Backward) relevantOpt = m_prevOption;
+ const Scores *cachedScores = relevantOpt->GetLexReorderingScores(m_configuration.GetScoreProducer());
+
+ if(cachedScores) {
+ Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
+
+ const Scores &scoreSet = *cachedScores;
+ if(m_configuration.CollapseScores())
+ scores[m_offset] = scoreSet[m_offset + reoType];
+ else {
+ std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
+ scores[m_offset + reoType] = scoreSet[m_offset + reoType];
+ }
+ accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
}
-}
-void LexicalReorderingState::ClearScores(Scores& scores) const
-{
- if(m_configuration.CollapseScores())
- scores[m_offset] = 0;
- else
- std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
+ const SparseReordering* sparse = m_configuration.GetSparseReordering();
+ if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType, m_direction, accum);
+
}
-int LexicalReorderingState::ComparePrevScores(const Scores *other) const
+
+int LexicalReorderingState::ComparePrevScores(const TranslationOption *other) const
{
- if(m_prevScore == other)
+ const Scores* myPrevScores = m_prevOption->GetLexReorderingScores(m_configuration.GetScoreProducer());
+ const Scores* otherPrevScores = other->GetLexReorderingScores(m_configuration.GetScoreProducer());
+
+ if(myPrevScores == otherPrevScores)
return 0;
// The pointers are NULL if a phrase pair isn't found in the reordering table.
- if(other == NULL)
+ if(otherPrevScores == NULL)
return -1;
- if(m_prevScore == NULL)
+ if(myPrevScores == NULL)
return 1;
- const Scores &my = *m_prevScore;
- const Scores &their = *other;
for(size_t i = m_offset; i < m_offset + m_configuration.GetNumberOfTypes(); i++)
- if(my[i] < their[i])
+ if((*myPrevScores)[i] < (*otherPrevScores)[i])
return -1;
- else if(my[i] > their[i])
+ else if((*myPrevScores)[i] > (*otherPrevScores)[i])
return 1;
return 0;
@@ -192,7 +200,7 @@ int PhraseBasedReorderingState::Compare(const FFState& o) const
const PhraseBasedReorderingState* other = static_cast<const PhraseBasedReorderingState*>(&o);
if (m_prevRange == other->m_prevRange) {
if (m_direction == LexicalReorderingConfiguration::Forward) {
- return ComparePrevScores(other->m_prevScore);
+ return ComparePrevScores(other->m_prevOption);
} else {
return 0;
}
@@ -202,27 +210,23 @@ int PhraseBasedReorderingState::Compare(const FFState& o) const
return 1;
}
-LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, Scores& scores) const
+LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const
{
ReorderingType reoType;
const WordsRange currWordsRange = topt.GetSourceWordsRange();
const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType();
- if (m_direction == LexicalReorderingConfiguration::Forward && m_first) {
- ClearScores(scores);
- } else {
- if (!m_first || m_useFirstBackwardScore) {
- if (modelType == LexicalReorderingConfiguration::MSD) {
- reoType = GetOrientationTypeMSD(currWordsRange);
- } else if (modelType == LexicalReorderingConfiguration::MSLR) {
- reoType = GetOrientationTypeMSLR(currWordsRange);
- } else if (modelType == LexicalReorderingConfiguration::Monotonic) {
- reoType = GetOrientationTypeMonotonic(currWordsRange);
- } else {
- reoType = GetOrientationTypeLeftRight(currWordsRange);
- }
- CopyScores(scores, topt, reoType);
+ if ((m_direction != LexicalReorderingConfiguration::Forward && m_useFirstBackwardScore) || !m_first) {
+ if (modelType == LexicalReorderingConfiguration::MSD) {
+ reoType = GetOrientationTypeMSD(currWordsRange);
+ } else if (modelType == LexicalReorderingConfiguration::MSLR) {
+ reoType = GetOrientationTypeMSLR(currWordsRange);
+ } else if (modelType == LexicalReorderingConfiguration::Monotonic) {
+ reoType = GetOrientationTypeMonotonic(currWordsRange);
+ } else {
+ reoType = GetOrientationTypeLeftRight(currWordsRange);
}
+ CopyScores(scores, topt, input, reoType);
}
return new PhraseBasedReorderingState(this, topt);
@@ -300,10 +304,10 @@ int BidirectionalReorderingState::Compare(const FFState& o) const
return m_forward->Compare(*other.m_forward);
}
-LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt, Scores& scores) const
+LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt, const InputType& input, ScoreComponentCollection* scores) const
{
- LexicalReorderingState *newbwd = m_backward->Expand(topt, scores);
- LexicalReorderingState *newfwd = m_forward->Expand(topt, scores);
+ LexicalReorderingState *newbwd = m_backward->Expand(topt,input, scores);
+ LexicalReorderingState *newfwd = m_forward->Expand(topt, input, scores);
return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset);
}
@@ -324,7 +328,7 @@ int HierarchicalReorderingBackwardState::Compare(const FFState& o) const
return m_reoStack.Compare(other.m_reoStack);
}
-LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, Scores& scores) const
+LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const
{
HierarchicalReorderingBackwardState* nextState = new HierarchicalReorderingBackwardState(this, topt, m_reoStack);
@@ -343,7 +347,7 @@ LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const Transl
reoType = GetOrientationTypeMonotonic(reoDistance);
}
- CopyScores(scores, topt, reoType);
+ CopyScores(scores, topt, input, reoType);
return nextState;
}
@@ -409,7 +413,7 @@ int HierarchicalReorderingForwardState::Compare(const FFState& o) const
const HierarchicalReorderingForwardState* other = static_cast<const HierarchicalReorderingForwardState*>(&o);
if (m_prevRange == other->m_prevRange) {
- return ComparePrevScores(other->m_prevScore);
+ return ComparePrevScores(other->m_prevOption);
} else if (m_prevRange < other->m_prevRange) {
return -1;
}
@@ -427,7 +431,7 @@ int HierarchicalReorderingForwardState::Compare(const FFState& o) const
// dright: if the next phrase follows the conditioning phrase and other stuff comes in between
// dleft: if the next phrase precedes the conditioning phrase and other stuff comes in between
-LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, Scores& scores) const
+LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const
{
const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType();
const WordsRange currWordsRange = topt.GetSourceWordsRange();
@@ -438,7 +442,7 @@ LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const Transla
ReorderingType reoType;
if (m_first) {
- ClearScores(scores);
+
} else {
if (modelType == LexicalReorderingConfiguration::MSD) {
reoType = GetOrientationTypeMSD(currWordsRange, coverage);
@@ -450,7 +454,7 @@ LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const Transla
reoType = GetOrientationTypeLeftRight(currWordsRange, coverage);
}
- CopyScores(scores, topt, reoType);
+ CopyScores(scores, topt, input, reoType);
}
return new HierarchicalReorderingForwardState(this, topt);
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index 8e237adc1..e309ed7f1 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -4,22 +4,25 @@
#include <vector>
#include <string>
+#include <boost/scoped_ptr.hpp>
+
#include "moses/Hypothesis.h"
-#include "LexicalReordering.h"
+//#include "LexicalReordering.h"
+#include "moses/ScoreComponentCollection.h"
#include "moses/WordsRange.h"
#include "moses/WordsBitmap.h"
-#include "moses/ReorderingStack.h"
#include "moses/TranslationOption.h"
#include "moses/FF/FFState.h"
+#include "ReorderingStack.h"
namespace Moses
{
class LexicalReorderingState;
class LexicalReordering;
+class SparseReordering;
/** Factory class for lexical reordering states
- * @todo There's a lot of classes for lexicalized reordering. Perhaps put them in a separate dir
*/
class LexicalReorderingConfiguration
{
@@ -31,6 +34,8 @@ public:
LexicalReorderingConfiguration(const std::string &modelType);
+ void ConfigureSparse(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer);
+
LexicalReorderingState *CreateLexicalReorderingState(const InputType &input) const;
size_t GetNumScoreComponents() const;
@@ -62,6 +67,10 @@ public:
return m_collapseScores;
}
+ const SparseReordering* GetSparseReordering() const {
+ return m_sparse.get();
+ }
+
private:
void SetScoreProducer(LexicalReordering* scoreProducer) {
m_scoreProducer = scoreProducer;
@@ -79,6 +88,7 @@ private:
Direction m_direction;
Condition m_condition;
size_t m_additionalScoreComponents;
+ boost::scoped_ptr<SparseReordering> m_sparse;
};
//! Abstract class for lexical reordering model states
@@ -86,34 +96,35 @@ class LexicalReorderingState : public FFState
{
public:
virtual int Compare(const FFState& o) const = 0;
- virtual LexicalReorderingState* Expand(const TranslationOption& hypo, Scores& scores) const = 0;
+ virtual LexicalReorderingState* Expand(const TranslationOption& hypo, const InputType& input, ScoreComponentCollection* scores) const = 0;
static LexicalReorderingState* CreateLexicalReorderingState(const std::vector<std::string>& config,
LexicalReorderingConfiguration::Direction dir, const InputType &input);
+ typedef int ReorderingType;
protected:
- typedef int ReorderingType;
const LexicalReorderingConfiguration &m_configuration;
// The following is the true direction of the object, which can be Backward or Forward even if the Configuration has Bidirectional.
LexicalReorderingConfiguration::Direction m_direction;
size_t m_offset;
- const Scores *m_prevScore;
+ //forward scores are conditioned on prev option, so need to remember it
+ const TranslationOption *m_prevOption;
inline LexicalReorderingState(const LexicalReorderingState *prev, const TranslationOption &topt) :
m_configuration(prev->m_configuration), m_direction(prev->m_direction), m_offset(prev->m_offset),
- m_prevScore(topt.GetLexReorderingScores(m_configuration.GetScoreProducer())) {}
+ m_prevOption(&topt) {}
inline LexicalReorderingState(const LexicalReorderingConfiguration &config, LexicalReorderingConfiguration::Direction dir, size_t offset)
- : m_configuration(config), m_direction(dir), m_offset(offset), m_prevScore(NULL) {}
+ : m_configuration(config), m_direction(dir), m_offset(offset), m_prevOption(NULL) {}
// copy the right scores in the right places, taking into account forward/backward, offset, collapse
- void CopyScores(Scores& scores, const TranslationOption& topt, ReorderingType reoType) const;
- void ClearScores(Scores& scores) const;
- int ComparePrevScores(const Scores *other) const;
+ void CopyScores(ScoreComponentCollection* scores, const TranslationOption& topt, const InputType& input, ReorderingType reoType) const;
+ int ComparePrevScores(const TranslationOption *other) const;
//constants for the different type of reorderings (corresponding to indexes in the table file)
+ public:
static const ReorderingType M = 0; // monotonic
static const ReorderingType NM = 1; // non-monotonic
static const ReorderingType S = 1; // swap
@@ -122,6 +133,7 @@ protected:
static const ReorderingType DR = 3; // discontinuous, right
static const ReorderingType R = 0; // right
static const ReorderingType L = 1; // left
+ static const ReorderingType MAX = 3; //largest possible
};
//! @todo what is this?
@@ -140,7 +152,7 @@ public:
}
virtual int Compare(const FFState& o) const;
- virtual LexicalReorderingState* Expand(const TranslationOption& topt, Scores& scores) const;
+ virtual LexicalReorderingState* Expand(const TranslationOption& topt, const InputType& input, ScoreComponentCollection* scores) const;
};
//! State for the standard Moses implementation of lexical reordering models
@@ -156,7 +168,7 @@ public:
PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, const TranslationOption &topt);
virtual int Compare(const FFState& o) const;
- virtual LexicalReorderingState* Expand(const TranslationOption& topt, Scores& scores) const;
+ virtual LexicalReorderingState* Expand(const TranslationOption& topt,const InputType& input, ScoreComponentCollection* scores) const;
ReorderingType GetOrientationTypeMSD(WordsRange currRange) const;
ReorderingType GetOrientationTypeMSLR(WordsRange currRange) const;
@@ -177,7 +189,7 @@ public:
const TranslationOption &topt, ReorderingStack reoStack);
virtual int Compare(const FFState& o) const;
- virtual LexicalReorderingState* Expand(const TranslationOption& hypo, Scores& scores) const;
+ virtual LexicalReorderingState* Expand(const TranslationOption& hypo, const InputType& input, ScoreComponentCollection* scores) const;
private:
ReorderingType GetOrientationTypeMSD(int reoDistance) const;
@@ -200,7 +212,7 @@ public:
HierarchicalReorderingForwardState(const HierarchicalReorderingForwardState *prev, const TranslationOption &topt);
virtual int Compare(const FFState& o) const;
- virtual LexicalReorderingState* Expand(const TranslationOption& hypo, Scores& scores) const;
+ virtual LexicalReorderingState* Expand(const TranslationOption& hypo, const InputType& input, ScoreComponentCollection* scores) const;
private:
ReorderingType GetOrientationTypeMSD(WordsRange currRange, WordsBitmap coverage) const;
diff --git a/moses/ReorderingStack.cpp b/moses/FF/LexicalReordering/ReorderingStack.cpp
index 49a723a36..49a723a36 100644
--- a/moses/ReorderingStack.cpp
+++ b/moses/FF/LexicalReordering/ReorderingStack.cpp
diff --git a/moses/ReorderingStack.h b/moses/FF/LexicalReordering/ReorderingStack.h
index 730b17ce3..5a5b80d16 100644
--- a/moses/ReorderingStack.h
+++ b/moses/FF/LexicalReordering/ReorderingStack.h
@@ -12,7 +12,7 @@
//#include "Phrase.h"
//#include "TypeDef.h"
//#include "Util.h"
-#include "WordsRange.h"
+#include "moses/WordsRange.h"
namespace Moses
{
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
new file mode 100644
index 000000000..f62dcde8b
--- /dev/null
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -0,0 +1,254 @@
+#include <fstream>
+
+#include "moses/FactorCollection.h"
+#include "moses/InputPath.h"
+#include "moses/Util.h"
+
+#include "util/exception.hh"
+
+#include "util/file_piece.hh"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include "LexicalReordering.h"
+#include "SparseReordering.h"
+
+
+using namespace std;
+
+namespace Moses
+{
+
+const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) {
+ static string kSep = "-";
+ static string name;
+ ostringstream buf;
+ // type side position id word reotype
+ if (type == Phrase) {
+ buf << "phr";
+ } else if (type == Stack) {
+ buf << "stk";
+ } else if (type == Between) {
+ buf << "btn";
+ }
+ buf << kSep;
+ if (side == Source) {
+ buf << "src";
+ } else if (side == Target) {
+ buf << "tgt";
+ }
+ buf << kSep;
+ if (position == First) {
+ buf << "first";
+ } else if (position == Last) {
+ buf << "last";
+ }
+ buf << kSep;
+ buf << wordListId;
+ buf << kSep;
+ if (isCluster) buf << "cluster_";
+ buf << word->GetString();
+ buf << kSep;
+ buf << reoType;
+ name = buf.str();
+ return name;
+}
+
+SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
+ : m_producer(producer)
+{
+ static const string kSource= "source";
+ static const string kTarget = "target";
+ for (map<string,string>::const_iterator i = config.begin(); i != config.end(); ++i) {
+ vector<string> fields = Tokenize(i->first, "-");
+ if (fields[0] == "words") {
+ UTIL_THROW_IF(!(fields.size() == 3), util::Exception, "Sparse reordering word list name should be sparse-words-(source|target)-<id>");
+ if (fields[1] == kSource) {
+ ReadWordList(i->second,fields[2], SparseReorderingFeatureKey::Source, &m_sourceWordLists);
+ } else if (fields[1] == kTarget) {
+ ReadWordList(i->second,fields[2],SparseReorderingFeatureKey::Target, &m_targetWordLists);
+ } else {
+ UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]);
+ }
+ } else if (fields[0] == "clusters") {
+ UTIL_THROW_IF(!(fields.size() == 3), util::Exception, "Sparse reordering cluster name should be sparse-clusters-(source|target)-<id>");
+ if (fields[1] == kSource) {
+ ReadClusterMap(i->second,fields[2], SparseReorderingFeatureKey::Source, &m_sourceClusterMaps);
+ } else if (fields[1] == kTarget) {
+ ReadClusterMap(i->second,fields[2],SparseReorderingFeatureKey::Target, &m_targetClusterMaps);
+ } else {
+ UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]);
+ }
+
+ } else if (fields[0] == "phrase") {
+ m_usePhrase = true;
+ } else if (fields[0] == "stack") {
+ m_useStack = true;
+ } else if (fields[0] == "between") {
+ m_useBetween = true;
+ } else {
+ UTIL_THROW(util::Exception, "Unable to parse sparse reordering option: " << i->first);
+ }
+ }
+
+}
+
+void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster) {
+ for (size_t type = SparseReorderingFeatureKey::Stack;
+ type <= SparseReorderingFeatureKey::Between; ++type) {
+ for (size_t position = SparseReorderingFeatureKey::First;
+ position <= SparseReorderingFeatureKey::Last; ++position) {
+ for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) {
+ SparseReorderingFeatureKey key(
+ index, static_cast<SparseReorderingFeatureKey::Type>(type), factor, isCluster,
+ static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
+ m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id))));
+ }
+ }
+ }
+}
+
+void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists) {
+ ifstream fh(filename.c_str());
+ UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
+ string line;
+ pWordLists->push_back(WordList());
+ pWordLists->back().first = id;
+ while (getline(fh,line)) {
+ //TODO: StringPiece
+ const Factor* factor = FactorCollection::Instance().AddFactor(line);
+ pWordLists->back().second.insert(factor);
+ PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false);
+
+ }
+}
+
+void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps) {
+ pClusterMaps->push_back(ClusterMap());
+ pClusterMaps->back().first = id;
+ util::FilePiece file(filename.c_str());
+ StringPiece line;
+ while (true) {
+ try {
+ line = file.ReadLine();
+ } catch (const util::EndOfFileException &e) {
+ break;
+ }
+ util::TokenIter<util::SingleCharacter, true> lineIter(line,util::SingleCharacter('\t'));
+ if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing word): '" << line << "'");
+ const Factor* wordFactor = FactorCollection::Instance().AddFactor(*lineIter);
+ ++lineIter;
+ if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing cluster id): '" << line << "'");
+ const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter);
+ pClusterMaps->back().second[wordFactor] = idFactor;
+ PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true);
+ }
+}
+
+void SparseReordering::AddFeatures(
+ SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
+ const Word& word, SparseReorderingFeatureKey::Position position,
+ LexicalReorderingState::ReorderingType reoType,
+ ScoreComponentCollection* scores) const {
+
+ const Factor* wordFactor = word.GetFactor(0);
+
+ const vector<WordList>* wordLists;
+ const vector<ClusterMap>* clusterMaps;
+ if (side == SparseReorderingFeatureKey::Source) {
+ wordLists = &m_sourceWordLists;
+ clusterMaps = &m_sourceClusterMaps;
+ } else {
+ wordLists = &m_targetWordLists;
+ clusterMaps = &m_targetClusterMaps;
+ }
+
+ for (size_t id = 0; id < wordLists->size(); ++id) {
+ if ((*wordLists)[id].second.find(wordFactor) == (*wordLists)[id].second.end()) continue;
+ SparseReorderingFeatureKey key(id, type, wordFactor, false, position, side, reoType);
+ FeatureMap::const_iterator fmi = m_featureMap.find(key);
+ assert(fmi != m_featureMap.end());
+ scores->SparsePlusEquals(fmi->second, 1.0);
+ }
+
+ for (size_t id = 0; id < clusterMaps->size(); ++id) {
+ const ClusterMap& clusterMap = (*clusterMaps)[id];
+ boost::unordered_map<const Factor*, const Factor*>::const_iterator clusterIter
+ = clusterMap.second.find(wordFactor);
+ if (clusterIter != clusterMap.second.end()) {
+ SparseReorderingFeatureKey key(id, type, clusterIter->second, true, position, side, reoType);
+ FeatureMap::const_iterator fmi = m_featureMap.find(key);
+ assert(fmi != m_featureMap.end());
+ scores->SparsePlusEquals(fmi->second, 1.0);
+ }
+ }
+
+}
+
+void SparseReordering::CopyScores(
+ const TranslationOption& currentOpt,
+ const TranslationOption* previousOpt,
+ const InputType& input,
+ LexicalReorderingState::ReorderingType reoType,
+ LexicalReorderingConfiguration::Direction direction,
+ ScoreComponentCollection* scores) const
+{
+ if (m_useBetween && direction == LexicalReorderingConfiguration::Backward &&
+ (reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL ||
+ reoType == LexicalReorderingState::DR)) {
+ size_t gapStart, gapEnd;
+ //NB: Using a static cast for speed, but could be nasty if
+ //using non-sentence input
+ const Sentence& sentence = static_cast<const Sentence&>(input);
+ const WordsRange& currentRange = currentOpt.GetSourceWordsRange();
+ if (previousOpt) {
+ const WordsRange& previousRange = previousOpt->GetSourceWordsRange();
+ if (previousRange < currentRange) {
+ gapStart = previousRange.GetEndPos() + 1;
+ gapEnd = currentRange.GetStartPos();
+ } else {
+ gapStart = currentRange.GetEndPos() + 1;
+ gapEnd = previousRange.GetStartPos();
+ }
+ } else {
+ //start of sentence
+ gapStart = 0;
+ gapEnd = currentRange.GetStartPos();
+ }
+ assert(gapStart < gapEnd);
+ for (size_t i = gapStart; i < gapEnd; ++i) {
+ AddFeatures(SparseReorderingFeatureKey::Between,
+ SparseReorderingFeatureKey::Source, sentence.GetWord(i),
+ SparseReorderingFeatureKey::First, reoType, scores);
+ }
+ }
+ //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
+ //phrase (backward)
+ //stack (forward)
+ SparseReorderingFeatureKey::Type type;
+ if (direction == LexicalReorderingConfiguration::Forward) {
+ if (!m_useStack) return;
+ type = SparseReorderingFeatureKey::Stack;
+ } else if (direction == LexicalReorderingConfiguration::Backward) {
+ if (!m_usePhrase) return;
+ type = SparseReorderingFeatureKey::Phrase;
+ } else {
+ //Shouldn't be called for bidirectional
+ //keep compiler happy
+ type = SparseReorderingFeatureKey::Phrase;
+ assert(!"Shouldn't call CopyScores() with bidirectional direction");
+ }
+ const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
+ AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
+ SparseReorderingFeatureKey::First, reoType, scores);
+ AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
+ const Phrase& targetPhrase = currentOpt.GetTargetPhrase();
+ AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
+ SparseReorderingFeatureKey::First, reoType, scores);
+ AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
+
+
+}
+
+} //namespace
+
diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
new file mode 100644
index 000000000..663785a88
--- /dev/null
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -0,0 +1,133 @@
+#ifndef moses_FF_LexicalReordering_SparseReordering_h
+#define moses_FF_LexicalReordering_SparseReordering_h
+
+/**
+ * Sparse reordering features for phrase-based MT, following Cherry (NAACL, 2013)
+**/
+
+
+#include <functional>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <boost/unordered_set.hpp>
+
+#include "util/murmur_hash.hh"
+#include "util/pool.hh"
+#include "util/string_piece.hh"
+
+#include "moses/FeatureVector.h"
+#include "moses/ScoreComponentCollection.h"
+#include "LexicalReorderingState.h"
+
+/**
+ Configuration of sparse reordering:
+
+ The sparse reordering feature is configured using sparse-* configs in the lexical reordering line.
+ sparse-words-(source|target)-<id>=<filename> -- Features which fire for the words in the list
+ sparse-clusters-(source|target)-<id>=<filename> -- Features which fire for clusters in the list. Format
+ of cluster file TBD
+ sparse-phrase -- Add features which depend on the current phrase (backward)
+ sparse-stack -- Add features which depend on the previous phrase, or
+ top of stack. (forward)
+ sparse-between -- Add features which depend on words between previous phrase
+ (or top of stack) and current phrase.
+**/
+
+namespace Moses
+{
+
+/**
+ * Used to store pre-calculated feature names.
+**/
+struct SparseReorderingFeatureKey {
+ size_t id;
+ enum Type {Stack, Phrase, Between} type;
+ const Factor* word;
+ bool isCluster;
+ enum Position {First, Last} position;
+ enum Side {Source, Target} side;
+ LexicalReorderingState::ReorderingType reoType;
+
+ SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, bool isCluster_,
+ Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_)
+ : id(id_), type(type_), word(word_), isCluster(isCluster_),
+ position(position_), side(side_), reoType(reoType_)
+ {}
+
+ const std::string& Name(const std::string& wordListId) ;
+};
+
+struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorderingFeatureKey, std::size_t> {
+ std::size_t operator()(const SparseReorderingFeatureKey& key) const {
+ //TODO: can we just hash the memory?
+ //not sure, there could be random padding
+ std::size_t seed = 0;
+ seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed);
+ seed = util::MurmurHashNative(&key.type, sizeof(key.type), seed);
+ seed = util::MurmurHashNative(&key.word, sizeof(key.word), seed);
+ seed = util::MurmurHashNative(&key.isCluster, sizeof(key.isCluster), seed);
+ seed = util::MurmurHashNative(&key.position, sizeof(key.position), seed);
+ seed = util::MurmurHashNative(&key.side, sizeof(key.side), seed);
+ seed = util::MurmurHashNative(&key.reoType, sizeof(key.reoType), seed);
+ return seed;
+ }
+};
+
+struct EqualsSparseReorderingFeatureKey :
+ public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
+ bool operator()(const SparseReorderingFeatureKey& left, const SparseReorderingFeatureKey& right) const {
+ //TODO: Can we just compare the memory?
+ return left.id == right.id && left.type == right.type && left.word == right.word &&
+ left.position == right.position && left.side == right.side &&
+ left.reoType == right.reoType;
+ }
+};
+
+class SparseReordering
+{
+public:
+ SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
+
+ //If direction is backward the options will be different, for forward they will be the same
+ void CopyScores(const TranslationOption& currentOpt,
+ const TranslationOption* previousOpt,
+ const InputType& input,
+ LexicalReorderingState::ReorderingType reoType,
+ LexicalReorderingConfiguration::Direction direction,
+ ScoreComponentCollection* scores) const ;
+
+private:
+ const LexicalReordering* m_producer;
+ typedef std::pair<std::string, boost::unordered_set<const Factor*> > WordList; //id and list
+ std::vector<WordList> m_sourceWordLists;
+ std::vector<WordList> m_targetWordLists;
+ typedef std::pair<std::string, boost::unordered_map<const Factor*, const Factor*> > ClusterMap; //id and map
+ std::vector<ClusterMap> m_sourceClusterMaps;
+ std::vector<ClusterMap> m_targetClusterMaps;
+ bool m_usePhrase;
+ bool m_useBetween;
+ bool m_useStack;
+ typedef boost::unordered_map<SparseReorderingFeatureKey, FName, HashSparseReorderingFeatureKey, EqualsSparseReorderingFeatureKey> FeatureMap;
+ FeatureMap m_featureMap;
+
+ void ReadWordList(const std::string& filename, const std::string& id,
+ SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
+ void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector<ClusterMap>* pClusterMaps);
+ void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster);
+
+ void AddFeatures(
+ SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
+ const Word& word, SparseReorderingFeatureKey::Position position,
+ LexicalReorderingState::ReorderingType reoType,
+ ScoreComponentCollection* scores) const;
+
+};
+
+
+
+} //namespace
+
+
+#endif
diff --git a/moses/FF/MaxSpanFreeNonTermSource.cpp b/moses/FF/MaxSpanFreeNonTermSource.cpp
index 3951fdd27..9de582635 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.cpp
+++ b/moses/FF/MaxSpanFreeNonTermSource.cpp
@@ -27,7 +27,7 @@ MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
m_glueTargetLHS.SetFactor(0, factor);
}
-void MaxSpanFreeNonTermSource::Evaluate(const Phrase &source
+void MaxSpanFreeNonTermSource::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
@@ -35,7 +35,7 @@ void MaxSpanFreeNonTermSource::Evaluate(const Phrase &source
targetPhrase.SetRuleSource(source);
}
-void MaxSpanFreeNonTermSource::Evaluate(const InputType &input
+void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
diff --git a/moses/FF/MaxSpanFreeNonTermSource.h b/moses/FF/MaxSpanFreeNonTermSource.h
index a9eec7b5e..973b374d8 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.h
+++ b/moses/FF/MaxSpanFreeNonTermSource.h
@@ -15,23 +15,23 @@ public:
virtual bool IsUseable(const FactorMask &mask) const
{ return true; }
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- virtual void Evaluate(const InputType &input
+ virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
- virtual void Evaluate(const Hypothesis& hypo,
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- virtual void EvaluateChart(const ChartHypothesis &hypo,
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp
index 88c9f86cd..b3a5f8f92 100644
--- a/moses/FF/NieceTerminal.cpp
+++ b/moses/FF/NieceTerminal.cpp
@@ -25,7 +25,7 @@ std::vector<float> NieceTerminal::DefaultWeights() const
return ret;
}
-void NieceTerminal::Evaluate(const Phrase &source
+void NieceTerminal::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
@@ -33,7 +33,7 @@ void NieceTerminal::Evaluate(const Phrase &source
targetPhrase.SetRuleSource(source);
}
-void NieceTerminal::Evaluate(const InputType &input
+void NieceTerminal::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -71,11 +71,11 @@ void NieceTerminal::Evaluate(const InputType &input
}
-void NieceTerminal::Evaluate(const Hypothesis& hypo,
+void NieceTerminal::EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
-void NieceTerminal::EvaluateChart(const ChartHypothesis &hypo,
+void NieceTerminal::EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h
index b7b398fff..7daf2963e 100644
--- a/moses/FF/NieceTerminal.h
+++ b/moses/FF/NieceTerminal.h
@@ -19,19 +19,19 @@ public:
return true;
}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const;
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const;
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/OSM-Feature/KenOSM.cpp b/moses/FF/OSM-Feature/KenOSM.cpp
new file mode 100644
index 000000000..e517200c3
--- /dev/null
+++ b/moses/FF/OSM-Feature/KenOSM.cpp
@@ -0,0 +1,32 @@
+#include "KenOSM.h"
+
+namespace Moses
+{
+
+OSMLM* ConstructOSMLM(const std::string &file)
+{
+ lm::ngram::ModelType model_type;
+ if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
+
+ switch(model_type) {
+ case lm::ngram::PROBING:
+ return new KenOSM<lm::ngram::ProbingModel>(file);
+ case lm::ngram::REST_PROBING:
+ return new KenOSM<lm::ngram::RestProbingModel>(file);
+ case lm::ngram::TRIE:
+ return new KenOSM<lm::ngram::TrieModel>(file);
+ case lm::ngram::QUANT_TRIE:
+ return new KenOSM<lm::ngram::QuantTrieModel>(file);
+ case lm::ngram::ARRAY_TRIE:
+ return new KenOSM<lm::ngram::ArrayTrieModel>(file);
+ case lm::ngram::QUANT_ARRAY_TRIE:
+ return new KenOSM<lm::ngram::QuantArrayTrieModel>(file);
+ default:
+ UTIL_THROW2("Unrecognized kenlm model type " << model_type);
+ }
+ } else {
+ return new KenOSM<lm::ngram::ProbingModel>(file);
+ }
+}
+
+} // namespace
diff --git a/moses/FF/OSM-Feature/KenOSM.h b/moses/FF/OSM-Feature/KenOSM.h
new file mode 100644
index 000000000..d3d8672d3
--- /dev/null
+++ b/moses/FF/OSM-Feature/KenOSM.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <string>
+#include "lm/model.hh"
+#include <boost/shared_ptr.hpp>
+
+namespace Moses
+{
+
+class KenOSMBase {
+ public:
+ virtual float Score(const lm::ngram::State&, const std::string&,
+ lm::ngram::State&) const = 0;
+
+ virtual const lm::ngram::State &BeginSentenceState() const = 0;
+
+ virtual const lm::ngram::State &NullContextState() const = 0;
+};
+
+template <class KenModel>
+class KenOSM : public KenOSMBase {
+ public:
+ KenOSM(const std::string& file)
+ : m_kenlm(new KenModel(file.c_str())) {}
+
+ virtual float Score(const lm::ngram::State &in_state,
+ const std::string& word,
+ lm::ngram::State &out_state) const {
+ return m_kenlm->Score(in_state, m_kenlm->GetVocabulary().Index(word),
+ out_state);
+ }
+
+ virtual const lm::ngram::State &BeginSentenceState() const {
+ return m_kenlm->BeginSentenceState();
+ }
+
+ virtual const lm::ngram::State &NullContextState() const {
+ return m_kenlm->NullContextState();
+ }
+
+ private:
+ boost::shared_ptr<KenModel> m_kenlm;
+};
+
+typedef KenOSMBase OSMLM;
+
+OSMLM* ConstructOSMLM(const std::string &file);
+
+
+} // namespace
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index dfa380a77..6d839f0cc 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -19,19 +19,18 @@ OpSequenceModel::OpSequenceModel(const std::string &line)
ReadParameters();
}
-OpSequenceModel::~OpSequenceModel()
-{
- delete OSM;
+OpSequenceModel::~OpSequenceModel() {
+ delete OSM;
}
void OpSequenceModel :: readLanguageModel(const char *lmFile)
{
-
string unkOp = "_TRANS_SLF_";
- OSM = new Model(m_lmPath.c_str());
+ OSM = ConstructOSMLM(m_lmPath);
+
State startState = OSM->NullContextState();
State endState;
- unkOpProb = OSM->Score(startState,OSM->GetVocabulary().Index(unkOp),endState);
+ unkOpProb = OSM->Score(startState,unkOp,endState);
}
@@ -42,7 +41,7 @@ void OpSequenceModel::Load()
-void OpSequenceModel:: Evaluate(const Phrase &source
+void OpSequenceModel:: EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
@@ -87,7 +86,7 @@ void OpSequenceModel:: Evaluate(const Phrase &source
}
-FFState* OpSequenceModel::Evaluate(
+FFState* OpSequenceModel::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
@@ -194,7 +193,7 @@ FFState* OpSequenceModel::Evaluate(
// return NULL;
}
-FFState* OpSequenceModel::EvaluateChart(
+FFState* OpSequenceModel::EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h
index 64cab3044..8c71e8152 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@@ -6,8 +6,7 @@
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/Manager.h"
#include "moses/FF/OSM-Feature/osmHyp.h"
-#include "lm/model.hh"
-
+#include "KenOSM.h"
namespace Moses
{
@@ -16,8 +15,7 @@ class OpSequenceModel : public StatefulFeatureFunction
{
public:
-
- lm::ngram::Model * OSM;
+ OSMLM* OSM;
float unkOpProb;
int sFactor; // Source Factor ...
int tFactor; // Target Factor ...
@@ -29,24 +27,24 @@ public:
void readLanguageModel(const char *);
void Load();
- FFState* Evaluate(
+ FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- virtual FFState* EvaluateChart(
+ virtual FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/OSM-Feature/osmHyp.cpp b/moses/FF/OSM-Feature/osmHyp.cpp
index d492888ed..422b7c933 100644
--- a/moses/FF/OSM-Feature/osmHyp.cpp
+++ b/moses/FF/OSM-Feature/osmHyp.cpp
@@ -121,7 +121,7 @@ void osmHypothesis :: removeReorderingOperations()
operations = tupleSequence;
}
-void osmHypothesis :: calculateOSMProb(Model & ptrOp)
+void osmHypothesis :: calculateOSMProb(OSMLM& ptrOp)
{
opProb = 0;
@@ -130,7 +130,7 @@ void osmHypothesis :: calculateOSMProb(Model & ptrOp)
for (int i = 0; i<operations.size(); i++) {
temp = currState;
- opProb += ptrOp.Score(temp,ptrOp.GetVocabulary().Index(operations[i]),currState);
+ opProb += ptrOp.Score(temp,operations[i],currState);
}
lmState = currState;
diff --git a/moses/FF/OSM-Feature/osmHyp.h b/moses/FF/OSM-Feature/osmHyp.h
index 2459b4d23..88f171188 100644
--- a/moses/FF/OSM-Feature/osmHyp.h
+++ b/moses/FF/OSM-Feature/osmHyp.h
@@ -2,12 +2,13 @@
# include "moses/FF/FFState.h"
# include "moses/Manager.h"
-#include "lm/model.hh"
# include <set>
# include <map>
# include <string>
# include <vector>
+#include "KenOSM.h"
+
namespace Moses
{
@@ -79,7 +80,7 @@ public:
~osmHypothesis() {};
void generateOperations(int & startIndex, int j1 , int contFlag , WordsBitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
- void calculateOSMProb(lm::ngram::Model & ptrOp);
+ void calculateOSMProb(OSMLM& ptrOp);
void computeOSMFeature(int startIndex , WordsBitmap & coverageVector);
void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2) {
diff --git a/moses/FF/PhraseBoundaryFeature.cpp b/moses/FF/PhraseBoundaryFeature.cpp
index d82181b76..3fdcf27f9 100644
--- a/moses/FF/PhraseBoundaryFeature.cpp
+++ b/moses/FF/PhraseBoundaryFeature.cpp
@@ -66,7 +66,7 @@ void PhraseBoundaryFeature::AddFeatures(
}
-FFState* PhraseBoundaryFeature::Evaluate
+FFState* PhraseBoundaryFeature::EvaluateWhenApplied
(const Hypothesis& cur_hypo, const FFState* prev_state,
ScoreComponentCollection* scores) const
{
diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h
index fbafc6da9..e4c3ca3ba 100644
--- a/moses/FF/PhraseBoundaryFeature.h
+++ b/moses/FF/PhraseBoundaryFeature.h
@@ -44,23 +44,23 @@ public:
virtual const FFState* EmptyHypothesisState(const InputType &) const;
- virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
+ virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
+ virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */,
int /* featureID */,
ScoreComponentCollection* ) const {
throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet");
}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/PhraseLengthFeature.cpp b/moses/FF/PhraseLengthFeature.cpp
index 43e0d1b2d..7850c374a 100644
--- a/moses/FF/PhraseLengthFeature.cpp
+++ b/moses/FF/PhraseLengthFeature.cpp
@@ -15,7 +15,7 @@ PhraseLengthFeature::PhraseLengthFeature(const std::string &line)
ReadParameters();
}
-void PhraseLengthFeature::Evaluate(const Phrase &source
+void PhraseLengthFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h
index ba835f654..4976e2210 100644
--- a/moses/FF/PhraseLengthFeature.h
+++ b/moses/FF/PhraseLengthFeature.h
@@ -24,16 +24,16 @@ public:
return true;
}
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- void EvaluateChart(const ChartHypothesis& hypo,
+ void EvaluateWhenApplied(const ChartHypothesis& hypo,
ScoreComponentCollection*) const {
throw std::logic_error("PhraseLengthFeature not valid in chart decoder");
}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -41,7 +41,7 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/PhrasePairFeature.cpp b/moses/FF/PhrasePairFeature.cpp
index 9277e19f2..f359b68f7 100644
--- a/moses/FF/PhrasePairFeature.cpp
+++ b/moses/FF/PhrasePairFeature.cpp
@@ -106,7 +106,7 @@ void PhrasePairFeature::Load()
}
}
-void PhrasePairFeature::Evaluate(
+void PhrasePairFeature::EvaluateWhenApplied(
const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{
diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h
index 7790e9035..8bfac628d 100644
--- a/moses/FF/PhrasePairFeature.h
+++ b/moses/FF/PhrasePairFeature.h
@@ -37,22 +37,22 @@ public:
bool IsUseable(const FactorMask &mask) const;
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const;
- void EvaluateChart(const ChartHypothesis& hypo,
+ void EvaluateWhenApplied(const ChartHypothesis& hypo,
ScoreComponentCollection*) const {
throw std::logic_error("PhrasePairFeature not valid in chart decoder");
}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/PhrasePenalty.cpp b/moses/FF/PhrasePenalty.cpp
index b3e493707..cd1b735df 100644
--- a/moses/FF/PhrasePenalty.cpp
+++ b/moses/FF/PhrasePenalty.cpp
@@ -1,22 +1,53 @@
-
+#include <vector>
#include "PhrasePenalty.h"
#include "moses/ScoreComponentCollection.h"
+#include "moses/TranslationModel/PhraseDictionary.h"
+#include "util/exception.hh"
+
+using namespace std;
namespace Moses
{
PhrasePenalty::PhrasePenalty(const std::string &line)
- : StatelessFeatureFunction(1, line)
+: StatelessFeatureFunction(1, line)
+, m_perPhraseTable(false)
{
ReadParameters();
}
-void PhrasePenalty::Evaluate(const Phrase &source
+void PhrasePenalty::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
- scoreBreakdown.Assign(this, 1.0f);
+ if (m_perPhraseTable) {
+ const PhraseDictionary *pt = targetPhrase.GetContainer();
+ if (pt) {
+ size_t ptId = pt->GetId();
+ UTIL_THROW_IF2(ptId >= m_numScoreComponents, "Wrong number of scores");
+
+ vector<float> scores(m_numScoreComponents, 0);
+ scores[ptId] = 1.0f;
+
+ scoreBreakdown.Assign(this, scores);
+ }
+
+ }
+ else {
+ scoreBreakdown.Assign(this, 1.0f);
+ }
}
+void PhrasePenalty::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "per-phrase-table") {
+ m_perPhraseTable =Scan<bool>(value);
+ }
+ else {
+ StatelessFeatureFunction::SetParameter(key, value);
+ }
+}
+
+
} // namespace
diff --git a/moses/FF/PhrasePenalty.h b/moses/FF/PhrasePenalty.h
index a4014abf1..b15a80224 100644
--- a/moses/FF/PhrasePenalty.h
+++ b/moses/FF/PhrasePenalty.h
@@ -14,19 +14,19 @@ public:
return true;
}
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -34,6 +34,10 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
+ void SetParameter(const std::string& key, const std::string& value);
+
+protected:
+ bool m_perPhraseTable;
};
} //namespace
diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h
index 8b0341fd6..62cf15ced 100644
--- a/moses/FF/ReferenceComparison.h
+++ b/moses/FF/ReferenceComparison.h
@@ -15,13 +15,13 @@ public:
virtual bool IsUseable(const FactorMask &mask) const
{ return true; }
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
- virtual void Evaluate(const InputType &input
+ virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -29,11 +29,11 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- virtual void Evaluate(const Hypothesis& hypo,
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- virtual void EvaluateChart(const ChartHypothesis &hypo,
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/RuleScope.cpp b/moses/FF/RuleScope.cpp
index e949c3337..ed329c4ca 100644
--- a/moses/FF/RuleScope.cpp
+++ b/moses/FF/RuleScope.cpp
@@ -16,7 +16,7 @@ bool IsAmbiguous(const Word &word, bool sourceSyntax)
return word.IsNonTerminal() && (!sourceSyntax || word == inputDefaultNonTerminal);
}
-void RuleScope::Evaluate(const Phrase &source
+void RuleScope::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/RuleScope.h b/moses/FF/RuleScope.h
index 4ac10c804..a2c9e06f3 100644
--- a/moses/FF/RuleScope.h
+++ b/moses/FF/RuleScope.h
@@ -14,12 +14,12 @@ public:
virtual bool IsUseable(const FactorMask &mask) const
{ return true; }
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- virtual void Evaluate(const InputType &input
+ virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -27,11 +27,11 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- virtual void Evaluate(const Hypothesis& hypo,
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- virtual void EvaluateChart(const ChartHypothesis &hypo,
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/SetSourcePhrase.cpp b/moses/FF/SetSourcePhrase.cpp
index 0a2eaa4cb..f89683f28 100644
--- a/moses/FF/SetSourcePhrase.cpp
+++ b/moses/FF/SetSourcePhrase.cpp
@@ -10,7 +10,7 @@ SetSourcePhrase::SetSourcePhrase(const std::string &line)
ReadParameters();
}
-void SetSourcePhrase::Evaluate(const Phrase &source
+void SetSourcePhrase::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h
index 0d7ad2ade..81f293dde 100644
--- a/moses/FF/SetSourcePhrase.h
+++ b/moses/FF/SetSourcePhrase.h
@@ -14,12 +14,12 @@ public:
virtual bool IsUseable(const FactorMask &mask) const
{ return true; }
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- virtual void Evaluate(const InputType &input
+ virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -27,11 +27,11 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- virtual void Evaluate(const Hypothesis& hypo,
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- virtual void EvaluateChart(const ChartHypothesis &hypo,
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/SkeletonChangeInput.cpp b/moses/FF/SkeletonChangeInput.cpp
new file mode 100644
index 000000000..74a85ba5e
--- /dev/null
+++ b/moses/FF/SkeletonChangeInput.cpp
@@ -0,0 +1,92 @@
+#include <vector>
+#include "SkeletonChangeInput.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/TargetPhrase.h"
+#include "moses/Sentence.h"
+#include "moses/FactorCollection.h"
+#include "util/exception.hh"
+
+using namespace std;
+
+namespace Moses
+{
+SkeletonChangeInput::SkeletonChangeInput(const std::string &line)
+ :StatelessFeatureFunction(2, line)
+{
+ ReadParameters();
+}
+
+void SkeletonChangeInput::EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
+{
+ // dense scores
+ vector<float> newScores(m_numScoreComponents);
+ newScores[0] = 1.5;
+ newScores[1] = 0.3;
+ scoreBreakdown.PlusEquals(this, newScores);
+
+ // sparse scores
+ scoreBreakdown.PlusEquals(this, "sparse-name", 2.4);
+
+}
+
+void SkeletonChangeInput::EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
+{
+ if (targetPhrase.GetNumNonTerminals()) {
+ vector<float> newScores(m_numScoreComponents);
+ newScores[0] = - std::numeric_limits<float>::infinity();
+ scoreBreakdown.PlusEquals(this, newScores);
+ }
+
+}
+
+void SkeletonChangeInput::EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const
+{}
+
+void SkeletonChangeInput::EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const
+{}
+
+void SkeletonChangeInput::ChangeSource(InputType *&input) const
+{
+ // add factor[1] to each word. Created from first 4 letter of factor[0]
+
+ Sentence *sentence = dynamic_cast<Sentence*>(input);
+ UTIL_THROW_IF2(sentence == NULL, "Not a sentence input");
+
+ FactorCollection &fc = FactorCollection::Instance();
+
+ size_t size = sentence->GetSize();
+ for (size_t i = 0; i < size; ++i) {
+ Word &word = sentence->Phrase::GetWord(i);
+ const Factor *factor0 = word[0];
+
+ std::string str = factor0->GetString().as_string();
+ if (str.length() > 4) {
+ str = str.substr(0, 4);
+ }
+
+ const Factor *factor1 = fc.AddFactor(str);
+ word.SetFactor(1, factor1);
+ }
+}
+
+void SkeletonChangeInput::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "arg") {
+ // set value here
+ } else {
+ StatelessFeatureFunction::SetParameter(key, value);
+ }
+}
+
+}
+
diff --git a/moses/FF/SkeletonChangeInput.h b/moses/FF/SkeletonChangeInput.h
new file mode 100644
index 000000000..07b19e768
--- /dev/null
+++ b/moses/FF/SkeletonChangeInput.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <string>
+#include "StatelessFeatureFunction.h"
+
+namespace Moses
+{
+
+class SkeletonChangeInput : public StatelessFeatureFunction
+{
+public:
+ SkeletonChangeInput(const std::string &line);
+
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
+
+ void ChangeSource(InputType *&input) const;
+
+ void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const;
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const;
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+};
+
+}
+
diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp
index 2dfec5fad..fe81aeeae 100644
--- a/moses/FF/SkeletonStatefulFF.cpp
+++ b/moses/FF/SkeletonStatefulFF.cpp
@@ -23,13 +23,13 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line)
ReadParameters();
}
-void SkeletonStatefulFF::Evaluate(const Phrase &source
+void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
-void SkeletonStatefulFF::Evaluate(const InputType &input
+void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -37,7 +37,7 @@ void SkeletonStatefulFF::Evaluate(const InputType &input
, ScoreComponentCollection *estimatedFutureScore) const
{}
-FFState* SkeletonStatefulFF::Evaluate(
+FFState* SkeletonStatefulFF::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
@@ -56,7 +56,7 @@ FFState* SkeletonStatefulFF::Evaluate(
return new SkeletonState(0);
}
-FFState* SkeletonStatefulFF::EvaluateChart(
+FFState* SkeletonStatefulFF::EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h
index 9cbe6b512..6fa26803e 100644
--- a/moses/FF/SkeletonStatefulFF.h
+++ b/moses/FF/SkeletonStatefulFF.h
@@ -30,21 +30,21 @@ public:
return new SkeletonState(0);
}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
- FFState* Evaluate(
+ FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- FFState* EvaluateChart(
+ FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/SkeletonStatelessFF.cpp
index c05e27dec..80c7d130e 100644
--- a/moses/FF/SkeletonStatelessFF.cpp
+++ b/moses/FF/SkeletonStatelessFF.cpp
@@ -13,7 +13,7 @@ SkeletonStatelessFF::SkeletonStatelessFF(const std::string &line)
ReadParameters();
}
-void SkeletonStatelessFF::Evaluate(const Phrase &source
+void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
@@ -29,7 +29,7 @@ void SkeletonStatelessFF::Evaluate(const Phrase &source
}
-void SkeletonStatelessFF::Evaluate(const InputType &input
+void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -44,11 +44,11 @@ void SkeletonStatelessFF::Evaluate(const InputType &input
}
-void SkeletonStatelessFF::Evaluate(const Hypothesis& hypo,
+void SkeletonStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
-void SkeletonStatelessFF::EvaluateChart(const ChartHypothesis &hypo,
+void SkeletonStatelessFF::EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/SkeletonStatelessFF.h
index 5adb35f6d..520ec1405 100644
--- a/moses/FF/SkeletonStatelessFF.h
+++ b/moses/FF/SkeletonStatelessFF.h
@@ -15,19 +15,19 @@ public:
return true;
}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const;
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const;
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/SoftMatchingFeature.cpp b/moses/FF/SoftMatchingFeature.cpp
index 017e551c4..0475547da 100644
--- a/moses/FF/SoftMatchingFeature.cpp
+++ b/moses/FF/SoftMatchingFeature.cpp
@@ -61,7 +61,7 @@ bool SoftMatchingFeature::Load(const std::string& filePath)
return true;
}
-void SoftMatchingFeature::EvaluateChart(const ChartHypothesis& hypo,
+void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
ScoreComponentCollection* accumulator) const
{
diff --git a/moses/FF/SoftMatchingFeature.h b/moses/FF/SoftMatchingFeature.h
index b823c2426..ff923ea08 100644
--- a/moses/FF/SoftMatchingFeature.h
+++ b/moses/FF/SoftMatchingFeature.h
@@ -19,20 +19,20 @@ public:
return true;
}
- virtual void EvaluateChart(const ChartHypothesis& hypo,
+ virtual void EvaluateWhenApplied(const ChartHypothesis& hypo,
ScoreComponentCollection* accumulator) const;
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {};
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {};
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const {};
bool Load(const std::string &filePath);
diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
new file mode 100644
index 000000000..fe1144465
--- /dev/null
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
@@ -0,0 +1,536 @@
+#include <vector>
+#include <limits>
+#include <assert.h>
+#include "SoftSourceSyntacticConstraintsFeature.h"
+#include "moses/StaticData.h"
+#include "moses/InputFileStream.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/Hypothesis.h"
+#include "moses/ChartHypothesis.h"
+#include "moses/ChartManager.h"
+#include "moses/FactorCollection.h"
+#include "moses/TreeInput.h"
+#include "moses/PP/SourceLabelsPhraseProperty.h"
+
+
+using namespace std;
+
+namespace Moses
+{
+
+SoftSourceSyntacticConstraintsFeature::SoftSourceSyntacticConstraintsFeature(const std::string &line)
+ : StatelessFeatureFunction(3, line), m_featureVariant(0)
+{
+ VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
+ ReadParameters();
+ VERBOSE(1, " Done.");
+ VERBOSE(1, " Feature variant: " << m_featureVariant << "." << std::endl);
+}
+
+void SoftSourceSyntacticConstraintsFeature::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "sourceLabelSetFile") {
+ m_sourceLabelSetFile = value;
+ } else if (key == "coreSourceLabelSetFile") {
+ m_coreSourceLabelSetFile = value;
+ } else if (key == "targetSourceLeftHandSideJointCountFile") {
+ m_targetSourceLHSJointCountFile = value;
+ } else if (key == "tuneable") {
+ m_tuneable = Scan<bool>(value);
+ } else if (key == "featureVariant") {
+ m_featureVariant = Scan<size_t>(value); // 0: only dense features, 1: no mismatches (also set weights 1 0 0 and tuneable=false), 2: with sparse features, 3: with sparse features for core labels only
+ } else {
+ StatelessFeatureFunction::SetParameter(key, value);
+ }
+}
+
+
+void SoftSourceSyntacticConstraintsFeature::Load()
+{
+ // don't change the loading order!
+ LoadSourceLabelSet();
+ if (m_featureVariant == 3) {
+ LoadCoreSourceLabelSet();
+ }
+ if (!m_targetSourceLHSJointCountFile.empty()) {
+ LoadTargetSourceLeftHandSideJointCountFile();
+ }
+}
+
+void SoftSourceSyntacticConstraintsFeature::LoadSourceLabelSet()
+{
+ VERBOSE(2, GetScoreProducerDescription() << ": Loading source label set from file " << m_sourceLabelSetFile << std::endl);
+ InputFileStream inFile(m_sourceLabelSetFile);
+
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ // read source label set
+ std::string line;
+ m_sourceLabels.clear();
+ m_sourceLabelsByIndex.clear();
+ m_sourceLabelIndexesByFactor.clear();
+ while (getline(inFile, line)) {
+ std::istringstream tokenizer(line);
+ std::string label;
+ size_t index;
+ try {
+ tokenizer >> label >> index;
+ } catch (const std::exception &e) {
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Error reading source label set file " << m_sourceLabelSetFile << " .");
+ }
+ std::pair< boost::unordered_map<std::string,size_t>::iterator, bool > inserted = m_sourceLabels.insert( std::pair<std::string,size_t>(label,index) );
+ UTIL_THROW_IF2(!inserted.second, GetScoreProducerDescription()
+ << ": Source label set file " << m_sourceLabelSetFile << " should contain each syntactic label only once.");
+
+ if (index >= m_sourceLabelsByIndex.size()) {
+ m_sourceLabelsByIndex.resize(index+1);
+ }
+ m_sourceLabelsByIndex[index] = label;
+ const Factor* sourceLabelFactor = factorCollection.AddFactor(label,true);
+ m_sourceLabelIndexesByFactor[sourceLabelFactor] = index;
+ }
+
+ inFile.Close();
+
+ std::list<std::string> specialLabels;
+ specialLabels.push_back("GlueTop");
+ specialLabels.push_back("GlueX");
+// specialLabels.push_back("XRHS");
+// specialLabels.push_back("XLHS");
+ for (std::list<std::string>::const_iterator iter=specialLabels.begin();
+ iter!=specialLabels.end(); ++iter) {
+ boost::unordered_map<std::string,size_t>::iterator found = m_sourceLabels.find(*iter);
+ UTIL_THROW_IF2(found == m_sourceLabels.end(), GetScoreProducerDescription()
+ << ": Source label set file " << m_sourceLabelSetFile << " should contain an entry for the special label \"" << *iter << "\".");
+ if (!(found->first).compare("GlueTop")) {
+ m_GlueTopLabel = found->second;
+// } else if (!(found->first).compare("XRHS")) {
+// m_XRHSLabel = found->second;
+// } else if (!(found->first).compare("XLHS")) {
+// m_XLHSLabel = found->second;
+ }
+ }
+}
+
+void SoftSourceSyntacticConstraintsFeature::LoadCoreSourceLabelSet()
+{
+ VERBOSE(2, GetScoreProducerDescription() << ": Loading core source label set from file " << m_coreSourceLabelSetFile << std::endl);
+ InputFileStream inFile(m_coreSourceLabelSetFile);
+
+ // read core source label set
+ std::string line;
+ m_coreSourceLabels.clear();
+ while (getline(inFile, line)) {
+ istringstream tokenizer(line);
+ std::string label;
+ tokenizer >> label;
+ boost::unordered_map<std::string,size_t>::iterator foundSourceLabelIndex = m_sourceLabels.find( label );
+ if ( foundSourceLabelIndex != m_sourceLabels.end() ) {
+ m_coreSourceLabels.insert(foundSourceLabelIndex->second);
+ } else {
+ VERBOSE(2, GetScoreProducerDescription()
+ << ": Ignoring unknown source label \"" << label << "\" "
+ << "from core source label set file " << m_coreSourceLabelSetFile << "."
+ << std::endl);
+ }
+ }
+
+ inFile.Close();
+}
+
+void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCountFile()
+{
+
+ VERBOSE(2, GetScoreProducerDescription() << ": Loading target/source label joint counts from file " << m_targetSourceLHSJointCountFile << std::endl);
+ InputFileStream inFile(m_targetSourceLHSJointCountFile);
+
+ for (boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator iter=m_labelPairProbabilities.begin();
+ iter!=m_labelPairProbabilities.end(); ++iter) {
+ delete iter->second;
+ }
+ m_labelPairProbabilities.clear();
+
+ // read joint counts
+ std::string line;
+ FactorCollection &factorCollection = FactorCollection::Instance();
+ boost::unordered_map<const Factor*,float> targetLHSCounts;
+ std::vector<float> sourceLHSCounts(m_sourceLabels.size(),0.0);
+
+ while (getline(inFile, line)) {
+ istringstream tokenizer(line);
+ std::string targetLabel;
+ std::string sourceLabel;
+ float count;
+ tokenizer >> targetLabel;
+ tokenizer >> sourceLabel;
+ tokenizer >> count;
+
+ boost::unordered_map<std::string,size_t>::iterator foundSourceLabelIndex = m_sourceLabels.find( sourceLabel );
+ UTIL_THROW_IF2(foundSourceLabelIndex == m_sourceLabels.end(), GetScoreProducerDescription()
+ << ": Target/source label joint count file " << m_targetSourceLHSJointCountFile
+ << " contains unknown source label \"" << sourceLabel << "\".");
+
+ const Factor* targetLabelFactor = factorCollection.AddFactor(targetLabel,true);
+
+ sourceLHSCounts[foundSourceLabelIndex->second] += count;
+ std::pair< boost::unordered_map<const Factor*,float >::iterator, bool > insertedTargetLHSCount =
+ targetLHSCounts.insert( std::pair<const Factor*,float>(targetLabelFactor,count) );
+ if (!insertedTargetLHSCount.second) {
+ (insertedTargetLHSCount.first)->second += count;
+ boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator jointCountIt =
+ m_labelPairProbabilities.find( targetLabelFactor );
+ assert(jointCountIt != m_labelPairProbabilities.end());
+ (jointCountIt->second)->at(foundSourceLabelIndex->second).first += count;
+ (jointCountIt->second)->at(foundSourceLabelIndex->second).second += count;
+ } else {
+ std::pair<float,float> init(0.0,0.0);
+ std::vector< std::pair<float,float> >* sourceVector = new std::vector< std::pair<float,float> >(m_sourceLabels.size(),init);
+ sourceVector->at(foundSourceLabelIndex->second) = std::pair<float,float>(count,count);
+ std::pair< boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator, bool > insertedJointCount =
+ m_labelPairProbabilities.insert( std::pair<const Factor*, std::vector< std::pair<float,float> >* >(targetLabelFactor,sourceVector) );
+ assert(insertedJointCount.second);
+ }
+ }
+
+ // normalization
+ for (boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator iter=m_labelPairProbabilities.begin();
+ iter!=m_labelPairProbabilities.end(); ++iter) {
+ float targetLHSCount = 0;
+ boost::unordered_map<const Factor*,float >::const_iterator targetLHSCountIt = targetLHSCounts.find( iter->first );
+ if ( targetLHSCountIt != targetLHSCounts.end() ) {
+ targetLHSCount = targetLHSCountIt->second;
+ }
+ std::vector< std::pair<float,float> > &probabilities = *(iter->second);
+ for (size_t index=0; index<probabilities.size(); ++index) {
+
+ if ( probabilities[index].first != 0 ) {
+ assert(targetLHSCount != 0);
+ probabilities[index].first /= targetLHSCount;
+ }
+ if ( probabilities[index].second != 0 ) {
+ assert(sourceLHSCounts[index] != 0);
+ probabilities[index].second /= sourceLHSCounts[index];
+ }
+ }
+ }
+
+ inFile.Close();
+}
+
+
+void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
+ const ChartHypothesis& hypo,
+ ScoreComponentCollection* accumulator) const
+{
+ // dense scores
+ std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 3
+
+ const InputType& input = hypo.GetManager().GetSource();
+ const TreeInput& treeInput = static_cast<const TreeInput&>(input);
+ const StaticData& staticData = StaticData::Instance();
+ const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();
+
+ size_t nNTs = 1;
+ bool treeInputMismatchLHSBinary = true;
+ size_t treeInputMismatchRHSCount = 0;
+ bool hasCompleteTreeInputMatch = false;
+ float t2sLabelsProb = 1;
+ float s2tLabelsProb = 1;
+ float ruleLabelledProbability = 1;
+
+ // read SourceLabels property
+ const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase();
+ const Factor* targetLHS = currTarPhr.GetTargetLHS()[0];
+ bool isGlueGrammarRule = false;
+ bool isUnkRule = false;
+
+ if (const PhraseProperty *property = currTarPhr.GetProperty("SourceLabels")) {
+
+ const SourceLabelsPhraseProperty *sourceLabelsPhraseProperty = static_cast<const SourceLabelsPhraseProperty*>(property);
+
+ nNTs = sourceLabelsPhraseProperty->GetNumberOfNonTerminals();
+ float totalCount = sourceLabelsPhraseProperty->GetTotalCount();
+
+ // prepare for input tree label matching
+ std::vector< boost::unordered_set<size_t> > treeInputLabelsRHS(nNTs-1);
+ boost::unordered_set<size_t> treeInputLabelsLHS;
+
+ // get index map for underlying hypotheses
+ const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+ currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
+
+ std::vector<const Factor*> targetLabelsRHS;
+ if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
+ size_t nonTerminalNumber = 0;
+
+ for (size_t phrasePos=0; phrasePos<currTarPhr.GetSize(); ++phrasePos) {
+ // consult rule for either word or non-terminal
+ const Word &word = currTarPhr.GetWord(phrasePos);
+ if ( word.IsNonTerminal() ) {
+ // non-terminal: consult subderivation
+ size_t nonTermIndex = nonTermIndexMap[phrasePos];
+ const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
+ targetLabelsRHS.push_back( prevHypo->GetTargetLHS()[0] );
+
+ // retrieve information that is required for input tree label matching (RHS)
+ const WordsRange& prevWordsRange = prevHypo->GetCurrSourceRange();
+ size_t prevStartPos = prevWordsRange.GetStartPos();
+ size_t prevEndPos = prevWordsRange.GetEndPos();
+ const NonTerminalSet& prevTreeInputLabels = treeInput.GetLabelSet(prevStartPos,prevEndPos);
+
+ for (NonTerminalSet::const_iterator prevTreeInputLabelsIt = prevTreeInputLabels.begin();
+ prevTreeInputLabelsIt != prevTreeInputLabels.end(); ++prevTreeInputLabelsIt) {
+ if (*prevTreeInputLabelsIt != outputDefaultNonTerminal) {
+ boost::unordered_map<const Factor*,size_t>::const_iterator foundPrevTreeInputLabel
+ = m_sourceLabelIndexesByFactor.find((*prevTreeInputLabelsIt)[0]);
+ if (foundPrevTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
+ size_t prevTreeInputLabelIndex = foundPrevTreeInputLabel->second;
+ treeInputLabelsRHS[nonTerminalNumber].insert(prevTreeInputLabelIndex);
+ }
+ }
+ }
+
+ ++nonTerminalNumber;
+ }
+ }
+ }
+
+ // retrieve information that is required for input tree label matching (LHS)
+ const WordsRange& wordsRange = hypo.GetCurrSourceRange();
+ size_t startPos = wordsRange.GetStartPos();
+ size_t endPos = wordsRange.GetEndPos();
+ const NonTerminalSet& treeInputLabels = treeInput.GetLabelSet(startPos,endPos);
+
+ for (NonTerminalSet::const_iterator treeInputLabelsIt = treeInputLabels.begin();
+ treeInputLabelsIt != treeInputLabels.end(); ++treeInputLabelsIt) {
+ if (*treeInputLabelsIt != outputDefaultNonTerminal) {
+ boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel
+ = m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]);
+ if (foundTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
+ size_t treeInputLabelIndex = foundTreeInputLabel->second;
+ treeInputLabelsLHS.insert(treeInputLabelIndex);
+ }
+ }
+ }
+
+
+ // inspect source-labelled rule items
+
+ std::vector< boost::unordered_set<size_t> > sparseScoredTreeInputLabelsRHS(nNTs-1);
+ boost::unordered_set<size_t> sparseScoredTreeInputLabelsLHS;
+
+ std::vector<bool> sourceLabelSeenAsLHS(m_sourceLabels.size(),false);
+ std::vector<bool> treeInputMatchRHSCountByNonTerminal(nNTs-1,false);
+
+ const std::list<SourceLabelsPhrasePropertyItem> &sourceLabelItems = sourceLabelsPhraseProperty->GetSourceLabelItems();
+
+ for (std::list<SourceLabelsPhrasePropertyItem>::const_iterator sourceLabelItem = sourceLabelItems.begin();
+ sourceLabelItem != sourceLabelItems.end() && !hasCompleteTreeInputMatch; ++sourceLabelItem) {
+
+ const std::list<size_t> &sourceLabelsRHS = sourceLabelItem->GetSourceLabelsRHS();
+ // float sourceLabelsRHSCount = sourceLabelItem->GetSourceLabelsRHSCount();
+ const std::list< std::pair<size_t,float> > &sourceLabelsLHSList = sourceLabelItem->GetSourceLabelsLHSList();
+
+ assert(sourceLabelsRHS.size() == nNTs-1);
+
+ bool currentSourceLabelItemIsCompleteTreeInputMatch = true;
+
+ size_t nonTerminalNumber=0;
+ for (std::list<size_t>::const_iterator sourceLabelsRHSIt = sourceLabelsRHS.begin();
+ sourceLabelsRHSIt != sourceLabelsRHS.end(); ++sourceLabelsRHSIt, ++nonTerminalNumber) {
+
+ if (treeInputLabelsRHS[nonTerminalNumber].find(*sourceLabelsRHSIt) != treeInputLabelsRHS[nonTerminalNumber].end()) {
+
+ treeInputMatchRHSCountByNonTerminal[nonTerminalNumber] = true;
+
+ if ( m_featureVariant == 2 ||
+ (m_featureVariant == 3 && m_coreSourceLabels.find(*sourceLabelsRHSIt) != m_coreSourceLabels.end()) ) {
+ // score sparse features: RHS match
+ if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*sourceLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
+ // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
+ float score_RHS_1 = (float)1/treeInputLabelsRHS[nonTerminalNumber].size();
+ accumulator->PlusEquals(this,
+ std::string("RHS_1_" + m_sourceLabelsByIndex[*sourceLabelsRHSIt]),
+ score_RHS_1);
+ sparseScoredTreeInputLabelsRHS[nonTerminalNumber].insert(*sourceLabelsRHSIt);
+ }
+ }
+
+ } else {
+
+ currentSourceLabelItemIsCompleteTreeInputMatch = false;
+
+ }
+ }
+
+ // LHS source non-terminal labels seen with this RHS
+ bool currentSourceLabelItemHasLHSTreeInputMatch = false;
+ //float ruleLabelledCount = 0;
+ std::list< std::pair<size_t,float> >::const_iterator sourceLabelsLHSIt;
+
+ for (sourceLabelsLHSIt = sourceLabelsLHSList.begin(); sourceLabelsLHSIt != sourceLabelsLHSList.end(); ++sourceLabelsLHSIt) {
+
+ if ( sourceLabelsLHSIt->first == m_GlueTopLabel ) {
+ isGlueGrammarRule = true;
+ }
+
+ if (treeInputLabelsLHS.find(sourceLabelsLHSIt->first) != treeInputLabelsLHS.end()) {
+
+ currentSourceLabelItemHasLHSTreeInputMatch = true;
+
+ if ( m_featureVariant == 2 ||
+ (m_featureVariant == 3 && m_coreSourceLabels.find(sourceLabelsLHSIt->first) != m_coreSourceLabels.end()) ) {
+ // score sparse features: LHS match
+ if (sparseScoredTreeInputLabelsLHS.find(sourceLabelsLHSIt->first) == sparseScoredTreeInputLabelsLHS.end()) {
+ // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
+ float score_LHS_1 = (float)1/treeInputLabelsLHS.size();
+ accumulator->PlusEquals(this,
+ std::string("LHS_1_" + m_sourceLabelsByIndex[sourceLabelsLHSIt->first]),
+ score_LHS_1);
+ sparseScoredTreeInputLabelsLHS.insert(sourceLabelsLHSIt->first);
+ }
+ }
+ break;
+
+ }
+ }
+
+ if (currentSourceLabelItemHasLHSTreeInputMatch) {
+ // input tree matching (LHS)
+ treeInputMismatchLHSBinary = false;
+ } else {
+ currentSourceLabelItemIsCompleteTreeInputMatch = false;
+ }
+
+ if (currentSourceLabelItemIsCompleteTreeInputMatch) {
+ hasCompleteTreeInputMatch = true;
+
+ ruleLabelledProbability = sourceLabelsLHSIt->second / totalCount;
+ std::pair<float,float> probPair = GetLabelPairProbabilities( targetLHS, sourceLabelsLHSIt->first);
+ t2sLabelsProb = probPair.first;
+ s2tLabelsProb = probPair.second;
+ nonTerminalNumber=0;
+ for (std::list<size_t>::const_iterator sourceLabelsRHSIt = sourceLabelsRHS.begin();
+ sourceLabelsRHSIt != sourceLabelsRHS.end(); ++sourceLabelsRHSIt, ++nonTerminalNumber) {
+ probPair = GetLabelPairProbabilities( targetLabelsRHS[nonTerminalNumber], *sourceLabelsRHSIt );
+ t2sLabelsProb += probPair.first;
+ s2tLabelsProb += probPair.second;
+ }
+ t2sLabelsProb /= nNTs;
+ s2tLabelsProb /= nNTs;
+ assert(t2sLabelsProb != 0);
+ assert(s2tLabelsProb != 0);
+ }
+
+ }
+
+ // input tree matching (RHS)
+ if ( !hasCompleteTreeInputMatch ) {
+ treeInputMismatchRHSCount = nNTs-1;
+ for (std::vector<bool>::const_iterator treeInputMatchRHSCountByNonTerminalIt = treeInputMatchRHSCountByNonTerminal.begin();
+ treeInputMatchRHSCountByNonTerminalIt != treeInputMatchRHSCountByNonTerminal.end(); ++treeInputMatchRHSCountByNonTerminalIt) {
+ if (*treeInputMatchRHSCountByNonTerminalIt) {
+ --treeInputMismatchRHSCount;
+ }
+ }
+ }
+
+ // score sparse features: mismatches
+ if ( m_featureVariant == 2 || m_featureVariant == 3 ) {
+
+ // RHS
+
+ for (size_t nonTerminalNumber = 0; nonTerminalNumber < nNTs-1; ++nonTerminalNumber) {
+ // nNTs-1 because nNTs also counts the left-hand side non-terminal
+
+ float score_RHS_0 = (float)1/treeInputLabelsRHS[nonTerminalNumber].size();
+ for (boost::unordered_set<size_t>::const_iterator treeInputLabelsRHSIt = treeInputLabelsRHS[nonTerminalNumber].begin();
+ treeInputLabelsRHSIt != treeInputLabelsRHS[nonTerminalNumber].end(); ++treeInputLabelsRHSIt) {
+
+ if ( m_featureVariant == 2 ||
+ (m_featureVariant == 3 && m_coreSourceLabels.find(*treeInputLabelsRHSIt) != m_coreSourceLabels.end()) ) {
+
+ if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*treeInputLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
+ // score sparse features: RHS mismatch
+ accumulator->PlusEquals(this,
+ std::string("RHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsRHSIt]),
+ score_RHS_0);
+ }
+ }
+ }
+ }
+
+ // LHS
+
+ float score_LHS_0 = (float)1/treeInputLabelsLHS.size();
+ for (boost::unordered_set<size_t>::const_iterator treeInputLabelsLHSIt = treeInputLabelsLHS.begin();
+ treeInputLabelsLHSIt != treeInputLabelsLHS.end(); ++treeInputLabelsLHSIt) {
+
+ if ( m_featureVariant == 2 ||
+ (m_featureVariant == 3 && m_coreSourceLabels.find(*treeInputLabelsLHSIt) != m_coreSourceLabels.end()) ) {
+
+ if (sparseScoredTreeInputLabelsLHS.find(*treeInputLabelsLHSIt) == sparseScoredTreeInputLabelsLHS.end()) {
+ // score sparse features: RHS mismatch
+ accumulator->PlusEquals(this,
+ std::string("LHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsLHSIt]),
+ score_LHS_0);
+ }
+ }
+ }
+
+ }
+
+ } else {
+
+ // abort with error message if the phrase does not translate an unknown word
+ UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
+ << ": Missing SourceLabels property. "
+ << "Please check phrase table and glue rules.");
+
+ // unknown word
+ isUnkRule = true;
+
+ }
+
+ // add scores
+
+ // input tree matching
+ switch (m_featureVariant) {
+
+ case 0:
+ newScores[0] = hasCompleteTreeInputMatch;
+ break;
+
+ case 1:
+ newScores[0] = ( (hasCompleteTreeInputMatch || isGlueGrammarRule || isUnkRule) ? 0 : std::numeric_limits<float>::min() );
+ break;
+
+ default:
+ newScores[0] = hasCompleteTreeInputMatch;
+ }
+ newScores[1] = treeInputMismatchLHSBinary;
+ newScores[2] = treeInputMismatchRHSCount;
+// newScores[3] = hasCompleteTreeInputMatch ? std::log(t2sLabelsProb) : 0;
+// newScores[4] = hasCompleteTreeInputMatch ? std::log(s2tLabelsProb) : 0;
+// newScores[3] = hasCompleteTreeInputMatch ? std::log(ruleLabelledProbability) : 0;
+
+ accumulator->PlusEquals(this, newScores);
+}
+
+
+std::pair<float,float> SoftSourceSyntacticConstraintsFeature::GetLabelPairProbabilities(
+ const Factor* target,
+ const size_t source) const
+{
+ boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::const_iterator found =
+ m_labelPairProbabilities.find(target);
+ if ( found == m_labelPairProbabilities.end() ) {
+ return std::pair<float,float>(0,0);
+ }
+ return found->second->at(source);
+}
+
+
+}
+
diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.h b/moses/FF/SoftSourceSyntacticConstraintsFeature.h
new file mode 100644
index 000000000..725a836eb
--- /dev/null
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.h
@@ -0,0 +1,87 @@
+#pragma once
+
+#include <string>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+#include "StatelessFeatureFunction.h"
+#include "FFState.h"
+#include "moses/Factor.h"
+
+namespace Moses
+{
+
+
+class SoftSourceSyntacticConstraintsFeature : public StatelessFeatureFunction
+{
+public:
+ SoftSourceSyntacticConstraintsFeature(const std::string &line);
+
+ ~SoftSourceSyntacticConstraintsFeature() {
+ for (boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator iter=m_labelPairProbabilities.begin();
+ iter!=m_labelPairProbabilities.end(); ++iter) {
+ delete iter->second;
+ }
+ }
+
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
+ {};
+
+ void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const
+ {};
+
+ void EvaluateWhenApplied(
+ const Hypothesis& cur_hypo,
+ ScoreComponentCollection* accumulator) const
+ {};
+
+ void EvaluateWhenApplied(
+ const ChartHypothesis& cur_hypo,
+ ScoreComponentCollection* accumulator) const;
+
+private:
+ std::string m_sourceLabelSetFile;
+ std::string m_coreSourceLabelSetFile;
+ std::string m_targetSourceLHSJointCountFile;
+ std::string m_unknownLeftHandSideFile;
+ size_t m_featureVariant;
+
+ boost::unordered_map<std::string,size_t> m_sourceLabels;
+ std::vector<std::string> m_sourceLabelsByIndex;
+ boost::unordered_set<size_t> m_coreSourceLabels;
+ boost::unordered_map<const Factor*,size_t> m_sourceLabelIndexesByFactor;
+ size_t m_GlueTopLabel;
+// mutable size_t m_XRHSLabel;
+// mutable size_t m_XLHSLabel;
+
+ boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* > m_labelPairProbabilities;
+ boost::unordered_map<size_t,float> m_unknownLHSProbabilities;
+ float m_smoothingWeight;
+ float m_unseenLHSSmoothingFactorForUnknowns;
+
+ void Load();
+ void LoadSourceLabelSet();
+ void LoadCoreSourceLabelSet();
+ void LoadTargetSourceLeftHandSideJointCountFile();
+
+ std::pair<float,float> GetLabelPairProbabilities(const Factor* target,
+ const size_t source) const;
+
+};
+
+
+}
+
diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
index 0dbb3a7be..38238b10c 100644
--- a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
+++ b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
@@ -32,7 +32,7 @@ void SourceGHKMTreeInputMatchFeature::SetParameter(const std::string& key, const
}
// assumes that source-side syntax labels are stored in the target non-terminal field of the rules
-void SourceGHKMTreeInputMatchFeature::Evaluate(const InputType &input
+void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h
index a1ddae325..743871b1c 100644
--- a/moses/FF/SourceGHKMTreeInputMatchFeature.h
+++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h
@@ -17,22 +17,22 @@ public:
void SetParameter(const std::string& key, const std::string& value);
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {};
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const {};
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const {};
};
diff --git a/moses/FF/SourceWordDeletionFeature.cpp b/moses/FF/SourceWordDeletionFeature.cpp
index 101e40579..e5167b93b 100644
--- a/moses/FF/SourceWordDeletionFeature.cpp
+++ b/moses/FF/SourceWordDeletionFeature.cpp
@@ -63,7 +63,7 @@ bool SourceWordDeletionFeature::IsUseable(const FactorMask &mask) const
return ret;
}
-void SourceWordDeletionFeature::Evaluate(const Phrase &source
+void SourceWordDeletionFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h
index 9b04476af..8211ef0ca 100644
--- a/moses/FF/SourceWordDeletionFeature.h
+++ b/moses/FF/SourceWordDeletionFeature.h
@@ -28,21 +28,21 @@ public:
bool IsUseable(const FactorMask &mask) const;
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/SpanLength.cpp b/moses/FF/SpanLength.cpp
index 6192334be..7a7c87be8 100644
--- a/moses/FF/SpanLength.cpp
+++ b/moses/FF/SpanLength.cpp
@@ -21,7 +21,7 @@ SpanLength::SpanLength(const std::string &line)
ReadParameters();
}
-void SpanLength::Evaluate(const Phrase &source
+void SpanLength::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
@@ -29,7 +29,7 @@ void SpanLength::Evaluate(const Phrase &source
targetPhrase.SetRuleSource(source);
}
-void SpanLength::Evaluate(const InputType &input
+void SpanLength::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
diff --git a/moses/FF/SpanLength.h b/moses/FF/SpanLength.h
index ba2196f87..dc5564fcd 100644
--- a/moses/FF/SpanLength.h
+++ b/moses/FF/SpanLength.h
@@ -14,12 +14,12 @@ public:
virtual bool IsUseable(const FactorMask &mask) const
{ return true; }
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- virtual void Evaluate(const InputType &input
+ virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -27,11 +27,11 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
- virtual void Evaluate(const Hypothesis& hypo,
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- virtual void EvaluateChart(const ChartHypothesis &hypo,
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/SparseHieroReorderingFeature.cpp b/moses/FF/SparseHieroReorderingFeature.cpp
new file mode 100644
index 000000000..0c6ac4767
--- /dev/null
+++ b/moses/FF/SparseHieroReorderingFeature.cpp
@@ -0,0 +1,222 @@
+#include <iostream>
+
+#include "moses/ChartHypothesis.h"
+#include "moses/ChartManager.h"
+#include "moses/FactorCollection.h"
+#include "moses/Sentence.h"
+
+#include "util/exception.hh"
+
+#include "SparseHieroReorderingFeature.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+SparseHieroReorderingFeature::SparseHieroReorderingFeature(const std::string &line)
+ :StatelessFeatureFunction(0, line),
+ m_type(SourceCombined),
+ m_sourceFactor(0),
+ m_targetFactor(0),
+ m_sourceVocabFile(""),
+ m_targetVocabFile("")
+{
+
+ /*
+ Configuration of features.
+ factor - Which factor should it apply to
+ type - what type of sparse reordering feature. e.g. block (modelled on Matthias
+ Huck's EAMT 2012 features)
+ word - which words to include, e.g. src_bdry, src_all, tgt_bdry , ...
+ vocab - vocab file to limit it to
+ orientation - e.g. lr, etc.
+ */
+ cerr << "Constructing a Sparse Reordering feature" << endl;
+ ReadParameters();
+ m_otherFactor = FactorCollection::Instance().AddFactor("##OTHER##");
+ LoadVocabulary(m_sourceVocabFile, m_sourceVocab);
+ LoadVocabulary(m_targetVocabFile, m_targetVocab);
+}
+
+void SparseHieroReorderingFeature::SetParameter(const std::string& key, const std::string& value) {
+ if (key == "input-factor") {
+ m_sourceFactor = Scan<FactorType>(value);
+ } else if (key == "output-factor") {
+ m_targetFactor = Scan<FactorType>(value);
+ } else if (key == "input-vocab-file") {
+ m_sourceVocabFile = value;
+ } else if (key == "output-vocab-file") {
+ m_targetVocabFile = value;
+ } else if (key == "type") {
+ if (value == "SourceCombined") {
+ m_type = SourceCombined;
+ } else if (value == "SourceLeft") {
+ m_type = SourceLeft;
+ } else if (value == "SourceRight") {
+ m_type = SourceRight;
+ } else {
+ UTIL_THROW(util::Exception, "Unknown sparse reordering type " << value);
+ }
+ } else {
+ FeatureFunction::SetParameter(key, value);
+ }
+}
+
+void SparseHieroReorderingFeature::LoadVocabulary(const std::string& filename, Vocab& vocab)
+{
+ if (filename.empty()) return;
+ ifstream in(filename.c_str());
+ UTIL_THROW_IF(!in, util::Exception, "Unable to open vocab file: " << filename);
+ string line;
+ while(getline(in,line)) {
+ vocab.insert(FactorCollection::Instance().AddFactor(line));
+ }
+ in.close();
+}
+
+const Factor* SparseHieroReorderingFeature::GetFactor(const Word& word, const Vocab& vocab, FactorType factorType) const {
+ const Factor* factor = word.GetFactor(factorType);
+ if (vocab.size() && vocab.find(factor) == vocab.end()) return m_otherFactor;
+ return factor;
+}
+
+void SparseHieroReorderingFeature::EvaluateWhenApplied(
+ const ChartHypothesis& cur_hypo ,
+ ScoreComponentCollection* accumulator) const
+{
+ // get index map for underlying hypotheses
+ //const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+ // cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
+
+ //The Huck features. For a rule with source side:
+ // abXcdXef
+ //We first have to split into blocks:
+ // ab X cd X ef
+ //Then we extract features based in the boundary words of the neighbouring blocks
+ //For the block pair, we use the right word of the left block, and the left
+ //word of the right block.
+
+ //Need to get blocks, and their alignment. Each block has a word range (on the
+ // on the source), a non-terminal flag, and a set of alignment points in the target phrase
+
+ //We need to be able to map source word position to target word position, as
+ //much as possible (don't need interior of non-terminals). The alignment info
+ //objects just give us the mappings between *rule* positions. So if we can
+ //map source word position to source rule position, and target rule position
+ //to target word position, then we can map right through.
+
+ size_t sourceStart = cur_hypo.GetCurrSourceRange().GetStartPos();
+ size_t sourceSize = cur_hypo.GetCurrSourceRange().GetNumWordsCovered();
+
+ vector<WordsRange> sourceNTSpans;
+ for (size_t prevHypoId = 0; prevHypoId < cur_hypo.GetPrevHypos().size(); ++prevHypoId) {
+ sourceNTSpans.push_back(cur_hypo.GetPrevHypo(prevHypoId)->GetCurrSourceRange());
+ }
+ //put in source order. Is this necessary?
+ sort(sourceNTSpans.begin(), sourceNTSpans.end());
+ //cerr << "Source NTs: ";
+ //for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " ";
+ //cerr << endl;
+
+ typedef pair<WordsRange,bool> Block;//flag indicates NT
+ vector<Block> sourceBlocks;
+ sourceBlocks.push_back(Block(cur_hypo.GetCurrSourceRange(),false));
+ for (vector<WordsRange>::const_iterator i = sourceNTSpans.begin();
+ i != sourceNTSpans.end(); ++i) {
+ const WordsRange& prevHypoRange = *i;
+ Block lastBlock = sourceBlocks.back();
+ sourceBlocks.pop_back();
+ //split this range into before NT, NT and after NT
+ if (prevHypoRange.GetStartPos() > lastBlock.first.GetStartPos()) {
+ sourceBlocks.push_back(Block(WordsRange(lastBlock.first.GetStartPos(),prevHypoRange.GetStartPos()-1),false));
+ }
+ sourceBlocks.push_back(Block(prevHypoRange,true));
+ if (prevHypoRange.GetEndPos() < lastBlock.first.GetEndPos()) {
+ sourceBlocks.push_back(Block(WordsRange(prevHypoRange.GetEndPos()+1,lastBlock.first.GetEndPos()), false));
+ }
+ }
+ /*
+ cerr << "Source Blocks: ";
+ for (size_t i = 0; i < sourceBlocks.size(); ++i) cerr << sourceBlocks[i].first << " "
+ << (sourceBlocks[i].second ? "NT" : "T") << " ";
+ cerr << endl;
+ */
+
+ //Mapping from source word to target rule position
+ vector<size_t> sourceWordToTargetRulePos(sourceSize);
+ map<size_t,size_t> alignMap;
+ alignMap.insert(
+ cur_hypo.GetCurrTargetPhrase().GetAlignTerm().begin(),
+ cur_hypo.GetCurrTargetPhrase().GetAlignTerm().end());
+ alignMap.insert(
+ cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().begin(),
+ cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().end());
+ //vector<size_t> alignMapTerm = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm()
+ size_t sourceRulePos = 0;
+ //cerr << "SW->RP ";
+ for (vector<Block>::const_iterator sourceBlockIt = sourceBlocks.begin();
+ sourceBlockIt != sourceBlocks.end(); ++sourceBlockIt) {
+ for (size_t sourceWordPos = sourceBlockIt->first.GetStartPos();
+ sourceWordPos <= sourceBlockIt->first.GetEndPos(); ++sourceWordPos) {
+ sourceWordToTargetRulePos[sourceWordPos - sourceStart] = alignMap[sourceRulePos];
+ // cerr << sourceWordPos - sourceStart << "-" << alignMap[sourceRulePos] << " ";
+ if (! sourceBlockIt->second) {
+ //T
+ ++sourceRulePos;
+ }
+ }
+ if ( sourceBlockIt->second) {
+ //NT
+ ++sourceRulePos;
+ }
+ }
+ //cerr << endl;
+
+ //Iterate through block pairs
+ const Sentence& sentence =
+ dynamic_cast<const Sentence&>(cur_hypo.GetManager().GetSource());
+ //const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
+ for (size_t i = 0; i < sourceBlocks.size()-1; ++i) {
+ Block& leftSourceBlock = sourceBlocks[i];
+ Block& rightSourceBlock = sourceBlocks[i+1];
+ size_t sourceLeftBoundaryPos = leftSourceBlock.first.GetEndPos();
+ size_t sourceRightBoundaryPos = rightSourceBlock.first.GetStartPos();
+ const Word& sourceLeftBoundaryWord = sentence.GetWord(sourceLeftBoundaryPos);
+ const Word& sourceRightBoundaryWord = sentence.GetWord(sourceRightBoundaryPos);
+ sourceLeftBoundaryPos -= sourceStart;
+ sourceRightBoundaryPos -= sourceStart;
+
+ // Need to figure out where these map to on the target.
+ size_t targetLeftRulePos =
+ sourceWordToTargetRulePos[sourceLeftBoundaryPos];
+ size_t targetRightRulePos =
+ sourceWordToTargetRulePos[sourceRightBoundaryPos];
+
+ bool isMonotone = true;
+ if ((sourceLeftBoundaryPos < sourceRightBoundaryPos &&
+ targetLeftRulePos > targetRightRulePos) ||
+ ((sourceLeftBoundaryPos > sourceRightBoundaryPos &&
+ targetLeftRulePos < targetRightRulePos)))
+ {
+ isMonotone = false;
+ }
+ stringstream buf;
+ buf << "h_"; //sparse reordering, Huck
+ if (m_type == SourceLeft || m_type == SourceCombined) {
+ buf << GetFactor(sourceLeftBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();
+ buf << "_";
+ }
+ if (m_type == SourceRight || m_type == SourceCombined) {
+ buf << GetFactor(sourceRightBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();
+ buf << "_";
+ }
+ buf << (isMonotone ? "M" : "S");
+ accumulator->PlusEquals(this,buf.str(), 1);
+ }
+// cerr << endl;
+}
+
+
+}
+
diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h
new file mode 100644
index 000000000..d631fdec1
--- /dev/null
+++ b/moses/FF/SparseHieroReorderingFeature.h
@@ -0,0 +1,80 @@
+#pragma once
+
+#include <string>
+
+#include <boost/unordered_set.hpp>
+
+#include <util/string_piece.hh>
+
+#include "moses/Factor.h"
+#include "moses/Sentence.h"
+
+#include "StatelessFeatureFunction.h"
+#include "FFState.h"
+
+namespace Moses
+{
+
+class SparseHieroReorderingFeature : public StatelessFeatureFunction
+{
+public:
+ enum Type {
+ SourceCombined,
+ SourceLeft,
+ SourceRight
+ };
+
+ SparseHieroReorderingFeature(const std::string &line);
+
+ bool IsUseable(const FactorMask &mask) const
+ { return true; }
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
+ {}
+ virtual void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const
+ {}
+
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const
+ {}
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const;
+
+
+private:
+
+ typedef boost::unordered_set<const Factor*> Vocab;
+
+ void AddNonTerminalPairFeatures(
+ const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2,
+ bool isMonotone, ScoreComponentCollection* accumulator) const;
+
+ void LoadVocabulary(const std::string& filename, Vocab& vocab);
+ const Factor* GetFactor(const Word& word, const Vocab& vocab, FactorType factor) const;
+
+ Type m_type;
+ FactorType m_sourceFactor;
+ FactorType m_targetFactor;
+ std::string m_sourceVocabFile;
+ std::string m_targetVocabFile;
+
+ const Factor* m_otherFactor;
+
+ Vocab m_sourceVocab;
+ Vocab m_targetVocab;
+
+};
+
+
+}
+
diff --git a/moses/FF/SparseHieroReorderingFeatureTest.cpp b/moses/FF/SparseHieroReorderingFeatureTest.cpp
new file mode 100644
index 000000000..f05355df9
--- /dev/null
+++ b/moses/FF/SparseHieroReorderingFeatureTest.cpp
@@ -0,0 +1,36 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2013- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+#include <iostream>
+
+#include <boost/test/unit_test.hpp>
+
+#include "SparseHieroReorderingFeature.h"
+
+using namespace Moses;
+using namespace std;
+
+BOOST_AUTO_TEST_SUITE(shrf)
+
+BOOST_AUTO_TEST_CASE(lexical_rule)
+{
+ SparseHieroReorderingFeature feature("name=shrf");
+
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h
index 75b46d827..86bed04ee 100644
--- a/moses/FF/StatefulFeatureFunction.h
+++ b/moses/FF/StatefulFeatureFunction.h
@@ -29,12 +29,12 @@ public:
* hypothesis, you should store it in an FFState object which will be passed
* in as prev_state. If you don't do this, you will get in trouble.
*/
- virtual FFState* Evaluate(
+ virtual FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const = 0;
- virtual FFState* EvaluateChart(
+ virtual FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const = 0;
diff --git a/moses/FF/StatelessFeatureFunction.h b/moses/FF/StatelessFeatureFunction.h
index fde740115..94029f882 100644
--- a/moses/FF/StatelessFeatureFunction.h
+++ b/moses/FF/StatelessFeatureFunction.h
@@ -23,13 +23,13 @@ public:
/**
* This should be implemented for features that apply to phrase-based models.
**/
- virtual void Evaluate(const Hypothesis& hypo,
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const = 0;
/**
* Same for chart-based features.
**/
- virtual void EvaluateChart(const ChartHypothesis &hypo,
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const = 0;
virtual bool IsStateless() const {
diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp
index 292eac004..5168b72d7 100644
--- a/moses/FF/SyntaxRHS.cpp
+++ b/moses/FF/SyntaxRHS.cpp
@@ -14,14 +14,14 @@ SyntaxRHS::SyntaxRHS(const std::string &line)
ReadParameters();
}
-void SyntaxRHS::Evaluate(const Phrase &source
+void SyntaxRHS::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
}
-void SyntaxRHS::Evaluate(const InputType &input
+void SyntaxRHS::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -42,11 +42,11 @@ void SyntaxRHS::Evaluate(const InputType &input
}
-void SyntaxRHS::Evaluate(const Hypothesis& hypo,
+void SyntaxRHS::EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
-void SyntaxRHS::EvaluateChart(const ChartHypothesis &hypo,
+void SyntaxRHS::EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/SyntaxRHS.h b/moses/FF/SyntaxRHS.h
index 1f9adcb93..4b9214995 100644
--- a/moses/FF/SyntaxRHS.h
+++ b/moses/FF/SyntaxRHS.h
@@ -15,19 +15,19 @@ public:
return true;
}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const;
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const;
};
diff --git a/moses/FF/TargetBigramFeature.cpp b/moses/FF/TargetBigramFeature.cpp
index 104f986e7..f1da62b7d 100644
--- a/moses/FF/TargetBigramFeature.cpp
+++ b/moses/FF/TargetBigramFeature.cpp
@@ -64,7 +64,7 @@ const FFState* TargetBigramFeature::EmptyHypothesisState(const InputType &/*inpu
return new TargetBigramState(m_bos);
}
-FFState* TargetBigramFeature::Evaluate(const Hypothesis& cur_hypo,
+FFState* TargetBigramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h
index fe2500ad2..c63f3caa4 100644
--- a/moses/FF/TargetBigramFeature.h
+++ b/moses/FF/TargetBigramFeature.h
@@ -39,22 +39,22 @@ public:
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
- virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
+ virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
+ virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */,
int /* featureID */,
ScoreComponentCollection* ) const {
throw std::logic_error("TargetBigramFeature not valid in chart decoder");
}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/TargetNgramFeature.cpp b/moses/FF/TargetNgramFeature.cpp
index b0abb07a1..a43410990 100644
--- a/moses/FF/TargetNgramFeature.cpp
+++ b/moses/FF/TargetNgramFeature.cpp
@@ -95,7 +95,7 @@ const FFState* TargetNgramFeature::EmptyHypothesisState(const InputType &/*input
return new TargetNgramState(bos);
}
-FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
+FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
@@ -207,7 +207,7 @@ void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream
}
}
-FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const
+FFState* TargetNgramFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const
{
vector<const Word*> contextFactor;
contextFactor.reserve(m_n);
diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h
index 8e91a08b2..e87252670 100644
--- a/moses/FF/TargetNgramFeature.h
+++ b/moses/FF/TargetNgramFeature.h
@@ -186,20 +186,20 @@ public:
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
- virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
+ virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
+ virtual FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId,
ScoreComponentCollection* accumulator) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/TargetWordInsertionFeature.cpp b/moses/FF/TargetWordInsertionFeature.cpp
index 7bb1ae6e9..c8db6bfe3 100644
--- a/moses/FF/TargetWordInsertionFeature.cpp
+++ b/moses/FF/TargetWordInsertionFeature.cpp
@@ -53,7 +53,7 @@ void TargetWordInsertionFeature::Load()
m_unrestricted = false;
}
-void TargetWordInsertionFeature::Evaluate(const Phrase &source
+void TargetWordInsertionFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h
index eedde61b2..06fa25400 100644
--- a/moses/FF/TargetWordInsertionFeature.h
+++ b/moses/FF/TargetWordInsertionFeature.h
@@ -28,21 +28,21 @@ public:
void Load();
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
diff --git a/moses/FF/TreeStructureFeature.cpp b/moses/FF/TreeStructureFeature.cpp
index a5446891a..c0505edd6 100644
--- a/moses/FF/TreeStructureFeature.cpp
+++ b/moses/FF/TreeStructureFeature.cpp
@@ -266,7 +266,7 @@ void TreeStructureFeature::AddNTLabels(TreePointer root) const {
}
}
-FFState* TreeStructureFeature::EvaluateChart(const ChartHypothesis& cur_hypo
+FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
, int featureID /* used to index the state in the previous hypotheses */
, ScoreComponentCollection* accumulator) const
{
diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h
index 0fbf0f9ea..a81d604bb 100644
--- a/moses/FF/TreeStructureFeature.h
+++ b/moses/FF/TreeStructureFeature.h
@@ -152,21 +152,21 @@ public:
return true;
}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {};
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {};
- FFState* Evaluate(
+ FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");};
- FFState* EvaluateChart(
+ FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/UnknownWordPenaltyProducer.h b/moses/FF/UnknownWordPenaltyProducer.h
index 3b48f4380..8850641e5 100644
--- a/moses/FF/UnknownWordPenaltyProducer.h
+++ b/moses/FF/UnknownWordPenaltyProducer.h
@@ -31,20 +31,20 @@ public:
}
std::vector<float> DefaultWeights() const;
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/WordPenaltyProducer.cpp b/moses/FF/WordPenaltyProducer.cpp
index 6dea01b72..1e191d040 100644
--- a/moses/FF/WordPenaltyProducer.cpp
+++ b/moses/FF/WordPenaltyProducer.cpp
@@ -17,7 +17,7 @@ WordPenaltyProducer::WordPenaltyProducer(const std::string &line)
s_instance = this;
}
-void WordPenaltyProducer::Evaluate(const Phrase &source
+void WordPenaltyProducer::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/WordPenaltyProducer.h b/moses/FF/WordPenaltyProducer.h
index ffd921677..e62877307 100644
--- a/moses/FF/WordPenaltyProducer.h
+++ b/moses/FF/WordPenaltyProducer.h
@@ -27,17 +27,17 @@ public:
return true;
}
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
diff --git a/moses/FF/WordTranslationFeature.cpp b/moses/FF/WordTranslationFeature.cpp
index 554107c32..7a98ad4c8 100644
--- a/moses/FF/WordTranslationFeature.cpp
+++ b/moses/FF/WordTranslationFeature.cpp
@@ -137,7 +137,7 @@ void WordTranslationFeature::Load()
}
}
-void WordTranslationFeature::Evaluate
+void WordTranslationFeature::EvaluateWhenApplied
(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{
@@ -349,7 +349,7 @@ void WordTranslationFeature::Evaluate
}
}
-void WordTranslationFeature::EvaluateChart(
+void WordTranslationFeature::EvaluateWhenApplied(
const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{
diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h
index 072ba1d6a..c213d8eb3 100644
--- a/moses/FF/WordTranslationFeature.h
+++ b/moses/FF/WordTranslationFeature.h
@@ -48,19 +48,19 @@ public:
return new DummyState();
}
- void Evaluate(const Hypothesis& hypo,
+ void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const;
- void EvaluateChart(const ChartHypothesis &hypo,
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FactorCollection.cpp b/moses/FactorCollection.cpp
index 5013da417..d701f8409 100644
--- a/moses/FactorCollection.cpp
+++ b/moses/FactorCollection.cpp
@@ -67,6 +67,23 @@ const Factor *FactorCollection::AddFactor(const StringPiece &factorString, bool
return &ret.first->in;
}
+const Factor *FactorCollection::GetFactor(const StringPiece &factorString, bool isNonTerminal)
+{
+ FactorFriend to_find;
+ to_find.in.m_string = factorString;
+ to_find.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId;
+ Set & set = (isNonTerminal) ? m_set : m_setNonTerminal;
+ { // read=lock scope
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+#endif // WITH_THREADS
+ Set::const_iterator i = set.find(to_find);
+ if (i != set.end()) return &i->in;
+ }
+ return NULL;
+}
+
+
FactorCollection::~FactorCollection() {}
TO_STRING_BODY(FactorCollection);
diff --git a/moses/FactorCollection.h b/moses/FactorCollection.h
index 400b6aa64..b5f49f3cf 100644
--- a/moses/FactorCollection.h
+++ b/moses/FactorCollection.h
@@ -114,6 +114,8 @@ public:
return m_factorIdNonTerminal;
}
+ const Factor *GetFactor(const StringPiece &factorString, bool isNonTerminal = false);
+
// TODO: remove calls to this function, replacing them with the simpler AddFactor(factorString)
const Factor *AddFactor(FactorDirection /*direction*/, FactorType /*factorType*/, const StringPiece &factorString, bool isNonTerminal = false) {
return AddFactor(factorString, isNonTerminal);
diff --git a/moses/FeatureVector.cpp b/moses/FeatureVector.cpp
index 536c1a720..ce4a043a6 100644
--- a/moses/FeatureVector.cpp
+++ b/moses/FeatureVector.cpp
@@ -214,10 +214,10 @@ void FVector::save(const string& filename) const
out.close();
}
-void FVector::write(ostream& out) const
+void FVector::write(ostream& out,const string& sep, const string& linesep) const
{
for (const_iterator i = cbegin(); i != cend(); ++i) {
- out << i->first << " " << i->second << endl;
+ out << i->first << sep << i->second << linesep;
}
}
diff --git a/moses/FeatureVector.h b/moses/FeatureVector.h
index 65f8bdc2e..fd5d3340c 100644
--- a/moses/FeatureVector.h
+++ b/moses/FeatureVector.h
@@ -167,7 +167,7 @@ public:
/** Load from file - each line should be 'root[_name] value' */
bool load(const std::string& filename);
void save(const std::string& filename) const;
- void write(std::ostream& out) const ;
+ void write(std::ostream& out, const std::string& sep=" ", const std::string& linesep="\n") const ;
/** Element access */
ProxyFVector operator[](const FName& name);
diff --git a/moses/HypergraphOutput.cpp b/moses/HypergraphOutput.cpp
new file mode 100644
index 000000000..da7e804dc
--- /dev/null
+++ b/moses/HypergraphOutput.cpp
@@ -0,0 +1,248 @@
+// $Id$
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/filesystem.hpp>
+#include <boost/iostreams/device/file.hpp>
+#include <boost/iostreams/filter/bzip2.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+
+#include <util/exception.hh>
+
+#include "ChartHypothesisCollection.h"
+#include "ChartManager.h"
+#include "HypergraphOutput.h"
+#include "Manager.h"
+
+using namespace std;
+
+namespace Moses {
+
+template<class M>
+HypergraphOutput<M>::HypergraphOutput(size_t precision) :
+ m_precision(precision) {
+ const StaticData& staticData = StaticData::Instance();
+ vector<string> hypergraphParameters = staticData.GetParam("output-search-graph-hypergraph");
+
+ if (hypergraphParameters.size() > 0 && hypergraphParameters[0] == "true") {
+ m_appendSuffix = true;
+ } else {
+ m_appendSuffix = false;
+ }
+
+ string compression;
+ if (hypergraphParameters.size() > 1) {
+ m_compression = hypergraphParameters[1];
+ } else {
+ m_compression = "txt";
+ }
+ UTIL_THROW_IF(m_compression != "txt" && m_compression != "gz" && m_compression != "bz2",
+ util::Exception, "Unknown compression type: " << m_compression);
+
+ if ( hypergraphParameters.size() > 2 ) {
+ m_hypergraphDir = hypergraphParameters[2];
+ } else {
+ string nbestFile = staticData.GetNBestFilePath();
+ if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) {
+ boost::filesystem::path nbestPath(nbestFile);
+
+ // In the Boost filesystem API version 2,
+ // which was the default prior to Boost 1.46,
+ // the filename() method returned a string.
+ //
+ // In the Boost filesystem API version 3,
+ // which is the default starting with Boost 1.46,
+ // the filename() method returns a path object.
+ //
+ // To get a string from the path object,
+ // the native() method must be called.
+ // hypergraphDir = nbestPath.parent_path().filename()
+ //#if BOOST_VERSION >= 104600
+ // .native()
+ //#endif
+ //;
+
+ // Hopefully the following compiles under all versions of Boost.
+ //
+ // If this line gives you compile errors,
+ // contact Lane Schwartz on the Moses mailing list
+ m_hypergraphDir = nbestPath.parent_path().string();
+
+ } else {
+ stringstream hypergraphDirName;
+ hypergraphDirName << boost::filesystem::current_path().string() << "/hypergraph";
+ m_hypergraphDir = hypergraphDirName.str();
+ }
+ }
+
+ if ( ! boost::filesystem::exists(m_hypergraphDir) ) {
+ boost::filesystem::create_directory(m_hypergraphDir);
+ }
+
+ UTIL_THROW_IF(!boost::filesystem::is_directory(m_hypergraphDir),
+ util::Exception, "Cannot output hypergraphs to " << m_hypergraphDir << " because that path exists, but is not a directory");
+
+
+ ofstream weightsOut;
+ stringstream weightsFilename;
+ weightsFilename << m_hypergraphDir << "/weights";
+
+ TRACE_ERR("The weights file is " << weightsFilename.str() << "\n");
+ weightsOut.open(weightsFilename.str().c_str());
+ weightsOut.setf(std::ios::fixed);
+ weightsOut.precision(6);
+ staticData.GetAllWeights().Save(weightsOut);
+ weightsOut.close();
+}
+
+template<class M>
+void HypergraphOutput<M>::Write(const M& manager) const {
+
+ stringstream fileName;
+ fileName << m_hypergraphDir << "/" << manager.GetLineNumber();
+ if ( m_appendSuffix ) {
+ fileName << "." << m_compression;
+ }
+ boost::iostreams::filtering_ostream file;
+
+ if ( m_compression == "gz" ) {
+ file.push( boost::iostreams::gzip_compressor() );
+ } else if ( m_compression == "bz2" ) {
+ file.push( boost::iostreams::bzip2_compressor() );
+ }
+
+ file.push( boost::iostreams::file_sink(fileName.str(), ios_base::out) );
+
+ if (file.is_complete() && file.good()) {
+ file.setf(std::ios::fixed);
+ file.precision(m_precision);
+ manager.OutputSearchGraphAsHypergraph(file);
+ file.flush();
+ } else {
+ TRACE_ERR("Cannot output hypergraph for line " << manager.GetLineNumber()
+ << " because the output file " << fileName.str()
+ << " is not open or not ready for writing"
+ << std::endl);
+ }
+ file.pop();
+}
+
+template class HypergraphOutput<Manager>;
+template class HypergraphOutput<ChartManager>;
+
+
+void ChartSearchGraphWriterMoses::WriteHypos
+ (const ChartHypothesisCollection& hypos, const map<unsigned, bool> &reachable) const {
+
+ ChartHypothesisCollection::const_iterator iter;
+ for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
+ ChartHypothesis &mainHypo = **iter;
+ if (StaticData::Instance().GetUnprunedSearchGraph() ||
+ reachable.find(mainHypo.GetId()) != reachable.end()) {
+ (*m_out) << m_lineNumber << " " << mainHypo << endl;
+ }
+
+ const ChartArcList *arcList = mainHypo.GetArcList();
+ if (arcList) {
+ ChartArcList::const_iterator iterArc;
+ for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) {
+ const ChartHypothesis &arc = **iterArc;
+ if (reachable.find(arc.GetId()) != reachable.end()) {
+ (*m_out) << m_lineNumber << " " << arc << endl;
+ }
+ }
+ }
+ }
+
+}
+void ChartSearchGraphWriterHypergraph::WriteHeader(size_t winners, size_t losers) const {
+
+ (*m_out) << "# target ||| features ||| source-covered" << endl;
+ (*m_out) << winners << " " << (winners+losers) << endl;
+
+}
+
+void ChartSearchGraphWriterHypergraph::WriteHypos(const ChartHypothesisCollection& hypos,
+ const map<unsigned, bool> &reachable) const {
+
+ ChartHypothesisCollection::const_iterator iter;
+ for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
+ const ChartHypothesis* mainHypo = *iter;
+ if (!StaticData::Instance().GetUnprunedSearchGraph() &&
+ reachable.find(mainHypo->GetId()) == reachable.end()) {
+ //Ignore non reachable nodes
+ continue;
+ }
+ (*m_out) << "# node " << m_nodeId << endl;
+ m_hypoIdToNodeId[mainHypo->GetId()] = m_nodeId;
+ ++m_nodeId;
+ vector<const ChartHypothesis*> edges;
+ edges.push_back(mainHypo);
+ const ChartArcList *arcList = (*iter)->GetArcList();
+ if (arcList) {
+ ChartArcList::const_iterator iterArc;
+ for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) {
+ const ChartHypothesis* arc = *iterArc;
+ if (reachable.find(arc->GetId()) != reachable.end()) {
+ edges.push_back(arc);
+ }
+ }
+ }
+ (*m_out) << edges.size() << endl;
+ for (vector<const ChartHypothesis*>::const_iterator ei = edges.begin(); ei != edges.end(); ++ei) {
+ const ChartHypothesis* hypo = *ei;
+ const TargetPhrase& target = hypo->GetCurrTargetPhrase();
+ size_t ntIndex = 0;
+ for (size_t i = 0; i < target.GetSize(); ++i) {
+ const Word& word = target.GetWord(i);
+ if (word.IsNonTerminal()) {
+ size_t hypoId = hypo->GetPrevHypos()[ntIndex++]->GetId();
+ (*m_out) << "[" << m_hypoIdToNodeId[hypoId] << "]";
+ } else {
+ (*m_out) << word.GetFactor(0)->GetString();
+ }
+ (*m_out) << " ";
+ }
+ (*m_out) << " ||| ";
+ ScoreComponentCollection scores = hypo->GetScoreBreakdown();
+ HypoList::const_iterator hi;
+ for (hi = hypo->GetPrevHypos().begin(); hi != hypo->GetPrevHypos().end(); ++hi) {
+ scores.MinusEquals((*hi)->GetScoreBreakdown());
+ }
+ scores.Save(*m_out, false);
+ (*m_out) << " ||| ";
+ (*m_out) << hypo->GetCurrSourceRange().GetNumWordsCovered();
+ (*m_out) << endl;
+
+ }
+ }
+}
+
+
+} //namespace Moses
+
diff --git a/moses/HypergraphOutput.h b/moses/HypergraphOutput.h
new file mode 100644
index 000000000..4ec8e2665
--- /dev/null
+++ b/moses/HypergraphOutput.h
@@ -0,0 +1,95 @@
+// $Id$
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_Hypergraph_Output_h
+#define moses_Hypergraph_Output_h
+
+#include <ostream>
+
+/**
+* Manage the output of hypergraphs.
+**/
+
+namespace Moses {
+
+class ChartHypothesisCollection;
+
+template<class M>
+class HypergraphOutput {
+
+public:
+ /** Initialise output directory and create weights file */
+ HypergraphOutput(size_t precision);
+
+ /** Write this hypergraph to file */
+ void Write(const M& manager) const;
+
+private:
+ size_t m_precision;
+ std::string m_hypergraphDir;
+ std::string m_compression;
+ bool m_appendSuffix;
+};
+
+
+/**
+ * ABC for different types of search graph output for chart Moses.
+**/
+class ChartSearchGraphWriter {
+public:
+ virtual void WriteHeader(size_t winners, size_t losers) const = 0;
+ virtual void WriteHypos(const ChartHypothesisCollection& hypos,
+ const std::map<unsigned, bool> &reachable) const = 0;
+
+};
+
+/** "Moses" format (osg style) */
+class ChartSearchGraphWriterMoses : public virtual ChartSearchGraphWriter {
+public:
+ ChartSearchGraphWriterMoses(std::ostream* out, size_t lineNumber) :
+ m_out(out), m_lineNumber(lineNumber) {}
+ virtual void WriteHeader(size_t, size_t) const {/* do nothing */}
+ virtual void WriteHypos(const ChartHypothesisCollection& hypos,
+ const std::map<unsigned, bool> &reachable) const;
+
+private:
+ std::ostream* m_out;
+ size_t m_lineNumber;
+};
+
+/** Modified version of Kenneth's lazy hypergraph format */
+class ChartSearchGraphWriterHypergraph : public virtual ChartSearchGraphWriter {
+public:
+ ChartSearchGraphWriterHypergraph(std::ostream* out) :
+ m_out(out), m_nodeId(0) {}
+ virtual void WriteHeader(size_t winners, size_t losers) const;
+ virtual void WriteHypos(const ChartHypothesisCollection& hypos,
+ const std::map<unsigned, bool> &reachable) const;
+
+private:
+ std::ostream* m_out;
+ mutable size_t m_nodeId;
+ mutable std::map<size_t,size_t> m_hypoIdToNodeId;
+};
+
+}
+#endif
diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp
index 400fd0e0f..d77d249e7 100644
--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@@ -85,14 +85,13 @@ Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &tran
, m_wordDeleted(false)
, m_totalScore(0.0f)
, m_futureScore(0.0f)
- , m_scoreBreakdown(prevHypo.GetScoreBreakdown())
, m_ffStates(prevHypo.m_ffStates.size())
, m_arcList(NULL)
, m_transOpt(transOpt)
, m_manager(prevHypo.GetManager())
, m_id(m_manager.GetNextHypoId())
{
- m_scoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
+ m_currScoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
// assert that we are not extending our hypothesis by retranslating something
// that this hypothesis has already translated!
@@ -206,30 +205,30 @@ int Hypothesis::RecombineCompare(const Hypothesis &compare) const
return 0;
}
-void Hypothesis::EvaluateWith(const StatefulFeatureFunction &sfff,
+void Hypothesis::EvaluateWhenApplied(const StatefulFeatureFunction &sfff,
int state_idx)
{
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored( sfff )) {
- m_ffStates[state_idx] = sfff.Evaluate(
+ m_ffStates[state_idx] = sfff.EvaluateWhenApplied(
*this,
m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL,
- &m_scoreBreakdown);
+ &m_currScoreBreakdown);
}
}
-void Hypothesis::EvaluateWith(const StatelessFeatureFunction& slff)
+void Hypothesis::EvaluateWhenApplied(const StatelessFeatureFunction& slff)
{
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored( slff )) {
- slff.Evaluate(*this, &m_scoreBreakdown);
+ slff.EvaluateWhenApplied(*this, &m_currScoreBreakdown);
}
}
/***
* calculate the logarithm of our total translation score (sum up components)
*/
-void Hypothesis::Evaluate(const SquareMatrix &futureScore)
+void Hypothesis::EvaluateWhenApplied(const SquareMatrix &futureScore)
{
IFVERBOSE(2) {
m_manager.GetSentenceStats().StartTimeOtherScore();
@@ -245,7 +244,7 @@ void Hypothesis::Evaluate(const SquareMatrix &futureScore)
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) {
const StatelessFeatureFunction &ff = *sfs[i];
- EvaluateWith(ff);
+ EvaluateWhenApplied(ff);
}
const vector<const StatefulFeatureFunction*>& ffs =
@@ -254,9 +253,9 @@ void Hypothesis::Evaluate(const SquareMatrix &futureScore)
const StatefulFeatureFunction &ff = *ffs[i];
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored(ff)) {
- m_ffStates[i] = ff.Evaluate(*this,
+ m_ffStates[i] = ff.EvaluateWhenApplied(*this,
m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
- &m_scoreBreakdown);
+ &m_currScoreBreakdown);
}
}
@@ -269,7 +268,8 @@ void Hypothesis::Evaluate(const SquareMatrix &futureScore)
m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
// TOTAL
- m_totalScore = m_scoreBreakdown.GetWeightedScore() + m_futureScore;
+ m_totalScore = m_currScoreBreakdown.GetWeightedScore() + m_futureScore;
+ if (m_prevHypo) m_totalScore += m_prevHypo->GetScore();
IFVERBOSE(2) {
m_manager.GetSentenceStats().StopTimeEstimateScore();
@@ -315,7 +315,7 @@ void Hypothesis::PrintHypothesis() const
// TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
// TRACE_ERR( "\tword penalty "); // <<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
TRACE_ERR( "\tscore "<<m_totalScore - m_futureScore<<" + future cost "<<m_futureScore<<" = "<<m_totalScore<<endl);
- TRACE_ERR( "\tunweighted feature scores: " << m_scoreBreakdown << endl);
+ TRACE_ERR( "\tunweighted feature scores: " << m_currScoreBreakdown << endl);
//PrintLMScores();
}
@@ -332,7 +332,7 @@ void Hypothesis::CleanupArcList()
*/
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
- bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphSLF() || staticData.GetOutputSearchGraphHypergraph() || staticData.UseLatticeMBR() ;
+ bool distinctNBest = staticData.GetDistinctNBest() || staticData.GetLatticeSamplesSize() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphSLF() || staticData.GetOutputSearchGraphHypergraph() || staticData.UseLatticeMBR() ;
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
// prune arc list only if there too many arcs
diff --git a/moses/Hypothesis.h b/moses/Hypothesis.h
index 2c49a8ea2..2b0c98d91 100644
--- a/moses/Hypothesis.h
+++ b/moses/Hypothesis.h
@@ -25,6 +25,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <iostream>
#include <memory>
+
+#include <boost/scoped_ptr.hpp>
+
#include <vector>
#include "Phrase.h"
#include "TypeDef.h"
@@ -77,7 +80,9 @@ protected:
bool m_wordDeleted;
float m_totalScore; /*! score so far */
float m_futureScore; /*! estimated future cost to translate rest of sentence */
- ScoreComponentCollection m_scoreBreakdown; /*! scores for this hypothesis */
+ /*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
+ mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
+ ScoreComponentCollection m_currScoreBreakdown; /*! scores for this hypothesis only */
std::vector<const FFState*> m_ffStates;
const Hypothesis *m_winningHypo;
ArcList *m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */
@@ -137,7 +142,7 @@ public:
return m_currTargetWordsRange.GetNumWordsCovered();
}
- void Evaluate(const SquareMatrix &futureScore);
+ void EvaluateWhenApplied(const SquareMatrix &futureScore);
int GetId()const {
return m_id;
@@ -228,7 +233,14 @@ public:
return m_arcList;
}
const ScoreComponentCollection& GetScoreBreakdown() const {
- return m_scoreBreakdown;
+ if (!m_scoreBreakdown.get()) {
+ m_scoreBreakdown.reset(new ScoreComponentCollection());
+ m_scoreBreakdown->PlusEquals(m_currScoreBreakdown);
+ if (m_prevHypo) {
+ m_scoreBreakdown->PlusEquals(m_prevHypo->GetScoreBreakdown());
+ }
+ }
+ return *(m_scoreBreakdown.get());
}
float GetTotalScore() const {
return m_totalScore;
@@ -244,8 +256,8 @@ public:
}
// Added by oliver.wilson@ed.ac.uk for async lm stuff.
- void EvaluateWith(const StatefulFeatureFunction &sfff, int state_idx);
- void EvaluateWith(const StatelessFeatureFunction &slff);
+ void EvaluateWhenApplied(const StatefulFeatureFunction &sfff, int state_idx);
+ void EvaluateWhenApplied(const StatelessFeatureFunction &slff);
//! target span that trans opt would populate if applied to this hypo. Used for alignment check
size_t GetNextStartPos(const TranslationOption &transOpt) const;
diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp
index 4e593df7e..06c46b786 100644
--- a/moses/Incremental.cpp
+++ b/moses/Incremental.cpp
@@ -102,7 +102,7 @@ public:
return vertex.BestChild();
}
- void Evaluate(const InputType &input, const InputPath &inputPath) {
+ void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) {
// TODO for input lattice
}
private:
@@ -327,7 +327,7 @@ void PhraseAndFeatures(const search::Applied final, Phrase &phrase, ScoreCompone
const LanguageModel &model = LanguageModel::GetFirstLM();
model.CalcScore(phrase, full, ignored_ngram, ignored_oov);
- // CalcScore transforms, but EvaluateChart doesn't.
+ // CalcScore transforms, but EvaluateWhenApplied doesn't.
features.Assign(&model, full);
}
diff --git a/moses/Jamfile b/moses/Jamfile
index 190c47eff..a64d8ed7f 100644
--- a/moses/Jamfile
+++ b/moses/Jamfile
@@ -10,7 +10,14 @@ if $(with-dlib) {
dlib = ;
}
-alias headers : ../util//kenutil : : : $(max-factors) $(dlib) ;
+with-lbllm = [ option.get "with-lbllm" ] ;
+if $(with-lbllm) {
+ lbllm2 = <cxxflags>-std=c++0x <define>LM_LBL <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
+} else {
+ lbllm2 = ;
+}
+
+alias headers : ../util//kenutil : : : $(max-factors) $(dlib) $(lbllm2) ;
alias ThreadPool : ThreadPool.cpp ;
alias Util : Util.cpp Timer.cpp ;
@@ -69,7 +76,7 @@ lib moses :
: #exceptions
ThreadPool.cpp
SyntacticLanguageModel.cpp
- *Test.cpp Mock*.cpp
+ *Test.cpp Mock*.cpp FF/*Test.cpp
FF/Factory.cpp
]
headers FF_Factory.o LM//LM TranslationModel/CompactPT//CompactPT TranslationModel/ProbingPT//ProbingPT synlm ThreadPool
@@ -85,5 +92,5 @@ alias headers-to-install : [ glob-tree *.h ] ;
import testing ;
-unit-test moses_test : [ glob *Test.cpp Mock*.cpp ] moses headers ..//z ../OnDiskPt//OnDiskPt ..//boost_unit_test_framework ;
+unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] moses headers ..//z ../OnDiskPt//OnDiskPt ..//boost_unit_test_framework ;
diff --git a/moses/LM/Base.cpp b/moses/LM/Base.cpp
index f59b5e31b..db71119d5 100644
--- a/moses/LM/Base.cpp
+++ b/moses/LM/Base.cpp
@@ -69,7 +69,7 @@ void LanguageModel::ReportHistoryOrder(std::ostream &out,const Phrase &phrase) c
// out << "ReportHistoryOrder not implemented";
}
-void LanguageModel::Evaluate(const Phrase &source
+void LanguageModel::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/LM/Base.h b/moses/LM/Base.h
index abae5de24..2be19e5bd 100644
--- a/moses/LM/Base.h
+++ b/moses/LM/Base.h
@@ -87,11 +87,11 @@ public:
virtual void IncrementalCallback(Incremental::Manager &manager) const;
virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const;
- virtual void Evaluate(const Phrase &source
+ virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
- void Evaluate(const InputType &input
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
diff --git a/moses/LM/DALMWrapper.cpp b/moses/LM/DALMWrapper.cpp
index 420efd9e8..68b3050de 100644
--- a/moses/LM/DALMWrapper.cpp
+++ b/moses/LM/DALMWrapper.cpp
@@ -288,7 +288,7 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
ngramScore = TransformLMScore(ngramScore);
}
-FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const{
+FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const{
// In this function, we only compute the LM scores of n-grams that overlap a
// phrase boundary. Phrase-internal scores are taken directly from the
// translation option.
@@ -339,7 +339,7 @@ FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps,
return dalm_state;
}
-FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const{
+FFState *LanguageModelDALM::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const{
// initialize language model context state
DALMChartState *newState = new DALMChartState();
DALM::State &state = newState->GetRightContext();
diff --git a/moses/LM/DALMWrapper.h b/moses/LM/DALMWrapper.h
index c791eeea6..ad53819c0 100644
--- a/moses/LM/DALMWrapper.h
+++ b/moses/LM/DALMWrapper.h
@@ -34,9 +34,9 @@ public:
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
- virtual FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
+ virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
- virtual FFState *EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const;
+ virtual FFState *EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const;
virtual bool IsUseable(const FactorMask &mask) const;
diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp
index ef09fbc77..bd5bd1834 100644
--- a/moses/LM/Implementation.cpp
+++ b/moses/LM/Implementation.cpp
@@ -134,7 +134,7 @@ void LanguageModelImplementation::CalcScore(const Phrase &phrase, float &fullSco
}
}
-FFState *LanguageModelImplementation::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
+FFState *LanguageModelImplementation::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
{
// In this function, we only compute the LM scores of n-grams that overlap a
// phrase boundary. Phrase-internal scores are taken directly from the
@@ -222,7 +222,7 @@ FFState *LanguageModelImplementation::Evaluate(const Hypothesis &hypo, const FFS
return res;
}
-FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection* out) const
+FFState* LanguageModelImplementation::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection* out) const
{
LanguageModelChartState *ret = new LanguageModelChartState(hypo, featureID, GetNGramOrder());
// data structure for factored context phrase (history and predicted word)
diff --git a/moses/LM/Implementation.h b/moses/LM/Implementation.h
index a39f5e42b..5eb8fb209 100644
--- a/moses/LM/Implementation.h
+++ b/moses/LM/Implementation.h
@@ -89,9 +89,9 @@ public:
void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
- FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
+ FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
- FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const;
+ FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const;
void updateChartScore(float *prefixScore, float *finalScore, float score, size_t wordPos) const;
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 4f964ddd8..ddca5e92d 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -84,12 +84,25 @@ if $(with-ldhtlm) {
local with-nplm = [ option.get "with-nplm" ] ;
if $(with-nplm) {
lib neuralLM : : <search>$(with-nplm)/lib <search>$(with-nplm)/lib64 ;
- obj NeuralLMWrapper.o : NeuralLMWrapper.cpp neuralLM ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen-3.1.4 ;
+ obj NeuralLMWrapper.o : NeuralLMWrapper.cpp neuralLM ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen ;
alias nplm : NeuralLMWrapper.o neuralLM : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>LM_NEURAL ;
dependencies += nplm ;
lmmacros += LM_NEURAL ;
}
+#LBLLM
+local with-lbllm = [ option.get "with-lbllm" ] ;
+if $(with-lbllm) {
+ lib lbl : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
+ lib murmurhash : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
+ obj LBLLM.o : oxlm/LBLLM.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
+ obj Mapper.o : oxlm/Mapper.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
+ alias lbllm : LBLLM.o Mapper.o lbl murmurhash /top//boost_filesystem : : : <cxxflags>-std=c++0x <define>LM_LBL ;
+ dependencies += lbllm ;
+ lmmacros += LM_LBL ;
+}
+
+
#DALM
local with-dalm = [ option.get "with-dalm" ] ;
if $(with-dalm) {
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index 2dfb58c23..e69746084 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -79,7 +79,7 @@ struct KenLMState : public FFState {
//
// FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
//
-// FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
+// FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
//
// void IncrementalCallback(Incremental::Manager &manager) const {
// manager.LMCallback(*m_ngram, m_lmIdLookup);
@@ -229,7 +229,7 @@ template <class Model> void LanguageModelKen<Model>::CalcScore(const Phrase &phr
fullScore = TransformLMScore(fullScore);
}
-template <class Model> FFState *LanguageModelKen<Model>::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
+template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
{
const lm::ngram::State &in_state = static_cast<const KenLMState&>(*ps).state;
@@ -307,7 +307,7 @@ private:
lm::ngram::ChartState m_state;
};
-template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const
+template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const
{
LanguageModelChartStateKenLM *newState = new LanguageModelChartStateKenLM();
lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState->GetChartState());
diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h
index e5950f591..2f473b697 100644
--- a/moses/LM/Ken.h
+++ b/moses/LM/Ken.h
@@ -55,9 +55,9 @@ public:
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
- virtual FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
+ virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
- virtual FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
+ virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
virtual void IncrementalCallback(Incremental::Manager &manager) const;
virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const;
diff --git a/moses/LM/LDHT.cpp b/moses/LM/LDHT.cpp
index 61226208c..1d0331df5 100644
--- a/moses/LM/LDHT.cpp
+++ b/moses/LM/LDHT.cpp
@@ -97,7 +97,7 @@ public:
FFState* Evaluate(const Hypothesis& hypo,
const FFState* input_state,
ScoreComponentCollection* score_output) const;
- FFState* EvaluateChart(const ChartHypothesis& hypo,
+ FFState* EvaluateWhenApplied(const ChartHypothesis& hypo,
int featureID,
ScoreComponentCollection* accumulator) const;
@@ -392,7 +392,7 @@ FFState* LanguageModelLDHT::Evaluate(
return state;
}
-FFState* LanguageModelLDHT::EvaluateChart(
+FFState* LanguageModelLDHT::EvaluateWhenApplied(
const ChartHypothesis& hypo,
int featureID,
ScoreComponentCollection* accumulator) const
diff --git a/moses/LM/NeuralLMWrapper.cpp b/moses/LM/NeuralLMWrapper.cpp
index 467c41846..ab7b5400b 100644
--- a/moses/LM/NeuralLMWrapper.cpp
+++ b/moses/LM/NeuralLMWrapper.cpp
@@ -1,9 +1,9 @@
#include "moses/StaticData.h"
#include "moses/FactorCollection.h"
+#include <boost/functional/hash.hpp>
#include "NeuralLMWrapper.h"
#include "neuralLM.h"
-#include <model.h>
using namespace std;
@@ -12,21 +12,19 @@ namespace Moses
NeuralLMWrapper::NeuralLMWrapper(const std::string &line)
:LanguageModelSingleFactor(line)
{
- // This space intentionally left blank
+ ReadParameters();
}
NeuralLMWrapper::~NeuralLMWrapper()
{
- delete m_neuralLM;
+ delete m_neuralLM_shared;
}
void NeuralLMWrapper::Load()
{
- TRACE_ERR("Loading NeuralLM " << m_filePath << endl);
-
// Set parameters required by ancestor classes
FactorCollection &factorCollection = FactorCollection::Instance();
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
@@ -34,59 +32,41 @@ void NeuralLMWrapper::Load()
m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
m_sentenceEndWord[m_factorType] = m_sentenceEnd;
- m_neuralLM = new nplm::neuralLM();
- m_neuralLM->read(m_filePath);
- m_neuralLM->set_log_base(10);
+ m_neuralLM_shared = new nplm::neuralLM(m_filePath, true);
+ //TODO: config option?
+ m_neuralLM_shared->set_cache(1000000);
+
+ UTIL_THROW_IF2(m_nGramOrder != m_neuralLM_shared->get_order(),
+ "Wrong order of neuralLM: LM has " << m_neuralLM_shared->get_order() << ", but Moses expects " << m_nGramOrder);
- //TODO: Implement this
}
LMResult NeuralLMWrapper::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
{
- unsigned int hashCode = 0;
+ if (!m_neuralLM.get()) {
+ m_neuralLM.reset(new nplm::neuralLM(*m_neuralLM_shared));
+ }
+ size_t hashCode = 0;
+
vector<int> words(contextFactor.size());
-// TRACE_ERR("NeuralLM words:");
- for (size_t i=0, n=contextFactor.size(); i<n; i+=1) {
+ for (size_t i=0, n=contextFactor.size(); i<n; i++) {
const Word* word = contextFactor[i];
const Factor* factor = word->GetFactor(m_factorType);
- const std::string string= factor->GetString().as_string();
+ const std::string string = factor->GetString().as_string();
int neuralLM_wordID = m_neuralLM->lookup_word(string);
words[i] = neuralLM_wordID;
- hashCode += neuralLM_wordID;
-// TRACE_ERR(" " << string << "(" << neuralLM_wordID << ")" );
+ boost::hash_combine(hashCode, neuralLM_wordID);
}
double value = m_neuralLM->lookup_ngram(words);
-// TRACE_ERR("\t=\t" << value);
-// TRACE_ERR(endl);
// Create a new struct to hold the result
LMResult ret;
- ret.score = value;
+ ret.score = FloorScore(value);
ret.unknown = false;
-
- // State* finalState is a void pointer
- //
- // Construct a hash value from the vector of words (contextFactor)
- //
- // The hash value must be the same size as sizeof(void*)
- //
- // TODO Set finalState to the above hash value
-
- // use last word as state info
-// const Factor *factor;
-// size_t hash_value(const Factor &f);
-// if (contextFactor.size()) {
-// factor = contextFactor.back()->GetFactor(m_factorType);
-// } else {
-// factor = NULL;
-// }
-//
-// (*finalState) = (State*) factor;
-
(*finalState) = (State*) hashCode;
return ret;
diff --git a/moses/LM/NeuralLMWrapper.h b/moses/LM/NeuralLMWrapper.h
index 6a05aa09a..7207605e1 100644
--- a/moses/LM/NeuralLMWrapper.h
+++ b/moses/LM/NeuralLMWrapper.h
@@ -2,6 +2,8 @@
#include "SingleFactor.h"
+#include <boost/thread/tss.hpp>
+
namespace nplm {
class neuralLM;
}
@@ -9,16 +11,16 @@ namespace nplm {
namespace Moses
{
-/** Implementation of single factor LM using IRST's code.
- */
class NeuralLMWrapper : public LanguageModelSingleFactor
{
protected:
- nplm::neuralLM *m_neuralLM;
+ // big data (vocab, weights, cache) shared among threads
+ nplm::neuralLM *m_neuralLM_shared;
+ // thread-specific nplm for thread-safety
+ mutable boost::thread_specific_ptr<nplm::neuralLM> m_neuralLM;
public:
NeuralLMWrapper(const std::string &line);
- // NeuralLM(const std::string &line);
~NeuralLMWrapper();
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const;
diff --git a/moses/LM/SingleFactor.cpp b/moses/LM/SingleFactor.cpp
index 74b8f4fe5..1efb13f16 100644
--- a/moses/LM/SingleFactor.cpp
+++ b/moses/LM/SingleFactor.cpp
@@ -87,6 +87,17 @@ void LanguageModelSingleFactor::SetParameter(const std::string& key, const std::
}
}
+std::string LanguageModelSingleFactor::DebugContextFactor(const std::vector<const Word*> &contextFactor) const
+{
+ std::string ret;
+ for (size_t i = 0; i < contextFactor.size(); ++i) {
+ const Word &word = *contextFactor[i];
+ ret += word.ToString();
+ }
+
+ return ret;
+}
+
}
diff --git a/moses/LM/SingleFactor.h b/moses/LM/SingleFactor.h
index eeb5cdbef..fd1d893e6 100644
--- a/moses/LM/SingleFactor.h
+++ b/moses/LM/SingleFactor.h
@@ -67,6 +67,8 @@ public:
virtual LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const;
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const = 0;
+
+ std::string DebugContextFactor(const std::vector<const Word*> &contextFactor) const;
};
diff --git a/moses/LM/oxlm/LBLLM.cpp b/moses/LM/oxlm/LBLLM.cpp
new file mode 100644
index 000000000..1bd9d768e
--- /dev/null
+++ b/moses/LM/oxlm/LBLLM.cpp
@@ -0,0 +1,172 @@
+#include "LBLLM.h"
+
+#include <boost/archive/binary_iarchive.hpp>
+#include <boost/archive/binary_oarchive.hpp>
+#include <boost/filesystem.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "moses/FactorCollection.h"
+#include "moses/InputType.h"
+
+using namespace std;
+using namespace oxlm;
+
+namespace Moses
+{
+
+template<class Model>
+LBLLM<Model>::LBLLM(const string &line) : LanguageModelSingleFactor(line) {
+ ReadParameters();
+
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ // needed by parent language model classes. Why didn't they set these themselves?
+ m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
+ m_sentenceStartWord[m_factorType] = m_sentenceStart;
+
+ m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
+ m_sentenceEndWord[m_factorType] = m_sentenceEnd;
+
+ cacheHits = totalHits = 0;
+}
+
+
+template<class Model>
+LBLLM<Model>::~LBLLM() {
+ if (persistentCache) {
+ double cache_hit_ratio = 100.0 * cacheHits / totalHits;
+ cerr << "Cache hit ratio: " << cache_hit_ratio << endl;
+ }
+}
+
+
+template<class Model>
+void LBLLM<Model>::SetParameter(const string& key, const string& value) {
+ if (key == "persistent-cache") {
+ persistentCache = Scan<bool>(value);
+ } else {
+ LanguageModelSingleFactor::SetParameter(key, value);
+ }
+}
+
+template<class Model>
+void LBLLM<Model>::Load() {
+ model.load(m_filePath);
+
+ Dict dict = model.getDict();
+ mapper = boost::make_shared<OXLMMapper>(dict);
+
+ kSTART = dict.Convert("<s>");
+ kSTOP = dict.Convert("</s>");
+ kUNKNOWN = dict.Convert("<unk>");
+
+ size_t ngram_order = model.getConfig()->ngram_order;
+ UTIL_THROW_IF2(
+ m_nGramOrder != ngram_order,
+ "Wrong order for LBLLM: LM has " << ngram_order << ", but Moses expects " << m_nGramOrder);
+}
+
+template<class Model>
+LMResult LBLLM<Model>::GetValue(
+ const vector<const Word*> &contextFactor, State* finalState) const {
+ if (!cache.get()) {
+ cache.reset(new QueryCache());
+ }
+
+ vector<int> context;
+ int word;
+ mapper->convert(contextFactor, context, word);
+
+ size_t context_width = m_nGramOrder - 1;
+
+ if (!context.empty() && context.back() == kSTART) {
+ context.resize(context_width, kSTART);
+ } else {
+ context.resize(context_width, kUNKNOWN);
+ }
+
+
+ double score;
+ if (persistentCache) {
+ ++totalHits;
+ NGram query(word, context);
+ pair<double, bool> ret = cache->get(query);
+ if (ret.second) {
+ score = ret.first;
+ ++cacheHits;
+ } else {
+ score = model.predict(word, context);
+ cache->put(query, score);
+ }
+ } else {
+ score = model.predict(word, context);
+ }
+
+ LMResult ret;
+ ret.score = score;
+ ret.unknown = (word == kUNKNOWN);
+
+ // calc state from hash of last n-1 words
+ size_t seed = 0;
+ boost::hash_combine(seed, word);
+ for (size_t i = 0; i < context.size() && i < context_width - 1; ++i) {
+ int id = context[i];
+ boost::hash_combine(seed, id);
+ }
+
+ (*finalState) = (State*) seed;
+ return ret;
+}
+
+template<class Model>
+void LBLLM<Model>::InitializeForInput(const InputType& source) {
+ LanguageModelSingleFactor::InitializeForInput(source);
+
+ if (persistentCache) {
+ if (!cache.get()) {
+ cache.reset(new QueryCache());
+ }
+
+ int sentence_id = source.GetTranslationId();
+ string cacheFile = m_filePath + "." + to_string(sentence_id) + ".cache.bin";
+ if (boost::filesystem::exists(cacheFile)) {
+ ifstream f(cacheFile);
+ boost::archive::binary_iarchive iar(f);
+ cerr << "Loading n-gram probability cache from " << cacheFile << endl;
+ iar >> *cache;
+ cerr << "Done loading " << cache->size()
+ << " n-gram probabilities..." << endl;
+ } else {
+ cerr << "Cache file not found" << endl;
+ }
+ }
+}
+
+template<class Model>
+void LBLLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source) {
+ model.clearCache();
+
+ if (persistentCache) {
+ int sentence_id = source.GetTranslationId();
+ string cacheFile = m_filePath + "." + to_string(sentence_id) + ".cache.bin";
+ ofstream f(cacheFile);
+ boost::archive::binary_oarchive oar(f);
+ cerr << "Saving persistent cache to " << cacheFile << endl;
+ oar << *cache;
+ cerr << "Done saving " << cache->size()
+ << " n-gram probabilities..." << endl;
+
+ cache->clear();
+ }
+
+ LanguageModelSingleFactor::CleanUpAfterSentenceProcessing(source);
+}
+
+template class LBLLM<LM>;
+template class LBLLM<FactoredLM>;
+template class LBLLM<FactoredMaxentLM>;
+
+}
+
+
+
diff --git a/moses/LM/oxlm/LBLLM.h b/moses/LM/oxlm/LBLLM.h
new file mode 100644
index 000000000..67759a8bd
--- /dev/null
+++ b/moses/LM/oxlm/LBLLM.h
@@ -0,0 +1,53 @@
+// $Id$
+#pragma once
+
+#include <vector>
+
+#include "moses/LM/SingleFactor.h"
+
+// lbl stuff
+#include "corpus/corpus.h"
+#include "lbl/model.h"
+#include "lbl/query_cache.h"
+
+#include "Mapper.h"
+
+namespace Moses
+{
+
+
+template<class Model>
+class LBLLM : public LanguageModelSingleFactor
+{
+public:
+ LBLLM(const std::string &line);
+
+ ~LBLLM();
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+ void Load();
+
+ virtual LMResult GetValue(
+ const std::vector<const Word*> &contextFactor,
+ State* finalState = 0) const;
+
+ virtual void InitializeForInput(const InputType& source);
+
+ virtual void CleanUpAfterSentenceProcessing(const InputType& source);
+
+protected:
+ Model model;
+ boost::shared_ptr<OXLMMapper> mapper;
+
+ int kSTART;
+ int kSTOP;
+ int kUNKNOWN;
+
+ bool persistentCache;
+ mutable boost::thread_specific_ptr<oxlm::QueryCache> cache;
+ mutable int cacheHits, totalHits;
+};
+
+
+}
diff --git a/moses/LM/oxlm/Mapper.cpp b/moses/LM/oxlm/Mapper.cpp
new file mode 100644
index 000000000..f1363ccf0
--- /dev/null
+++ b/moses/LM/oxlm/Mapper.cpp
@@ -0,0 +1,67 @@
+#include "Mapper.h"
+#include "moses/FactorCollection.h"
+
+using namespace std;
+
+namespace Moses
+{
+OXLMMapper::OXLMMapper(const oxlm::Dict& dict) : dict(dict)
+{
+ for (int i = 0; i < dict.size(); ++i) {
+ const string &str = dict.Convert(i);
+ FactorCollection &fc = FactorCollection::Instance();
+ const Moses::Factor *factor = fc.AddFactor(str, false);
+ moses2lbl[factor] = i;
+
+ //add(i, TD::Convert());
+ }
+
+ kUNKNOWN = this->dict.Convert("<unk>");
+}
+
+int OXLMMapper::convert(const Moses::Factor *factor) const
+{
+ Coll::const_iterator iter;
+ iter = moses2lbl.find(factor);
+ if (iter == moses2lbl.end()) {
+ return kUNKNOWN;
+ }
+ else {
+ int ret = iter->second;
+ return ret;
+ }
+}
+
+std::vector<int> OXLMMapper::convert(const Phrase &phrase) const
+{
+ size_t size = phrase.GetSize();
+ vector<int> ret(size);
+
+ for (size_t i = 0; i < size; ++i) {
+ const Moses::Factor *factor = phrase.GetFactor(i, 0);
+ int id = convert(factor);
+ ret[i] = id;
+ }
+ return ret;
+}
+
+void OXLMMapper::convert(const std::vector<const Word*> &contextFactor, std::vector<int> &ids, int &word) const
+{
+ size_t size = contextFactor.size();
+
+ ids.resize(size - 1);
+
+ for (size_t i = 0; i < size - 1; ++i) {
+ const Moses::Factor *factor = contextFactor[i]->GetFactor(0);
+ int id = convert(factor);
+ ids[i] = id;
+ }
+ std::reverse(ids.begin(), ids.end());
+
+ const Moses::Factor *factor = contextFactor.back()->GetFactor(0);
+ word = convert(factor);
+
+}
+
+} // namespace
+
diff --git a/moses/LM/oxlm/Mapper.h b/moses/LM/oxlm/Mapper.h
new file mode 100644
index 000000000..79cbf7b5f
--- /dev/null
+++ b/moses/LM/oxlm/Mapper.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <map>
+#include "corpus/corpus.h"
+#include "moses/Factor.h"
+#include "moses/Phrase.h"
+
+namespace Moses
+{
+class OXLMMapper
+{
+public:
+ OXLMMapper(const oxlm::Dict& dict);
+
+ int convert(const Moses::Factor *factor) const;
+ std::vector<int> convert(const Phrase &phrase) const;
+ void convert(const std::vector<const Word*> &contextFactor, std::vector<int> &ids, int &word) const;
+
+private:
+ void add(int lbl_id, int cdec_id);
+
+ oxlm::Dict dict;
+ typedef std::map<const Moses::Factor*, int> Coll;
+ Coll moses2lbl;
+ int kUNKNOWN;
+
+};
+
+/**
+ * Wraps the feature values computed from the LBL language model.
+ */
+struct LBLFeatures {
+ LBLFeatures() : LMScore(0), OOVScore(0) {}
+ LBLFeatures(double lm_score, double oov_score)
+ : LMScore(lm_score), OOVScore(oov_score) {}
+ LBLFeatures& operator+=(const LBLFeatures& other) {
+ LMScore += other.LMScore;
+ OOVScore += other.OOVScore;
+ return *this;
+ }
+
+ double LMScore;
+ double OOVScore;
+};
+
+}
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index 6bc82378e..5ebd0b9c4 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -105,7 +105,9 @@ void Manager::ProcessSentence()
// some reporting on how long this took
IFVERBOSE(1) {
GetSentenceStats().StopTimeCollectOpts();
- TRACE_ERR("Line "<< m_lineNumber << ": Collecting options took " << GetSentenceStats().GetTimeCollectOpts() << " seconds" << endl);
+ TRACE_ERR("Line "<< m_lineNumber << ": Collecting options took "
+ << GetSentenceStats().GetTimeCollectOpts() << " seconds at "
+ << __FILE__ << ":" << __LINE__ << endl);
}
// search for best translation with the specified algorithm
@@ -755,18 +757,12 @@ void Manager::OutputFeatureValuesForHypergraph(const Hypothesis* hypo, std::ostr
{
outputSearchGraphStream.setf(std::ios::fixed);
outputSearchGraphStream.precision(6);
-
- const vector<const StatelessFeatureFunction*>& slf =StatelessFeatureFunction::GetStatelessFeatureFunctions();
- const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
- size_t featureIndex = 1;
- for (size_t i = 0; i < sff.size(); ++i) {
- featureIndex = OutputFeatureValuesForHypergraph(featureIndex, hypo, sff[i], outputSearchGraphStream);
- }
- for (size_t i = 0; i < slf.size(); ++i) {
- {
- featureIndex = OutputFeatureValuesForHypergraph(featureIndex, hypo, slf[i], outputSearchGraphStream);
- }
+ ScoreComponentCollection scores = hypo->GetScoreBreakdown();
+ const Hypothesis *prevHypo = hypo->GetPrevHypo();
+ if (prevHypo) {
+ scores.MinusEquals(prevHypo->GetScoreBreakdown());
}
+ scores.Save(outputSearchGraphStream, false);
}
@@ -831,35 +827,11 @@ size_t Manager::OutputFeatureValuesForSLF(size_t index, bool zeros, const Hypoth
// }
}
-size_t Manager::OutputFeatureValuesForHypergraph(size_t index, const Hypothesis* hypo, const FeatureFunction* ff, std::ostream &outputSearchGraphStream) const
-{
- if (!ff->IsTuneable()) {
- return index;
- }
- ScoreComponentCollection scoreCollection = hypo->GetScoreBreakdown();
- const Hypothesis *prevHypo = hypo->GetPrevHypo();
- if (prevHypo) {
- scoreCollection.MinusEquals( prevHypo->GetScoreBreakdown() );
- }
- vector<float> featureValues = scoreCollection.GetScoresForProducer(ff);
- size_t numScoreComps = featureValues.size();
-
- if (numScoreComps > 1) {
- for (size_t i = 0; i < numScoreComps; ++i) {
- outputSearchGraphStream << ff->GetScoreProducerDescription() << i << "=" << featureValues[i] << " ";
- }
- } else {
- outputSearchGraphStream << ff->GetScoreProducerDescription() << "=" << featureValues[0] << " ";
- }
-
- return index+numScoreComps;
-}
-
/**! Output search graph in hypergraph format of Kenneth Heafield's lazy hypergraph decoder */
-void Manager::OutputSearchGraphAsHypergraph(long translationId, std::ostream &outputSearchGraphStream) const
+void Manager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const
{
- VERBOSE(2,"Getting search graph to output as hypergraph for sentence " << translationId << std::endl)
+ VERBOSE(2,"Getting search graph to output as hypergraph for sentence " << m_lineNumber << std::endl)
vector<SearchGraphNode> searchGraph;
GetSearchGraph(searchGraph);
@@ -870,7 +842,7 @@ void Manager::OutputSearchGraphAsHypergraph(long translationId, std::ostream &ou
set<int> terminalNodes;
multimap<int,int> hypergraphIDToArcs;
- VERBOSE(2,"Gathering information about search graph to output as hypergraph for sentence " << translationId << std::endl)
+ VERBOSE(2,"Gathering information about search graph to output as hypergraph for sentence " << m_lineNumber << std::endl)
long numNodes = 0;
long endNode = 0;
@@ -932,15 +904,15 @@ void Manager::OutputSearchGraphAsHypergraph(long translationId, std::ostream &ou
// Print number of nodes and arcs
outputSearchGraphStream << numNodes << " " << numArcs << endl;
- VERBOSE(2,"Search graph to output as hypergraph for sentence " << translationId
+ VERBOSE(2,"Search graph to output as hypergraph for sentence " << m_lineNumber
<< " contains " << numArcs << " arcs and " << numNodes << " nodes" << std::endl)
- VERBOSE(2,"Outputting search graph to output as hypergraph for sentence " << translationId << std::endl)
+ VERBOSE(2,"Outputting search graph to output as hypergraph for sentence " << m_lineNumber << std::endl)
for (int hypergraphHypothesisID=0; hypergraphHypothesisID < endNode; hypergraphHypothesisID+=1) {
if (hypergraphHypothesisID % 100000 == 0) {
- VERBOSE(2,"Processed " << hypergraphHypothesisID << " of " << numNodes << " hypergraph nodes for sentence " << translationId << std::endl);
+ VERBOSE(2,"Processed " << hypergraphHypothesisID << " of " << numNodes << " hypergraph nodes for sentence " << m_lineNumber << std::endl);
}
// int mosesID = hypergraphIDToMosesID[hypergraphHypothesisID];
size_t count = hypergraphIDToArcs.count(hypergraphHypothesisID);
@@ -963,7 +935,7 @@ void Manager::OutputSearchGraphAsHypergraph(long translationId, std::ostream &ou
// int actualHypergraphHypothesisID = mosesIDToHypergraphID[mosesHypothesisID];
UTIL_THROW_IF2(
(hypergraphHypothesisID != mosesIDToHypergraphID[mosesHypothesisID]),
- "Error while writing search lattice as hypergraph for sentence " << translationId << ". " <<
+ "Error while writing search lattice as hypergraph for sentence " << m_lineNumber << ". " <<
"Moses node " << mosesHypothesisID << " was expected to have hypergraph id " << hypergraphHypothesisID <<
", but actually had hypergraph id " << mosesIDToHypergraphID[mosesHypothesisID] <<
". There are " << numNodes << " nodes in the search lattice."
@@ -978,7 +950,7 @@ void Manager::OutputSearchGraphAsHypergraph(long translationId, std::ostream &ou
// VERBOSE(2,"Hypergraph node " << hypergraphHypothesisID << " has parent node " << startNode << std::endl)
UTIL_THROW_IF2(
(startNode >= hypergraphHypothesisID),
- "Error while writing search lattice as hypergraph for sentence" << translationId << ". " <<
+ "Error while writing search lattice as hypergraph for sentence" << m_lineNumber << ". " <<
"The nodes must be output in topological order. The code attempted to violate this restriction."
);
diff --git a/moses/Manager.h b/moses/Manager.h
index ccf57c527..9512bb472 100644
--- a/moses/Manager.h
+++ b/moses/Manager.h
@@ -106,7 +106,6 @@ private:
// Helper functions to output search graph in the hypergraph format of Kenneth Heafield's lazy hypergraph decoder
void OutputFeatureValuesForHypergraph(const Hypothesis* hypo, std::ostream &outputSearchGraphStream) const;
- size_t OutputFeatureValuesForHypergraph(size_t index, const Hypothesis* hypo, const FeatureFunction* ff, std::ostream &outputSearchGraphStream) const;
protected:
@@ -146,13 +145,14 @@ public:
void GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *hypo );
void GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const;
int GetNextHypoId();
+ size_t GetLineNumber() const {return m_lineNumber;}
#ifdef HAVE_PROTOBUF
void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const;
#endif
void OutputSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
void OutputSearchGraphAsSLF(long translationId, std::ostream &outputSearchGraphStream) const;
- void OutputSearchGraphAsHypergraph(long translationId, std::ostream &outputSearchGraphStream) const;
+ void OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const;
void GetSearchGraph(std::vector<SearchGraphNode>& searchGraph) const;
const InputType& GetSource() const {
return m_source;
diff --git a/moses/MockHypothesis.cpp b/moses/MockHypothesis.cpp
index 81fcb24b8..3f68bd9a8 100644
--- a/moses/MockHypothesis.cpp
+++ b/moses/MockHypothesis.cpp
@@ -62,7 +62,7 @@ MockHypothesisGuard::MockHypothesisGuard(
for (; ti != targetSegments.end() && ai != alignments.end(); ++ti,++ai) {
Hypothesis* prevHypo = m_hypothesis;
WordsRange wordsRange(ai->first,ai->second);
- m_targetPhrases.push_back(TargetPhrase());
+ m_targetPhrases.push_back(TargetPhrase(NULL));
// m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|", NULL);
m_targetPhrases.back().CreateFromString(Input, factors, *ti, NULL);
m_toptions.push_back(new TranslationOption
diff --git a/moses/PDTAimp.cpp b/moses/PDTAimp.cpp
new file mode 100644
index 000000000..f3f870e1d
--- /dev/null
+++ b/moses/PDTAimp.cpp
@@ -0,0 +1,463 @@
+#include "PDTAimp.h"
+
+namespace Moses
+{
+
+PDTAimp::PDTAimp(PhraseDictionaryTreeAdaptor *p)
+ : m_dict(0),
+ m_obj(p),
+ useCache(1),
+ totalE(0),
+ distinctE(0) {
+ m_numInputScores = 0;
+ const StaticData &staticData = StaticData::Instance();
+ m_inputFeature = &InputFeature::Instance();
+
+ if (m_inputFeature) {
+ const PhraseDictionary *firstPt = PhraseDictionary::GetColl()[0];
+ if (firstPt == m_obj) {
+ m_numInputScores = m_inputFeature->GetNumScoreComponents();
+ }
+ }
+}
+
+PDTAimp::~PDTAimp() {
+ CleanUp();
+ delete m_dict;
+
+ if (StaticData::Instance().GetVerboseLevel() >= 2) {
+
+ TRACE_ERR("tgt candidates stats: total="<<totalE<<"; distinct="
+ <<distinctE<<" ("<<distinctE/(0.01*totalE)<<"); duplicates="
+ <<totalE-distinctE<<" ("<<(totalE-distinctE)/(0.01*totalE)
+ <<")\n");
+
+ TRACE_ERR("\npath statistics\n");
+
+ if(path1Best.size()) {
+ TRACE_ERR("1-best: ");
+ std::copy(path1Best.begin()+1,path1Best.end(),
+ std::ostream_iterator<size_t>(std::cerr," \t"));
+ TRACE_ERR("\n");
+ }
+ if(pathCN.size()) {
+ TRACE_ERR("CN (full): ");
+ std::transform(pathCN.begin()+1
+ ,pathCN.end()
+ ,std::ostream_iterator<double>(std::cerr," \t")
+ ,Exp);
+ TRACE_ERR("\n");
+ }
+ if(pathExplored.size()) {
+ TRACE_ERR("CN (explored): ");
+ std::copy(pathExplored.begin()+1,pathExplored.end(),
+ std::ostream_iterator<size_t>(std::cerr," \t"));
+ TRACE_ERR("\n");
+ }
+ }
+
+}
+
+void PDTAimp::CleanUp() {
+ assert(m_dict);
+ m_dict->FreeMemory();
+ for(size_t i=0; i<m_tgtColls.size(); ++i) delete m_tgtColls[i];
+ m_tgtColls.clear();
+ m_cache.clear();
+ m_rangeCache.clear();
+ uniqSrcPhr.clear();
+}
+
+TargetPhraseCollectionWithSourcePhrase const*
+PDTAimp::GetTargetPhraseCollection(Phrase const &src) const {
+
+ assert(m_dict);
+ if(src.GetSize()==0) return 0;
+
+ std::pair<MapSrc2Tgt::iterator,bool> piter;
+ if(useCache) {
+ piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollectionWithSourcePhrase const*>(0)));
+ if(!piter.second) return piter.first->second;
+ } else if (m_cache.size()) {
+ MapSrc2Tgt::const_iterator i=m_cache.find(src);
+ return (i!=m_cache.end() ? i->second : 0);
+ }
+
+ std::vector<std::string> srcString(src.GetSize());
+ // convert source Phrase into vector of strings
+ for(size_t i=0; i<srcString.size(); ++i) {
+ Factors2String(src.GetWord(i),srcString[i]);
+ }
+
+ // get target phrases in string representation
+ std::vector<StringTgtCand> cands;
+ std::vector<std::string> wacands;
+ m_dict->GetTargetCandidates(srcString,cands,wacands);
+ if(cands.empty()) {
+ return 0;
+ }
+
+ //TODO: Multiple models broken here
+ std::vector<float> weights = StaticData::Instance().GetWeights(m_obj);
+
+ std::vector<TargetPhrase> tCands;
+ tCands.reserve(cands.size());
+
+ std::vector<std::pair<float,size_t> > costs;
+ costs.reserve(cands.size());
+
+ std::vector<Phrase> sourcePhrases;
+ sourcePhrases.reserve(cands.size());
+
+
+ // convert into TargetPhrases
+ for(size_t i=0; i<cands.size(); ++i) {
+ TargetPhrase targetPhrase(m_obj);
+
+ StringTgtCand::Tokens const& factorStrings=cands[i].tokens;
+ Scores const& probVector=cands[i].scores;
+
+ std::vector<float> scoreVector(probVector.size());
+ std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
+ TransformScore);
+ std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
+ FloorScore);
+
+ //sparse features.
+ //These are already in log-space
+ for (size_t j = 0; j < cands[i].fnames.size(); ++j) {
+ targetPhrase.GetScoreBreakdown().Assign(m_obj, *cands[i].fnames[j], cands[i].fvalues[j]);
+ }
+
+ CreateTargetPhrase(targetPhrase,factorStrings,scoreVector, Scores(0), &wacands[i], &src);
+
+ costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
+ tCands.push_back(targetPhrase);
+
+ sourcePhrases.push_back(src);
+ }
+
+ TargetPhraseCollectionWithSourcePhrase *rv;
+ rv=PruneTargetCandidates(tCands,costs, sourcePhrases);
+ if(rv->IsEmpty()) {
+ delete rv;
+ return 0;
+ } else {
+ if(useCache) piter.first->second=rv;
+ m_tgtColls.push_back(rv);
+ return rv;
+ }
+
+}
+
+void PDTAimp::Create(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , const std::vector<float> &weight
+ ) {
+
+ // set my members
+ m_dict=new PhraseDictionaryTree();
+ m_input=input;
+ m_output=output;
+
+ const StaticData &staticData = StaticData::Instance();
+ m_dict->NeedAlignmentInfo(staticData.NeedAlignmentInfo());
+
+ std::string binFname=filePath+".binphr.idx";
+ if(!FileExists(binFname.c_str())) {
+ UTIL_THROW2( "bin ttable does not exist");
+ //TRACE_ERR( "bin ttable does not exist -> create it\n");
+ //InputFileStream in(filePath);
+ //m_dict->Create(in,filePath);
+ }
+ VERBOSE(1,"reading bin ttable\n");
+// m_dict->Read(filePath);
+ bool res=m_dict->Read(filePath);
+ if (!res) {
+ std::stringstream strme;
+ strme << "bin ttable was read in a wrong way\n";
+ UserMessage::Add(strme.str());
+ exit(1);
+ }
+}
+
+
+void PDTAimp::CacheSource(ConfusionNet const& src) {
+ assert(m_dict);
+ const size_t srcSize=src.GetSize();
+
+ std::vector<size_t> exploredPaths(srcSize+1,0);
+ std::vector<double> exPathsD(srcSize+1,-1.0);
+
+ // collect some statistics
+ std::vector<size_t> cnDepths(srcSize,0);
+ for(size_t i=0; i<srcSize; ++i) cnDepths[i]=src[i].size();
+
+ for(size_t len=1; len<=srcSize; ++len)
+ for(size_t i=0; i<=srcSize-len; ++i) {
+ double pd=0.0;
+ for(size_t k=i; k<i+len; ++k) pd+=log(1.0*cnDepths[k]);
+ exPathsD[len]=(exPathsD[len]>=0.0 ? addLogScale(pd,exPathsD[len]) : pd);
+ }
+
+ // update global statistics
+ if(pathCN.size()<=srcSize) pathCN.resize(srcSize+1,-1.0);
+ for(size_t len=1; len<=srcSize; ++len)
+ pathCN[len]=pathCN[len]>=0.0 ? addLogScale(pathCN[len],exPathsD[len]) : exPathsD[len];
+
+ if(path1Best.size()<=srcSize) path1Best.resize(srcSize+1,0);
+ for(size_t len=1; len<=srcSize; ++len) path1Best[len]+=srcSize-len+1;
+
+
+ if (StaticData::Instance().GetVerboseLevel() >= 2 && exPathsD.size()) {
+ TRACE_ERR("path stats for current CN: \nCN (full): ");
+ std::transform(exPathsD.begin()+1
+ ,exPathsD.end()
+ ,std::ostream_iterator<double>(std::cerr," ")
+ ,Exp);
+ TRACE_ERR("\n");
+ }
+
+ typedef StringTgtCand::Tokens sPhrase;
+ typedef std::map<StringTgtCand::Tokens,TScores> E2Costs;
+
+ std::map<Range,E2Costs> cov2cand;
+ std::vector<State> stack;
+ for(Position i=0 ; i < srcSize ; ++i)
+ stack.push_back(State(i, i, m_dict->GetRoot(), std::vector<float>(m_numInputScores,0.0)));
+
+ std::vector<float> weightTrans = StaticData::Instance().GetWeights(m_obj);
+ std::vector<float> weightInput = StaticData::Instance().GetWeights(m_inputFeature);
+ float weightWP = StaticData::Instance().GetWeightWordPenalty();
+
+ while(!stack.empty()) {
+ State curr(stack.back());
+ stack.pop_back();
+
+ UTIL_THROW_IF2(curr.end() >= srcSize, "Error");
+ const ConfusionNet::Column &currCol=src[curr.end()];
+ // in a given column, loop over all possibilities
+ for(size_t colidx=0; colidx<currCol.size(); ++colidx) {
+ const Word& w=currCol[colidx].first; // w=the i^th possibility in column colidx
+ std::string s;
+ Factors2String(w,s);
+ bool isEpsilon=(s=="" || s==EPSILON);
+
+ //assert that we have the right number of link params in this CN option
+ UTIL_THROW_IF2(currCol[colidx].second.denseScores.size() < m_numInputScores,
+ "Incorrect number of input scores");
+
+ // do not start with epsilon (except at first position)
+ if(isEpsilon && curr.begin()==curr.end() && curr.begin()>0) continue;
+
+ // At a given node in the prefix tree, look to see if w defines an edge to
+ // another node (Extend). Stay at the same node if w==EPSILON
+ PPtr nextP = (isEpsilon ? curr.ptr : m_dict->Extend(curr.ptr,s));
+
+ if(nextP) { // w is a word that should be considered
+ Range newRange(curr.begin(),curr.end()+src.GetColumnIncrement(curr.end(),colidx));
+
+ //add together the link scores from the current state and the new arc
+ float inputScoreSum = 0;
+ std::vector<float> newInputScores(m_numInputScores,0.0);
+ if (m_numInputScores) {
+ std::transform(currCol[colidx].second.denseScores.begin(), currCol[colidx].second.denseScores.end(),
+ curr.GetScores().begin(),
+ newInputScores.begin(),
+ std::plus<float>());
+
+
+ //we need to sum up link weights (excluding realWordCount, which isn't in numLinkParams)
+ //if the sum is too low, then we won't expand this.
+ //TODO: dodgy! shouldn't we consider weights here? what about zero-weight params?
+ inputScoreSum = std::accumulate(newInputScores.begin(),newInputScores.begin()+m_numInputScores,0.0);
+ }
+
+ Phrase newSrc(curr.src);
+ if(!isEpsilon) newSrc.AddWord(w);
+ if(newRange.second<srcSize && inputScoreSum>LOWEST_SCORE) {
+ // if there is more room to grow, add a new state onto the queue
+ // to be explored that represents [begin, curEnd+)
+ stack.push_back(State(newRange,nextP,newInputScores));
+ stack.back().src=newSrc;
+ }
+
+ std::vector<StringTgtCand> tcands;
+ // now, look up the target candidates (aprx. TargetPhraseCollection) for
+ // the current path through the CN
+ m_dict->GetTargetCandidates(nextP,tcands);
+
+ if(newRange.second>=exploredPaths.size()+newRange.first)
+ exploredPaths.resize(newRange.second-newRange.first+1,0);
+ ++exploredPaths[newRange.second-newRange.first];
+
+ totalE+=tcands.size();
+
+ if(tcands.size()) {
+ E2Costs& e2costs=cov2cand[newRange];
+ Phrase const* srcPtr=uniqSrcPhr(newSrc);
+ for(size_t i=0; i<tcands.size(); ++i) {
+ //put input scores in first - already logged, just drop in directly
+ std::vector<float> transcores(m_obj->GetNumScoreComponents());
+ UTIL_THROW_IF2(transcores.size() != weightTrans.size(),
+ "Incorrect number of translation scores");
+
+ //put in phrase table scores, logging as we insert
+ std::transform(tcands[i].scores.begin()
+ ,tcands[i].scores.end()
+ ,transcores.begin()
+ ,TransformScore);
+
+
+ //tally up
+ float score=std::inner_product(transcores.begin(), transcores.end(), weightTrans.begin(), 0.0f);
+
+ // input feature
+ score +=std::inner_product(newInputScores.begin(), newInputScores.end(), weightInput.begin(), 0.0f);
+
+ //count word penalty
+ score-=tcands[i].tokens.size() * weightWP;
+
+ std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(tcands[i].tokens,TScores()));
+
+ if(p.second) ++distinctE;
+
+ TScores & scores=p.first->second;
+ if(p.second || scores.total<score) {
+ scores.total=score;
+ scores.transScore=transcores;
+ scores.inputScores=newInputScores;
+ scores.src=srcPtr;
+ }
+ }
+ }
+ }
+ }
+ } // end while(!stack.empty())
+
+
+ if (StaticData::Instance().GetVerboseLevel() >= 2 && exploredPaths.size()) {
+ TRACE_ERR("CN (explored): ");
+ std::copy(exploredPaths.begin()+1,exploredPaths.end(),
+ std::ostream_iterator<size_t>(std::cerr," "));
+ TRACE_ERR("\n");
+ }
+
+ if(pathExplored.size()<exploredPaths.size())
+ pathExplored.resize(exploredPaths.size(),0);
+ for(size_t len=1; len<=srcSize; ++len)
+ pathExplored[len]+=exploredPaths[len];
+
+
+ m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
+
+ for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) {
+ assert(i->first.first<m_rangeCache.size());
+ assert(i->first.second>0);
+ assert(static_cast<size_t>(i->first.second-1)<m_rangeCache[i->first.first].size());
+ assert(m_rangeCache[i->first.first][i->first.second-1]==0);
+
+ std::vector<TargetPhrase> tCands;
+ tCands.reserve(i->second.size());
+
+ std::vector<std::pair<float,size_t> > costs;
+ costs.reserve(i->second.size());
+
+ std::vector<Phrase> sourcePhrases;
+ sourcePhrases.reserve(i->second.size());
+
+ for(E2Costs::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) {
+ TScores const & scores=j->second;
+ TargetPhrase targetPhrase(m_obj);
+ CreateTargetPhrase(targetPhrase
+ , j ->first
+ , scores.transScore
+ , scores.inputScores
+ , NULL
+ , scores.src);
+ costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
+ tCands.push_back(targetPhrase);
+
+ sourcePhrases.push_back(*scores.src);
+
+ //std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
+ }
+
+ TargetPhraseCollectionWithSourcePhrase *rv=PruneTargetCandidates(tCands, costs, sourcePhrases);
+
+ if(rv->IsEmpty())
+ delete rv;
+ else {
+ m_rangeCache[i->first.first][i->first.second-1]=rv;
+ m_tgtColls.push_back(rv);
+ }
+ }
+ // free memory
+ m_dict->FreeMemory();
+}
+
+void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
+ StringTgtCand::Tokens const& factorStrings,
+ Scores const& transVector,
+ Scores const& inputVector,
+ const std::string *alignmentString,
+ Phrase const* srcPtr) const {
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ for(size_t k=0; k<factorStrings.size(); ++k) {
+ util::TokenIter<util::MultiCharacter, false> word(*factorStrings[k], StaticData::Instance().GetFactorDelimiter());
+ Word& w=targetPhrase.AddWord();
+ for(size_t l=0; l<m_output.size(); ++l, ++word) {
+ w[m_output[l]]= factorCollection.AddFactor(*word);
+ }
+ }
+
+ if (alignmentString) {
+ targetPhrase.SetAlignmentInfo(*alignmentString);
+ }
+
+ if (m_numInputScores) {
+ targetPhrase.GetScoreBreakdown().Assign(m_inputFeature, inputVector);
+ }
+
+ targetPhrase.GetScoreBreakdown().Assign(m_obj, transVector);
+ targetPhrase.EvaluateInIsolation(*srcPtr, m_obj->GetFeaturesToApply());
+}
+
+TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
+(const std::vector<TargetPhrase> & tCands,
+ std::vector<std::pair<float,size_t> >& costs,
+ const std::vector<Phrase> &sourcePhrases) const {
+ // convert into TargetPhraseCollection
+ UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
+ "Number of target phrases must equal number of source phrases");
+
+ TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase;
+
+
+ // set limit to tableLimit or actual size, whatever is smaller
+ std::vector<std::pair<float,size_t> >::iterator nth =
+ costs.begin() + ((m_obj->m_tableLimit>0 && // 0 indicates no limit
+ m_obj->m_tableLimit < costs.size()) ?
+ m_obj->m_tableLimit : costs.size());
+
+ // find the nth phrase according to future cost
+ NTH_ELEMENT3(costs.begin(),nth ,costs.end());
+
+ // add n top phrases to the return list
+ for(std::vector<std::pair<float,size_t> >::iterator
+ it = costs.begin(); it != nth; ++it) {
+ size_t ind = it->second;
+ TargetPhrase *targetPhrase = new TargetPhrase(tCands[ind]);
+ const Phrase &sourcePhrase = sourcePhrases[ind];
+ rv->Add(targetPhrase, sourcePhrase);
+
+ }
+
+ return rv;
+}
+
+}
+
+
diff --git a/moses/PDTAimp.h b/moses/PDTAimp.h
index 999fbb1e0..01de1e88a 100644
--- a/moses/PDTAimp.h
+++ b/moses/PDTAimp.h
@@ -38,23 +38,7 @@ class PDTAimp
friend class PhraseDictionaryTreeAdaptor;
protected:
- PDTAimp(PhraseDictionaryTreeAdaptor *p)
- : m_dict(0),
- m_obj(p),
- useCache(1),
- totalE(0),
- distinctE(0) {
- m_numInputScores = 0;
- const StaticData &staticData = StaticData::Instance();
- m_inputFeature = &InputFeature::Instance();
-
- if (m_inputFeature) {
- const PhraseDictionary *firstPt = PhraseDictionary::GetColl()[0];
- if (firstPt == m_obj) {
- m_numInputScores = m_inputFeature->GetNumScoreComponents();
- }
- }
- }
+ PDTAimp(PhraseDictionaryTreeAdaptor *p);
public:
std::vector<FactorType> m_input,m_output;
@@ -77,172 +61,22 @@ public:
std::vector<size_t> path1Best,pathExplored;
std::vector<double> pathCN;
- ~PDTAimp() {
- CleanUp();
- delete m_dict;
-
- if (StaticData::Instance().GetVerboseLevel() >= 2) {
-
- TRACE_ERR("tgt candidates stats: total="<<totalE<<"; distinct="
- <<distinctE<<" ("<<distinctE/(0.01*totalE)<<"); duplicates="
- <<totalE-distinctE<<" ("<<(totalE-distinctE)/(0.01*totalE)
- <<")\n");
-
- TRACE_ERR("\npath statistics\n");
-
- if(path1Best.size()) {
- TRACE_ERR("1-best: ");
- std::copy(path1Best.begin()+1,path1Best.end(),
- std::ostream_iterator<size_t>(std::cerr," \t"));
- TRACE_ERR("\n");
- }
- if(pathCN.size()) {
- TRACE_ERR("CN (full): ");
- std::transform(pathCN.begin()+1
- ,pathCN.end()
- ,std::ostream_iterator<double>(std::cerr," \t")
- ,Exp);
- TRACE_ERR("\n");
- }
- if(pathExplored.size()) {
- TRACE_ERR("CN (explored): ");
- std::copy(pathExplored.begin()+1,pathExplored.end(),
- std::ostream_iterator<size_t>(std::cerr," \t"));
- TRACE_ERR("\n");
- }
- }
-
- }
+ ~PDTAimp();
void Factors2String(Word const& w,std::string& s) const {
s=w.GetString(m_input,false);
}
- void CleanUp() {
- assert(m_dict);
- m_dict->FreeMemory();
- for(size_t i=0; i<m_tgtColls.size(); ++i) delete m_tgtColls[i];
- m_tgtColls.clear();
- m_cache.clear();
- m_rangeCache.clear();
- uniqSrcPhr.clear();
- }
+ void CleanUp();
TargetPhraseCollectionWithSourcePhrase const*
- GetTargetPhraseCollection(Phrase const &src) const {
-
- assert(m_dict);
- if(src.GetSize()==0) return 0;
-
- std::pair<MapSrc2Tgt::iterator,bool> piter;
- if(useCache) {
- piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollectionWithSourcePhrase const*>(0)));
- if(!piter.second) return piter.first->second;
- } else if (m_cache.size()) {
- MapSrc2Tgt::const_iterator i=m_cache.find(src);
- return (i!=m_cache.end() ? i->second : 0);
- }
-
- std::vector<std::string> srcString(src.GetSize());
- // convert source Phrase into vector of strings
- for(size_t i=0; i<srcString.size(); ++i) {
- Factors2String(src.GetWord(i),srcString[i]);
- }
-
- // get target phrases in string representation
- std::vector<StringTgtCand> cands;
- std::vector<std::string> wacands;
- m_dict->GetTargetCandidates(srcString,cands,wacands);
- if(cands.empty()) {
- return 0;
- }
-
- //TODO: Multiple models broken here
- std::vector<float> weights = StaticData::Instance().GetWeights(m_obj);
-
- std::vector<TargetPhrase> tCands;
- tCands.reserve(cands.size());
-
- std::vector<std::pair<float,size_t> > costs;
- costs.reserve(cands.size());
-
- std::vector<Phrase> sourcePhrases;
- sourcePhrases.reserve(cands.size());
-
-
- // convert into TargetPhrases
- for(size_t i=0; i<cands.size(); ++i) {
- TargetPhrase targetPhrase;
-
- StringTgtCand::Tokens const& factorStrings=cands[i].tokens;
- Scores const& probVector=cands[i].scores;
-
- std::vector<float> scoreVector(probVector.size());
- std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
- TransformScore);
- std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
- FloorScore);
-
- //sparse features.
- //These are already in log-space
- for (size_t j = 0; j < cands[i].fnames.size(); ++j) {
- targetPhrase.GetScoreBreakdown().Assign(m_obj, *cands[i].fnames[j], cands[i].fvalues[j]);
- }
-
- CreateTargetPhrase(targetPhrase,factorStrings,scoreVector, Scores(0), &wacands[i], &src);
-
- costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
- tCands.push_back(targetPhrase);
-
- sourcePhrases.push_back(src);
- }
-
- TargetPhraseCollectionWithSourcePhrase *rv;
- rv=PruneTargetCandidates(tCands,costs, sourcePhrases);
- if(rv->IsEmpty()) {
- delete rv;
- return 0;
- } else {
- if(useCache) piter.first->second=rv;
- m_tgtColls.push_back(rv);
- return rv;
- }
-
- }
-
-
+ GetTargetPhraseCollection(Phrase const &src) const;
void Create(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, const std::string &filePath
- , const std::vector<float> &weight
- ) {
+ , const std::vector<float> &weight);
- // set my members
- m_dict=new PhraseDictionaryTree();
- m_input=input;
- m_output=output;
-
- const StaticData &staticData = StaticData::Instance();
- m_dict->NeedAlignmentInfo(staticData.NeedAlignmentInfo());
-
- std::string binFname=filePath+".binphr.idx";
- if(!FileExists(binFname.c_str())) {
- UTIL_THROW2( "bin ttable does not exist");
- //TRACE_ERR( "bin ttable does not exist -> create it\n");
- //InputFileStream in(filePath);
- //m_dict->Create(in,filePath);
- }
- TRACE_ERR( "reading bin ttable\n");
-// m_dict->Read(filePath);
- bool res=m_dict->Read(filePath);
- if (!res) {
- std::stringstream strme;
- strme << "bin ttable was read in a wrong way\n";
- UserMessage::Add(strme.str());
- exit(1);
- }
- }
typedef PhraseDictionaryTree::PrefixPtr PPtr;
typedef unsigned short Position;
@@ -285,61 +119,13 @@ public:
Scores const& transVector,
Scores const& inputVector,
const std::string *alignmentString,
- Phrase const* srcPtr=0) const {
- FactorCollection &factorCollection = FactorCollection::Instance();
-
- for(size_t k=0; k<factorStrings.size(); ++k) {
- util::TokenIter<util::MultiCharacter, false> word(*factorStrings[k], StaticData::Instance().GetFactorDelimiter());
- Word& w=targetPhrase.AddWord();
- for(size_t l=0; l<m_output.size(); ++l, ++word) {
- w[m_output[l]]= factorCollection.AddFactor(*word);
- }
- }
-
- if (alignmentString) {
- targetPhrase.SetAlignmentInfo(*alignmentString);
- }
-
- if (m_numInputScores) {
- targetPhrase.GetScoreBreakdown().Assign(m_inputFeature, inputVector);
- }
-
- targetPhrase.GetScoreBreakdown().Assign(m_obj, transVector);
- targetPhrase.Evaluate(*srcPtr, m_obj->GetFeaturesToApply());
- }
+ Phrase const* srcPtr=0) const;
TargetPhraseCollectionWithSourcePhrase* PruneTargetCandidates
(const std::vector<TargetPhrase> & tCands,
std::vector<std::pair<float,size_t> >& costs,
- const std::vector<Phrase> &sourcePhrases) const {
- // convert into TargetPhraseCollection
- UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
- "Number of target phrases must equal number of source phrases");
-
- TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase;
-
-
- // set limit to tableLimit or actual size, whatever is smaller
- std::vector<std::pair<float,size_t> >::iterator nth =
- costs.begin() + ((m_obj->m_tableLimit>0 && // 0 indicates no limit
- m_obj->m_tableLimit < costs.size()) ?
- m_obj->m_tableLimit : costs.size());
-
- // find the nth phrase according to future cost
- NTH_ELEMENT3(costs.begin(),nth ,costs.end());
+ const std::vector<Phrase> &sourcePhrases) const;
- // add n top phrases to the return list
- for(std::vector<std::pair<float,size_t> >::iterator
- it = costs.begin(); it != nth; ++it) {
- size_t ind = it->second;
- TargetPhrase *targetPhrase = new TargetPhrase(tCands[ind]);
- const Phrase &sourcePhrase = sourcePhrases[ind];
- rv->Add(targetPhrase, sourcePhrase);
-
- }
-
- return rv;
- }
// POD for target phrase scores
struct TScores {
@@ -350,220 +136,7 @@ public:
TScores() : total(0.0),src(0) {}
};
- void CacheSource(ConfusionNet const& src) {
- assert(m_dict);
- const size_t srcSize=src.GetSize();
-
- std::vector<size_t> exploredPaths(srcSize+1,0);
- std::vector<double> exPathsD(srcSize+1,-1.0);
-
- // collect some statistics
- std::vector<size_t> cnDepths(srcSize,0);
- for(size_t i=0; i<srcSize; ++i) cnDepths[i]=src[i].size();
-
- for(size_t len=1; len<=srcSize; ++len)
- for(size_t i=0; i<=srcSize-len; ++i) {
- double pd=0.0;
- for(size_t k=i; k<i+len; ++k) pd+=log(1.0*cnDepths[k]);
- exPathsD[len]=(exPathsD[len]>=0.0 ? addLogScale(pd,exPathsD[len]) : pd);
- }
-
- // update global statistics
- if(pathCN.size()<=srcSize) pathCN.resize(srcSize+1,-1.0);
- for(size_t len=1; len<=srcSize; ++len)
- pathCN[len]=pathCN[len]>=0.0 ? addLogScale(pathCN[len],exPathsD[len]) : exPathsD[len];
-
- if(path1Best.size()<=srcSize) path1Best.resize(srcSize+1,0);
- for(size_t len=1; len<=srcSize; ++len) path1Best[len]+=srcSize-len+1;
-
-
- if (StaticData::Instance().GetVerboseLevel() >= 2 && exPathsD.size()) {
- TRACE_ERR("path stats for current CN: \nCN (full): ");
- std::transform(exPathsD.begin()+1
- ,exPathsD.end()
- ,std::ostream_iterator<double>(std::cerr," ")
- ,Exp);
- TRACE_ERR("\n");
- }
-
- typedef StringTgtCand::Tokens sPhrase;
- typedef std::map<StringTgtCand::Tokens,TScores> E2Costs;
-
- std::map<Range,E2Costs> cov2cand;
- std::vector<State> stack;
- for(Position i=0 ; i < srcSize ; ++i)
- stack.push_back(State(i, i, m_dict->GetRoot(), std::vector<float>(m_numInputScores,0.0)));
-
- std::vector<float> weightTrans = StaticData::Instance().GetWeights(m_obj);
- std::vector<float> weightInput = StaticData::Instance().GetWeights(m_inputFeature);
- float weightWP = StaticData::Instance().GetWeightWordPenalty();
-
- while(!stack.empty()) {
- State curr(stack.back());
- stack.pop_back();
-
- UTIL_THROW_IF2(curr.end() >= srcSize, "Error");
- const ConfusionNet::Column &currCol=src[curr.end()];
- // in a given column, loop over all possibilities
- for(size_t colidx=0; colidx<currCol.size(); ++colidx) {
- const Word& w=currCol[colidx].first; // w=the i^th possibility in column colidx
- std::string s;
- Factors2String(w,s);
- bool isEpsilon=(s=="" || s==EPSILON);
-
- //assert that we have the right number of link params in this CN option
- UTIL_THROW_IF2(currCol[colidx].second.denseScores.size() < m_numInputScores,
- "Incorrect number of input scores");
-
- // do not start with epsilon (except at first position)
- if(isEpsilon && curr.begin()==curr.end() && curr.begin()>0) continue;
-
- // At a given node in the prefix tree, look to see if w defines an edge to
- // another node (Extend). Stay at the same node if w==EPSILON
- PPtr nextP = (isEpsilon ? curr.ptr : m_dict->Extend(curr.ptr,s));
-
- if(nextP) { // w is a word that should be considered
- Range newRange(curr.begin(),curr.end()+src.GetColumnIncrement(curr.end(),colidx));
-
- //add together the link scores from the current state and the new arc
- float inputScoreSum = 0;
- std::vector<float> newInputScores(m_numInputScores,0.0);
- if (m_numInputScores) {
- std::transform(currCol[colidx].second.denseScores.begin(), currCol[colidx].second.denseScores.end(),
- curr.GetScores().begin(),
- newInputScores.begin(),
- std::plus<float>());
-
-
- //we need to sum up link weights (excluding realWordCount, which isn't in numLinkParams)
- //if the sum is too low, then we won't expand this.
- //TODO: dodgy! shouldn't we consider weights here? what about zero-weight params?
- inputScoreSum = std::accumulate(newInputScores.begin(),newInputScores.begin()+m_numInputScores,0.0);
- }
-
- Phrase newSrc(curr.src);
- if(!isEpsilon) newSrc.AddWord(w);
- if(newRange.second<srcSize && inputScoreSum>LOWEST_SCORE) {
- // if there is more room to grow, add a new state onto the queue
- // to be explored that represents [begin, curEnd+)
- stack.push_back(State(newRange,nextP,newInputScores));
- stack.back().src=newSrc;
- }
-
- std::vector<StringTgtCand> tcands;
- // now, look up the target candidates (aprx. TargetPhraseCollection) for
- // the current path through the CN
- m_dict->GetTargetCandidates(nextP,tcands);
-
- if(newRange.second>=exploredPaths.size()+newRange.first)
- exploredPaths.resize(newRange.second-newRange.first+1,0);
- ++exploredPaths[newRange.second-newRange.first];
-
- totalE+=tcands.size();
-
- if(tcands.size()) {
- E2Costs& e2costs=cov2cand[newRange];
- Phrase const* srcPtr=uniqSrcPhr(newSrc);
- for(size_t i=0; i<tcands.size(); ++i) {
- //put input scores in first - already logged, just drop in directly
- std::vector<float> transcores(m_obj->GetNumScoreComponents());
- UTIL_THROW_IF2(transcores.size() != weightTrans.size(),
- "Incorrect number of translation scores");
-
- //put in phrase table scores, logging as we insert
- std::transform(tcands[i].scores.begin()
- ,tcands[i].scores.end()
- ,transcores.begin()
- ,TransformScore);
-
-
- //tally up
- float score=std::inner_product(transcores.begin(), transcores.end(), weightTrans.begin(), 0.0f);
-
- // input feature
- score +=std::inner_product(newInputScores.begin(), newInputScores.end(), weightInput.begin(), 0.0f);
-
- //count word penalty
- score-=tcands[i].tokens.size() * weightWP;
-
- std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(tcands[i].tokens,TScores()));
-
- if(p.second) ++distinctE;
-
- TScores & scores=p.first->second;
- if(p.second || scores.total<score) {
- scores.total=score;
- scores.transScore=transcores;
- scores.inputScores=newInputScores;
- scores.src=srcPtr;
- }
- }
- }
- }
- }
- } // end while(!stack.empty())
-
-
- if (StaticData::Instance().GetVerboseLevel() >= 2 && exploredPaths.size()) {
- TRACE_ERR("CN (explored): ");
- std::copy(exploredPaths.begin()+1,exploredPaths.end(),
- std::ostream_iterator<size_t>(std::cerr," "));
- TRACE_ERR("\n");
- }
-
- if(pathExplored.size()<exploredPaths.size())
- pathExplored.resize(exploredPaths.size(),0);
- for(size_t len=1; len<=srcSize; ++len)
- pathExplored[len]+=exploredPaths[len];
-
-
- m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
-
- for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) {
- assert(i->first.first<m_rangeCache.size());
- assert(i->first.second>0);
- assert(static_cast<size_t>(i->first.second-1)<m_rangeCache[i->first.first].size());
- assert(m_rangeCache[i->first.first][i->first.second-1]==0);
-
- std::vector<TargetPhrase> tCands;
- tCands.reserve(i->second.size());
-
- std::vector<std::pair<float,size_t> > costs;
- costs.reserve(i->second.size());
-
- std::vector<Phrase> sourcePhrases;
- sourcePhrases.reserve(i->second.size());
-
- for(E2Costs::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) {
- TScores const & scores=j->second;
- TargetPhrase targetPhrase;
- CreateTargetPhrase(targetPhrase
- , j ->first
- , scores.transScore
- , scores.inputScores
- , NULL
- , scores.src);
- costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
- tCands.push_back(targetPhrase);
-
- sourcePhrases.push_back(*scores.src);
-
- //std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
- }
-
- TargetPhraseCollectionWithSourcePhrase *rv=PruneTargetCandidates(tCands, costs, sourcePhrases);
-
- if(rv->IsEmpty())
- delete rv;
- else {
- m_rangeCache[i->first.first][i->first.second-1]=rv;
- m_tgtColls.push_back(rv);
- }
- }
- // free memory
- m_dict->FreeMemory();
- }
-
+ void CacheSource(ConfusionNet const& src);
size_t GetNumInputScores() const {
return m_numInputScores;
diff --git a/moses/PP/Factory.cpp b/moses/PP/Factory.cpp
index 497eabaff..fd146005b 100644
--- a/moses/PP/Factory.cpp
+++ b/moses/PP/Factory.cpp
@@ -8,6 +8,8 @@
#include "moses/PP/SourceLabelsPhraseProperty.h"
#include "moses/PP/TreeStructurePhraseProperty.h"
#include "moses/PP/SpanLengthPhraseProperty.h"
+#include "moses/PP/NonTermContextProperty.h"
+#include "moses/PP/OrientationPhraseProperty.h"
namespace Moses
{
@@ -57,6 +59,8 @@ PhrasePropertyFactory::PhrasePropertyFactory()
MOSES_PNAME2("SourceLabels", SourceLabelsPhraseProperty);
MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
MOSES_PNAME2("SpanLength", SpanLengthPhraseProperty);
+ MOSES_PNAME2("NonTermContext", NonTermContextProperty);
+ MOSES_PNAME2("Orientation", OrientationPhraseProperty);
}
PhrasePropertyFactory::~PhrasePropertyFactory()
diff --git a/moses/PP/NonTermContextProperty.cpp b/moses/PP/NonTermContextProperty.cpp
new file mode 100644
index 000000000..df5e88d8e
--- /dev/null
+++ b/moses/PP/NonTermContextProperty.cpp
@@ -0,0 +1,137 @@
+#include "moses/PP/NonTermContextProperty.h"
+#include <string>
+#include <assert.h>
+#include "moses/Util.h"
+#include "moses/FactorCollection.h"
+
+using namespace std;
+
+namespace Moses
+{
+NonTermContextProperty::NonTermContextProperty()
+{
+}
+
+NonTermContextProperty::~NonTermContextProperty()
+{
+ //RemoveAllInColl(m_probStores);
+}
+
+void NonTermContextProperty::ProcessValue(const std::string &value)
+{
+ vector<string> toks;
+ Tokenize(toks, value);
+
+ FactorCollection &fc = FactorCollection::Instance();
+
+ size_t numNT = Scan<size_t>(toks[0]);
+ m_probStores.resize(numNT);
+
+ size_t ind = 1;
+ while (ind < toks.size()) {
+ vector<const Factor *> factors;
+
+ for (size_t nt = 0; nt < numNT; ++nt) {
+ size_t ntInd = Scan<size_t>(toks[ind]);
+ assert(nt == ntInd);
+ ++ind;
+
+ for (size_t contextInd = 0; contextInd < 4; ++contextInd) {
+ //cerr << "toks[" << ind << "]=" << toks[ind] << endl;
+ const Factor *factor = fc.AddFactor(toks[ind], false);
+ factors.push_back(factor);
+ ++ind;
+ }
+ }
+
+ // done with the context. Just get the count and put it all into data structures
+ // cerr << "count=" << toks[ind] << endl;
+ float count = Scan<float>(toks[ind]);
+ ++ind;
+
+ for (size_t i = 0; i < factors.size(); ++i) {
+ size_t ntInd = i / 4;
+ size_t contextInd = i % 4;
+ const Factor *factor = factors[i];
+ AddToMap(ntInd, contextInd, factor, count);
+ }
+ }
+}
+
+void NonTermContextProperty::AddToMap(size_t ntIndex, size_t index, const Factor *factor, float count)
+{
+ if (ntIndex <= m_probStores.size()) {
+ m_probStores.resize(ntIndex + 1);
+ }
+
+ ProbStore &probStore = m_probStores[ntIndex];
+ probStore.AddToMap(index, factor, count);
+}
+
+float NonTermContextProperty::GetProb(size_t ntInd,
+ size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const
+{
+ UTIL_THROW_IF2(ntInd >= m_probStores.size(), "Invalid nt index=" << ntInd);
+ const ProbStore &probStore = m_probStores[ntInd];
+ float ret = probStore.GetProb(contextInd, factor, smoothConstant);
+ return ret;
+}
+
+//////////////////////////////////////////
+
+void NonTermContextProperty::ProbStore::AddToMap(size_t index, const Factor *factor, float count)
+{
+ Map &map = m_vec[index];
+
+ Map::iterator iter = map.find(factor);
+ if (iter == map.end()) {
+ map[factor] = count;
+ }
+ else {
+ float &currCount = iter->second;
+ currCount += count;
+ }
+
+ m_totalCount += count;
+}
+
+
+float NonTermContextProperty::ProbStore::GetProb(size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const
+{
+ float count = GetCount(contextInd, factor, smoothConstant);
+ float total = GetTotalCount(contextInd, smoothConstant);
+ float ret = count / total;
+ return ret;
+}
+
+float NonTermContextProperty::ProbStore::GetCount(size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const
+{
+ const Map &map = m_vec[contextInd];
+
+ float count = smoothConstant;
+ Map::const_iterator iter = map.find(factor);
+ if (iter == map.end()) {
+ // nothing
+ }
+ else {
+ count += iter->second;
+ }
+
+ return count;
+}
+
+float NonTermContextProperty::ProbStore::GetTotalCount(size_t contextInd, float smoothConstant) const
+{
+ const Map &map = m_vec[contextInd];
+ return m_totalCount + smoothConstant * map.size();
+}
+
+
+} // namespace Moses
+
diff --git a/moses/PP/NonTermContextProperty.h b/moses/PP/NonTermContextProperty.h
new file mode 100644
index 000000000..56db9cb32
--- /dev/null
+++ b/moses/PP/NonTermContextProperty.h
@@ -0,0 +1,73 @@
+
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include "util/exception.hh"
+#include <string>
+#include <list>
+#include <map>
+#include <vector>
+
+namespace Moses
+{
+class Factor;
+
+class NonTermContextProperty : public PhraseProperty
+{
+public:
+
+ NonTermContextProperty();
+ ~NonTermContextProperty();
+
+ virtual void ProcessValue(const std::string &value);
+
+ virtual const std::string *GetValueString() const {
+ UTIL_THROW2("NonTermContextProperty: value string not available in this phrase property");
+ return NULL;
+ };
+
+ float GetProb(size_t ntInd,
+ size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const;
+
+protected:
+
+ class ProbStore {
+ typedef std::map<const Factor*, float> Map; // map word -> prob
+ typedef std::vector<Map> Vec; // left outside, left inside, right inside, right outside
+ Vec m_vec;
+ float m_totalCount;
+
+ float GetCount(size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const;
+ float GetTotalCount(size_t contextInd, float smoothConstant) const;
+
+ public:
+
+ ProbStore()
+ :m_vec(4)
+ ,m_totalCount(0)
+ {}
+
+ float GetProb(size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const;
+
+ float GetSize(size_t index) const
+ { return m_vec[index].size(); }
+
+ void AddToMap(size_t index, const Factor *factor, float count);
+
+ };
+
+ // by nt index
+ std::vector<ProbStore> m_probStores;
+
+ void AddToMap(size_t ntIndex, size_t index, const Factor *factor, float count);
+
+};
+
+} // namespace Moses
+
diff --git a/moses/PP/OrientationPhraseProperty.cpp b/moses/PP/OrientationPhraseProperty.cpp
new file mode 100644
index 000000000..653a1bf3b
--- /dev/null
+++ b/moses/PP/OrientationPhraseProperty.cpp
@@ -0,0 +1,26 @@
+#include "moses/PP/OrientationPhraseProperty.h"
+#include <iostream>
+
+
+namespace Moses
+{
+
+void OrientationPhraseProperty::ProcessValue(const std::string &value)
+{
+ // bidirectional MSLR phrase orientation with 2x4 orientation classes:
+ // mono swap dright dleft
+
+ std::istringstream tokenizer(value);
+
+ try {
+ if (! (tokenizer >> m_l2rMonoProbability >> m_l2rSwapProbability >> m_l2rDrightProbability >> m_l2rDleftProbability
+ >> m_r2lMonoProbability >> m_r2lSwapProbability >> m_r2lDrightProbability >> m_r2lDleftProbability)) {
+ UTIL_THROW2("OrientationPhraseProperty: Not able to read value. Flawed property?");
+ }
+ } catch (const std::exception &e) {
+ UTIL_THROW2("OrientationPhraseProperty: Read error. Flawed property?");
+ }
+};
+
+} // namespace Moses
+
diff --git a/moses/PP/OrientationPhraseProperty.h b/moses/PP/OrientationPhraseProperty.h
new file mode 100644
index 000000000..32c6ff208
--- /dev/null
+++ b/moses/PP/OrientationPhraseProperty.h
@@ -0,0 +1,65 @@
+
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include "util/exception.hh"
+#include <string>
+
+namespace Moses
+{
+
+class OrientationPhraseProperty : public PhraseProperty
+{
+public:
+ OrientationPhraseProperty() {};
+
+ virtual void ProcessValue(const std::string &value);
+
+
+ double GetLeftToRightProbabilityMono() const {
+ return m_l2rMonoProbability;
+ };
+
+ double GetLeftToRightProbabilitySwap() const {
+ return m_l2rSwapProbability;
+ };
+
+ double GetLeftToRightProbabilityDright() const {
+ return m_l2rDrightProbability;
+ };
+
+ double GetLeftToRightProbabilityDleft() const {
+ return m_l2rDleftProbability;
+ };
+
+
+ double GetRightToLeftProbabilityMono() const {
+ return m_r2lMonoProbability;
+ };
+
+ double GetRightToLeftProbabilitySwap() const {
+ return m_r2lSwapProbability;
+ };
+
+ double GetRightToLeftProbabilityDright() const {
+ return m_r2lDrightProbability;
+ };
+
+ double GetRightToLeftProbabilityDleft() const {
+ return m_r2lDleftProbability;
+ };
+
+
+ virtual const std::string *GetValueString() const {
+ UTIL_THROW2("OrientationPhraseProperty: value string not available in this phrase property");
+ return NULL;
+ };
+
+protected:
+
+ float m_l2rMonoProbability, m_l2rSwapProbability, m_l2rDrightProbability, m_l2rDleftProbability,
+ m_r2lMonoProbability, m_r2lSwapProbability, m_r2lDrightProbability, m_r2lDleftProbability;
+};
+
+} // namespace Moses
+
diff --git a/moses/PP/SourceLabelsPhraseProperty.cpp b/moses/PP/SourceLabelsPhraseProperty.cpp
index bca5c9a30..8e6a5dd6d 100644
--- a/moses/PP/SourceLabelsPhraseProperty.cpp
+++ b/moses/PP/SourceLabelsPhraseProperty.cpp
@@ -16,12 +16,12 @@ void SourceLabelsPhraseProperty::ProcessValue(const std::string &value)
std::istringstream tokenizer(value);
if (! (tokenizer >> m_nNTs)) { // first token: number of non-terminals (incl. left-hand side)
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of non-terminals. Flawed property?");
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of non-terminals. Flawed property? " << value);
}
assert( m_nNTs > 0 );
if (! (tokenizer >> m_totalCount)) { // second token: overall rule count
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read overall rule count. Flawed property?");
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read overall rule count. Flawed property? " << value);
}
assert( m_totalCount > 0.0 );
@@ -32,7 +32,7 @@ void SourceLabelsPhraseProperty::ProcessValue(const std::string &value)
std::priority_queue<float> ruleLabelledCountsPQ;
while (tokenizer.peek() != EOF) {
- try {
+// try {
SourceLabelsPhrasePropertyItem item;
size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
@@ -46,28 +46,28 @@ void SourceLabelsPhraseProperty::ProcessValue(const std::string &value)
for (size_t i=0; i<m_nNTs-1; ++i) { // RHS source non-terminal labels
size_t sourceLabelRHS;
if (! (tokenizer >> sourceLabelRHS) ) { // RHS source non-terminal label
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side label index. Flawed property?");
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side label index. Flawed property? " << value);
}
item.m_sourceLabelsRHS.push_back(sourceLabelRHS);
}
if (! (tokenizer >> item.m_sourceLabelsRHSCount)) {
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side count. Flawed property?");
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side count. Flawed property? " << value);
}
if (! (tokenizer >> numberOfLHSsGivenRHS)) {
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of left-hand sides. Flawed property?");
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of left-hand sides. Flawed property? " << value);
}
}
for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS source non-terminal labels seen with this RHS
size_t sourceLabelLHS;
if (! (tokenizer >> sourceLabelLHS)) { // LHS source non-terminal label
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read left-hand side label index. Flawed property?");
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read left-hand side label index. Flawed property? " << value);
}
float ruleSourceLabelledCount;
if (! (tokenizer >> ruleSourceLabelledCount)) {
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read count. Flawed property?");
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read count. Flawed property? " << value);
}
item.m_sourceLabelsLHSList.push_back( std::make_pair(sourceLabelLHS,ruleSourceLabelledCount) );
ruleLabelledCountsPQ.push(ruleSourceLabelledCount);
@@ -75,9 +75,9 @@ void SourceLabelsPhraseProperty::ProcessValue(const std::string &value)
m_sourceLabelItems.push_back(item);
- } catch (const std::exception &e) {
- UTIL_THROW2("SourceLabelsPhraseProperty: Read error. Flawed property?");
- }
+// } catch (const std::exception &e) {
+// UTIL_THROW2("SourceLabelsPhraseProperty: Read error. Flawed property?");
+// }
}
// keep only top N label vectors
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index 10ac56627..726ac903f 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -36,6 +36,7 @@ using namespace std;
namespace Moses
{
+
/** define allowed parameters */
Parameter::Parameter()
{
@@ -49,7 +50,7 @@ Parameter::Parameter()
AddParam("factor-delimiter", "fd", "specify a different factor delimiter than the default");
AddParam("input-factors", "list of factors in the input");
AddParam("input-file", "i", "location of the input file to be translated");
- AddParam("inputtype", "text (0), confusion network (1), word lattice (2) (default = 0)");
+ AddParam("inputtype", "text (0), confusion network (1), word lattice (2), tree (3) (default = 0)");
AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
AddParam("mark-unknown", "mu", "mark unknown words in output");
AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
@@ -513,29 +514,29 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
}
UTIL_THROW_IF2(token.size() < 5, "Phrase table must have at least 5 scores");
- PhraseTableImplementation implementation = (PhraseTableImplementation) Scan<int>(token[0]);
+ int implementation = Scan<int>(token[0]);
string ptType;
switch (implementation) {
- case Memory:
+ case 0: // Memory
ptType = "PhraseDictionaryMemory";
break;
- case Binary:
+ case 1: // Binary
ptType = "PhraseDictionaryBinary";
break;
- case OnDisk:
+ case 2: // OnDisk
ptType = "PhraseDictionaryOnDisk";
break;
- case SCFG:
+ case 6: // SCFG
ptType = "PhraseDictionaryMemory";
break;
- case Compact:
+ case 12: // Compact
ptType = "PhraseDictionaryCompact";
break;
- case SuffixArray:
+ case 8: // SuffixArray
ptType = "PhraseDictionarySuffixArray";
break;
- case DSuffixArray:
+ case 14: // DSuffixArray
ptType = "PhraseDictionaryDynSuffixArray";
break;
default:
@@ -591,7 +592,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
ptLine << "num-features=" << numScoreComponent << " ";
ptLine << "table-limit=" << maxTargetPhrase[currDict] << " ";
- if (implementation == SuffixArray || implementation == DSuffixArray) {
+ if (implementation == 8 || implementation == 14) {
ptLine << "target-path=" << token[5] << " ";
ptLine << "alignment-path=" << token[6] << " ";
}
diff --git a/moses/RuleCubeItem.cpp b/moses/RuleCubeItem.cpp
index 4525d059e..970bac94d 100644
--- a/moses/RuleCubeItem.cpp
+++ b/moses/RuleCubeItem.cpp
@@ -79,7 +79,7 @@ void RuleCubeItem::CreateHypothesis(const ChartTranslationOptions &transOpt,
ChartManager &manager)
{
m_hypothesis = new ChartHypothesis(transOpt, *this, manager);
- m_hypothesis->Evaluate();
+ m_hypothesis->EvaluateWhenApplied();
m_score = m_hypothesis->GetTotalScore();
}
diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp
index 52ec00dd4..eedaa589e 100644
--- a/moses/ScoreComponentCollection.cpp
+++ b/moses/ScoreComponentCollection.cpp
@@ -179,25 +179,31 @@ void ScoreComponentCollection::SparseL2Regularize(float lambda)
m_scores.sparseL2regularize(lambda);
}
-void ScoreComponentCollection::Save(ostream& out) const
+void ScoreComponentCollection::Save(ostream& out, bool multiline) const
{
+ string sep = " ";
+ string linesep = "\n";
+ if (!multiline) {
+ sep = "=";
+ linesep = " ";
+ }
ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
for (; iter != s_scoreIndexes.end(); ++iter ) {
string name = iter->first->GetScoreProducerDescription();
IndexPair ip = iter->second; // feature indices
if (ip.second-ip.first == 1) {
- out << name << " " << m_scores[ip.first] << endl;
+ out << name << sep << m_scores[ip.first] << linesep;
} else {
for (size_t i=ip.first; i < ip.second; ++i) {
ostringstream fullname;
fullname << name << "_" << (i + 1 - ip.first);
- out << fullname.str() << " " << m_scores[i] << endl;
+ out << fullname.str() << sep << m_scores[i] << linesep;
}
}
}
// write sparse features
- m_scores.write(out);
+ m_scores.write(out,sep,linesep);
}
void ScoreComponentCollection::Save(const string& filename) const
diff --git a/moses/ScoreComponentCollection.h b/moses/ScoreComponentCollection.h
index 3cddbca67..b44216d29 100644
--- a/moses/ScoreComponentCollection.h
+++ b/moses/ScoreComponentCollection.h
@@ -261,12 +261,21 @@ public:
void PlusEquals(const FeatureFunction* sp, const ScorePair &scorePair);
+ // Add score by index
+ void PlusEquals(size_t index, float score) {
+ m_scores[index] += score;
+ }
+
//For features which have an unbounded number of components
void SparsePlusEquals(const std::string& full_name, float score) {
FName fname(full_name);
m_scores[fname] += score;
}
+ void SparsePlusEquals(const FName& fname, float score) {
+ m_scores[fname] += score;
+ }
+
void Assign(const FeatureFunction* sp, const std::vector<float>& scores);
//! Special version Assign(ScoreProducer, vector<float>)
@@ -279,7 +288,7 @@ public:
m_scores[indexes.first] = score;
}
- // Assign core weight by index
+ // Assign score by index
void Assign(size_t index, float score) {
m_scores[index] = score;
}
@@ -350,6 +359,11 @@ public:
m_scores.capMin(minValue);
}
+ std::pair<size_t,size_t> GetIndexesForProducer(const FeatureFunction* sp) const {
+ IndexPair indexPair = GetIndexes(sp);
+ return indexPair;
+ }
+
//! if a FeatureFunction produces a single score (for example, a language model score)
//! this will return it. If not, this method will throw
float GetScoreForProducer(const FeatureFunction* sp) const {
@@ -379,7 +393,7 @@ public:
size_t SparseL1Regularize(float lambda);
void SparseL2Regularize(float lambda);
void Save(const std::string& filename) const;
- void Save(std::ostream&) const;
+ void Save(std::ostream&, bool multiline=true) const;
void IncrementSparseHopeFeatures() {
m_scores.incrementSparseHopeFeatures();
diff --git a/moses/ScoreComponentCollectionTest.cpp b/moses/ScoreComponentCollectionTest.cpp
index de542d1f6..a238d66b8 100644
--- a/moses/ScoreComponentCollectionTest.cpp
+++ b/moses/ScoreComponentCollectionTest.cpp
@@ -34,16 +34,16 @@ class MockStatelessFeatureFunction : public StatelessFeatureFunction
public:
MockStatelessFeatureFunction(size_t n, const string &line) :
StatelessFeatureFunction(n, line) {}
- void Evaluate(const Hypothesis&, ScoreComponentCollection*) const {}
- void EvaluateChart(const ChartHypothesis&, ScoreComponentCollection*) const {}
- void Evaluate(const InputType &input
+ void EvaluateWhenApplied(const Hypothesis&, ScoreComponentCollection*) const {}
+ void EvaluateWhenApplied(const ChartHypothesis&, ScoreComponentCollection*) const {}
+ void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const
{}
- void Evaluate(const Phrase &source
+ void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/SearchNormal.cpp b/moses/SearchNormal.cpp
index d40324c15..80ff37703 100644
--- a/moses/SearchNormal.cpp
+++ b/moses/SearchNormal.cpp
@@ -288,7 +288,7 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const Translat
stats.StopTimeBuildHyp();
}
if (newHypo==NULL) return;
- newHypo->Evaluate(m_transOptColl.GetFutureScore());
+ newHypo->EvaluateWhenApplied(m_transOptColl.GetFutureScore());
} else
// early discarding: check if hypothesis is too bad to build
{
diff --git a/moses/SearchNormalBatch.cpp b/moses/SearchNormalBatch.cpp
index 244ebbf05..9700a0694 100644
--- a/moses/SearchNormalBatch.cpp
+++ b/moses/SearchNormalBatch.cpp
@@ -159,13 +159,13 @@ void SearchNormalBatch::EvalAndMergePartialHypos()
++sfff_iter) {
const StatefulFeatureFunction &ff = *(sfff_iter->second);
int state_idx = sfff_iter->first;
- hypo->EvaluateWith(ff, state_idx);
+ hypo->EvaluateWhenApplied(ff, state_idx);
}
std::vector<const StatelessFeatureFunction*>::iterator slff_iter;
for (slff_iter = m_stateless_ffs.begin();
slff_iter != m_stateless_ffs.end();
++slff_iter) {
- hypo->EvaluateWith(**slff_iter);
+ hypo->EvaluateWhenApplied(**slff_iter);
}
}
@@ -190,7 +190,7 @@ void SearchNormalBatch::EvalAndMergePartialHypos()
dlm_iter != m_dlm_ffs.end();
++dlm_iter) {
LanguageModel &lm = *(dlm_iter->second);
- hypo->EvaluateWith(lm, (*dlm_iter).first);
+ hypo->EvaluateWhenApplied(lm, (*dlm_iter).first);
}
// Put completed hypothesis onto its stack.
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index badb189d4..6ab6218b1 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -125,6 +125,9 @@ bool StaticData::LoadData(Parameter *parameter)
if (m_inputType == 2) {
s_it = "word lattice";
}
+ if (m_inputType == 3) {
+ s_it = "tree";
+ }
VERBOSE(2,"input type is: "<<s_it<<"\n");
if(m_parameter->GetParam("recover-input-path").size()) {
diff --git a/moses/StaticData.h b/moses/StaticData.h
index 68e1ee60c..c32f92106 100644
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@@ -448,10 +448,6 @@ public:
return m_allWeights.GetScoresForProducer(sp);
}
- float GetSparseWeight(const FName& featureName) const {
- return m_allWeights.GetSparseWeight(featureName);
- }
-
//Weights for feature with fixed number of values
void SetWeights(const FeatureFunction* sp, const std::vector<float>& weights);
diff --git a/moses/SyntacticLanguageModel.h b/moses/SyntacticLanguageModel.h
index 6e88d85c1..76882a4d1 100644
--- a/moses/SyntacticLanguageModel.h
+++ b/moses/SyntacticLanguageModel.h
@@ -30,7 +30,7 @@ public:
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- FFState* EvaluateChart(const ChartHypothesis& cur_hypo,
+ FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
int featureID,
ScoreComponentCollection* accumulator) const {
throw std::runtime_error("Syntactic LM can only be used with phrase-based decoder.");
diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp
index d356ab2cc..30419e9c7 100644
--- a/moses/TargetPhrase.cpp
+++ b/moses/TargetPhrase.cpp
@@ -38,24 +38,7 @@ using namespace std;
namespace Moses
{
-TargetPhrase::TargetPhrase( std::string out_string)
- :Phrase(0)
- , m_fullScore(0.0)
- , m_futureScore(0.0)
- , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
- , m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
- , m_lhsTarget(NULL)
- , m_ruleSource(NULL)
-{
-
- //ACAT
- const StaticData &staticData = StaticData::Instance();
- CreateFromString(Output, staticData.GetInputFactorOrder(), out_string,
- // staticData.GetFactorDelimiter(), // eliminated [UG]
- NULL);
-}
-
-TargetPhrase::TargetPhrase()
+TargetPhrase::TargetPhrase(const PhraseDictionary *pt)
:Phrase()
, m_fullScore(0.0)
, m_futureScore(0.0)
@@ -63,10 +46,11 @@ TargetPhrase::TargetPhrase()
, m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
, m_lhsTarget(NULL)
, m_ruleSource(NULL)
+ , m_container(pt)
{
}
-TargetPhrase::TargetPhrase(const Phrase &phrase)
+TargetPhrase::TargetPhrase(const Phrase &phrase, const PhraseDictionary *pt)
: Phrase(phrase)
, m_fullScore(0.0)
, m_futureScore(0.0)
@@ -74,6 +58,7 @@ TargetPhrase::TargetPhrase(const Phrase &phrase)
, m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
, m_lhsTarget(NULL)
, m_ruleSource(NULL)
+ , m_container(pt)
{
}
@@ -84,6 +69,7 @@ TargetPhrase::TargetPhrase(const TargetPhrase &copy)
, m_scoreBreakdown(copy.m_scoreBreakdown)
, m_alignTerm(copy.m_alignTerm)
, m_alignNonTerm(copy.m_alignNonTerm)
+ , m_container(copy.m_container)
{
if (copy.m_lhsTarget) {
m_lhsTarget = new Word(*copy.m_lhsTarget);
@@ -115,13 +101,13 @@ void TargetPhrase::WriteToRulePB(hgmert::Rule* pb) const
}
#endif
-void TargetPhrase::Evaluate(const Phrase &source)
+void TargetPhrase::EvaluateInIsolation(const Phrase &source)
{
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
- Evaluate(source, ffs);
+ EvaluateInIsolation(source, ffs);
}
-void TargetPhrase::Evaluate(const Phrase &source, const std::vector<FeatureFunction*> &ffs)
+void TargetPhrase::EvaluateInIsolation(const Phrase &source, const std::vector<FeatureFunction*> &ffs)
{
if (ffs.size()) {
const StaticData &staticData = StaticData::Instance();
@@ -129,7 +115,7 @@ void TargetPhrase::Evaluate(const Phrase &source, const std::vector<FeatureFunct
for (size_t i = 0; i < ffs.size(); ++i) {
const FeatureFunction &ff = *ffs[i];
if (! staticData.IsFeatureFunctionIgnored( ff )) {
- ff.Evaluate(source, *this, m_scoreBreakdown, futureScoreBreakdown);
+ ff.EvaluateInIsolation(source, *this, m_scoreBreakdown, futureScoreBreakdown);
}
}
@@ -140,7 +126,7 @@ void TargetPhrase::Evaluate(const Phrase &source, const std::vector<FeatureFunct
}
}
-void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath)
+void TargetPhrase::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
{
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
const StaticData &staticData = StaticData::Instance();
@@ -148,7 +134,7 @@ void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath)
for (size_t i = 0; i < ffs.size(); ++i) {
const FeatureFunction &ff = *ffs[i];
if (! staticData.IsFeatureFunctionIgnored( ff )) {
- ff.Evaluate(input, inputPath, *this, NULL, m_scoreBreakdown, &futureScoreBreakdown);
+ ff.EvaluateWithSourceContext(input, inputPath, *this, NULL, m_scoreBreakdown, &futureScoreBreakdown);
}
}
float weightedScore = m_scoreBreakdown.GetWeightedScore();
diff --git a/moses/TargetPhrase.h b/moses/TargetPhrase.h
index 1e9e51c79..d23e946c0 100644
--- a/moses/TargetPhrase.h
+++ b/moses/TargetPhrase.h
@@ -41,6 +41,8 @@ namespace Moses
{
class FeatureFunction;
class InputPath;
+class InputPath;
+class PhraseDictionary;
/** represents an entry on the target side of a phrase table (scores, translation, alignment)
*/
@@ -60,22 +62,23 @@ private:
typedef std::map<std::string, boost::shared_ptr<PhraseProperty> > Properties;
Properties m_properties;
+ const PhraseDictionary *m_container;
+
public:
- TargetPhrase();
+ TargetPhrase(const PhraseDictionary *pt = NULL);
TargetPhrase(const TargetPhrase &copy);
- explicit TargetPhrase(std::string out_string);
- explicit TargetPhrase(const Phrase &targetPhrase);
+ explicit TargetPhrase(const Phrase &targetPhrase, const PhraseDictionary *pt);
~TargetPhrase();
// 1st evaluate method. Called during loading of phrase table.
- void Evaluate(const Phrase &source, const std::vector<FeatureFunction*> &ffs);
+ void EvaluateInIsolation(const Phrase &source, const std::vector<FeatureFunction*> &ffs);
// as above, score with ALL FFs
// Used only for OOV processing. Doesn't have a phrase table connect with it
- void Evaluate(const Phrase &source);
+ void EvaluateInIsolation(const Phrase &source);
// 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
- void Evaluate(const InputType &input, const InputPath &inputPath);
+ void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
void SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString);
@@ -132,6 +135,9 @@ public:
return m_ruleSource;
}
+ const PhraseDictionary *GetContainer() const
+ { return m_container; }
+
// To be set by the FF that needs it, by default the rule source = NULL
// make a copy of the source side of the rule
void SetRuleSource(const Phrase &ruleSource) const;
diff --git a/moses/TranslationModel/BilingualDynSuffixArray.cpp b/moses/TranslationModel/BilingualDynSuffixArray.cpp
index 4fa930124..cfdbc3aa6 100644
--- a/moses/TranslationModel/BilingualDynSuffixArray.cpp
+++ b/moses/TranslationModel/BilingualDynSuffixArray.cpp
@@ -377,9 +377,9 @@ TrgPhraseFromSntIdx(const PhrasePair& phrasepair) const
TargetPhrase*
BilingualDynSuffixArray::
-GetMosesFactorIDs(const SAPhrase& phrase, const Phrase& sourcePhrase) const
+GetMosesFactorIDs(const SAPhrase& phrase, const Phrase& sourcePhrase, const PhraseDictionary *pt) const
{
- TargetPhrase* targetPhrase = new TargetPhrase();
+ TargetPhrase* targetPhrase = new TargetPhrase(pt);
for(size_t i=0; i < phrase.words.size(); ++i) { // look up trg words
Word& word = m_trgVocab->GetWord( phrase.words[i]);
UTIL_THROW_IF2(word == m_trgVocab->GetkOOVWord(),
diff --git a/moses/TranslationModel/BilingualDynSuffixArray.h b/moses/TranslationModel/BilingualDynSuffixArray.h
index 48d719b7e..5b52b8814 100644
--- a/moses/TranslationModel/BilingualDynSuffixArray.h
+++ b/moses/TranslationModel/BilingualDynSuffixArray.h
@@ -128,7 +128,7 @@ public:
GatherCands(Phrase const& src, map<SAPhrase, vector<float> >& pstats) const;
TargetPhrase*
- GetMosesFactorIDs(const SAPhrase&, const Phrase& sourcePhrase) const;
+ GetMosesFactorIDs(const SAPhrase&, const Phrase& sourcePhrase, const PhraseDictionary *pt) const;
private:
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
index c3672ac47..e2ba6779c 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
@@ -50,6 +50,19 @@ protected:
StackVec m_stackVec;
};
+// struct that caches cellLabel, its end position and score for quicker lookup
+struct ChartCellCache
+{
+ ChartCellCache(size_t endPos, const ChartCellLabel* cellLabel, float score)
+ : endPos(endPos)
+ , cellLabel(cellLabel)
+ , score(score) {}
+
+ size_t endPos;
+ const ChartCellLabel* cellLabel;
+ float score;
+};
+
} // namespace Moses
#endif
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
index cdcf080b7..c9508873b 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
@@ -22,10 +22,12 @@
#include "moses/ChartParser.h"
#include "moses/InputType.h"
+#include "moses/Terminal.h"
#include "moses/ChartParserCallback.h"
#include "moses/StaticData.h"
#include "moses/NonTerminal.h"
#include "moses/ChartCellCollection.h"
+#include "moses/FactorCollection.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
using namespace std;
@@ -59,9 +61,13 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
m_lastPos = lastPos;
m_stackVec.clear();
+ m_stackScores.clear();
m_outColl = &outColl;
m_unaryPos = absEndPos-1; // rules ending in this position are unary and should not be added to collection
+ // create/update data structure to quickly look up all chart cells that match start position and label.
+ UpdateCompressedMatrix(startPos, absEndPos, lastPos);
+
const PhraseDictionaryNodeMemory &rootNode = m_ruleTable.GetRootNode();
// size-1 terminal rules
@@ -77,7 +83,7 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
}
// all rules starting with nonterminal
else if (absEndPos > startPos) {
- GetNonTerminalExtension(&rootNode, startPos, absEndPos-1);
+ GetNonTerminalExtension(&rootNode, startPos);
// all (non-unary) rules starting with terminal
if (absEndPos == startPos+1) {
GetTerminalExtension(&rootNode, absEndPos-1);
@@ -94,21 +100,87 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
}
+// Create/update compressed matrix that stores all valid ChartCellLabels for a given start position and label.
+void ChartRuleLookupManagerMemory::UpdateCompressedMatrix(size_t startPos,
+ size_t origEndPos,
+ size_t lastPos) {
+
+ std::vector<size_t> endPosVec;
+ size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
+ m_compressedMatrixVec.resize(lastPos+1);
+
+ // we only need to update cell at [startPos, origEndPos-1] for initial lookup
+ if (startPos < origEndPos) {
+ endPosVec.push_back(origEndPos-1);
+ }
+
+ // update all cells starting from startPos+1 for lookup of rule extensions
+ else if (startPos == origEndPos)
+ {
+ startPos++;
+ for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
+ endPosVec.push_back(endPos);
+ }
+ //re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
+ for (size_t pos = startPos+1; pos <= lastPos; pos++) {
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
+ cellMatrix.resize(numNonTerms);
+ for (size_t i = 0; i < numNonTerms; i++) {
+ if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
+ cellMatrix[i].pop_back();
+ }
+ }
+ }
+ }
+
+ if (startPos > lastPos) {
+ return;
+ }
+
+ // populate compressed matrix with all chart cells that start at current start position
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
+ cellMatrix.clear();
+ cellMatrix.resize(numNonTerms);
+ for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
+
+ size_t endPos = *p;
+ // target non-terminal labels for the span
+ const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
+
+ if (targetNonTerms.GetSize() == 0) {
+ continue;
+ }
+
+#if !defined(UNLABELLED_SOURCE)
+ // source non-terminal labels for the span
+ const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
+ const std::vector<bool> &sourceNonTermArray = inputPath.GetNonTerminalArray();
+
+ // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
+ if (inputPath.GetNonTerminalSet().size() == 0) {
+ continue;
+ }
+#endif
+
+ for (size_t i = 0; i < numNonTerms; i++) {
+ const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
+ if (cellLabel != NULL) {
+ float score = cellLabel->GetBestScore(m_outColl);
+ cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
+ }
+ }
+ }
+}
+
// if a (partial) rule matches, add it to list completed rules (if non-unary and non-empty), and try find expansions that have this partial rule as prefix.
void ChartRuleLookupManagerMemory::AddAndExtend(
const PhraseDictionaryNodeMemory *node,
- size_t endPos,
- const ChartCellLabel *cellLabel) {
-
- // add backpointer
- if (cellLabel != NULL) {
- m_stackVec.push_back(cellLabel);
- }
+ size_t endPos) {
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
// add target phrase collection (except if rule is empty or unary)
if (!tpc.IsEmpty() && endPos != m_unaryPos) {
- m_completedRules[endPos].Add(tpc, m_stackVec, *m_outColl);
+ m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
}
// get all further extensions of rule (until reaching end of sentence or max-chart-span)
@@ -117,18 +189,12 @@ void ChartRuleLookupManagerMemory::AddAndExtend(
GetTerminalExtension(node, endPos+1);
}
if (!node->GetNonTerminalMap().empty()) {
- for (size_t newEndPos = endPos+1; newEndPos <= m_lastPos; newEndPos++) {
- GetNonTerminalExtension(node, endPos+1, newEndPos);
- }
+ GetNonTerminalExtension(node, endPos+1);
}
}
-
- // remove backpointer
- if (cellLabel != NULL) {
- m_stackVec.pop_back();
- }
}
+
// search all possible terminal extensions of a partial rule (pointed at by node) at a given position
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemory::GetTerminalExtension(
@@ -142,9 +208,10 @@ void ChartRuleLookupManagerMemory::GetTerminalExtension(
if (terminals.size() < 5) {
for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
const Word & word = iter->first;
- if (word == sourceWord) {
+ if (TerminalEqualityPred()(word, sourceWord)) {
const PhraseDictionaryNodeMemory *child = & iter->second;
- AddAndExtend(child, pos, NULL);
+ AddAndExtend(child, pos);
+ break;
}
}
}
@@ -152,39 +219,26 @@ void ChartRuleLookupManagerMemory::GetTerminalExtension(
else {
const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
if (child != NULL) {
- AddAndExtend(child, pos, NULL);
+ AddAndExtend(child, pos);
}
}
}
-// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a given span (StartPos, endPos).
+// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a variable span (starting from startPos).
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemory::GetNonTerminalExtension(
const PhraseDictionaryNodeMemory *node,
- size_t startPos,
- size_t endPos) {
+ size_t startPos) {
- // target non-terminal labels for the span
- const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
-
- if (targetNonTerms.GetSize() == 0) {
- return;
- }
-
-#if !defined(UNLABELLED_SOURCE)
- // source non-terminal labels for the span
- const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
- const std::vector<bool> &sourceNonTermArray = inputPath.GetNonTerminalArray();
-
- // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
- if (inputPath.GetNonTerminalSet().size() == 0) {
- return;
- }
-#endif
+ const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
// non-terminal labels in phrase dictionary node
const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
+ // make room for back pointer
+ m_stackVec.push_back(NULL);
+ m_stackScores.push_back(0);
+
// loop over possible expansions of the rule
PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
@@ -193,37 +247,32 @@ void ChartRuleLookupManagerMemory::GetNonTerminalExtension(
#if defined(UNLABELLED_SOURCE)
const Word &targetNonTerm = p->first;
#else
- const PhraseDictionaryNodeMemory::NonTerminalMapKey &key = p->first;
- const Word &sourceNonTerm = key.first;
- // check if source label matches
- if (! sourceNonTermArray[sourceNonTerm[0]->GetId()]) {
- continue;
- }
- const Word &targetNonTerm = key.second;
+ const Word &targetNonTerm = p->first.second;
#endif
+ const PhraseDictionaryNodeMemory *child = &p->second;
//soft matching of NTs
if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
- const ChartCellLabel *cellLabel = targetNonTerms.Find(*softMatch);
- if (cellLabel == NULL) {
- continue;
+ const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- // create new rule
- const PhraseDictionaryNodeMemory &child = p->second;
- AddAndExtend(&child, endPos, cellLabel);
}
} // end of soft matches lookup
- const ChartCellLabel *cellLabel = targetNonTerms.Find(targetNonTerm);
- if (cellLabel == NULL) {
- continue;
+ const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- // create new rule
- const PhraseDictionaryNodeMemory &child = p->second;
- AddAndExtend(&child, endPos, cellLabel);
}
+ // remove last back pointer
+ m_stackVec.pop_back();
+ m_stackScores.pop_back();
}
-
} // namespace Moses
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h
index 223857bc3..80b6f7246 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h
@@ -40,6 +40,10 @@ class WordsRange;
class ChartRuleLookupManagerMemory : public ChartRuleLookupManagerCYKPlus
{
public:
+ typedef std::vector<ChartCellCache> CompressedColumn;
+ typedef std::vector<CompressedColumn> CompressedMatrix;
+
+
ChartRuleLookupManagerMemory(const ChartParser &parser,
const ChartCellCollectionBase &cellColl,
const PhraseDictionaryMemory &ruleTable);
@@ -53,19 +57,21 @@ public:
private:
-void GetTerminalExtension(
+ void GetTerminalExtension(
const PhraseDictionaryNodeMemory *node,
size_t pos);
-void GetNonTerminalExtension(
+ void GetNonTerminalExtension(
const PhraseDictionaryNodeMemory *node,
- size_t startPos,
- size_t endPos);
+ size_t startPos);
void AddAndExtend(
const PhraseDictionaryNodeMemory *node,
+ size_t endPos);
+
+ void UpdateCompressedMatrix(size_t startPos,
size_t endPos,
- const ChartCellLabel *cellLabel);
+ size_t lastPos);
const PhraseDictionaryMemory &m_ruleTable;
@@ -80,8 +86,13 @@ void GetNonTerminalExtension(
size_t m_unaryPos;
StackVec m_stackVec;
+ std::vector<float> m_stackScores;
+ std::vector<const Word*> m_sourceWords;
ChartParserCallback* m_outColl;
+ std::vector<CompressedMatrix> m_compressedMatrixVec;
+
+
};
} // namespace Moses
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
index 7979a050d..010608665 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
@@ -22,10 +22,12 @@
#include "moses/ChartParser.h"
#include "moses/InputType.h"
+#include "moses/Terminal.h"
#include "moses/ChartParserCallback.h"
#include "moses/StaticData.h"
#include "moses/NonTerminal.h"
#include "moses/ChartCellCollection.h"
+#include "moses/FactorCollection.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
using namespace std;
@@ -59,9 +61,13 @@ void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
m_lastPos = lastPos;
m_stackVec.clear();
+ m_stackScores.clear();
m_outColl = &outColl;
m_unaryPos = absEndPos-1; // rules ending in this position are unary and should not be added to collection
+ // create/update data structure to quickly look up all chart cells that match start position and label.
+ UpdateCompressedMatrix(startPos, absEndPos, lastPos);
+
const PhraseDictionaryNodeMemory &rootNode = m_ruleTable.GetRootNode(GetParser().GetTranslationId());
// size-1 terminal rules
@@ -77,7 +83,7 @@ void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
}
// all rules starting with nonterminal
else if (absEndPos > startPos) {
- GetNonTerminalExtension(&rootNode, startPos, absEndPos-1);
+ GetNonTerminalExtension(&rootNode, startPos);
// all (non-unary) rules starting with terminal
if (absEndPos == startPos+1) {
GetTerminalExtension(&rootNode, absEndPos-1);
@@ -94,21 +100,87 @@ void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
}
+// Create/update compressed matrix that stores all valid ChartCellLabels for a given start position and label.
+void ChartRuleLookupManagerMemoryPerSentence::UpdateCompressedMatrix(size_t startPos,
+ size_t origEndPos,
+ size_t lastPos) {
+
+ std::vector<size_t> endPosVec;
+ size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
+ m_compressedMatrixVec.resize(lastPos+1);
+
+ // we only need to update cell at [startPos, origEndPos-1] for initial lookup
+ if (startPos < origEndPos) {
+ endPosVec.push_back(origEndPos-1);
+ }
+
+ // update all cells starting from startPos+1 for lookup of rule extensions
+ else if (startPos == origEndPos)
+ {
+ startPos++;
+ for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
+ endPosVec.push_back(endPos);
+ }
+ //re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
+ for (size_t pos = startPos+1; pos <= lastPos; pos++) {
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
+ cellMatrix.resize(numNonTerms);
+ for (size_t i = 0; i < numNonTerms; i++) {
+ if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
+ cellMatrix[i].pop_back();
+ }
+ }
+ }
+ }
+
+ if (startPos > lastPos) {
+ return;
+ }
+
+ // populate compressed matrix with all chart cells that start at current start position
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
+ cellMatrix.clear();
+ cellMatrix.resize(numNonTerms);
+ for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
+
+ size_t endPos = *p;
+ // target non-terminal labels for the span
+ const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
+
+ if (targetNonTerms.GetSize() == 0) {
+ continue;
+ }
+
+#if !defined(UNLABELLED_SOURCE)
+ // source non-terminal labels for the span
+ const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
+ const std::vector<bool> &sourceNonTermArray = inputPath.GetNonTerminalArray();
+
+ // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
+ if (inputPath.GetNonTerminalSet().size() == 0) {
+ continue;
+ }
+#endif
+
+ for (size_t i = 0; i < numNonTerms; i++) {
+ const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
+ if (cellLabel != NULL) {
+ float score = cellLabel->GetBestScore(m_outColl);
+ cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
+ }
+ }
+ }
+}
+
// if a (partial) rule matches, add it to list completed rules (if non-unary and non-empty), and try find expansions that have this partial rule as prefix.
void ChartRuleLookupManagerMemoryPerSentence::AddAndExtend(
const PhraseDictionaryNodeMemory *node,
- size_t endPos,
- const ChartCellLabel *cellLabel) {
-
- // add backpointer
- if (cellLabel != NULL) {
- m_stackVec.push_back(cellLabel);
- }
+ size_t endPos) {
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
// add target phrase collection (except if rule is empty or unary)
if (!tpc.IsEmpty() && endPos != m_unaryPos) {
- m_completedRules[endPos].Add(tpc, m_stackVec, *m_outColl);
+ m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
}
// get all further extensions of rule (until reaching end of sentence or max-chart-span)
@@ -117,18 +189,12 @@ void ChartRuleLookupManagerMemoryPerSentence::AddAndExtend(
GetTerminalExtension(node, endPos+1);
}
if (!node->GetNonTerminalMap().empty()) {
- for (size_t newEndPos = endPos+1; newEndPos <= m_lastPos; newEndPos++) {
- GetNonTerminalExtension(node, endPos+1, newEndPos);
- }
+ GetNonTerminalExtension(node, endPos+1);
}
}
-
- // remove backpointer
- if (cellLabel != NULL) {
- m_stackVec.pop_back();
- }
}
+
// search all possible terminal extensions of a partial rule (pointed at by node) at a given position
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemoryPerSentence::GetTerminalExtension(
@@ -142,9 +208,10 @@ void ChartRuleLookupManagerMemoryPerSentence::GetTerminalExtension(
if (terminals.size() < 5) {
for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
const Word & word = iter->first;
- if (word == sourceWord) {
+ if (TerminalEqualityPred()(word, sourceWord)) {
const PhraseDictionaryNodeMemory *child = & iter->second;
- AddAndExtend(child, pos, NULL);
+ AddAndExtend(child, pos);
+ break;
}
}
}
@@ -152,39 +219,26 @@ void ChartRuleLookupManagerMemoryPerSentence::GetTerminalExtension(
else {
const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
if (child != NULL) {
- AddAndExtend(child, pos, NULL);
+ AddAndExtend(child, pos);
}
}
}
-// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a given span (StartPos, endPos).
+// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a variable span (starting from startPos).
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemoryPerSentence::GetNonTerminalExtension(
const PhraseDictionaryNodeMemory *node,
- size_t startPos,
- size_t endPos) {
+ size_t startPos) {
- // target non-terminal labels for the span
- const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
-
- if (targetNonTerms.GetSize() == 0) {
- return;
- }
-
-#if !defined(UNLABELLED_SOURCE)
- // source non-terminal labels for the span
- const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
- const std::vector<bool> &sourceNonTermArray = inputPath.GetNonTerminalArray();
-
- // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
- if (inputPath.GetNonTerminalSet().size() == 0) {
- return;
- }
-#endif
+ const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
// non-terminal labels in phrase dictionary node
const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
+ // make room for back pointer
+ m_stackVec.push_back(NULL);
+ m_stackScores.push_back(0);
+
// loop over possible expansions of the rule
PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
@@ -193,38 +247,32 @@ void ChartRuleLookupManagerMemoryPerSentence::GetNonTerminalExtension(
#if defined(UNLABELLED_SOURCE)
const Word &targetNonTerm = p->first;
#else
- const PhraseDictionaryNodeMemory::NonTerminalMapKey &key = p->first;
- const Word &sourceNonTerm = key.first;
- // check if source label matches
- if (! sourceNonTermArray[sourceNonTerm[0]->GetId()]) {
- continue;
- }
- const Word &targetNonTerm = key.second;
+ const Word &targetNonTerm = p->first.second;
#endif
-
+ const PhraseDictionaryNodeMemory *child = &p->second;
//soft matching of NTs
if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
- const ChartCellLabel *cellLabel = targetNonTerms.Find(*softMatch);
- if (cellLabel == NULL) {
- continue;
+ const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- // create new rule
- const PhraseDictionaryNodeMemory &child = p->second;
- AddAndExtend(&child, endPos, cellLabel);
}
} // end of soft matches lookup
- const ChartCellLabel *cellLabel = targetNonTerms.Find(targetNonTerm);
- if (cellLabel == NULL) {
- continue;
+ const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- // create new rule
- const PhraseDictionaryNodeMemory &child = p->second;
- AddAndExtend(&child, endPos, cellLabel);
}
+ // remove last back pointer
+ m_stackVec.pop_back();
+ m_stackScores.pop_back();
}
-
} // namespace Moses
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h
index bd7cd45fe..6cdc73dd4 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h
@@ -40,6 +40,9 @@ class WordsRange;
class ChartRuleLookupManagerMemoryPerSentence : public ChartRuleLookupManagerCYKPlus
{
public:
+ typedef std::vector<ChartCellCache> CompressedColumn;
+ typedef std::vector<CompressedColumn> CompressedMatrix;
+
ChartRuleLookupManagerMemoryPerSentence(const ChartParser &parser,
const ChartCellCollectionBase &cellColl,
const PhraseDictionaryFuzzyMatch &ruleTable);
@@ -53,19 +56,21 @@ public:
private:
-void GetTerminalExtension(
+ void GetTerminalExtension(
const PhraseDictionaryNodeMemory *node,
size_t pos);
-void GetNonTerminalExtension(
+ void GetNonTerminalExtension(
const PhraseDictionaryNodeMemory *node,
- size_t startPos,
- size_t endPos);
+ size_t startPos);
void AddAndExtend(
const PhraseDictionaryNodeMemory *node,
+ size_t endPos);
+
+ void UpdateCompressedMatrix(size_t startPos,
size_t endPos,
- const ChartCellLabel *cellLabel);
+ size_t lastPos);
const PhraseDictionaryFuzzyMatch &m_ruleTable;
@@ -80,8 +85,12 @@ void GetNonTerminalExtension(
size_t m_unaryPos;
StackVec m_stackVec;
+ std::vector<float> m_stackScores;
+ std::vector<const Word*> m_sourceWords;
ChartParserCallback* m_outColl;
+ std::vector<CompressedMatrix> m_compressedMatrixVec;
+
};
} // namespace Moses
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp
index 81cfa8c38..93ddc82db 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp
@@ -77,7 +77,7 @@ TargetPhrase *ChartRuleLookupManagerSkeleton::CreateTargetPhrase(const Word &sou
string str = sourceWord.GetFactor(0)->GetString().as_string();
str = "ChartManagerSkeleton:" + str;
- TargetPhrase *tp = new TargetPhrase();
+ TargetPhrase *tp = new TargetPhrase(&m_skeletonPT);
Word &word = tp->AddWord();
word.CreateFromString(Output, m_skeletonPT.GetOutput(), str, false);
diff --git a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp
index 294b93fe2..325650c2e 100644
--- a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp
+++ b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp
@@ -77,4 +77,47 @@ void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
}
}
+
+// copies some functionality (pruning) from ChartTranslationOptionList::Add
+void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
+ const StackVec &stackVec,
+ const std::vector<float> &stackScores,
+ const ChartParserCallback &outColl)
+{
+ if (tpc.IsEmpty()) {
+ return;
+ }
+
+ const TargetPhrase &targetPhrase = **(tpc.begin());
+ float score = std::accumulate(stackScores.begin(), stackScores.end(), targetPhrase.GetFutureScore());
+
+ // If the rule limit has already been reached then don't add the option
+ // unless it is better than at least one existing option.
+ if (m_collection.size() > m_ruleLimit && score < m_scoreThreshold) {
+ return;
+ }
+
+ CompletedRule *completedRule = new CompletedRule(tpc, stackVec, score);
+ m_collection.push_back(completedRule);
+
+ // If the rule limit hasn't been exceeded then update the threshold.
+ if (m_collection.size() <= m_ruleLimit) {
+ m_scoreThreshold = (score < m_scoreThreshold) ? score : m_scoreThreshold;
+ }
+
+ // Prune if bursting
+ if (m_collection.size() == m_ruleLimit * 2) {
+ NTH_ELEMENT4(m_collection.begin(),
+ m_collection.begin() + m_ruleLimit - 1,
+ m_collection.end(),
+ CompletedRuleOrdered());
+ m_scoreThreshold = m_collection[m_ruleLimit-1]->GetScoreEstimate();
+ for (size_t i = 0 + m_ruleLimit; i < m_collection.size(); i++) {
+ delete m_collection[i];
+
+ }
+ m_collection.resize(m_ruleLimit);
+ }
}
+
+} \ No newline at end of file
diff --git a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h
index ec898d56d..18373d743 100644
--- a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h
+++ b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h
@@ -22,6 +22,7 @@
#define moses_CompletedRuleCollectionS_h
#include <vector>
+#include <numeric>
#include "moses/StackVec.h"
#include "moses/TargetPhraseCollection.h"
@@ -105,6 +106,11 @@ public:
const StackVec &stackVec,
const ChartParserCallback &outColl);
+ void Add(const TargetPhraseCollection &tpc,
+ const StackVec &stackVec,
+ const std::vector<float> &stackScores,
+ const ChartParserCallback &outColl);
+
private:
std::vector<CompletedRule*> m_collection;
float m_scoreThreshold;
diff --git a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
index 1a63c0fce..ec3efdbb4 100644
--- a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
@@ -418,7 +418,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
}
if(eval) {
- targetPhrase->Evaluate(sourcePhrase);
+ targetPhrase->EvaluateInIsolation(sourcePhrase, m_phraseDictionary.GetFeaturesToApply());
}
if(m_coding == PREnc) {
diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
index 96a5fda7d..bd212a19e 100644
--- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
@@ -57,6 +57,8 @@ void PhraseDictionaryCompact::Load()
{
const StaticData &staticData = StaticData::Instance();
+ SetFeaturesToApply();
+
m_weight = staticData.GetWeights(this);
std::string tFilePath = m_filePath;
diff --git a/moses/TranslationModel/CompactPT/StringVector.h b/moses/TranslationModel/CompactPT/StringVector.h
index 06f3c24e0..bb2bc11ef 100644
--- a/moses/TranslationModel/CompactPT/StringVector.h
+++ b/moses/TranslationModel/CompactPT/StringVector.h
@@ -86,7 +86,8 @@ protected:
virtual const ValueT* value_ptr(PosT i) const;
public:
- typedef ValueIteratorRange<typename std::vector<ValueT, Allocator<ValueT> >::const_iterator> range;
+ //typedef ValueIteratorRange<typename std::vector<ValueT, Allocator<ValueT> >::const_iterator> range;
+ typedef ValueIteratorRange<const ValueT *> range;
// ********** RangeIterator **********
@@ -174,8 +175,10 @@ public:
iterator end() const;
PosT length(PosT i) const;
- typename std::vector<ValueT, Allocator<ValueT> >::const_iterator begin(PosT i) const;
- typename std::vector<ValueT, Allocator<ValueT> >::const_iterator end(PosT i) const;
+ //typename std::vector<ValueT, Allocator<ValueT> >::const_iterator begin(PosT i) const;
+ //typename std::vector<ValueT, Allocator<ValueT> >::const_iterator end(PosT i) const;
+ const ValueT* begin(PosT i) const;
+ const ValueT* end(PosT i) const;
void clear() {
m_charArray->clear();
@@ -469,15 +472,19 @@ const ValueT* StringVector<ValueT, PosT, Allocator>::value_ptr(PosT i) const
}
template<typename ValueT, typename PosT, template <typename> class Allocator>
-typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVector<ValueT, PosT, Allocator>::begin(PosT i) const
+//typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVector<ValueT, PosT, Allocator>::begin(PosT i) const
+const ValueT* StringVector<ValueT, PosT, Allocator>::begin(PosT i) const
{
- return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i));
+ //return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i));
+ return value_ptr(i);
}
template<typename ValueT, typename PosT, template <typename> class Allocator>
-typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVector<ValueT, PosT, Allocator>::end(PosT i) const
+//typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVector<ValueT, PosT, Allocator>::end(PosT i) const
+const ValueT* StringVector<ValueT, PosT, Allocator>::end(PosT i) const
{
- return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i) + length(i));
+ //return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i) + length(i));
+ return value_ptr(i) + length(i);
}
template<typename ValueT, typename PosT, template <typename> class Allocator>
diff --git a/moses/TranslationModel/DynSAInclude/FileHandler.cpp b/moses/TranslationModel/DynSAInclude/FileHandler.cpp
index 4e92ad907..ffde4a0f3 100644
--- a/moses/TranslationModel/DynSAInclude/FileHandler.cpp
+++ b/moses/TranslationModel/DynSAInclude/FileHandler.cpp
@@ -71,13 +71,13 @@ bool FileHandler::setStreamBuffer(bool checkExists)
{
// redirect stdin or stdout if necesary
if (path_ == FileHandler::kStdInDescriptor) {
- UTIL_THROW_IF2(flags_ & std::ios::in == 0,
- "Incorrect flags: " << flags_);
+ UTIL_THROW_IF2((flags_ & std::ios::in) == 0,
+ "Incorrect flags: " << flags_);
std::streambuf* sb = std::cin.rdbuf();
buffer_ = sb;
} else if (path_ == FileHandler::kStdOutDescriptor) {
- UTIL_THROW_IF2(flags_ & std::ios::out == 0,
- "Incorrect flags: " << flags_);
+ UTIL_THROW_IF2((flags_ & std::ios::out) == 0,
+ "Incorrect flags: " << flags_);
std::streambuf* sb = std::cout.rdbuf();
buffer_ = sb;
} else {
diff --git a/moses/TranslationModel/PhraseDictionary.cpp b/moses/TranslationModel/PhraseDictionary.cpp
index cd3e85d69..e0251b907 100644
--- a/moses/TranslationModel/PhraseDictionary.cpp
+++ b/moses/TranslationModel/PhraseDictionary.cpp
@@ -24,6 +24,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/InputType.h"
#include "moses/TranslationOption.h"
#include "moses/UserMessage.h"
+#include "moses/DecodeStep.h"
#include "moses/DecodeGraph.h"
#include "moses/InputPath.h"
#include "util/exception.hh"
@@ -48,6 +49,7 @@ PhraseDictionary::PhraseDictionary(const std::string &line)
,m_tableLimit(20) // default
,m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
{
+ m_id = s_staticColl.size();
s_staticColl.push_back(this);
}
diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h
index f803a466e..441c94c0b 100644
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@@ -87,6 +87,10 @@ public:
return m_tableLimit;
}
+ //! continguous id for each pt, starting from 0
+ size_t GetId() const
+ { return m_id; }
+
virtual
void
Release(TargetPhraseCollection const* tpc) const;
@@ -167,6 +171,7 @@ protected:
protected:
CacheColl &GetCache() const;
+ size_t m_id;
};
diff --git a/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp b/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp
index 3d2b0af08..83b78fe5b 100644
--- a/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp
+++ b/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp
@@ -58,9 +58,9 @@ GetTargetPhraseCollectionLEGACY(const Phrase& src) const
TargetPhraseCollection *ret = new TargetPhraseCollection();
BOOST_FOREACH(pstat_entry & e, pstats) {
- TargetPhrase* tp = m_biSA->GetMosesFactorIDs(e.first, src);
+ TargetPhrase* tp = m_biSA->GetMosesFactorIDs(e.first, src, this);
tp->GetScoreBreakdown().Assign(this,e.second);
- tp->Evaluate(src);
+ tp->EvaluateInIsolation(src);
ret->Add(tp);
}
// return ret;
diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
index 9f3996505..f226b8ba4 100644
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
@@ -30,16 +30,27 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
{
ReadParameters();
- if (m_mode != "interpolate") {
+ if (m_mode == "interpolate") {
+ size_t numWeights = m_numScoreComponents;
+ UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &&
+ m_pdStr.size()*numWeights != m_multimodelweights.size(),
+ "Number of scores and weights are not equal");
+ } else if (m_mode == "all" || m_mode == "all-restrict") {
+ size_t componentWeights = 0;
+ for(size_t i = 0; i < m_numModels; ++i) {
+ const string &ptName = m_pdStr[i];
+ PhraseDictionary *pt = FindPhraseDictionary(ptName);
+ UTIL_THROW_IF2(pt == NULL,
+ "Could not find component phrase table " << ptName);
+ componentWeights += pt->GetNumScoreComponents();
+ }
+ UTIL_THROW_IF2(componentWeights != m_numScoreComponents,
+ "Total number of component model scores is unequal to specified number of scores");
+ } else {
ostringstream msg;
msg << "combination mode unknown: " << m_mode;
throw runtime_error(msg.str());
}
-
- size_t numWeights = m_numScoreComponents;
- UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &
- m_pdStr.size()*numWeights != m_multimodelweights.size(),
- "Number of scores and weights are not equal");
}
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::string &line)
@@ -95,20 +106,23 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect
multimodelweights = getWeights(m_numScoreComponents, true);
}
- std::map<std::string,multiModelStatistics*>* allStats = new(std::map<std::string,multiModelStatistics*>);
-
- CollectSufficientStatistics(src, allStats);
-
TargetPhraseCollection *ret = NULL;
+
if (m_mode == "interpolate") {
+ std::map<std::string,multiModelStatistics*>* allStats = new(std::map<std::string,multiModelStatistics*>);
+ CollectSufficientStatistics(src, allStats);
ret = CreateTargetPhraseCollectionLinearInterpolation(src, allStats, multimodelweights);
+ RemoveAllInMap(*allStats);
+ delete allStats;
+ } else if (m_mode == "all") {
+ ret = CreateTargetPhraseCollectionAll(src, false);
+ } else if (m_mode == "all-restrict") {
+ ret = CreateTargetPhraseCollectionAll(src, true);
}
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret);
- RemoveAllInMap(*allStats);
- delete allStats;
-
+
return ret;
}
@@ -147,7 +161,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
vector<FeatureFunction*> pd_feature;
pd_feature.push_back(m_pd[i]);
const vector<FeatureFunction*> pd_feature_const(pd_feature);
- statistics->targetPhrase->Evaluate(src, pd_feature_const);
+ statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
// zero out scores from original phrase table
statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
@@ -166,7 +180,6 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
}
}
-
TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const
{
TargetPhraseCollection *ret = new TargetPhraseCollection();
@@ -186,13 +199,95 @@ TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollection
vector<FeatureFunction*> pd_feature;
pd_feature.push_back(const_cast<PhraseDictionaryMultiModel*>(this));
const vector<FeatureFunction*> pd_feature_const(pd_feature);
- statistics->targetPhrase->Evaluate(src, pd_feature_const);
+ statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
ret->Add(new TargetPhrase(*statistics->targetPhrase));
}
return ret;
}
+TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollectionAll(const Phrase& src, const bool restricted) const
+{
+ // Collect phrases from all models
+ std::map<std::string, multiModelPhrase*> allPhrases;
+ size_t offset = 0;
+ for(size_t i = 0; i < m_numModels; ++i) {
+ const PhraseDictionary &pd = *m_pd[i];
+
+ TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY(src);
+ if (ret_raw != NULL) {
+
+ TargetPhraseCollection::iterator iterTargetPhrase, iterLast;
+ if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) {
+ iterLast = ret_raw->begin() + m_tableLimit;
+ } else {
+ iterLast = ret_raw->end();
+ }
+
+ for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != iterLast; ++iterTargetPhrase) {
+ const TargetPhrase* targetPhrase = *iterTargetPhrase;
+ std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);
+
+ std::string targetString = targetPhrase->GetStringRep(m_output);
+ // Phrase not in collection -> add if unrestricted (all) or first model (all-restrict)
+ if (allPhrases.find(targetString) == allPhrases.end()) {
+ // all-restrict and not first model: skip adding unseen phrase
+ if (restricted && i > 0) {
+ continue;
+ }
+
+ multiModelPhrase* phrase = new multiModelPhrase;
+ phrase->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
+ // p contains scores from all models in order. Values default to zero for models that do not contain phrase.
+ phrase->p.resize(m_numScoreComponents, 0);
+
+ //correct future cost estimates and total score
+ phrase->targetPhrase->GetScoreBreakdown().InvertDenseFeatures(&pd);
+ vector<FeatureFunction*> pd_feature;
+ pd_feature.push_back(m_pd[i]);
+ const vector<FeatureFunction*> pd_feature_const(pd_feature);
+ phrase->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
+ // zero out scores from original phrase table
+ phrase->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
+
+ allPhrases[targetString] = phrase;
+
+ }
+ multiModelPhrase* phrase = allPhrases[targetString];
+
+ for(size_t j = 0; j < pd.GetNumScoreComponents(); ++j) {
+ phrase->p[offset + j] = raw_scores[j];
+ }
+ }
+ }
+ offset += pd.GetNumScoreComponents();
+ }
+
+ // Copy accumulated score vectors to phrases
+ TargetPhraseCollection* ret = new TargetPhraseCollection();
+ for (std::map<std::string, multiModelPhrase*>::const_iterator iter = allPhrases.begin(); iter != allPhrases.end(); ++iter) {
+
+ multiModelPhrase* phrase = iter->second;
+ Scores scoreVector(m_numScoreComponents);
+
+ for(size_t i = 0; i < m_numScoreComponents; ++i) {
+ scoreVector[i] = phrase->p[i];
+ }
+
+ phrase->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
+
+ //correct future cost estimates and total score
+ vector<FeatureFunction*> pd_feature;
+ pd_feature.push_back(const_cast<PhraseDictionaryMultiModel*>(this));
+ const vector<FeatureFunction*> pd_feature_const(pd_feature);
+ phrase->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
+
+ ret->Add(new TargetPhrase(*phrase->targetPhrase));
+ }
+
+ RemoveAllInMap(allPhrases);
+ return ret;
+}
//TODO: is it worth caching the results as long as weights don't change?
std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t numWeights, bool normalize) const
@@ -323,9 +418,6 @@ void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector
vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
{
- const StaticData &staticData = StaticData::Instance();
- const string& factorDelimiter = staticData.GetFactorDelimiter();
-
map<pair<string, string>, size_t> phrase_pair_map;
for ( vector<pair<string, string> >::const_iterator iter = phrase_pair_vector.begin(); iter != phrase_pair_vector.end(); ++iter ) {
@@ -344,7 +436,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
map<string,multiModelStatistics*>* allStats = new(map<string,multiModelStatistics*>);
Phrase sourcePhrase(0);
- sourcePhrase.CreateFromString(Input, m_input, source_string, factorDelimiter, NULL);
+ sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
CollectSufficientStatistics(sourcePhrase, allStats); //optimization potential: only call this once per source phrase
diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.h b/moses/TranslationModel/PhraseDictionaryMultiModel.h
index 5886a9d98..41df2e679 100644
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.h
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.h
@@ -51,6 +51,14 @@ struct multiModelStatisticsOptimization: multiModelStatistics {
class OptimizationObjective;
+struct multiModelPhrase {
+ TargetPhrase *targetPhrase;
+ std::vector<float> p;
+ ~multiModelPhrase() {
+ delete targetPhrase;
+ };
+};
+
/** Implementation of a virtual phrase table constructed from multiple component phrase tables.
*/
class PhraseDictionaryMultiModel: public PhraseDictionary
@@ -66,6 +74,7 @@ public:
void Load();
virtual void CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const;
virtual TargetPhraseCollection* CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
+ virtual TargetPhraseCollection* CreateTargetPhraseCollectionAll(const Phrase& src, const bool restricted = false) const;
std::vector<std::vector<float> > getWeights(size_t numWeights, bool normalize) const;
std::vector<float> normalizeWeights(std::vector<float> &weights) const;
void CacheForCleanup(TargetPhraseCollection* tpc);
diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
index 99d3ad256..6a3174a59 100644
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
@@ -189,7 +189,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
vector<FeatureFunction*> pd_feature;
pd_feature.push_back(m_pd[i]);
const vector<FeatureFunction*> pd_feature_const(pd_feature);
- statistics->targetPhrase->Evaluate(src, pd_feature_const);
+ statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
// zero out scores from original phrase table
statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
@@ -251,7 +251,7 @@ TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseColl
vector<FeatureFunction*> pd_feature;
pd_feature.push_back(const_cast<PhraseDictionaryMultiModelCounts*>(this));
const vector<FeatureFunction*> pd_feature_const(pd_feature);
- statistics->targetPhrase->Evaluate(src, pd_feature_const);
+ statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
} catch (AlignmentException& e) {
continue;
}
@@ -489,9 +489,6 @@ void PhraseDictionaryMultiModelCounts::LoadLexicalTable( string &fileName, lexic
vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
{
- const StaticData &staticData = StaticData::Instance();
- const string& factorDelimiter = staticData.GetFactorDelimiter();
-
map<pair<string, string>, size_t> phrase_pair_map;
for ( vector<pair<string, string> >::const_iterator iter = phrase_pair_vector.begin(); iter != phrase_pair_vector.end(); ++iter ) {
@@ -510,7 +507,7 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>);
Phrase sourcePhrase(0);
- sourcePhrase.CreateFromString(Input, m_input, source_string, factorDelimiter, NULL);
+ sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
CollectSufficientStatistics(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase
diff --git a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
index 0d9b34f91..c2ffd95da 100644
--- a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
@@ -124,7 +124,7 @@ std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(
Tokenize(toks, line, "\t");
UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
- TargetPhrase *tp = new TargetPhrase();
+ TargetPhrase *tp = new TargetPhrase(this);
Word &word = tp->AddWord();
word.CreateFromString(Output, m_output, toks[0], false);
@@ -132,7 +132,7 @@ std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(
tp->GetScoreBreakdown().PlusEquals(this, score);
// score of all other ff when this rule is being loaded
- tp->Evaluate(sourcePhrase, GetFeaturesToApply());
+ tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
ret.push_back(tp);
}
diff --git a/moses/TranslationModel/PhraseDictionaryTree.cpp b/moses/TranslationModel/PhraseDictionaryTree.cpp
index 68dd5a59f..c8b7cb5d2 100644
--- a/moses/TranslationModel/PhraseDictionaryTree.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTree.cpp
@@ -3,6 +3,7 @@
#include "moses/FeatureVector.h"
#include "moses/TranslationModel/PhraseDictionaryTree.h"
#include "util/exception.hh"
+#include "moses/StaticData.h"
#include <map>
#include <sstream>
@@ -233,7 +234,8 @@ public:
typedef PhraseDictionaryTree::PrefixPtr PPtr;
void GetTargetCandidates(PPtr p,TgtCands& tgtCands) {
- UTIL_THROW_IF2(p == NULL, "Error");
+ UTIL_THROW_IF2(p == 0L, "Error");
+ // UTIL_THROW_IF2(p == NULL, "Error");
if(p.imp->isRoot()) return;
OFF_T tCandOffset=p.imp->ptr()->getData(p.imp->idx);
@@ -278,7 +280,8 @@ public:
}
PPtr Extend(PPtr p,const std::string& w) {
- UTIL_THROW_IF2(p == NULL, "Error");
+ UTIL_THROW_IF2(p == 0L, "Error");
+ // UTIL_THROW_IF2(p == NULL, "Error");
if(w.empty() || w==EPSILON) return p;
@@ -349,8 +352,8 @@ int PDTimp::Read(const std::string& fn)
sv.Read(ifsv);
tv.Read(iftv);
- TRACE_ERR("binary phrasefile loaded, default OFF_T: "<<PTF::getDefault()
- <<"\n");
+ VERBOSE(1,"binary phrasefile loaded, default OFF_T: "
+ <<PTF::getDefault() <<"\n");
return 1;
}
@@ -660,7 +663,7 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
int PhraseDictionaryTree::Read(const std::string& fn)
{
- TRACE_ERR("size of OFF_T "<<sizeof(OFF_T)<<"\n");
+ VERBOSE(1,"size of OFF_T "<<sizeof(OFF_T)<<"\n");
return imp->Read(fn);
}
diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.cpp b/moses/TranslationModel/ProbingPT/ProbingPT.cpp
index 9859520c1..b854c8c02 100644
--- a/moses/TranslationModel/ProbingPT/ProbingPT.cpp
+++ b/moses/TranslationModel/ProbingPT/ProbingPT.cpp
@@ -153,7 +153,7 @@ TargetPhrase *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase, const ta
const std::vector<unsigned int> &probingPhrase = probingTargetPhrase.target_phrase;
size_t size = probingPhrase.size();
- TargetPhrase *tp = new TargetPhrase();
+ TargetPhrase *tp = new TargetPhrase(this);
// words
for (size_t i = 0; i < size; ++i) {
@@ -181,7 +181,7 @@ TargetPhrase *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase, const ta
*/
// score of all other ff when this rule is being loaded
- tp->Evaluate(sourcePhrase, GetFeaturesToApply());
+ tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
return tp;
}
diff --git a/moses/TranslationModel/RuleTable/LoaderCompact.cpp b/moses/TranslationModel/RuleTable/LoaderCompact.cpp
index 468eaafbf..299cfe7ea 100644
--- a/moses/TranslationModel/RuleTable/LoaderCompact.cpp
+++ b/moses/TranslationModel/RuleTable/LoaderCompact.cpp
@@ -222,11 +222,11 @@ bool RuleTableLoaderCompact::LoadRuleSection(
// The remaining columns are currently ignored.
// Create and score target phrase.
- TargetPhrase *targetPhrase = new TargetPhrase(targetPhrasePhrase);
+ TargetPhrase *targetPhrase = new TargetPhrase(targetPhrasePhrase, &ruleTable);
targetPhrase->SetAlignNonTerm(alignNonTerm);
targetPhrase->SetTargetLHS(targetLhs);
- targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply());
+ targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
// Insert rule into table.
TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection(
diff --git a/moses/TranslationModel/RuleTable/LoaderStandard.cpp b/moses/TranslationModel/RuleTable/LoaderStandard.cpp
index 89e173444..f46d3a440 100644
--- a/moses/TranslationModel/RuleTable/LoaderStandard.cpp
+++ b/moses/TranslationModel/RuleTable/LoaderStandard.cpp
@@ -222,7 +222,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
Word *targetLHS;
// create target phrase obj
- TargetPhrase *targetPhrase = new TargetPhrase();
+ TargetPhrase *targetPhrase = new TargetPhrase(&ruleTable);
// targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS);
targetPhrase->CreateFromString(Output, output, targetPhraseString, &targetLHS);
// source
@@ -247,7 +247,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
}
targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
- targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply());
+ targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
phraseColl.Add(targetPhrase);
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
index c778ab3dd..48ed91e4b 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
@@ -270,7 +270,7 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSenten
sourcePhrase.CreateFromString(Input, m_input, sourcePhraseString, &sourceLHS);
// create target phrase obj
- TargetPhrase *targetPhrase = new TargetPhrase();
+ TargetPhrase *targetPhrase = new TargetPhrase(this);
// targetPhrase->CreateFromString(Output, m_output, targetPhraseString, factorDelimiter, &targetLHS);
targetPhrase->CreateFromString(Output, m_output, targetPhraseString, &targetLHS);
@@ -284,7 +284,7 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSenten
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
- targetPhrase->Evaluate(sourcePhrase, GetFeaturesToApply());
+ targetPhrase->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);
phraseColl.Add(targetPhrase);
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h
index 19548411c..4deb800f8 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h
@@ -69,10 +69,6 @@ public:
~PhraseDictionaryOnDisk();
void Load();
- PhraseTableImplementation GetPhraseTableImplementation() const {
- return OnDisk;
- }
-
// PhraseDictionary impl
virtual ChartRuleLookupManager *CreateRuleLookupManager(
const ChartParser &parser,
diff --git a/moses/TranslationModel/SkeletonPT.cpp b/moses/TranslationModel/SkeletonPT.cpp
index 82667e236..4fc3da96b 100644
--- a/moses/TranslationModel/SkeletonPT.cpp
+++ b/moses/TranslationModel/SkeletonPT.cpp
@@ -53,7 +53,7 @@ TargetPhrase *SkeletonPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
string str = sourcePhrase.GetWord(0).GetFactor(0)->GetString().as_string();
str = "SkeletonPT:" + str;
- TargetPhrase *tp = new TargetPhrase();
+ TargetPhrase *tp = new TargetPhrase(this);
Word &word = tp->AddWord();
word.CreateFromString(Output, m_output, str, false);
@@ -62,7 +62,7 @@ TargetPhrase *SkeletonPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
tp->GetScoreBreakdown().PlusEquals(this, scores);
// score of all other ff when this rule is being loaded
- tp->Evaluate(sourcePhrase, GetFeaturesToApply());
+ tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
return tp;
}
diff --git a/moses/TranslationModel/UG/Jamfile b/moses/TranslationModel/UG/Jamfile
index 547928423..c36d4a072 100644
--- a/moses/TranslationModel/UG/Jamfile
+++ b/moses/TranslationModel/UG/Jamfile
@@ -9,8 +9,41 @@ $(TOP)/moses/TranslationModel/UG//mmsapt
$(TOP)/util//kenutil
;
-exe lookup_mmsapt :
-lookup_mmsapt.cc
+exe ptable-lookup :
+ptable-lookup.cc
+$(TOP)/moses//moses
+$(TOP)/moses/TranslationModel/UG/generic//generic
+$(TOP)//boost_iostreams
+$(TOP)//boost_program_options
+$(TOP)/moses/TranslationModel/UG/mm//mm
+$(TOP)/moses/TranslationModel/UG//mmsapt
+$(TOP)/util//kenutil
+;
+
+exe sim-pe :
+sim-pe.cc
+$(TOP)/moses//moses
+$(TOP)/moses/TranslationModel/UG/generic//generic
+$(TOP)//boost_iostreams
+$(TOP)//boost_program_options
+$(TOP)/moses/TranslationModel/UG/mm//mm
+$(TOP)/moses/TranslationModel/UG//mmsapt
+$(TOP)/util//kenutil
+;
+
+exe spe-check-coverage :
+spe-check-coverage.cc
+$(TOP)/moses//moses
+$(TOP)/moses/TranslationModel/UG/generic//generic
+$(TOP)//boost_iostreams
+$(TOP)//boost_program_options
+$(TOP)/moses/TranslationModel/UG/mm//mm
+$(TOP)/moses/TranslationModel/UG//mmsapt
+$(TOP)/util//kenutil
+;
+
+exe spe-check-coverage2 :
+spe-check-coverage2.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
@@ -22,4 +55,4 @@ $(TOP)/util//kenutil
install $(PREFIX)/bin : try-align ;
-fakelib mmsapt : [ glob *.cpp mmsapt*.cc ] ;
+fakelib mmsapt : [ glob *.cpp mmsapt*.cc sapt*.cc ] ;
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
new file mode 100644
index 000000000..7dc2cd18f
--- /dev/null
+++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
@@ -0,0 +1,50 @@
+//-*- c++ -*-
+#include "ug_splice_arglist.h"
+#include "moses/Util.h"
+#include "util/exception.hh"
+#include <boost/foreach.hpp>
+
+namespace Moses {
+
+ void
+ filter_arguments(int const argc_in, char const* const* const argv_in,
+ int & argc_moses, char*** argv_moses,
+ int & argc_other, char*** argv_other,
+ vector<pair<string,int> > const& filter)
+ {
+ *argv_moses = new char*[argc_in];
+ *argv_other = new char*[argc_in];
+ (*argv_moses)[0] = new char[strlen(argv_in[0])+1];
+ strcpy((*argv_moses)[0], argv_in[0]);
+ argc_moses = 1;
+ argc_other = 0;
+ typedef pair<string,int> option;
+ int i = 1;
+ while (i < argc_in)
+ {
+ BOOST_FOREACH(option const& o, filter)
+ {
+ if (o.first == argv_in[i])
+ {
+ (*argv_other)[argc_other] = new char[strlen(argv_in[i])+1];
+ strcpy((*argv_other)[argc_other++],argv_in[i]);
+ for (int k = 0; k < o.second; ++k)
+ {
+ UTIL_THROW_IF2(++i >= argc_in || argv_in[i][0] == '-',
+ "[" << HERE << "] Missing argument for "
+ << "parameter " << o.first << "!");
+ (*argv_other)[argc_other] = new char[strlen(argv_in[i])+1];
+ strcpy((*argv_other)[argc_other++],argv_in[i]);
+ }
+ if (++i >= argc_in) break;
+ }
+ }
+ if (i >= argc_in) break;
+ (*argv_moses)[argc_moses] = new char[strlen(argv_in[i])+1];
+ strcpy((*argv_moses)[argc_moses++], argv_in[i++]);
+ }
+ }
+
+} // namespace Moses
+
+
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
new file mode 100644
index 000000000..e56585e8a
--- /dev/null
+++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
@@ -0,0 +1,18 @@
+//-*- c++ -*-
+#pragma once
+#include <vector>
+#include <string>
+namespace Moses {
+ using namespace std;
+
+ // Function to splice the argument list (e.g. before handing it over to
+ // Moses LoadParam() function. /filter/ is a vector of argument names
+ // and the number of arguments after each of them
+ void
+ filter_arguments(int const argc_in, char const* const* const argv_in,
+ int & argc_moses, char*** argv_moses,
+ int & argc_other, char*** argv_other,
+ vector<pair<string,int> > const& filter);
+
+
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/lookup_mmsapt.cc b/moses/TranslationModel/UG/lookup_mmsapt.cc
deleted file mode 100644
index e295f1012..000000000
--- a/moses/TranslationModel/UG/lookup_mmsapt.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-#include "mmsapt.h"
-#include <boost/foreach.hpp>
-#include <boost/format.hpp>
-#include <boost/tokenizer.hpp>
-#include <boost/shared_ptr.hpp>
-#include <algorithm>
-#include <iostream>
-
-using namespace Moses;
-using namespace bitext;
-using namespace std;
-using namespace boost;
-
-vector<FactorType> fo(1,FactorType(0));
-
-class SimplePhrase : public Moses::Phrase
-{
- vector<FactorType> const m_fo; // factor order
-public:
- SimplePhrase(): m_fo(1,FactorType(0)) {}
-
- void init(string const& s)
- {
- istringstream buf(s); string w;
- while (buf >> w)
- {
- Word wrd;
- this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false);
- }
- }
-};
-
-class TargetPhraseIndexSorter
-{
- TargetPhraseCollection const& my_tpc;
- CompareTargetPhrase cmp;
-public:
- TargetPhraseIndexSorter(TargetPhraseCollection const& tpc) : my_tpc(tpc) {}
- bool operator()(size_t a, size_t b) const
- {
- return cmp(*my_tpc[a], *my_tpc[b]);
- }
-};
-
-int main(int argc, char* argv[])
-{
- Parameter params;
- if (!params.LoadParam(argc,argv) || !StaticData::LoadDataStatic(&params, argv[0]))
- exit(1);
-
- Mmsapt* PT = NULL;
- BOOST_FOREACH(PhraseDictionary* pd, PhraseDictionary::GetColl())
- if ((PT = dynamic_cast<Mmsapt*>(pd))) break;
- vector<string> const& fname = PT->GetFeatureNames();
-
- // vector<FeatureFunction*> const& ffs = FeatureFunction::GetFeatureFunctions();
-
- string line;
- while (getline(cin,line))
- {
- SimplePhrase p; p.init(line);
- cout << p << endl;
- TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p);
- if (!trg) continue;
- vector<size_t> order(trg->GetSize());
- for (size_t i = 0; i < order.size(); ++i) order[i] = i;
- sort(order.begin(),order.end(),TargetPhraseIndexSorter(*trg));
- size_t k = 0;
- // size_t precision =
- cout.precision(2);
-
- BOOST_FOREACH(size_t i, order)
- {
- Phrase const& phr = static_cast<Phrase const&>(*(*trg)[i]);
- cout << setw(3) << ++k << " " << phr << endl;
- ScoreComponentCollection const& scc = (*trg)[i]->GetScoreBreakdown();
- ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
- FVector const& scores = scc.GetScoresVector();
- cout << " ";
- for (size_t k = idx.first; k < idx.second; ++k)
- cout << " " << format("%10.10s") % fname[k-idx.first];
- cout << endl;
- cout << " ";
- for (size_t k = idx.first; k < idx.second; ++k)
- {
- if (fname[k-idx.first].substr(0,3) == "log")
- {
- if(scores[k] < 0)
- cout << " " << format("%10d") % round(exp(-scores[k]));
- else
- cout << " " << format("%10d") % round(exp(scores[k]));
- }
- else
- cout << " " << format("%10.8f") % exp(scores[k]);
- }
- cout << endl;
- }
- PT->Release(trg);
- }
- exit(0);
-}
-
-
-
diff --git a/moses/TranslationModel/UG/mm/Jamfile b/moses/TranslationModel/UG/mm/Jamfile
index 2cc923581..efeae3321 100644
--- a/moses/TranslationModel/UG/mm/Jamfile
+++ b/moses/TranslationModel/UG/mm/Jamfile
@@ -72,15 +72,15 @@ $(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/util//kenutil
;
-exe custom-pt :
-custom-pt.cc
-$(TOP)/moses//moses
-$(TOP)//boost_iostreams
-$(TOP)//boost_program_options
-$(TOP)/moses/TranslationModel/UG/mm//mm
-$(TOP)/moses/TranslationModel/UG/generic//generic
-$(TOP)/util//kenutil
-;
+# exe custom-pt :
+# custom-pt.cc
+# $(TOP)/moses//moses
+# $(TOP)//boost_iostreams
+# $(TOP)//boost_program_options
+# $(TOP)/moses/TranslationModel/UG/mm//mm
+# $(TOP)/moses/TranslationModel/UG/generic//generic
+# $(TOP)/util//kenutil
+# ;
exe calc-coverage :
@@ -98,12 +98,11 @@ mtt-dump
mtt-count-words
symal2mam
mam2symal
-custom-pt
mmlex-build
mmlex-lookup
mam_verify
calc-coverage
;
-fakelib mm : [ glob ug_*.cc tpt_*.cc ] ;
+fakelib mm : [ glob ug_*.cc tpt_*.cc num_read_write.cc ] ;
diff --git a/moses/TranslationModel/UG/mm/custom-pt.cc b/moses/TranslationModel/UG/mm/custom-pt.cc
index 086ef42a6..93c8c0eb0 100644
--- a/moses/TranslationModel/UG/mm/custom-pt.cc
+++ b/moses/TranslationModel/UG/mm/custom-pt.cc
@@ -1,6 +1,6 @@
// build a phrase table for the given input
// #include "ug_lexical_phrase_scorer2.h"
-
+#if 0
#include <stdint.h>
#include <string>
#include <vector>
@@ -23,9 +23,8 @@
#include "ug_typedefs.h"
#include "tpt_pickler.h"
#include "ug_bitext.h"
-#include "../mmsapt_phrase_scorers.h"
#include "ug_lexical_phrase_scorer2.h"
-
+#include "../sapt_phrase_scorers.h"
using namespace std;
using namespace ugdiss;
using namespace Moses;
@@ -46,7 +45,7 @@ float lbsmooth = .005;
PScorePfwd<Token> calc_pfwd;
PScorePbwd<Token> calc_pbwd;
PScoreLex<Token> calc_lex(1.0);
-PScoreWP<Token> apply_wp;
+PScoreWC<Token> apply_wp;
vector<float> fweights;
void
@@ -110,6 +109,7 @@ int main(int argc, char* argv[])
{
// assert(argc == 4);
#if 0
+#if 0
string base = argv[1];
string L1 = argv[2];
string L2 = argv[3];
@@ -131,7 +131,7 @@ int main(int argc, char* argv[])
size_t i;
i = calc_pfwd.init(0,.05,'g');
- i = calc_pbwd.init(i,.05);
+ i = calc_pbwd.init(i,.05,'g');
i = calc_lex.init(i,base+L1+"-"+L2+".lex");
i = apply_wp.init(i);
@@ -182,7 +182,7 @@ int main(int argc, char* argv[])
}
}
}
-
+#endif
exit(0);
}
-
+#endif
diff --git a/moses/TranslationModel/UG/mm/mtt-build.cc b/moses/TranslationModel/UG/mm/mtt-build.cc
index 0518e7161..49fd7f6c2 100644
--- a/moses/TranslationModel/UG/mm/mtt-build.cc
+++ b/moses/TranslationModel/UG/mm/mtt-build.cc
@@ -361,7 +361,7 @@ build_mmTSA(string infile, string outfile)
{
size_t mypid = fork();
if(mypid) return mypid;
- shared_ptr<mmTtrack<Token> > T(new mmTtrack<Token>(infile));
+ boost::shared_ptr<mmTtrack<Token> > T(new mmTtrack<Token>(infile));
bdBitset filter;
filter.resize(T->size(),true);
imTSA<Token> S(T,&filter,(quiet?NULL:&cerr));
diff --git a/moses/TranslationModel/UG/mm/num_read_write.cc b/moses/TranslationModel/UG/mm/num_read_write.cc
new file mode 100644
index 000000000..403f7d300
--- /dev/null
+++ b/moses/TranslationModel/UG/mm/num_read_write.cc
@@ -0,0 +1,74 @@
+#include "num_read_write.h"
+namespace ugdiss {
+ typedef unsigned char uchar;
+
+ void
+ numwrite(std::ostream& out, uint16_t const& x)
+ {
+ char buf[2];
+ buf[0] = x%256;
+ buf[1] = (x>>8)%256;
+ out.write(buf,2);
+ }
+
+ void
+ numwrite(std::ostream& out, uint32_t const& x)
+ {
+ char buf[4];
+ buf[0] = x%256;
+ buf[1] = (x>>8)%256;
+ buf[2] = (x>>16)%256;
+ buf[3] = (x>>24)%256;
+ out.write(buf,4);
+ }
+
+ void
+ numwrite(std::ostream& out, uint64_t const& x)
+ {
+ char buf[8];
+ buf[0] = x%256;
+ buf[1] = (x>>8)%256;
+ buf[2] = (x>>16)%256;
+ buf[3] = (x>>24)%256;
+ buf[4] = (x>>32)%256;
+ buf[5] = (x>>40)%256;
+ buf[6] = (x>>48)%256;
+ buf[7] = (x>>56)%256;
+ out.write(buf,8);
+ }
+
+ char const*
+ numread(char const* src, uint16_t & x)
+ {
+ uchar const* d = reinterpret_cast<uchar const*>(src);
+ x = (uint16_t(d[0])<<0) | (uint16_t(d[1])<<8);
+ return src+2;
+ }
+
+ char const*
+ numread(char const* src, uint32_t & x)
+ {
+ uchar const* d = reinterpret_cast<uchar const*>(src);
+ x = ((uint32_t(d[0])<<0) |
+ (uint32_t(d[1])<<8) |
+ (uint32_t(d[2])<<16)|
+ (uint32_t(d[3])<<24));
+ return src+4;
+ }
+
+ char const*
+ numread(char const* src, uint64_t & x)
+ {
+ uchar const* d = reinterpret_cast<uchar const*>(src);
+ x = ((uint64_t(d[0])<<0) |
+ (uint64_t(d[1])<<8) |
+ (uint64_t(d[2])<<16) |
+ (uint64_t(d[3])<<24) |
+ (uint64_t(d[4])<<32) |
+ (uint64_t(d[5])<<40) |
+ (uint64_t(d[6])<<48) |
+ (uint64_t(d[7])<<56));
+ return src+8;
+ }
+
+}
diff --git a/moses/TranslationModel/UG/mm/num_read_write.h b/moses/TranslationModel/UG/mm/num_read_write.h
index 96630f4b0..6fdcecc81 100644
--- a/moses/TranslationModel/UG/mm/num_read_write.h
+++ b/moses/TranslationModel/UG/mm/num_read_write.h
@@ -1,66 +1,78 @@
// -*- c++ -*-
// (c) 2006,2007,2008 Ulrich Germann
-#ifndef __num_read_write_hh
-#define __num_read_write_hh
+// #ifndef __num_read_write_hh
+// #define __num_read_write_hh
+#pragma once
#include <stdint.h>
#include <iostream>
-#include <endian.h>
-#include <byteswap.h>
-#include "tpt_typedefs.h"
+// #include <endian.h>
+// #include <byteswap.h>
+// #include "tpt_typedefs.h"
namespace ugdiss {
+
+ void numwrite(std::ostream& out, uint16_t const& x);
+ void numwrite(std::ostream& out, uint32_t const& x);
+ void numwrite(std::ostream& out, uint64_t const& x);
+
+ char const* numread(char const* src, uint16_t & x);
+ char const* numread(char const* src, uint32_t & x);
+ char const* numread(char const* src, uint64_t & x);
-template<typename uintNumber>
-void
-numwrite(std::ostream& out, uintNumber const& x)
-{
-#if __BYTE_ORDER == __BIG_ENDIAN
- uintNumber y;
- switch (sizeof(uintNumber))
- {
- case 2: y = bswap_16(x); break;
- case 4: y = bswap_32(x); break;
- case 8: y = bswap_64(x); break;
- default: y = x;
- }
- out.write(reinterpret_cast<char*>(&y),sizeof(y));
-#else
- out.write(reinterpret_cast<char const*>(&x),sizeof(x));
-#endif
-}
+// template<typename uintNumber>
+// void
+// numwrite(std::ostream& out, uintNumber const& x)
+// {
+// uchar const* c = reinterpret_cast<uchar const*>(&x);
+// for (size_t i = 0; i < sizeof(x); ++i)
+// out.write(c
+// #if __BYTE_ORDER == __BIG_ENDIAN
+// uintNumber y;
+// switch (sizeof(uintNumber))
+// {
+// case 2: y = bswap_16(x); break;
+// case 4: y = bswap_32(x); break;
+// case 8: y = bswap_64(x); break;
+// default: y = x;
+// }
+// out.write(reinterpret_cast<char*>(&y),sizeof(y));
+// #else
+// out.write(reinterpret_cast<char const*>(&x),sizeof(x));
+// #endif
+// }
-template<typename uintNumber>
-void
-numread(std::istream& in, uintNumber& x)
-{
- in.read(reinterpret_cast<char*>(&x),sizeof(uintNumber));
-#if __BYTE_ORDER == __BIG_ENDIAN
- switch (sizeof(uintNumber))
- {
- case 2: x = bswap_16(x); break;
- case 4: x = bswap_32(x); break;
- case 8: x = bswap_64(x); break;
- default: break;
- }
-#endif
-}
+// template<typename uintNumber>
+// void
+// numread(std::istream& in, uintNumber& x)
+// {
+// in.read(reinterpret_cast<char*>(&x),sizeof(uintNumber));
+// #if __BYTE_ORDER == __BIG_ENDIAN
+// switch (sizeof(uintNumber))
+// {
+// case 2: x = bswap_16(x); break;
+// case 4: x = bswap_32(x); break;
+// case 8: x = bswap_64(x); break;
+// default: break;
+// }
+// #endif
+// }
-template<typename uintNumber>
-char const*
-numread(char const* src, uintNumber& x)
-{
- // ATTENTION: THIS NEEDS TO BE VERIFIED FOR BIG-ENDIAN MACHINES!!!
- x = *reinterpret_cast<uintNumber const*>(src);
-#if __BYTE_ORDER == __BIG_ENDIAN
- switch (sizeof(uintNumber))
- {
- case 2: x = bswap_16(x); break;
- case 4: x = bswap_32(x); break;
- case 8: x = bswap_64(x); break;
- default: break;
- }
-#endif
- return src+sizeof(uintNumber);
-}
+// template<typename uintNumber>
+// char const*
+// numread(char const* src, uintNumber& x)
+// {
+// // ATTENTION: THIS NEEDS TO BE VERIFIED FOR BIG-ENDIAN MACHINES!!!
+// x = *reinterpret_cast<uintNumber const*>(src);
+// #if __BYTE_ORDER == __BIG_ENDIAN
+// switch (sizeof(uintNumber))
+// {
+// case 2: x = bswap_16(x); break;
+// case 4: x = bswap_32(x); break;
+// case 8: x = bswap_64(x); break;
+// default: break;
+// }
+// #endif
+// return src+sizeof(uintNumber);
+// }
} // end of namespace ugdiss
-#endif
+//#endif
diff --git a/moses/TranslationModel/UG/mm/tpt_pickler.cc b/moses/TranslationModel/UG/mm/tpt_pickler.cc
index 87fad195c..c23913fc2 100644
--- a/moses/TranslationModel/UG/mm/tpt_pickler.cc
+++ b/moses/TranslationModel/UG/mm/tpt_pickler.cc
@@ -268,9 +268,7 @@ namespace ugdiss
}
- template<>
- char const*
- binread<uint16_t>(char const* p, uint16_t& buf)
+ char const *binread(char const* p, uint16_t& buf)
{
static char mask = 127;
buf = (*p)&mask;
@@ -286,9 +284,14 @@ namespace ugdiss
return p;
}
- template<>
- char const*
- binread<uint32_t>(char const* p, uint32_t& buf)
+#ifdef __clang__
+ char const *binread(char const* p, size_t& buf)
+ {
+ return binread(p, (uint32_t&) buf);
+ }
+#endif
+
+ char const *binread(char const* p, uint32_t& buf)
{
static char mask = 127;
@@ -325,9 +328,7 @@ namespace ugdiss
return ++p;
}
- template<>
- char const*
- binread<filepos_type>(char const* p, filepos_type& buf)
+ char const *binread(char const* p, filepos_type& buf)
{
static char mask = 127;
@@ -394,9 +395,7 @@ namespace ugdiss
return ++p;
}
- template<>
- char const*
- binread<float>(char const* p, float& buf)
+ char const *binread(char const* p, float& buf)
{
buf = *reinterpret_cast<float const*>(p);
return p+sizeof(float);
diff --git a/moses/TranslationModel/UG/mm/tpt_pickler.h b/moses/TranslationModel/UG/mm/tpt_pickler.h
index fa603b3b6..e98ba88fe 100644
--- a/moses/TranslationModel/UG/mm/tpt_pickler.h
+++ b/moses/TranslationModel/UG/mm/tpt_pickler.h
@@ -42,6 +42,14 @@ namespace ugdiss
void binread(std::istream& in, std::string &data);
void binread(std::istream& in, float &data);
+ char const *binread(char const* p, uint16_t& buf);
+ char const *binread(char const* p, uint32_t& buf);
+ char const *binread(char const* p, filepos_type& buf);
+ char const *binread(char const* p, float& buf);
+#ifdef __clang__
+ char const *binread(char const* p, size_t& buf);
+#endif
+
std::ostream& write(std::ostream& out, char x);
std::ostream& write(std::ostream& out, unsigned char x);
std::ostream& write(std::ostream& out, short x);
@@ -58,6 +66,7 @@ namespace ugdiss
std::istream& read(std::istream& in, size_t& x);
std::istream& read(std::istream& in, float& x);
+ /*
template<typename WHATEVER>
char const*
binread(char const* p, WHATEVER* buf);
@@ -65,6 +74,7 @@ namespace ugdiss
template<typename numtype>
char const*
binread(char const* p, numtype& buf);
+ */
template<typename K, typename V>
void binwrite(std::ostream& out, std::pair<K,V> const& data);
@@ -93,11 +103,11 @@ namespace ugdiss
template<typename V>
char const* binread(char const* p, std::vector<V>& v)
{
- size_t vsize;
+ size_t vsize;
#ifdef VERIFY_TIGHT_PACKING
assert(p);
#endif
- p = binread(p,vsize);
+ p = binread(p, vsize);
v.resize(vsize);
for (size_t i = 0; i < vsize; ++i)
p = binread(p,v[i]);
@@ -199,9 +209,6 @@ namespace ugdiss
return binread(p,*buf);
}
- template<typename numtype>
- char const*
- binread(char const* p, numtype& buf);
} // end namespace ugdiss
#endif
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.cc b/moses/TranslationModel/UG/mm/ug_bitext.cc
index c4f5175f3..2a3fe50ec 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext.cc
@@ -158,187 +158,35 @@ namespace Moses
jstats::
invalidate()
{
- my_rcnt = 0;
+ if (my_wcnt > 0)
+ my_wcnt *= -1;
}
- bool
+ void
jstats::
- valid()
- {
- return my_rcnt != 0;
- }
-
- bool
- PhrasePair::
- operator<=(PhrasePair const& other) const
+ validate()
{
- return this->score <= other.score;
+ if (my_wcnt < 0)
+ my_wcnt *= -1;
}
bool
- PhrasePair::
- operator>=(PhrasePair const& other) const
- {
- return this->score >= other.score;
- }
-
- bool
- PhrasePair::
- operator<(PhrasePair const& other) const
- {
- return this->score < other.score;
- }
-
- bool
- PhrasePair::
- operator>(PhrasePair const& other) const
- {
- return this->score > other.score;
- }
-
- PhrasePair::
- PhrasePair() {}
-
- PhrasePair::
- PhrasePair(PhrasePair const& o)
- : p1(o.p1),
- p2(o.p2),
- raw1(o.raw1),
- raw2(o.raw2),
- sample1(o.sample1),
- sample2(o.sample2),
- good1(o.good1),
- good2(o.good2),
- joint(o.joint),
- fvals(o.fvals),
- aln(o.aln),
- score(o.score)
- {
- for (size_t i = 0; i <= po_other; ++i)
- {
- dfwd[i] = o.dfwd[i];
- dbwd[i] = o.dbwd[i];
- }
- }
-
- void
- PhrasePair::
- init(uint64_t const pid1, pstats const& ps, size_t const numfeats)
+ jstats::
+ valid()
{
- p1 = pid1;
- p2 = 0;
- raw1 = ps.raw_cnt;
- sample1 = ps.sample_cnt;
- sample2 = 0;
- good1 = ps.good;
- good2 = 0;
- raw2 = 0;
- fvals.resize(numfeats);
+ return my_wcnt >= 0;
}
- void
- PhrasePair::
- init(uint64_t const pid1,
- pstats const& ps1,
- pstats const& ps2,
- size_t const numfeats)
- {
- p1 = pid1;
- raw1 = ps1.raw_cnt + ps2.raw_cnt;
- sample1 = ps1.sample_cnt + ps2.sample_cnt;
- sample2 = 0;
- good1 = ps1.good + ps2.good;
- good2 = 0;
- fvals.resize(numfeats);
- }
float
lbop(size_t const tries, size_t const succ, float const confidence)
{
- return
- boost::math::binomial_distribution<>::
- find_lower_bound_on_p(tries, succ, confidence);
+ return (confidence == 0
+ ? float(succ)/tries
+ : (boost::math::binomial_distribution<>::
+ find_lower_bound_on_p(tries, succ, confidence)));
}
- PhrasePair const&
- PhrasePair::
- update(uint64_t const pid2, jstats const& js)
- {
- p2 = pid2;
- raw2 = js.cnt2();
- joint = js.rcnt();
- assert(js.aln().size());
- if (js.aln().size())
- aln = js.aln()[0].second;
- float total_fwd = 0, total_bwd = 0;
- for (int i = po_first; i <= po_other; i++)
- {
- PhraseOrientation po = static_cast<PhraseOrientation>(i);
- total_fwd += js.dcnt_fwd(po)+1;
- total_bwd += js.dcnt_bwd(po)+1;
- }
- for (int i = po_first; i <= po_other; i++)
- {
- PhraseOrientation po = static_cast<PhraseOrientation>(i);
- dfwd[i] = float(js.dcnt_fwd(po)+1)/total_fwd;
- dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd;
- }
- return *this;
- }
-
- PhrasePair const&
- PhrasePair::
- update(uint64_t const pid2, jstats const& js1, jstats const& js2)
- {
- p2 = pid2;
- raw2 = js1.cnt2() + js2.cnt2();
- joint = js1.rcnt() + js2.rcnt();
- assert(js1.aln().size() || js2.aln().size());
- if (js1.aln().size())
- aln = js1.aln()[0].second;
- else if (js2.aln().size())
- aln = js2.aln()[0].second;
- for (int i = po_first; i < po_other; i++)
- {
- PhraseOrientation po = static_cast<PhraseOrientation>(i);
- dfwd[i] = float(js1.dcnt_fwd(po) + js2.dcnt_fwd(po) + 1)/(sample1+po_other);
- dbwd[i] = float(js1.dcnt_bwd(po) + js2.dcnt_bwd(po) + 1)/(sample1+po_other);
- }
- return *this;
- }
-
- PhrasePair const&
- PhrasePair::
- update(uint64_t const pid2,
- size_t const raw2extra,
- jstats const& js)
- {
- p2 = pid2;
- raw2 = js.cnt2() + raw2extra;
- joint = js.rcnt();
- assert(js.aln().size());
- if (js.aln().size())
- aln = js.aln()[0].second;
- for (int i = po_first; i <= po_other; i++)
- {
- PhraseOrientation po = static_cast<PhraseOrientation>(i);
- dfwd[i] = float(js.dcnt_fwd(po)+1)/(sample1+po_other);
- dbwd[i] = float(js.dcnt_bwd(po)+1)/(sample1+po_other);
- }
- return *this;
- }
-
- float
- PhrasePair::
- eval(vector<float> const& w)
- {
- assert(w.size() == this->fvals.size());
- this->score = 0;
- for (size_t i = 0; i < w.size(); ++i)
- this->score += w[i] * this->fvals[i];
- return this->score;
- }
-
template<>
sptr<imBitext<L2R_Token<SimpleWordId> > >
imBitext<L2R_Token<SimpleWordId> >::
@@ -355,7 +203,7 @@ namespace Moses
sptr<imBitext<TKN> > ret;
{
- lock_guard<mutex> guard(this->lock);
+ boost::lock_guard<boost::mutex> guard(this->lock);
ret.reset(new imBitext<TKN>(*this));
}
@@ -370,7 +218,8 @@ namespace Moses
uint32_t row,col; char c;
while (ibuf >> row >> c >> col)
{
- assert(c == '-');
+ UTIL_THROW_IF2(c != '-', "[" << HERE << "] "
+ << "Error in alignment information:\n" << a);
binwrite(obuf,row);
binwrite(obuf,col);
}
@@ -638,7 +487,6 @@ namespace Moses
cout << string(90,'-') << endl;
}
-
PhraseOrientation
find_po_fwd(vector<vector<ushort> >& a1,
vector<vector<ushort> >& a2,
@@ -653,13 +501,13 @@ namespace Moses
ushort ns1,ne1,ne2;
if (!expand_phrase_pair(a1,a2,n2,b1,e1,ns1,ne1,ne2))
- {
- return po_other;
- }
+ return po_other;
+
if (ns1 >= e1)
{
for (ushort j = e1; j < ns1; ++j)
- if (a1[j].size()) return po_jfwd;
+ if (a1[j].size())
+ return po_jfwd;
return po_mono;
}
else
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.h b/moses/TranslationModel/UG/mm/ug_bitext.h
index 397253973..01d8187d6 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext.h
@@ -56,6 +56,7 @@ namespace Moses {
class Mmsapt;
namespace bitext
{
+ template<typename TKN> class Bitext;
using namespace ugdiss;
template<typename TKN> class Bitext;
@@ -120,6 +121,7 @@ namespace Moses {
void add(float w, vector<uchar> const& a, uint32_t const cnt2,
uint32_t fwd_orient, uint32_t bwd_orient);
void invalidate();
+ void validate();
bool valid();
uint32_t dcnt_fwd(PhraseOrientation const idx) const;
uint32_t dcnt_bwd(PhraseOrientation const idx) const;
@@ -141,7 +143,7 @@ namespace Moses {
uint32_t ofwd[po_other+1], obwd[po_other+1];
// typedef typename boost::unordered_map<uint64_t, jstats> trg_map_t;
- typedef typename std::map<uint64_t, jstats> trg_map_t;
+ typedef std::map<uint64_t, jstats> trg_map_t;
trg_map_t trg;
pstats();
~pstats();
@@ -157,43 +159,6 @@ namespace Moses {
uint32_t fwd_o, uint32_t bwd_o);
};
- class
- PhrasePair
- {
- public:
- uint64_t p1, p2;
- uint32_t raw1,raw2,sample1,sample2,good1,good2,joint;
- vector<float> fvals;
- float dfwd[po_other+1];
- float dbwd[po_other+1];
- vector<uchar> aln;
- // float avlex12,avlex21; // average lexical probs (Moses std)
- // float znlex1,znlex2; // zens-ney lexical smoothing
- // float colex1,colex2; // based on raw lexical occurrences
- float score;
- PhrasePair();
- PhrasePair(PhrasePair const& o);
- bool operator<(PhrasePair const& other) const;
- bool operator>(PhrasePair const& other) const;
- bool operator<=(PhrasePair const& other) const;
- bool operator>=(PhrasePair const& other) const;
-
- void init(uint64_t const pid1, pstats const& ps, size_t const numfeats);
- void init(uint64_t const pid1, pstats const& ps1, pstats const& ps2,
- size_t const numfeats);
-
- PhrasePair const&
- update(uint64_t const pid2, jstats const& js);
-
- PhrasePair const&
- update(uint64_t const pid2, jstats const& js1, jstats const& js2);
-
- PhrasePair const&
- update(uint64_t const pid2, size_t const raw2extra, jstats const& js);
-
- float eval(vector<float> const& w);
- };
-
template<typename TKN>
class Bitext
@@ -534,6 +499,16 @@ namespace Moses {
aln[k] += s2 - s1;
Token const* o = (j->fwd ? ag.bt.T2 : ag.bt.T1)->sntStart(sid);
float sample_weight = 1./((s2-s1+1)*(e2-e1+1));
+
+ vector<uint64_t> seen;
+ seen.reserve(100);
+ // It is possible that the phrase extraction extracts the same
+ // phrase twice, e.g., when word a co-occurs with sequence b b b
+ // but is aligned only to the middle word. We can only count
+ // each phrase pair once per source phrase occurrence, or else
+ // run the risk of having more joint counts than marginal
+ // counts.
+
for (size_t s = s1; s <= s2; ++s)
{
sptr<iter> b = (j->fwd ? ag.bt.I2 : ag.bt.I1)->find(o+s,e1-s);
@@ -542,7 +517,26 @@ namespace Moses {
// assert(b);
for (size_t i = e1; i <= e2; ++i)
{
- if (! j->stats->add(b->getPid(),sample_weight,aln,
+ uint64_t tpid = b->getPid();
+ size_t s = 0;
+ while (s < seen.size() && seen[s] != tpid) ++s;
+ if (s < seen.size())
+ {
+#if 0
+ size_t sid, off, len;
+ parse_pid(tpid,sid,off,len);
+ cerr << "HA, gotcha! " << sid << ":" << off << " at " << HERE << endl;
+ for (size_t z = 0; z < len; ++z)
+ {
+ id_type tid = ag.bt.T2->sntStart(sid)[off+z].id();
+ cerr << (*ag.bt.V2)[tid] << " ";
+ }
+ cerr << endl;
+#endif
+ continue;
+ }
+ seen.push_back(tpid);
+ if (! j->stats->add(tpid,sample_weight,aln,
b->approxOccurrenceCount(),
po_fwd,po_bwd))
{
diff --git a/moses/TranslationModel/UG/mm/ug_im_tsa.h b/moses/TranslationModel/UG/mm/ug_im_tsa.h
index 1de45d877..6b4a83f72 100644
--- a/moses/TranslationModel/UG/mm/ug_im_tsa.h
+++ b/moses/TranslationModel/UG/mm/ug_im_tsa.h
@@ -52,12 +52,12 @@ namespace ugdiss
public:
imTSA();
- imTSA(shared_ptr<Ttrack<TOKEN> const> c,
+ imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c,
bdBitset const* filt,
ostream* log = NULL);
imTSA(imTSA<TOKEN> const& prior,
- shared_ptr<imTtrack<TOKEN> const> const& crp,
+ boost::shared_ptr<imTtrack<TOKEN> const> const& crp,
vector<id_type> const& newsids, size_t const vsize);
count_type
@@ -140,7 +140,7 @@ namespace ugdiss
// specified in filter
template<typename TOKEN>
imTSA<TOKEN>::
- imTSA(shared_ptr<Ttrack<TOKEN> const> c, bdBitset const* filter, ostream* log)
+ imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c, bdBitset const* filter, ostream* log)
{
assert(c);
this->corpus = c;
@@ -359,7 +359,7 @@ namespace ugdiss
template<typename TOKEN>
imTSA<TOKEN>::
imTSA(imTSA<TOKEN> const& prior,
- shared_ptr<imTtrack<TOKEN> const> const& crp,
+ boost::shared_ptr<imTtrack<TOKEN> const> const& crp,
vector<id_type> const& newsids, size_t const vsize)
{
typename ttrack::Position::LESS<Ttrack<TOKEN> > sorter(crp.get());
diff --git a/moses/TranslationModel/UG/mm/ug_im_ttrack.h b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
index 05066c922..ac49ebcd4 100644
--- a/moses/TranslationModel/UG/mm/ug_im_ttrack.h
+++ b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
@@ -16,6 +16,9 @@
#include "tpt_tokenindex.h"
#include "ug_ttrack_base.h"
#include "tpt_tokenindex.h"
+#include "util/exception.hh"
+#include "moses/Util.h"
+
// #include "ug_vocab.h"
// define the corpus buffer size (in sentences) and the
@@ -49,10 +52,12 @@ namespace ugdiss
typename boost::shared_ptr<imTtrack<Token> >
append<Token>(typename boost::shared_ptr<imTtrack<Token> > const & crp, vector<Token> const & snt);
+ void m_check_token_count(); // debugging function
+
public:
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d);
- imTtrack(istream& in, TokenIndex const& V, ostream* log);
+ imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL);
imTtrack(size_t reserve = 0);
// imTtrack(istream& in, Vocab& V);
@@ -70,6 +75,22 @@ namespace ugdiss
};
template<typename Token>
+ void
+ imTtrack<Token>::
+ m_check_token_count()
+ { // sanity check
+ size_t check = 0;
+ BOOST_FOREACH(vector<Token> const& s, *myData)
+ check += s.size();
+ UTIL_THROW_IF2(check != this->numToks, "[" << HERE << "]"
+ << " Wrong token count after appending sentence!"
+ << " Counted " << check << " but expected "
+ << this->numToks << " in a total of " << myData->size()
+ << " sentences.");
+
+ }
+
+ template<typename Token>
Token const*
imTtrack<Token>::
sntStart(size_t sid) const // return pointer to beginning of sentence
@@ -110,10 +131,10 @@ namespace ugdiss
template<typename Token>
imTtrack<Token>::
- imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL)
+ imTtrack(istream& in, TokenIndex const& V, ostream* log)
+ : numToks(0)
{
myData.reset(new vector<vector<Token> >());
- numToks = 0;
string line,w;
size_t linectr=0;
boost::unordered_map<string,id_type> H;
@@ -135,6 +156,7 @@ namespace ugdiss
template<typename Token>
imTtrack<Token>::
imTtrack(size_t reserve)
+ : numToks(0)
{
myData.reset(new vector<vector<Token> >());
if (reserve) myData->reserve(reserve);
@@ -143,9 +165,9 @@ namespace ugdiss
template<typename Token>
imTtrack<Token>::
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d)
+ : numToks(0)
{
myData = d;
- numToks = 0;
BOOST_FOREACH(vector<Token> const& v, *d)
numToks += v.size();
}
@@ -168,10 +190,13 @@ namespace ugdiss
/// add a sentence to the database
template<typename TOKEN>
- shared_ptr<imTtrack<TOKEN> >
- append(shared_ptr<imTtrack<TOKEN> > const& crp, vector<TOKEN> const & snt)
+ boost::shared_ptr<imTtrack<TOKEN> >
+ append(boost::shared_ptr<imTtrack<TOKEN> > const& crp, vector<TOKEN> const & snt)
{
- shared_ptr<imTtrack<TOKEN> > ret;
+#if 1
+ if (crp) crp->m_check_token_count();
+#endif
+ boost::shared_ptr<imTtrack<TOKEN> > ret;
if (crp == NULL)
{
ret.reset(new imTtrack<TOKEN>());
@@ -185,6 +210,11 @@ namespace ugdiss
}
else ret = crp;
ret->myData->push_back(snt);
+ ret->numToks += snt.size();
+
+#if 1
+ ret->m_check_token_count();
+#endif
return ret;
}
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
index 558b5a7fa..b7e359223 100644
--- a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
+++ b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
@@ -27,7 +27,6 @@ namespace ugdiss
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> table_t;
table_t COOC;
void open(string const& fname);
-
template<typename someint>
void
score(TKN const* snt1, size_t const s1, size_t const e1,
@@ -104,7 +103,19 @@ namespace ugdiss
if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0;
UTIL_THROW_IF2(alpha < 0,"At " << __FILE__ << ":" << __LINE__
<< ": alpha parameter must be >= 0");
- return float(COOC[s][t]+alpha)/(COOC.m1(s)+alpha);
+ float ret = COOC[s][t]+alpha;
+ ret = (ret?ret:1.)/(COOC.m1(s)+alpha);
+ UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__
+ << ": result not > 0 and <= 1. alpha = " << alpha << "; "
+ << COOC[s][t] << "/" << COOC.m1(s));
+
+#if 0
+ cerr << "[" << s << "," << t << "] "
+ << COOC.m1(s) << "/"
+ << COOC[s][t] << "/"
+ << COOC.m2(t) << endl;
+#endif
+ return ret;
}
template<typename TKN>
@@ -115,7 +126,11 @@ namespace ugdiss
if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0;
UTIL_THROW_IF2(alpha < 0,"At " << __FILE__ << ":" << __LINE__
<< ": alpha parameter must be >= 0");
- return float(COOC[s][t]+alpha)/(COOC.m2(t)+alpha);
+ float ret = float(COOC[s][t]+alpha);
+ ret = (ret?ret:1.)/(COOC.m2(t)+alpha);
+ UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__
+ << ": result not > 0 and <= 1.");
+ return ret;
}
template<typename TKN>
diff --git a/moses/TranslationModel/UG/mm/ug_mm_ttrack.h b/moses/TranslationModel/UG/mm/ug_mm_ttrack.h
index 2be6e6de5..51ba21778 100644
--- a/moses/TranslationModel/UG/mm/ug_mm_ttrack.h
+++ b/moses/TranslationModel/UG/mm/ug_mm_ttrack.h
@@ -93,7 +93,7 @@ namespace ugdiss
assert(myfile.is_open());
Moses::prime(myfile);
filepos_type idxOffset;
- char* p = myfile.data();
+ const char* p = myfile.data();
id_type numSent,numWords;
p = numread(p,idxOffset);
p = numread(p,numSent);
diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.cc b/moses/TranslationModel/UG/mm/ug_phrasepair.cc
new file mode 100644
index 000000000..6373f8468
--- /dev/null
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.cc
@@ -0,0 +1,97 @@
+#include "ug_phrasepair.h"
+namespace Moses {
+ namespace bitext
+ {
+
+#if 0
+ void
+ PhrasePair::
+ init()
+ {
+ p1 = p2 = raw1 = raw2 = sample1 = sample2 = good1 = good2 = joint = 0;
+ }
+
+ void
+ PhrasePair::
+ init(uint64_t const pid1,
+ pstats const& ps1,
+ pstats const& ps2,
+ size_t const numfeats)
+ {
+ p1 = pid1;
+ raw1 = ps1.raw_cnt + ps2.raw_cnt;
+ sample1 = ps1.sample_cnt + ps2.sample_cnt;
+ sample2 = 0;
+ good1 = ps1.good + ps2.good;
+ good2 = 0;
+ joint = 0;
+ fvals.resize(numfeats);
+ }
+
+ PhrasePair const&
+ PhrasePair::
+ update(uint64_t const pid2, jstats const& js1, jstats const& js2)
+ {
+ p2 = pid2;
+ raw2 = js1.cnt2() + js2.cnt2();
+ joint = js1.rcnt() + js2.rcnt();
+ assert(js1.aln().size() || js2.aln().size());
+ if (js1.aln().size())
+ aln = js1.aln()[0].second;
+ else if (js2.aln().size())
+ aln = js2.aln()[0].second;
+ for (int i = po_first; i < po_other; i++)
+ {
+ PhraseOrientation po = static_cast<PhraseOrientation>(i);
+ dfwd[i] = float(js1.dcnt_fwd(po) + js2.dcnt_fwd(po) + 1)/(sample1+po_other);
+ dbwd[i] = float(js1.dcnt_bwd(po) + js2.dcnt_bwd(po) + 1)/(sample1+po_other);
+ }
+ return *this;
+ }
+
+ PhrasePair const&
+ PhrasePair::
+ update(uint64_t const pid2, size_t r2)
+ {
+ p2 = pid2;
+ raw2 = r2;
+ joint = 0;
+ return *this;
+ }
+
+
+ PhrasePair const&
+ PhrasePair::
+ update(uint64_t const pid2,
+ size_t const raw2extra,
+ jstats const& js)
+ {
+ p2 = pid2;
+ raw2 = js.cnt2() + raw2extra;
+ joint = js.rcnt();
+ assert(js.aln().size());
+ if (js.aln().size())
+ aln = js.aln()[0].second;
+ for (int i = po_first; i <= po_other; i++)
+ {
+ PhraseOrientation po = static_cast<PhraseOrientation>(i);
+ dfwd[i] = float(js.dcnt_fwd(po)+1)/(sample1+po_other);
+ dbwd[i] = float(js.dcnt_bwd(po)+1)/(sample1+po_other);
+ }
+ return *this;
+ }
+
+ float
+ PhrasePair::
+ eval(vector<float> const& w)
+ {
+ assert(w.size() == this->fvals.size());
+ this->score = 0;
+ for (size_t i = 0; i < w.size(); ++i)
+ this->score += w[i] * this->fvals[i];
+ return this->score;
+ }
+#endif
+ } // namespace bitext
+} // namespace Moses
+
diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.h b/moses/TranslationModel/UG/mm/ug_phrasepair.h
new file mode 100644
index 000000000..8cd43dc18
--- /dev/null
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h
@@ -0,0 +1,243 @@
+//-*- c++ -*-
+#pragma once
+#include "ug_bitext.h"
+
+using namespace ugdiss;
+using namespace std;
+
+namespace Moses {
+ namespace bitext
+ {
+
+ template<typename Token>
+ string
+ toString(TokenIndex const& V, Token const* x, size_t const len)
+ {
+ if (!len) return "";
+ UTIL_THROW_IF2(!x, HERE << ": Unexpected end of phrase!");
+ ostringstream buf;
+ buf << V[x->id()];
+ size_t i = 1;
+ for (x = x->next(); x && i < len; ++i, x = x->next())
+ buf << " " << V[x->id()];
+ UTIL_THROW_IF2(i != len, HERE << ": Unexpected end of phrase!");
+ return buf.str();
+ }
+
+ template<typename Token>
+ class
+ PhrasePair
+ {
+ public:
+ Token const* start1;
+ Token const* start2;
+ uint32_t len1;
+ uint32_t len2;
+ // uint64_t p1, p2;
+ uint32_t raw1,raw2,sample1,sample2,good1,good2,joint;
+ vector<float> fvals;
+ float dfwd[po_other+1]; // distortion counts // counts or probs?
+ float dbwd[po_other+1]; // distortion counts
+ vector<uchar> aln;
+ float score;
+ PhrasePair() { };
+ PhrasePair(PhrasePair const& o);
+
+ PhrasePair const& operator+=(PhrasePair const& other);
+
+ bool operator<(PhrasePair const& other) const;
+ bool operator>(PhrasePair const& other) const;
+ bool operator<=(PhrasePair const& other) const;
+ bool operator>=(PhrasePair const& other) const;
+
+ void init();
+ void init(Token const* x, uint32_t const len,
+ pstats const* ps = NULL, size_t const numfeats=0);
+
+ // void init(uint64_t const pid1, pstats const& ps, size_t const numfeats);
+ // void init(uint64_t const pid1, pstats const& ps1, pstats const& ps2,
+ // size_t const numfeats);
+
+ // PhrasePair const&
+ // update(uint64_t const pid2, size_t r2 = 0);
+
+ PhrasePair const&
+ update(Token const* x, uint32_t const len, jstats const& js);
+
+ // PhrasePair const&
+ // update(uint64_t const pid2, jstats const& js1, jstats const& js2);
+
+ // PhrasePair const&
+ // update(uint64_t const pid2, size_t const raw2extra, jstats const& js);
+
+ // float
+ // eval(vector<float> const& w);
+
+ class SortByTargetIdSeq
+ {
+ public:
+ int cmp(PhrasePair const& a, PhrasePair const& b) const;
+ bool operator()(PhrasePair const& a, PhrasePair const& b) const;
+ };
+ };
+
+ template<typename Token>
+ void
+ PhrasePair<Token>::
+ init(Token const* x, uint32_t const len,
+ pstats const* ps, size_t const numfeats)
+ {
+ start1 = x; len1 = len;
+ // p1 = pid1;
+ // p2 = 0;
+ if (ps)
+ {
+ raw1 = ps->raw_cnt;
+ sample1 = ps->sample_cnt;
+ good1 = ps->good;
+ }
+ else raw1 = sample1 = good1 = 0;
+ joint = 0;
+ good2 = 0;
+ sample2 = 0;
+ raw2 = 0;
+ fvals.resize(numfeats);
+ }
+
+ template<typename Token>
+ PhrasePair<Token> const&
+ PhrasePair<Token>::
+ update(Token const* x, uint32_t const len, jstats const& js)
+ {
+ // p2 = pid2;
+ start2 = x; len2 = len;
+ raw2 = js.cnt2();
+ joint = js.rcnt();
+ assert(js.aln().size());
+ if (js.aln().size())
+ aln = js.aln()[0].second;
+ float total_fwd = 0, total_bwd = 0;
+ for (int i = po_first; i <= po_other; i++)
+ {
+ PhraseOrientation po = static_cast<PhraseOrientation>(i);
+ total_fwd += js.dcnt_fwd(po)+1;
+ total_bwd += js.dcnt_bwd(po)+1;
+ }
+
+ // should we do that here or leave the raw counts?
+ for (int i = po_first; i <= po_other; i++)
+ {
+ PhraseOrientation po = static_cast<PhraseOrientation>(i);
+ dfwd[i] = float(js.dcnt_fwd(po)+1)/total_fwd;
+ dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd;
+ }
+
+ return *this;
+ }
+
+ template<typename Token>
+ bool
+ PhrasePair<Token>::
+ operator<(PhrasePair const& other) const
+ { return this->score < other.score; }
+
+ template<typename Token>
+ bool
+ PhrasePair<Token>::
+ operator>(PhrasePair const& other) const
+ { return this->score > other.score; }
+
+ template<typename Token>
+ bool
+ PhrasePair<Token>::
+ operator<=(PhrasePair const& other) const
+ { return this->score <= other.score; }
+
+ template<typename Token>
+ bool
+ PhrasePair<Token>::
+ operator>=(PhrasePair const& other) const
+ { return this->score >= other.score; }
+
+ template<typename Token>
+ PhrasePair<Token> const&
+ PhrasePair<Token>::
+ operator+=(PhrasePair const& o)
+ {
+ raw1 += o.raw1;
+ raw2 += o.raw2;
+ sample1 += o.sample1;
+ sample2 += o.sample2;
+ good1 += o.good1;
+ good2 += o.good2;
+ joint += o.joint;
+ return *this;
+ }
+
+ template<typename Token>
+ PhrasePair<Token>::
+ PhrasePair(PhrasePair<Token> const& o)
+ : start1(o.start1)
+ , start2(o.start2)
+ , len1(o.len1)
+ , len2(o.len2)
+ , raw1(o.raw1)
+ , raw2(o.raw2)
+ , sample1(o.sample1)
+ , sample2(o.sample2)
+ , good1(o.good1)
+ , good2(o.good2)
+ , joint(o.joint)
+ , fvals(o.fvals)
+ , aln(o.aln)
+ , score(o.score)
+ {
+ for (size_t i = 0; i <= po_other; ++i)
+ {
+ dfwd[i] = o.dfwd[i];
+ dbwd[i] = o.dbwd[i];
+ }
+ }
+
+ template<typename Token>
+ int
+ PhrasePair<Token>::
+ SortByTargetIdSeq::
+ cmp(PhrasePair const& a, PhrasePair const& b) const
+ {
+ size_t i = 0;
+ Token const* x = a.start2;
+ Token const* y = b.start2;
+ while (i < a.len2 && i < b.len2 && x->id() == y->id())
+ {
+ x = x->next();
+ y = y->next();
+ ++i;
+ }
+ if (i == a.len2 && i == b.len2) return 0;
+ if (i == a.len2) return -1;
+ if (i == b.len2) return 1;
+ return x->id() < y->id() ? -1 : 1;
+ }
+
+ template<typename Token>
+ bool
+ PhrasePair<Token>::
+ SortByTargetIdSeq::
+ operator()(PhrasePair const& a, PhrasePair const& b) const
+ {
+ return this->cmp(a,b) < 0;
+ }
+
+ template<typename Token>
+ void
+ PhrasePair<Token>::
+ init()
+ {
+ len1 = len2 = raw1 = raw2 = sample1 = sample2 = good1 = good2 = joint = 0;
+ start1 = start2 = NULL;
+ }
+
+
+ } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_base.h b/moses/TranslationModel/UG/mm/ug_tsa_base.h
index a6291ac3c..dc5e270c2 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_base.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_base.h
@@ -53,7 +53,7 @@ namespace ugdiss
/* an entry in the array, for iteration over all occurrences of a
* particular sequence */
// typedef boost::dynamic_bitset<uint64_t> bitset;
- typedef shared_ptr<bitvector> bitset_pointer;
+ typedef boost::shared_ptr<bitvector> bitset_pointer;
typedef TKN Token;
typedef BitSetCache<TSA<TKN> > BSC_t;
/* to allow caching of bit vectors that are expensive to create on
@@ -62,7 +62,7 @@ namespace ugdiss
friend class TSA_tree_iterator<TKN>;
protected:
- shared_ptr<Ttrack<TKN> const> corpus; // pointer to the underlying corpus
+ boost::shared_ptr<Ttrack<TKN> const> corpus; // pointer to the underlying corpus
char const* startArray; // beginning ...
char const* endArray; // ... and end ...
// of memory block storing the actual TSA
@@ -139,7 +139,7 @@ namespace ugdiss
getUpperBound(id_type id) const = 0;
public:
- shared_ptr<BSC_t> bsc;
+ boost::shared_ptr<BSC_t> bsc;
char const* arrayStart() const { return startArray; }
char const* arrayEnd() const { return endArray; }
@@ -298,7 +298,7 @@ namespace ugdiss
bitset_pointer
getBitSet(TKN const* startKey, size_t keyLen) const;
- shared_ptr<bitvector>
+ boost::shared_ptr<bitvector>
findTree(TKN const* treeStart, TKN const* treeEnd,
bitvector const* filter) const;
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
index 14bf6cdad..ab7f96bf0 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
@@ -7,6 +7,8 @@
#include "ug_typedefs.h"
#include "tpt_tokenindex.h"
#include <iostream>
+#include "util/exception.hh"
+#include "moses/Util.h"
//#include <cassert>
// #include "ug_bv_iter.h"
@@ -60,10 +62,15 @@ namespace ugdiss
// TSA_tree_iterator(TSA_tree_iterator const& other);
TSA_tree_iterator(TSA<Token> const* s);
+ TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other);
TSA_tree_iterator(TSA<Token> const* r, id_type const* s, size_t const len);
// TSA_tree_iterator(TSA<Token> const* s, Token const& t);
TSA_tree_iterator(TSA<Token> const* s,
Token const* kstart,
+ size_t const len,
+ bool full_match_only=true);
+ TSA_tree_iterator(TSA<Token> const* s,
+ Token const* kstart,
Token const* kend,
bool full_match_only=true);
// TSA_tree_iterator(TSA<Token> const* s,
@@ -150,9 +157,12 @@ namespace ugdiss
double approxOccurrenceCount(int p=-1) const
{
assert(root);
+ if (p < 0) p += lower.size();
double ret = arrayByteSpanSize(p)/root->aveIndexEntrySize();
- assert(ret < root->corpus->numTokens());
if (ret < 25) ret = rawCnt(p);
+ UTIL_THROW_IF2(ret > root->corpus->numTokens(), "[" << HERE << "] "
+ << "Word count mismatch.");
+ assert(ret <= root->corpus->numTokens());
return ret;
}
@@ -320,6 +330,18 @@ namespace ugdiss
template<typename Token>
TSA_tree_iterator<Token>::
+ TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other)
+ : root(s)
+ {
+ Token const* x = other.getToken(0);
+ for (size_t i = 0; i < other.size() && this->extend(x->id()); ++i)
+ x = x->next();
+ };
+
+
+
+ template<typename Token>
+ TSA_tree_iterator<Token>::
TSA_tree_iterator
(TSA<Token> const* r,
id_type const* s,
@@ -385,6 +407,25 @@ namespace ugdiss
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
+ size_t const len, bool full_match_only)
+ : root(s)
+ {
+ if (!root) return;
+ size_t i = 0;
+ for (; i < len && kstart && extend(*kstart); ++i)
+ kstart = kstart->next();
+ if (full_match_only && i != len)
+ {
+ lower.clear();
+ upper.clear();
+ }
+ };
+
+ // DEPRECATED: DO NOT USE. Use the one that takes the length
+ // instead of kend.
+ template<typename Token>
+ TSA_tree_iterator<Token>::
+ TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
Token const* kend, bool full_match_only)
: root(s)
{
@@ -561,8 +602,7 @@ namespace ugdiss
TSA_tree_iterator<Token>::
rawCnt(int p) const
{
- if (p < 0)
- p = lower.size()+p;
+ if (p < 0) p += lower.size();
assert(p>=0);
if (lower.size() == 0) return root->getCorpusSize();
return root->rawCnt(lower[p],upper[p]);
diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp
index 730a9dc42..e59d4c61a 100644
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@@ -1,13 +1,38 @@
#include "mmsapt.h"
#include <boost/foreach.hpp>
+#include <boost/scoped_ptr.hpp>
#include <boost/tokenizer.hpp>
#include <algorithm>
+#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
+#include "util/exception.hh"
+#include <set>
namespace Moses
{
using namespace bitext;
using namespace std;
using namespace boost;
+
+
+ // uint64_t
+ // pack_phrasekey(uint64_t const shard_id, uint64_t const snt_id,
+ // uint64_t const offset, uint64_t const len)
+ // {
+ // uint64_t one = 1;
+ // // 8 bits - 256 shards
+ // // 13 bits - max offset
+ // // 11 bits - max len
+ // // 32 bits - max sentence id
+ // UTIL_TRHOW_IF2(shard_id >= 256, "[" << HERE << "] "
+ // << "Sentence ID exceeds limit.");
+ // UTIL_THROW_IF2(snt_id >= 4294967296, "[" << HERE << "] "
+ // << "Sentence ID exceeds limit.");
+ // UTIL_TRHOW_IF2(offset >= 8192, "[" << HERE << "]"
+ // << "Phrase offset exceeds limit.");
+ // UTIL_TRHOW_IF2(offset >= 2048, "[" << HERE << "]"
+ // << "Phrase length exceeds limit.");
+ // return ((shard_id<<56)+(snt_id<<24)+(offset<<11)+len);
+ // }
void
fillIdSeq(Phrase const& mophrase, size_t const ifactor,
@@ -23,7 +48,7 @@ namespace Moses
void
- parseLine(string const& line, map<string,string> & params)
+ parseLine(string const& line, map<string,string> & param)
{
char_separator<char> sep("; ");
tokenizer<char_separator<char> > tokens(line,sep);
@@ -32,9 +57,14 @@ namespace Moses
size_t i = t.find_first_not_of(" =");
size_t j = t.find_first_of(" =",i+1);
size_t k = t.find_first_not_of(" =",j+1);
+ UTIL_THROW_IF2(i == string::npos || k == string::npos,
+ "[" << HERE << "] "
+ << "Parameter specification error near '"
+ << t << "' in moses ini line\n"
+ << line);
assert(i != string::npos);
assert(k != string::npos);
- params[t.substr(i,j)] = t.substr(k);
+ param[t.substr(i,j)] = t.substr(k);
}
}
@@ -57,10 +87,13 @@ namespace Moses
Mmsapt::
Mmsapt(string const& line)
: PhraseDictionary(line)
- , m_lex_alpha(1.0)
- , withLogCountFeatures(false)
- , withCoherence(true)
- , m_pfwd_features("g"), withPbwd(true), poolCounts(true)
+ // , m_lex_alpha(1.0)
+ // , withLogCountFeatures(false)
+ // , withCoherence(true)
+ // , m_pfwd_features("g")
+ // , m_pbwd_features("g")
+ // , withPbwd(true)
+ // , poolCounts(true)
, ofactor(1,0)
, m_tpc_ctr(0)
{
@@ -91,78 +124,147 @@ namespace Moses
void
Mmsapt::
+ register_ff(sptr<pscorer> const& ff, vector<sptr<pscorer> > & registry)
+ {
+ registry.push_back(ff);
+ ff->setIndex(m_feature_names.size());
+ for (int i = 0; i < ff->fcnt(); ++i)
+ {
+ m_feature_names.push_back(ff->fname(i));
+ m_is_logval.push_back(ff->isLogVal(i));
+ m_is_integer.push_back(ff->isIntegerValued(i));
+ }
+ }
+
+ bool
+ Mmsapt::
+ isLogVal(int i) const { return m_is_logval.at(i); }
+
+ bool
+ Mmsapt::
+ isInteger(int i) const { return m_is_integer.at(i); }
+
+ void
+ Mmsapt::
init(string const& line)
{
map<string,string>::const_iterator m;
- map<string,string> param;
- parseLine(line,param);
+ parseLine(line,this->param);
+
+ this->m_numScoreComponents = atoi(param["num-features"].c_str());
m = param.find("config");
if (m != param.end())
read_config_file(m->second,param);
-
- bname = param["base"];
+
+ m = param.find("base");
+ if (m != param.end())
+ {
+ bname = m->second;
+ m = param.find("path");
+ UTIL_THROW_IF2((m != param.end() && m->second != bname),
+ "Conflicting aliases for path:\n"
+ << "path=" << string(m->second) << "\n"
+ << "base=" << bname.c_str() );
+ }
+ else bname = param["path"];
L1 = param["L1"];
L2 = param["L2"];
- assert(bname.size());
- assert(L1.size());
- assert(L2.size());
-
- m = param.find("pfwd-denom");
- m_pfwd_denom = m != param.end() ? m->second[0] : 's';
- m = param.find("smooth");
- m_lbop_parameter = m != param.end() ? atof(m->second.c_str()) : .05;
+ UTIL_THROW_IF2(bname.size() == 0, "Missing corpus base name at " << HERE);
+ UTIL_THROW_IF2(L1.size() == 0, "Missing L1 tag at " << HERE);
+ UTIL_THROW_IF2(L2.size() == 0, "Missing L2 tag at " << HERE);
+
+ // set defaults for all parameters if not specified so far
+ pair<string,string> dflt("input-factor","0");
+ input_factor = atoi(param.insert(dflt).first->second.c_str());
+ // shouldn't that be a string?
+
+ dflt = pair<string,string> ("smooth",".01");
+ m_lbop_conf = atof(param.insert(dflt).first->second.c_str());
- m = param.find("max-samples");
- m_default_sample_size = m != param.end() ? atoi(m->second.c_str()) : 1000;
+ dflt = pair<string,string> ("lexalpha","0");
+ m_lex_alpha = atof(param.insert(dflt).first->second.c_str());
- if ((m = param.find("logcnt-features")) != param.end())
- withLogCountFeatures = m->second != "0";
+ dflt = pair<string,string> ("sample","1000");
+ m_default_sample_size = atoi(param.insert(dflt).first->second.c_str());
- if ((m = param.find("coh")) != param.end())
- withCoherence = m->second != "0";
-
- if ((m = param.find("pfwd")) != param.end())
- m_pfwd_features = (m->second == "0" ? "" : m->second);
-
- if (m_pfwd_features == "1")
- m_pfwd_features[0] = m_pfwd_denom;
-
- if ((m = param.find("pbwd")) != param.end())
- withPbwd = m->second != "0";
-
- if ((m = param.find("lexalpha")) != param.end())
- m_lex_alpha = atof(m->second.c_str());
-
- m = param.find("workers");
- m_workers = m != param.end() ? atoi(m->second.c_str()) : 8;
+ dflt = pair<string,string>("workers","8");
+ m_workers = atoi(param.insert(dflt).first->second.c_str());
m_workers = min(m_workers,24UL);
- if ((m = param.find("limit")) != param.end())
- m_tableLimit = atoi(m->second.c_str());
+
+ dflt = pair<string,string>("table-limit","20");
+ m_tableLimit = atoi(param.insert(dflt).first->second.c_str());
- m = param.find("cache-size");
- m_history.reserve(m != param.end()?max(1000,atoi(m->second.c_str())):10000);
+ dflt = pair<string,string>("cache","10000");
+ size_t hsize = max(1000,atoi(param.insert(dflt).first->second.c_str()));
+ m_history.reserve(hsize);
// in plain language: cache size is at least 1000, and 10,000 by default
// this cache keeps track of the most frequently used target phrase collections
// even when not actively in use
-
- this->m_numScoreComponents = atoi(param["num-features"].c_str());
- m = param.find("ifactor");
- input_factor = m != param.end() ? atoi(m->second.c_str()) : 0;
+ // Feature functions are initialized in function Load();
+ param.insert(pair<string,string>("pfwd", "g"));
+ param.insert(pair<string,string>("pbwd", "g"));
+ param.insert(pair<string,string>("logcnt", "0"));
+ param.insert(pair<string,string>("coh", "0"));
+ param.insert(pair<string,string>("rare", "1"));
+ param.insert(pair<string,string>("prov", "1"));
poolCounts = true;
if ((m = param.find("extra")) != param.end())
extra_data = m->second;
+ dflt = pair<string,string>("tuneable","true");
+ m_tuneable = Scan<bool>(param.insert(dflt).first->second.c_str());
+
+ dflt = pair<string,string>("feature-sets","standard");
+ m_feature_set_names = Tokenize(param.insert(dflt).first->second.c_str(), ",");
+
+ // check for unknown parameters
+ vector<string> known_parameters; known_parameters.reserve(50);
+ known_parameters.push_back("L1");
+ known_parameters.push_back("L2");
+ known_parameters.push_back("Mmsapt");
+ known_parameters.push_back("PhraseDictionaryBitextSampling"); // alias for Mmsapt
+ known_parameters.push_back("base"); // alias for path
+ known_parameters.push_back("cache");
+ known_parameters.push_back("coh");
+ known_parameters.push_back("config");
+ known_parameters.push_back("extra");
+ known_parameters.push_back("feature-sets");
+ known_parameters.push_back("input-factor");
+ known_parameters.push_back("lexalpha");
+ // known_parameters.push_back("limit"); // replaced by "table-limit"
+ known_parameters.push_back("logcnt");
+ known_parameters.push_back("name");
+ known_parameters.push_back("num-features");
+ known_parameters.push_back("output-factor");
+ known_parameters.push_back("path");
+ known_parameters.push_back("pbwd");
+ known_parameters.push_back("pfwd");
+ known_parameters.push_back("prov");
+ known_parameters.push_back("rare");
+ known_parameters.push_back("sample");
+ known_parameters.push_back("smooth");
+ known_parameters.push_back("table-limit");
+ known_parameters.push_back("tuneable");
+ known_parameters.push_back("unal");
+ known_parameters.push_back("workers");
+ for (map<string,string>::iterator m = param.begin(); m != param.end(); ++m)
+ {
+ UTIL_THROW_IF2(!binary_search(known_parameters.begin(),
+ known_parameters.end(), m->first),
+ HERE << ": Unknown parameter specification for Mmsapt: "
+ << m->first);
+ }
}
void
Mmsapt::
- load_extra_data(string bname)
+ load_extra_data(string bname, bool locking = true)
{
// TO DO: ADD CHECKS FOR ROBUSTNESS
// - file existence?
@@ -180,108 +282,139 @@ namespace Moses
while(getline(in2,line)) text2.push_back(line);
while(getline(ina,line)) symal.push_back(line);
- lock_guard<mutex> guard(this->lock);
+ boost::scoped_ptr<boost::lock_guard<boost::mutex> > guard;
+ if (locking) guard.reset(new boost::lock_guard<boost::mutex>(this->lock));
btdyn = btdyn->add(text1,text2,symal);
assert(btdyn);
// cerr << "Loaded " << btdyn->T1->size() << " sentence pairs" << endl;
}
- size_t
+ template<typename fftype>
+ void
Mmsapt::
- add_corpus_specific_features
- (vector<sptr<pscorer > >& ffvec, size_t num_feats)
- {
- float const lbop = m_lbop_parameter; // just for code readability below
- // for the time being, we assume that all phrase probability features
- // use the same confidence parameter for lower-bound-estimation
- for (size_t i = 0; i < m_pfwd_features.size(); ++i)
- {
- UTIL_THROW_IF2(m_pfwd_features[i] != 'g' &&
- m_pfwd_features[i] != 'r' &&
- m_pfwd_features[i] != 's',
- "Can't handle pfwd feature type '"
- << m_pfwd_features[i] << "'.");
- sptr<PScorePfwd<Token> > ff(new PScorePfwd<Token>());
- size_t k = num_feats;
- num_feats = ff->init(num_feats,lbop,m_pfwd_features[i]);
- for (;k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
- ffvec.push_back(ff);
+ check_ff(string const ffname, vector<sptr<pscorer> >* registry)
+ {
+ string const& spec = param[ffname];
+ if (spec == "" || spec == "0") return;
+ if (registry)
+ {
+ sptr<fftype> ff(new fftype(spec));
+ register_ff(ff, *registry);
}
-
- if (withPbwd)
+ else if (spec[spec.size()-1] == '+') // corpus specific
{
- sptr<PScorePbwd<Token> > ff(new PScorePbwd<Token>());
- size_t k = num_feats;
- num_feats = ff->init(num_feats,lbop);
- for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
- ffvec.push_back(ff);
+ sptr<fftype> ff(new fftype(spec));
+ register_ff(ff, m_active_ff_fix);
+ ff.reset(new fftype(spec));
+ register_ff(ff, m_active_ff_dyn);
}
-
- if (withLogCountFeatures)
+ else
{
- sptr<PScoreLogCounts<Token> > ff(new PScoreLogCounts<Token>());
- size_t k = num_feats;
- num_feats = ff->init(num_feats);
- for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
- ffvec.push_back(ff);
+ sptr<fftype> ff(new fftype(spec));
+ register_ff(ff, m_active_ff_common);
}
+ }
- return num_feats;
+ template<typename fftype>
+ void
+ Mmsapt::
+ check_ff(string const ffname, float const xtra, vector<sptr<pscorer> >* registry)
+ {
+ string const& spec = param[ffname];
+ if (spec == "" || spec == "0") return;
+ if (registry)
+ {
+ sptr<fftype> ff(new fftype(xtra,spec));
+ register_ff(ff, *registry);
+ }
+ else if (spec[spec.size()-1] == '+') // corpus specific
+ {
+ sptr<fftype> ff(new fftype(xtra,spec));
+ register_ff(ff, m_active_ff_fix);
+ ff.reset(new fftype(xtra,spec));
+ register_ff(ff, m_active_ff_dyn);
+ }
+ else
+ {
+ sptr<fftype> ff(new fftype(xtra,spec));
+ register_ff(ff, m_active_ff_common);
+ }
}
+ // void
+ // Mmsapt::
+ // add_corpus_specific_features(vector<sptr<pscorer > >& registry)
+ // {
+ // check_ff<PScorePbwd<Token> >("pbwd",m_lbop_conf,registry);
+ // check_ff<PScoreLogCnt<Token> >("logcnt",registry);
+ // }
+
void
Mmsapt::
Load()
{
- btfix.num_workers = this->m_workers;
- btfix.open(bname, L1, L2);
- btfix.setDefaultSampleSize(m_default_sample_size);
-
- size_t num_feats = 0;
-
- // lexical scores are currently always active
- sptr<PScoreLex<Token> > ff(new PScoreLex<Token>(m_lex_alpha));
- size_t k = num_feats;
- num_feats = ff->init(num_feats, bname + L1 + "-" + L2 + ".lex");
- for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
- m_active_ff_common.push_back(ff);
+ boost::lock_guard<boost::mutex> guard(this->lock);
+
+ // can load only once
+ // UTIL_THROW_IF2(shards.size(),"Mmsapt is already loaded at " << HERE);
+
+ // load feature sets
+ BOOST_FOREACH(string const& fsname, m_feature_set_names)
+ {
+ // standard (default) feature set
+ if (fsname == "standard")
+ {
+ // lexical scores
+ string lexfile = bname + L1 + "-" + L2 + ".lex";
+ sptr<PScoreLex1<Token> > ff(new PScoreLex1<Token>(param["lex_alpha"],lexfile));
+ register_ff(ff,m_active_ff_common);
+
+ // these are always computed on pooled data
+ check_ff<PScoreRareness<Token> > ("rare", &m_active_ff_common);
+ check_ff<PScoreUnaligned<Token> >("unal", &m_active_ff_common);
+ check_ff<PScoreCoherence<Token> >("coh", &m_active_ff_common);
- if (withCoherence)
+ // for these ones either way is possible (specification ends with '+'
+ // if corpus-specific
+ check_ff<PScorePfwd<Token> >("pfwd", m_lbop_conf);
+ check_ff<PScorePbwd<Token> >("pbwd", m_lbop_conf);
+ check_ff<PScoreLogCnt<Token> >("logcnt");
+
+ // These are always corpus-specific
+ check_ff<PScoreProvenance<Token> >("prov", &m_active_ff_fix);
+ check_ff<PScoreProvenance<Token> >("prov", &m_active_ff_dyn);
+ }
+
+ // data source features (copies of phrase and word count specific to
+ // this translation model)
+ else if (fsname == "datasource")
{
- sptr<PScoreCoherence<Token> > ff(new PScoreCoherence<Token>());
- size_t k = num_feats;
- num_feats = ff->init(num_feats);
- for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
- m_active_ff_common.push_back(ff);
+ sptr<PScorePC<Token> > ffpcnt(new PScorePC<Token>("pcnt"));
+ register_ff(ffpcnt,m_active_ff_common);
+ sptr<PScoreWC<Token> > ffwcnt(new PScoreWC<Token>("wcnt"));
+ register_ff(ffwcnt,m_active_ff_common);
}
+ }
+ // cerr << "Features: " << Join("|",m_feature_names) << endl;
- num_feats = add_corpus_specific_features(m_active_ff_fix,num_feats);
- // cerr << num_feats << "/" << this->m_numScoreComponents
- // << " at " << __FILE__ << ":" << __LINE__ << endl;
- poolCounts = poolCounts && num_feats == this->m_numScoreComponents;
- if (!poolCounts)
- num_feats = add_corpus_specific_features(m_active_ff_dyn, num_feats);
-
-#if 0
- cerr << "MMSAPT provides " << num_feats << " features at "
- << __FILE__ << ":" << __LINE__ << endl;
- BOOST_FOREACH(string const& fname, m_feature_names)
- cerr << fname << endl;
-#endif
- UTIL_THROW_IF2(num_feats != this->m_numScoreComponents,
- "At " << __FILE__ << ":" << __LINE__
- << ": number of feature values provided by Phrase table ("
- << num_feats << ") does not match number specified in "
- << "Moses config file (" << this->m_numScoreComponents
- << ")!\n";);
-
+ UTIL_THROW_IF2(this->m_feature_names.size() != this->m_numScoreComponents,
+ "At " << HERE << ": number of feature values provided by "
+ << "Phrase table (" << this->m_feature_names.size()
+ << ") does not match number specified in Moses config file ("
+ << this->m_numScoreComponents << ")!\n";);
+
+ // Load corpora. For the time being, we can have one memory-mapped static
+ // corpus and one in-memory dynamic corpus
+ // sptr<mmbitext> btfix(new mmbitext());
+ btfix.num_workers = this->m_workers;
+ btfix.open(bname, L1, L2);
+ btfix.setDefaultSampleSize(m_default_sample_size);
+ // shards.push_back(btfix);
- btdyn.reset(new imBitext<Token>(btfix.V1, btfix.V2,m_default_sample_size));
+ btdyn.reset(new imbitext(btfix.V1, btfix.V2, m_default_sample_size));
btdyn->num_workers = this->m_workers;
if (extra_data.size())
- {
- load_extra_data(extra_data);
- }
+ load_extra_data(extra_data,false);
#if 0
// currently not used
@@ -310,258 +443,345 @@ namespace Moses
TargetPhrase*
Mmsapt::
- createTargetPhrase(Phrase const& src,
- Bitext<Token> const& bt,
- PhrasePair const& pp) const
- {
- Word w; uint32_t sid,off,len;
- TargetPhrase* tp = new TargetPhrase();
- parse_pid(pp.p2, sid, off, len);
- Token const* x = bt.T2->sntStart(sid) + off;
- for (uint32_t k = 0; k < len; ++k)
+ mkTPhrase(Phrase const& src,
+ PhrasePair<Token>* fix,
+ PhrasePair<Token>* dyn,
+ sptr<Bitext<Token> > const& dynbt) const
+ {
+ UTIL_THROW_IF2(!fix && !dyn, HERE <<
+ ": Can't create target phrase from nothing.");
+ vector<float> fvals(this->m_numScoreComponents);
+ PhrasePair<Token> pool = fix ? *fix : *dyn;
+ if (fix)
{
- // cerr << (*bt.V2)[x[k].id()] << " at " << __FILE__ << ":" << __LINE__ << endl;
- StringPiece wrd = (*bt.V2)[x[k].id()];
- // if ((off+len) > bt.T2->sntLen(sid))
- // cerr << off << ";" << len << " " << bt.T2->sntLen(sid) << endl;
- assert(off+len <= bt.T2->sntLen(sid));
- w.CreateFromString(Output,ofactor,wrd,false);
- tp->AddWord(w);
+ BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+ (*ff)(btfix, *fix, &fvals);
}
- tp->GetScoreBreakdown().Assign(this, pp.fvals);
- tp->Evaluate(src);
- return tp;
- }
-
- // process phrase stats from a single parallel corpus
- void
- Mmsapt::
- process_pstats
- (Phrase const& src,
- uint64_t const pid1,
- pstats const& stats,
- Bitext<Token> const & bt,
- TargetPhraseCollection* tpcoll
- ) const
- {
- PhrasePair pp;
- pp.init(pid1, stats, this->m_numScoreComponents);
- pstats::trg_map_t::const_iterator t;
- for (t = stats.trg.begin(); t != stats.trg.end(); ++t)
+ if (dyn)
{
- pp.update(t->first,t->second);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
- (*ff)(bt,pp);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
- (*ff)(bt,pp);
- tpcoll->Add(createTargetPhrase(src,bt,pp));
+ BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
+ (*ff)(*dynbt, *dyn, &fvals);
}
- }
-
- void
- Mmsapt::
- ScorePPfix(bitext::PhrasePair& pp) const
- {
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
- (*ff)(btfix,pp);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
- (*ff)(btfix,pp);
- }
-
- // process phrase stats from a single parallel corpus
- bool
- Mmsapt::
- pool_pstats(Phrase const& src,
- uint64_t const pid1a,
- pstats * statsa,
- Bitext<Token> const & bta,
- uint64_t const pid1b,
- pstats const* statsb,
- Bitext<Token> const & btb,
- TargetPhraseCollection* tpcoll) const
- {
- PhrasePair pp;
- if (statsa && statsb)
- pp.init(pid1b, *statsa, *statsb, this->m_numScoreComponents);
- else if (statsa)
- pp.init(pid1a, *statsa, this->m_numScoreComponents);
- else if (statsb)
- pp.init(pid1b, *statsb, this->m_numScoreComponents);
- else return false; // throw "no stats for pooling available!";
-
- pstats::trg_map_t::const_iterator b;
- pstats::trg_map_t::iterator a;
- if (statsb)
+
+ if (fix && dyn) { pool += *dyn; }
+ else if (fix)
{
- for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b)
- {
- uint32_t sid,off,len;
- parse_pid(b->first, sid, off, len);
- Token const* x = bta.T2->sntStart(sid) + off;
- TSA<Token>::tree_iterator m(bta.I2.get(),x,x+len);
- if (m.size() == len)
- {
- ;
- if (statsa && ((a = statsa->trg.find(m.getPid()))
- != statsa->trg.end()))
- {
- pp.update(b->first,a->second,b->second);
- a->second.invalidate();
- }
- else
- pp.update(b->first,m.approxOccurrenceCount(),
- b->second);
- }
- else pp.update(b->first,b->second);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
- (*ff)(btb,pp);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
- (*ff)(btb,pp);
- tpcoll->Add(createTargetPhrase(src,btb,pp));
- }
+ PhrasePair<Token> zilch; zilch.init();
+ TSA<Token>::tree_iterator m(dynbt->I2.get(), fix->start2, fix->len2);
+ if (m.size() == fix->len2)
+ zilch.raw2 = m.approxOccurrenceCount();
+ pool += zilch;
+ BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
+ (*ff)(*dynbt, ff->allowPooling() ? pool : zilch, &fvals);
}
- if (!statsa) return statsb != NULL;
- for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a)
+ else if (dyn)
{
- uint32_t sid,off,len;
- if (!a->second.valid()) continue;
- parse_pid(a->first, sid, off, len);
- if (btb.T2)
- {
- Token const* x = bta.T2->sntStart(sid) + off;
- TSA<Token>::tree_iterator m(btb.I2.get(), x, x+len);
- if (m.size() == len)
- pp.update(a->first,m.approxOccurrenceCount(),a->second);
- else
- pp.update(a->first,a->second);
- }
- else
- pp.update(a->first,a->second);
-#if 0
- // jstats const& j = a->second;
- cerr << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: "
- << bta.T2->pid2str(bta.V2.get(),pp.p2) << endl;
- cerr << pp.raw1 << " " << pp.sample1 << " " << pp.good1 << " "
- << pp.joint << " " << pp.raw2 << endl;
-#endif
-
- UTIL_THROW_IF2(pp.raw2 == 0,
- "OOPS" << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: "
- << bta.T2->pid2str(bta.V2.get(),pp.p2) << ": "
- << pp.raw1 << " " << pp.sample1 << " "
- << pp.good1 << " " << pp.joint << " "
- << pp.raw2);
+ PhrasePair<Token> zilch; zilch.init();
+ TSA<Token>::tree_iterator m(btfix.I2.get(), dyn->start2, dyn->len2);
+ if (m.size() == dyn->len2)
+ zilch.raw2 = m.approxOccurrenceCount();
+ pool += zilch;
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
- (*ff)(bta,pp);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
- (*ff)(bta,pp);
- tpcoll->Add(createTargetPhrase(src,bta,pp));
+ (*ff)(*dynbt, ff->allowPooling() ? pool : zilch, &fvals);
}
- return true;
- }
-
-
- // process phrase stats from a single parallel corpus
- bool
- Mmsapt::
- combine_pstats
- (Phrase const& src,
- uint64_t const pid1a, pstats * statsa, Bitext<Token> const & bta,
- uint64_t const pid1b, pstats const* statsb, Bitext<Token> const & btb,
- TargetPhraseCollection* tpcoll) const
- {
- PhrasePair ppfix,ppdyn,pool;
- // ppfix: counts from btfix
- // ppdyn: counts from btdyn
- // pool: pooled counts from both
- Word w;
- if (statsa) ppfix.init(pid1a,*statsa,this->m_numScoreComponents);
- if (statsb) ppdyn.init(pid1b,*statsb,this->m_numScoreComponents);
- pstats::trg_map_t::const_iterator b;
- pstats::trg_map_t::iterator a;
-
- if (statsb)
+ if (fix)
{
- pool.init(pid1b,*statsb,0);
- for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b)
- {
- ppdyn.update(b->first,b->second);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
- (*ff)(btb,ppdyn);
-
- uint32_t sid,off,len;
- parse_pid(b->first, sid, off, len);
- Token const* x = bta.T2->sntStart(sid) + off;
- TSA<Token>::tree_iterator m(bta.I2.get(),x,x+len);
-
- if (m.size() && statsa &&
- ((a = statsa->trg.find(m.getPid())) != statsa->trg.end()))
- {
- // phrase pair found also in btfix
- ppfix.update(a->first,a->second);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
- (*ff)(bta,ppfix,&ppdyn.fvals);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
- (*ff)(bta,ppfix,&ppdyn.fvals);
- a->second.invalidate();
- }
- else
- {
- // phrase pair was not found in btfix
-
- // ... but the source phrase was
- if (m.size())
- pool.update(b->first,m.approxOccurrenceCount(), b->second);
-
- // ... and not even the source phrase
- else
- pool.update(b->first,b->second);
-
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
- (*ff)(btb,pool,&ppdyn.fvals);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
- (*ff)(btb,pool,&ppdyn.fvals);
-
- }
-
- tpcoll->Add(createTargetPhrase(src,btb,ppdyn));
- }
+ BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+ (*ff)(btfix, pool, &fvals);
}
-
- // now deal with all phraise pairs that are ONLY in btfix
- // (the ones that are in both were dealt with above)
- if (statsa)
+ else
{
- pool.init(pid1a,*statsa,0);
- for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a)
- {
- if (!a->second.valid()) continue; // done above
- ppfix.update(a->first,a->second);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
- (*ff)(bta,ppfix);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
- (*ff)(bta,ppfix);
-
- if (btb.I2)
- {
- uint32_t sid,off,len;
- parse_pid(a->first, sid, off, len);
- Token const* x = bta.T2->sntStart(sid) + off;
- TSA<Token>::tree_iterator m(btb.I2.get(),x,x+len);
- if (m.size())
- pool.update(a->first,m.approxOccurrenceCount(),a->second);
- else
- pool.update(a->first,a->second);
- }
- else pool.update(a->first,a->second);
- BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
- (*ff)(btb,pool,&ppfix.fvals);
- }
- if (ppfix.p2)
- tpcoll->Add(createTargetPhrase(src,bta,ppfix));
+ BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+ (*ff)(*dynbt, pool, &fvals);
+ }
+ TargetPhrase* tp = new TargetPhrase(this);
+ Token const* x = fix ? fix->start2 : dyn->start2;
+ uint32_t len = fix ? fix->len2 : dyn->len2;
+ for (uint32_t k = 0; k < len; ++k, x = x->next())
+ {
+ StringPiece wrd = (*(btfix.V2))[x->id()];
+ Word w; w.CreateFromString(Output,ofactor,wrd,false);
+ tp->AddWord(w);
}
- return (statsa || statsb);
+ tp->GetScoreBreakdown().Assign(this, fvals);
+ tp->EvaluateInIsolation(src);
+ return tp;
}
+
+ // TargetPhrase*
+ // Mmsapt::
+ // mkTPhrase(Phrase const& src,
+ // Bitext<Token> const& bt,
+ // PhrasePair const& pp) const
+ // {
+ // Word w; uint32_t sid,off,len;
+ // TargetPhrase* tp = new TargetPhrase();
+ // parse_pid(pp.p2, sid, off, len);
+ // Token const* x = bt.T2->sntStart(sid) + off;
+ // for (uint32_t k = 0; k < len; ++k)
+ // {
+ // // cerr << (*bt.V2)[x[k].id()] << " at " << __FILE__ << ":" << __LINE__ << endl;
+ // StringPiece wrd = (*bt.V2)[x[k].id()];
+ // // if ((off+len) > bt.T2->sntLen(sid))
+ // // cerr << off << ";" << len << " " << bt.T2->sntLen(sid) << endl;
+ // assert(off+len <= bt.T2->sntLen(sid));
+ // w.CreateFromString(Output,ofactor,wrd,false);
+ // tp->AddWord(w);
+ // }
+ // tp->GetScoreBreakdown().Assign(this, pp.fvals);
+ // tp->Evaluate(src);
+ // return tp;
+ // }
+
+ // // process phrase stats from a single parallel corpus
+ // void
+ // Mmsapt::
+ // process_pstats
+ // (Phrase const& src,
+ // uint64_t const pid1,
+ // pstats const& stats,
+ // Bitext<Token> const & bt,
+ // TargetPhraseCollection* tpcoll
+ // ) const
+ // {
+ // PhrasePair pp;
+ // pp.init(pid1, stats, this->m_numScoreComponents);
+ // pstats::trg_map_t::const_iterator t;
+ // for (t = stats.trg.begin(); t != stats.trg.end(); ++t)
+ // {
+ // pp.update(t->first,t->second);
+ // BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+ // (*ff)(bt,pp);
+ // BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+ // (*ff)(bt,pp);
+ // tpcoll->Add(mkTPhrase(src,bt,pp));
+ // }
+ // }
+
+ // void
+ // Mmsapt::
+ // ScorePPfix(PhrasePair& pp) const
+ // {
+ // BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+ // (*ff)(btfix,pp);
+ // BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+ // (*ff)(btfix,pp);
+ // }
+
+// // process phrase stats from a single parallel corpus
+// bool
+// Mmsapt::
+// pool_pstats(Phrase const& src,
+// uint64_t const pid1a,
+// pstats * statsa,
+// Bitext<Token> const & bta,
+// uint64_t const pid1b,
+// pstats const* statsb,
+// Bitext<Token> const & btb,
+// TargetPhraseCollection* tpcoll) const
+// {
+// PhrasePair pp;
+// if (statsa && statsb)
+// pp.init(pid1b, *statsa, *statsb, this->m_numScoreComponents);
+// else if (statsa)
+// pp.init(pid1a, *statsa, this->m_numScoreComponents);
+// else if (statsb)
+// pp.init(pid1b, *statsb, this->m_numScoreComponents);
+// else return false; // throw "no stats for pooling available!";
+
+// pstats::trg_map_t::const_iterator b;
+// pstats::trg_map_t::iterator a;
+// if (statsb)
+// {
+// for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b)
+// {
+// uint32_t sid,off,len;
+// parse_pid(b->first, sid, off, len);
+// Token const* x = btb.T2->sntStart(sid) + off;
+// TSA<Token>::tree_iterator m(bta.I2.get(),x,x+len);
+// if (m.size() == len)
+// {
+// ;
+// if (statsa && ((a = statsa->trg.find(m.getPid()))
+// != statsa->trg.end()))
+// {
+// pp.update(b->first,a->second,b->second);
+// a->second.invalidate();
+// }
+// else
+// pp.update(b->first,m.approxOccurrenceCount(),
+// b->second);
+// }
+// else pp.update(b->first,b->second);
+// BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+// (*ff)(btb,pp);
+// BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+// (*ff)(btb,pp);
+// tpcoll->Add(mkTPhrase(src,btb,pp));
+// }
+// }
+// if (!statsa) return statsb != NULL;
+// for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a)
+// {
+// uint32_t sid,off,len;
+// if (!a->second.valid()) continue;
+// parse_pid(a->first, sid, off, len);
+// if (btb.T2)
+// {
+// Token const* x = bta.T2->sntStart(sid) + off;
+// TSA<Token>::tree_iterator m(btb.I2.get(), x, len);
+// if (m.size() == len)
+// pp.update(a->first,m.approxOccurrenceCount(),a->second);
+// else
+// pp.update(a->first,a->second);
+// }
+// else pp.update(a->first,a->second);
+// #if 0
+// // jstats const& j = a->second;
+// cerr << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: "
+// << bta.T2->pid2str(bta.V2.get(),pp.p2) << endl;
+// cerr << pp.raw1 << " " << pp.sample1 << " " << pp.good1 << " "
+// << pp.joint << " " << pp.raw2 << endl;
+// #endif
+
+// UTIL_THROW_IF2(pp.raw2 == 0,
+// "OOPS" << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: "
+// << bta.T2->pid2str(bta.V2.get(),pp.p2) << ": "
+// << pp.raw1 << " " << pp.sample1 << " "
+// << pp.good1 << " " << pp.joint << " "
+// << pp.raw2);
+// BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+// (*ff)(bta,pp);
+// BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+// (*ff)(bta,pp);
+// tpcoll->Add(mkTPhrase(src,bta,pp));
+// }
+// return true;
+// }
+
+
+
+
+ // // process phrase stats from a single parallel corpus
+ // bool
+ // Mmsapt::
+ // combine_pstats
+ // (Phrase const& src,
+ // uint64_t const pid1a, pstats * statsa, Bitext<Token> const & bta,
+ // uint64_t const pid1b, pstats const* statsb, Bitext<Token> const & btb,
+ // TargetPhraseCollection* tpcoll) const
+ // {
+ // if (!statsa && !statsb) return false;
+
+ // PhrasePair ppfix,ppdyn,pool; Word w;
+ // // ppfix: counts from btfix
+ // // ppdyn: counts from btdyn
+ // // pool: pooled counts from both
+
+ // pstats::trg_map_t::const_iterator b;
+ // pstats::trg_map_t::iterator a;
+
+
+ // set<uint64_t> check;
+ // if (statsb)
+ // {
+ // ppdyn.init(pid1b,*statsb,this->m_numScoreComponents);
+ // if (statsa)
+ // {
+ // pool.init(pid1b, *statsa, *statsb, 0);
+ // ppfix.init(pid1a,*statsa, 0);
+ // }
+ // else
+ // {
+ // pool.init(pid1b, *statsb,0);
+ // ppfix.init();
+ // }
+
+ // for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b)
+ // {
+ // ppdyn.update(b->first,b->second);
+ // BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
+ // (*ff)(btb,ppdyn);
+
+ // uint32_t sid,off,len;
+ // parse_pid(b->first, sid, off, len);
+ // Token const* x = btb.T2->sntStart(sid) + off;
+ // TSA<Token>::tree_iterator m(bta.I2.get(),x,len);
+
+ // Token const* y = m.getToken(0);
+ // for (size_t i = 0; i < len; ++i)
+ // cout << x[i].id() << " " << endl;
+ // for (size_t i = 0; i < m.size(); ++i)
+ // cout << y[i].id() << " " << endl;
+
+ // if (statsa && m.size() &&
+ // ((a = statsa->trg.find(m.getPid())) != statsa->trg.end()))
+ // { // i.e., phrase pair found also in btfix
+ // ppfix.update(a->first,a->second);
+ // pool.update(b->first, b->second, a->second);
+ // BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+ // (*ff)(bta, ppfix, &ppdyn.fvals);
+ // check.insert(a->first);
+ // }
+ // else // phrase pair was not found in btfix
+ // {
+ // if (m.size()) // ... but the source phrase was
+ // {
+ // pool.update(b->first, m.approxOccurrenceCount(), b->second);
+ // ppfix.update(b->first,m.approxOccurrenceCount());
+ // }
+ // else // ... and not even the source phrase
+ // {
+ // pool.update(b->first, b->second);
+ // ppfix.update(b->first,0);
+ // }
+ // BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+ // (*ff)(btb, ff->allowPooling() ? pool : ppfix, &ppdyn.fvals);
+ // }
+ // BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+ // (*ff)(btb, pool, &ppdyn.fvals);
+ // tpcoll->Add(mkTPhrase(src,btb,ppdyn));
+ // }
+ // }
+
+ // // now deal with all phraise pairs that are ONLY in btfix
+ // // (the ones that are in both were dealt with above)
+ // if (statsa)
+ // {
+ // ppfix.init(pid1a, *statsa, this->m_numScoreComponents);
+ // pool.init(pid1a, *statsa, 0);
+ // ppdyn.init();
+ // for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a)
+ // {
+ // if (check.find(a->first) != check.end())
+ // continue;
+
+ // ppfix.update(a->first, a->second);
+ // BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+ // (*ff)(bta, ppfix);
+
+ // if (btb.I2)
+ // {
+ // uint32_t sid,off,len;
+ // parse_pid(a->first, sid, off, len);
+ // Token const* x = bta.T2->sntStart(sid) + off;
+ // TSA<Token>::tree_iterator m(btb.I2.get(), x, len);
+ // if (m.size())
+ // pool.update(a->first, m.approxOccurrenceCount(), a->second);
+ // else
+ // pool.update(a->first, a->second);
+ // }
+ // else pool.update(a->first, a->second);
+ // BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
+ // (*ff)(btb, ff->allowPooling() ? pool : ppdyn, &ppfix.fvals);
+ // BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+ // (*ff)(bta, pool, &ppfix.fvals);
+ // if (ppfix.p2)
+ // tpcoll->Add(mkTPhrase(src, bta, ppfix));
+ // }
+ // }
+ // return true;
+ // }
Mmsapt::
TargetPhraseCollectionWrapper::
@@ -575,8 +795,34 @@ namespace Moses
{
assert(this->refCount == 0);
}
-
+ template<typename Token>
+ void
+ expand(typename Bitext<Token>::iter const& m,
+ Bitext<Token> const& bt,
+ pstats const& ps, vector<PhrasePair<Token> >& dest)
+ {
+ dest.reserve(ps.trg.size());
+ PhrasePair<Token> pp;
+ pp.init(m.getToken(0), m.size(), &ps, 0);
+ // cout << HERE << " " << toString(*(bt.V1),pp.start1,pp.len1) << endl;
+ pstats::trg_map_t::const_iterator a;
+ for (a = ps.trg.begin(); a != ps.trg.end(); ++a)
+ {
+ uint32_t sid,off,len;
+ parse_pid(a->first, sid, off, len);
+ pp.update(bt.T2->sntStart(sid)+off, len, a->second);
+ dest.push_back(pp);
+ }
+ typename PhrasePair<Token>::SortByTargetIdSeq sorter;
+ sort(dest.begin(), dest.end(),sorter);
+#if 0
+ BOOST_FOREACH(PhrasePair<Token> const& p, dest)
+ cout << toString (*bt.V1,p.start1,p.len1) << " ::: "
+ << toString (*bt.V2,p.start2,p.len2) << " "
+ << p.joint << endl;
+#endif
+ }
// This is not the most efficient way of phrase lookup!
TargetPhraseCollection const*
@@ -585,13 +831,9 @@ namespace Moses
{
// map from Moses Phrase to internal id sequence
vector<id_type> sphrase;
- fillIdSeq(src,input_factor,*btfix.V1,sphrase);
+ fillIdSeq(src,input_factor,*(btfix.V1),sphrase);
if (sphrase.size() == 0) return NULL;
- // lookup in static bitext
- TSA<Token>::tree_iterator mfix(btfix.I1.get(),&sphrase[0],sphrase.size());
-
- // lookup in dynamic bitext
// Reserve a local copy of the dynamic bitext in its current form. /btdyn/
// is set to a new copy of the dynamic bitext every time a sentence pair
// is added. /dyn/ keeps the old bitext around as long as we need it.
@@ -601,12 +843,13 @@ namespace Moses
dyn = btdyn;
}
assert(dyn);
+
+ // lookup phrases in both bitexts
+ TSA<Token>::tree_iterator mfix(btfix.I1.get(), &sphrase[0], sphrase.size());
TSA<Token>::tree_iterator mdyn(dyn->I1.get());
if (dyn->I1.get())
- {
- for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i)
- mdyn.extend(sphrase[i]);
- }
+ for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i)
+ mdyn.extend(sphrase[i]);
#if 0
cerr << src << endl;
@@ -614,43 +857,62 @@ namespace Moses
<< mdyn.size() << " " << mdyn.getPid() << endl;
#endif
- // phrase not found in either
- if (mdyn.size() != sphrase.size() &&
- mfix.size() != sphrase.size())
- return NULL; // not found
+ if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size())
+ return NULL; // phrase not found in either bitext
// cache lookup:
-
- uint64_t phrasekey;
- if (mfix.size() == sphrase.size())
- phrasekey = (mfix.getPid()<<1);
- else
- phrasekey = (mdyn.getPid()<<1)+1;
-
+ uint64_t phrasekey = (mfix.size() == sphrase.size() ? (mfix.getPid()<<1)
+ : (mdyn.getPid()<<1)+1);
size_t revision = dyn->revision();
{
boost::lock_guard<boost::mutex> guard(this->lock);
tpc_cache_t::iterator c = m_cache.find(phrasekey);
+ // TO DO: we should revise the revision mechanism: we take the length
+ // of the dynamic bitext (in sentences) at the time the PT entry
+ // was stored as the time stamp. For each word in the
+ // vocabulary, we also store its most recent occurrence in the
+ // bitext. Only if the timestamp of each word in the phrase is
+ // newer than the timestamp of the phrase itself we must update
+ // the entry.
if (c != m_cache.end() && c->second->revision == revision)
return encache(c->second);
}
- // not found or not up to date
+ // OK: pt entry not found or not up to date
+ // lookup and expansion could be done in parallel threds,
+ // but ppdyn is probably small anyway
+ // TO DO: have Bitexts return lists of PhrasePairs instead of pstats
+ // no need to expand pstats at every single lookup again, especially
+ // for btfix.
sptr<pstats> sfix,sdyn;
- if (mfix.size() == sphrase.size())
- sfix = btfix.lookup(mfix);
- if (mdyn.size() == sphrase.size())
- sdyn = dyn->lookup(mdyn);
+ if (mfix.size() == sphrase.size()) sfix = btfix.lookup(mfix);
+ if (mdyn.size() == sphrase.size()) sdyn = dyn->lookup(mdyn);
+
+ vector<PhrasePair<Token> > ppfix,ppdyn;
+ if (sfix) expand(mfix, btfix, *sfix, ppfix);
+ if (sdyn) expand(mdyn, *dyn, *sdyn, ppdyn);
- TargetPhraseCollectionWrapper*
- ret = new TargetPhraseCollectionWrapper(revision,phrasekey);
- if ((poolCounts &&
- pool_pstats(src, mfix.getPid(),sfix.get(),btfix,
- mdyn.getPid(),sdyn.get(),*dyn,ret))
- || combine_pstats(src, mfix.getPid(),sfix.get(),btfix,
- mdyn.getPid(),sdyn.get(),*dyn,ret))
+ // now we have two lists of Phrase Pairs, let's merge them
+ TargetPhraseCollectionWrapper* ret;
+ ret = new TargetPhraseCollectionWrapper(revision,phrasekey);
+ PhrasePair<Token>::SortByTargetIdSeq sorter;
+ size_t i = 0; size_t k = 0;
+ while (i < ppfix.size() && k < ppdyn.size())
+ {
+ int cmp = sorter.cmp(ppfix[i], ppdyn[k]);
+ if (cmp < 0) ret->Add(mkTPhrase(src,&ppfix[i++],NULL,dyn));
+ else if (cmp == 0) ret->Add(mkTPhrase(src,&ppfix[i++],&ppdyn[k++],dyn));
+ else ret->Add(mkTPhrase(src,NULL,&ppdyn[k++],dyn));
+ }
+ while (i < ppfix.size()) ret->Add(mkTPhrase(src,&ppfix[i++],NULL,dyn));
+ while (k < ppdyn.size()) ret->Add(mkTPhrase(src,NULL,&ppdyn[k++],dyn));
+ if (m_tableLimit) ret->Prune(true, m_tableLimit);
+ else ret->Prune(true,ret->GetSize());
+#if 0
+ if (combine_pstats(src,
+ mfix.getPid(), sfix.get(), btfix,
+ mdyn.getPid(), sdyn.get(), *dyn, ret))
{
- if (m_tableLimit) ret->Prune(true,m_tableLimit);
#if 0
sort(ret->begin(), ret->end(), CompareTargetPhrase());
cout << "SOURCE PHRASE: " << src << endl;
@@ -666,6 +928,9 @@ namespace Moses
}
#endif
}
+#endif
+
+ // put the result in the cache and return
boost::lock_guard<boost::mutex> guard(this->lock);
m_cache[phrasekey] = ret;
return encache(ret);
@@ -707,6 +972,7 @@ namespace Moses
// assert(0);
}
+#if defined(timespec)
bool operator<(timespec const& a, timespec const& b)
{
if (a.tv_sec != b.tv_sec) return a.tv_sec < b.tv_sec;
@@ -718,6 +984,19 @@ namespace Moses
if (a.tv_sec != b.tv_sec) return a.tv_sec > b.tv_sec;
return (a.tv_nsec >= b.tv_nsec);
}
+#endif
+
+ bool operator<(timeval const& a, timeval const& b)
+ {
+ if (a.tv_sec != b.tv_sec) return a.tv_sec < b.tv_sec;
+ return (a.tv_usec < b.tv_usec);
+ }
+
+ bool operator>=(timeval const& a, timeval const& b)
+ {
+ if (a.tv_sec != b.tv_sec) return a.tv_sec > b.tv_sec;
+ return (a.tv_usec >= b.tv_usec);
+ }
void
bubble_up(vector<Mmsapt::TargetPhraseCollectionWrapper*>& v, size_t k)
@@ -748,12 +1027,10 @@ namespace Moses
decache(TargetPhraseCollectionWrapper* ptr) const
{
if (ptr->refCount || ptr->idx >= 0) return;
-
- timespec t; clock_gettime(CLOCK_MONOTONIC,&t);
- timespec r; clock_getres(CLOCK_MONOTONIC,&r);
-
// if (t.tv_nsec < v[0]->tstamp.tv_nsec)
#if 0
+ timespec t; clock_gettime(CLOCK_MONOTONIC,&t);
+ timespec r; clock_getres(CLOCK_MONOTONIC,&r);
float delta = t.tv_sec - ptr->tstamp.tv_sec;
cerr << "deleting old cache entry after "
<< delta << " seconds."
@@ -778,8 +1055,11 @@ namespace Moses
if (!ptr) return NULL;
++ptr->refCount;
++m_tpc_ctr;
+#if defined(timespec)
clock_gettime(CLOCK_MONOTONIC, &ptr->tstamp);
-
+#else
+ gettimeofday(&ptr->tstamp, NULL);
+#endif
// update history
if (m_history.capacity() > 1)
{
@@ -819,6 +1099,7 @@ namespace Moses
TSA<Token>::tree_iterator mfix(btfix.I1.get(),&myphrase[0],myphrase.size());
if (mfix.size() == myphrase.size())
{
+ btfix.prep(mfix);
// cerr << phrase << " " << mfix.approxOccurrenceCount() << endl;
return true;
}
@@ -834,6 +1115,7 @@ namespace Moses
{
for (size_t i = 0; mdyn.size() == i && i < myphrase.size(); ++i)
mdyn.extend(myphrase[i]);
+ if (mdyn.size() == myphrase.size()) dyn->prep(mdyn);
}
return mdyn.size() == myphrase.size();
}
diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h
index d2c5d251b..2c088dd18 100644
--- a/moses/TranslationModel/UG/mmsapt.h
+++ b/moses/TranslationModel/UG/mmsapt.h
@@ -19,6 +19,7 @@
#include "moses/TranslationModel/UG/mm/ug_typedefs.h"
#include "moses/TranslationModel/UG/mm/tpt_pickler.h"
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
#include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h"
#include "moses/InputFileStream.h"
@@ -26,10 +27,11 @@
#include "moses/TargetPhrase.h"
#include <boost/dynamic_bitset.hpp>
#include "moses/TargetPhraseCollection.h"
+#include "util/usage.hh"
#include <map>
#include "moses/TranslationModel/PhraseDictionary.h"
-#include "mmsapt_phrase_scorers.h"
+#include "sapt_phrase_scorers.h"
// TO DO:
// - make lexical phrase scorer take addition to the "dynamic overlay" into account
@@ -47,52 +49,74 @@ namespace Moses
#endif
{
friend class Alignment;
+ map<string,string> param;
public:
typedef L2R_Token<SimpleWordId> Token;
typedef mmBitext<Token> mmbitext;
typedef imBitext<Token> imbitext;
+ typedef Bitext<Token> bitext;
typedef TSA<Token> tsa;
typedef PhraseScorer<Token> pscorer;
private:
+ // vector<sptr<bitext> > shards;
mmbitext btfix;
- sptr<imbitext> btdyn;
+ sptr<imbitext> btdyn;
string bname,extra_data;
string L1;
string L2;
- float m_lbop_parameter;
- float m_lex_alpha;
+ float m_lbop_conf; // confidence level for lbop smoothing
+ float m_lex_alpha; // alpha paramter (j+a)/(m+a) for lexical smoothing
// alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha)
// must be > 0 if dynamic
size_t m_default_sample_size;
size_t m_workers; // number of worker threads for sampling the bitexts
-
- // deprecated!
- char m_pfwd_denom; // denominator for computation of fwd phrase score:
- // 'r' - divide by raw count
- // 's' - divide by sample count
- // 'g' - devide by number of "good" (i.e. coherent) samples
- // size_t num_features;
+ vector<string> m_feature_set_names; // one or more of: standard, datasource
+
+ // // deprecated!
+ // char m_pfwd_denom; // denominator for computation of fwd phrase score:
+ // // 'r' - divide by raw count
+ // // 's' - divide by sample count
+ // // 'g' - devide by number of "good" (i.e. coherent) samples
+ // // size_t num_features;
size_t input_factor;
size_t output_factor; // we can actually return entire Tokens!
- bool withLogCountFeatures; // add logs of counts as features?
- bool withCoherence;
- string m_pfwd_features; // which pfwd functions to use
+ // bool withLogCountFeatures; // add logs of counts as features?
+ // bool withCoherence;
+ // string m_pfwd_features; // which pfwd functions to use
+ // string m_pbwd_features; // which pbwd functions to use
+
+ // for display for human inspection (ttable dumps):
vector<string> m_feature_names; // names of features activated
+ vector<bool> m_is_logval; // keeps track of which features are log valued
+ vector<bool> m_is_integer; // keeps track of which features are integer valued
+
vector<sptr<pscorer > > m_active_ff_fix; // activated feature functions (fix)
vector<sptr<pscorer > > m_active_ff_dyn; // activated feature functions (dyn)
vector<sptr<pscorer > > m_active_ff_common; // activated feature functions (dyn)
- size_t
- add_corpus_specific_features
- (vector<sptr<pscorer > >& ffvec, size_t num_feats);
+ void
+ register_ff(sptr<pscorer> const& ff, vector<sptr<pscorer> > & registry);
+
+ template<typename fftype>
+ void
+ check_ff(string const ffname,vector<sptr<pscorer> >* registry = NULL);
+ // add feature function if specified
+
+ template<typename fftype>
+ void
+ check_ff(string const ffname, float const xtra, vector<sptr<pscorer> >* registry = NULL);
+ // add feature function if specified
+
+ void
+ add_corpus_specific_features(vector<sptr<pscorer > >& ffvec);
// built-in feature functions
// PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;
// PScorePbwd<Token> calc_pbwd_fix, calc_pbwd_dyn;
// PScoreLex<Token> calc_lex; // this one I'd like to see as an external ff eventually
- // PScorePP<Token> apply_pp; // apply phrase penalty
+ // PScorePC<Token> apply_pp; // apply phrase penalty
// PScoreLogCounts<Token> add_logcounts_fix;
// PScoreLogCounts<Token> add_logcounts_dyn;
void init(string const& line);
@@ -111,7 +135,11 @@ namespace Moses
size_t const revision; // time stamp from dynamic bitext
uint64_t const key; // phrase key
uint32_t refCount; // reference count
+#if defined(timespec)
timespec tstamp; // last use
+#else
+ timeval tstamp; // last use
+#endif
int idx; // position in history heap
TargetPhraseCollectionWrapper(size_t r, uint64_t const k);
~TargetPhraseCollectionWrapper();
@@ -139,12 +167,24 @@ namespace Moses
mm2dtable_t COOCraw;
TargetPhrase*
- createTargetPhrase
+ mkTPhrase(Phrase const& src,
+ Moses::bitext::PhrasePair<Token>* fix,
+ Moses::bitext::PhrasePair<Token>* dyn,
+ sptr<Bitext<Token> > const& dynbt) const;
+
+ // template<typename Token>
+ // void
+ // expand(typename Bitext<Token>::iter const& m, Bitext<Token> const& bt,
+ // pstats const& pstats, vector<PhrasePair<Token> >& dest);
+
+#if 0
+ TargetPhrase*
+ mkTPhrase
(Phrase const& src,
Bitext<Token> const& bt,
- bitext::PhrasePair const& pp
+ Moses::bitext::PhrasePair const& pp
) const;
-
+#endif
void
process_pstats
(Phrase const& src,
@@ -179,7 +219,7 @@ namespace Moses
) const;
void
- load_extra_data(string bname);
+ load_extra_data(string bname, bool locking);
mutable size_t m_tpc_ctr;
public:
@@ -230,8 +270,14 @@ namespace Moses
vector<string> const&
GetFeatureNames() const;
- void
- ScorePPfix(bitext::PhrasePair& pp) const;
+ // void
+ // ScorePPfix(bitext::PhrasePair& pp) const;
+
+ bool
+ isLogVal(int i) const;
+
+ bool
+ isInteger(int i) const;
private:
};
diff --git a/moses/TranslationModel/UG/mmsapt_align.cc b/moses/TranslationModel/UG/mmsapt_align.cc
index 407df648d..8b6bf1eb2 100644
--- a/moses/TranslationModel/UG/mmsapt_align.cc
+++ b/moses/TranslationModel/UG/mmsapt_align.cc
@@ -1,335 +1,336 @@
#include "mmsapt.h"
+// currently broken
-namespace Moses
-{
- using namespace bitext;
- using namespace std;
- using namespace boost;
+// namespace Moses
+// {
+// using namespace bitext;
+// using namespace std;
+// using namespace boost;
- struct PPgreater
- {
- bool operator()(PhrasePair const& a, PhrasePair const& b)
- {
- return a.score > b.score;
- }
- };
+// struct PPgreater
+// {
+// bool operator()(PhrasePair const& a, PhrasePair const& b)
+// {
+// return a.score > b.score;
+// }
+// };
- void
- Mmsapt::
- setWeights(vector<float> const & w)
- {
- assert(w.size() == this->m_numScoreComponents);
- this->feature_weights = w;
- }
+// void
+// Mmsapt::
+// setWeights(vector<float> const & w)
+// {
+// assert(w.size() == this->m_numScoreComponents);
+// this->feature_weights = w;
+// }
- struct PhraseAlnHyp
- {
- PhrasePair pp;
- ushort s1,e1,s2,e2; // start and end positions
- int prev; // preceding alignment hypothesis
- float score;
- bitvector scov; // source coverage
- PhraseAlnHyp(PhrasePair const& ppx, int slen,
- pair<uint32_t,uint32_t> const& sspan,
- pair<uint32_t,uint32_t> const& tspan)
- : pp(ppx), prev(-1), score(ppx.score), scov(slen)
- {
- s1 = sspan.first; e1 = sspan.second;
- s2 = tspan.first; e2 = tspan.second;
- for (size_t i = s1; i < e1; ++i)
- scov.set(i);
- }
+// struct PhraseAlnHyp
+// {
+// PhrasePair pp;
+// ushort s1,e1,s2,e2; // start and end positions
+// int prev; // preceding alignment hypothesis
+// float score;
+// bitvector scov; // source coverage
+// PhraseAlnHyp(PhrasePair const& ppx, int slen,
+// pair<uint32_t,uint32_t> const& sspan,
+// pair<uint32_t,uint32_t> const& tspan)
+// : pp(ppx), prev(-1), score(ppx.score), scov(slen)
+// {
+// s1 = sspan.first; e1 = sspan.second;
+// s2 = tspan.first; e2 = tspan.second;
+// for (size_t i = s1; i < e1; ++i)
+// scov.set(i);
+// }
- bool operator<(PhraseAlnHyp const& other) const
- {
- return this->score < other.score;
- }
+// bool operator<(PhraseAlnHyp const& other) const
+// {
+// return this->score < other.score;
+// }
- bool operator>(PhraseAlnHyp const& other) const
- {
- return this->score > other.score;
- }
+// bool operator>(PhraseAlnHyp const& other) const
+// {
+// return this->score > other.score;
+// }
- PhraseOrientation
- po_bwd(PhraseAlnHyp const* prev) const
- {
- if (s2 == 0) return po_first;
- assert(prev);
- assert(prev->e2 <= s2);
- if (prev->e2 < s2) return po_other;
- if (prev->e1 == s1) return po_mono;
- if (prev->e1 < s1) return po_jfwd;
- if (prev->s1 == e1) return po_swap;
- if (prev->s1 > e1) return po_jbwd;
- return po_other;
- }
+// PhraseOrientation
+// po_bwd(PhraseAlnHyp const* prev) const
+// {
+// if (s2 == 0) return po_first;
+// assert(prev);
+// assert(prev->e2 <= s2);
+// if (prev->e2 < s2) return po_other;
+// if (prev->e1 == s1) return po_mono;
+// if (prev->e1 < s1) return po_jfwd;
+// if (prev->s1 == e1) return po_swap;
+// if (prev->s1 > e1) return po_jbwd;
+// return po_other;
+// }
- PhraseOrientation
- po_fwd(PhraseAlnHyp const* next) const
- {
- if (!next) return po_last;
- assert(next->s2 >= e2);
- if (next->s2 < e2) return po_other;
- if (next->e1 == s1) return po_swap;
- if (next->e1 < s1) return po_jbwd;
- if (next->s1 == e1) return po_mono;
- if (next->s1 > e1) return po_jfwd;
- return po_other;
- }
+// PhraseOrientation
+// po_fwd(PhraseAlnHyp const* next) const
+// {
+// if (!next) return po_last;
+// assert(next->s2 >= e2);
+// if (next->s2 < e2) return po_other;
+// if (next->e1 == s1) return po_swap;
+// if (next->e1 < s1) return po_jbwd;
+// if (next->s1 == e1) return po_mono;
+// if (next->s1 > e1) return po_jfwd;
+// return po_other;
+// }
- float
- dprob_fwd(PhraseAlnHyp const& next)
- {
- return pp.dfwd[po_fwd(&next)];
- }
+// float
+// dprob_fwd(PhraseAlnHyp const& next)
+// {
+// return pp.dfwd[po_fwd(&next)];
+// }
- float
- dprob_bwd(PhraseAlnHyp const& prev)
- {
- return pp.dbwd[po_bwd(&prev)];
- }
+// float
+// dprob_bwd(PhraseAlnHyp const& prev)
+// {
+// return pp.dbwd[po_bwd(&prev)];
+// }
- };
+// };
- class Alignment
- {
- typedef L2R_Token<SimpleWordId> Token;
- typedef TSA<Token> tsa;
- typedef pair<uint32_t, uint32_t> span;
- typedef vector<vector<uint64_t> > pidmap_t; // span -> phrase ID
- typedef boost::unordered_map<uint64_t,vector<span> > pid2span_t;
- typedef pstats::trg_map_t jStatsTable;
+// class Alignment
+// {
+// typedef L2R_Token<SimpleWordId> Token;
+// typedef TSA<Token> tsa;
+// typedef pair<uint32_t, uint32_t> span;
+// typedef vector<vector<uint64_t> > pidmap_t; // span -> phrase ID
+// typedef boost::unordered_map<uint64_t,vector<span> > pid2span_t;
+// typedef pstats::trg_map_t jStatsTable;
- Mmsapt const& PT;
- vector<id_type> s,t;
- pidmap_t sspan2pid, tspan2pid; // span -> phrase ID
- pid2span_t spid2span,tpid2span;
- vector<vector<sptr<pstats> > > spstats;
+// Mmsapt const& PT;
+// vector<id_type> s,t;
+// pidmap_t sspan2pid, tspan2pid; // span -> phrase ID
+// pid2span_t spid2span,tpid2span;
+// vector<vector<sptr<pstats> > > spstats;
- vector<PhrasePair> PP;
- // position-independent phrase pair info
- public:
- vector<PhraseAlnHyp> PAH;
- vector<vector<int> > tpos2ahyp;
- // maps from target start positions to PhraseAlnHyps starting at
- // that position
+// vector<PhrasePair> PP;
+// // position-independent phrase pair info
+// public:
+// vector<PhraseAlnHyp> PAH;
+// vector<vector<int> > tpos2ahyp;
+// // maps from target start positions to PhraseAlnHyps starting at
+// // that position
- sptr<pstats> getPstats(span const& sspan);
- void fill_tspan_maps();
- void fill_sspan_maps();
- public:
- Alignment(Mmsapt const& pt, string const& src, string const& trg);
- void show(ostream& out);
- void show(ostream& out, PhraseAlnHyp const& ah);
- };
+// sptr<pstats> getPstats(span const& sspan);
+// void fill_tspan_maps();
+// void fill_sspan_maps();
+// public:
+// Alignment(Mmsapt const& pt, string const& src, string const& trg);
+// void show(ostream& out);
+// void show(ostream& out, PhraseAlnHyp const& ah);
+// };
- void
- Alignment::
- show(ostream& out, PhraseAlnHyp const& ah)
- {
-#if 0
- LexicalPhraseScorer2<Token>::table_t const&
- COOCjnt = PT.calc_lex.scorer.COOC;
+// void
+// Alignment::
+// show(ostream& out, PhraseAlnHyp const& ah)
+// {
+// #if 0
+// LexicalPhraseScorer2<Token>::table_t const&
+// COOCjnt = PT.calc_lex.scorer.COOC;
- out << setw(10) << exp(ah.score) << " "
- << PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2)
- << " <=> "
- << PT.btfix.T1->pid2str(PT.btfix.V1.get(), ah.pp.p1);
- vector<uchar> const& a = ah.pp.aln;
- // BOOST_FOREACH(int x,a) cout << "[" << x << "] ";
- for (size_t u = 0; u+1 < a.size(); u += 2)
- out << " " << int(a[u+1]) << "-" << int(a[u]);
+// out << setw(10) << exp(ah.score) << " "
+// << PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2)
+// << " <=> "
+// << PT.btfix.T1->pid2str(PT.btfix.V1.get(), ah.pp.p1);
+// vector<uchar> const& a = ah.pp.aln;
+// // BOOST_FOREACH(int x,a) cout << "[" << x << "] ";
+// for (size_t u = 0; u+1 < a.size(); u += 2)
+// out << " " << int(a[u+1]) << "-" << int(a[u]);
- if (ah.e2-ah.s2 == 1 and ah.e1-ah.s1 == 1)
- out << " " << COOCjnt[s[ah.s1]][t[ah.s2]]
- << "/" << PT.COOCraw[s[ah.s1]][t[ah.s2]]
- << "=" << float(COOCjnt[s[ah.s1]][t[ah.s2]])/PT.COOCraw[s[ah.s1]][t[ah.s2]];
- out << endl;
- // float const* ofwdj = ah.pp.dfwd;
- // float const* obwdj = ah.pp.dbwd;
- // uint32_t const* ofwdm = spstats[ah.s1][ah.e1-ah.s1-1]->ofwd;
- // uint32_t const* obwdm = spstats[ah.s1][ah.e1-ah.s1-1]->obwd;
- // out << " [first: " << ofwdj[po_first]<<"/"<<ofwdm[po_first]
- // << " last: " << ofwdj[po_last]<<"/"<<ofwdm[po_last]
- // << " mono: " << ofwdj[po_mono]<<"/"<<ofwdm[po_mono]
- // << " jfwd: " << ofwdj[po_jfwd]<<"/"<<ofwdm[po_jfwd]
- // << " swap: " << ofwdj[po_swap]<<"/"<<ofwdm[po_swap]
- // << " jbwd: " << ofwdj[po_jbwd]<<"/"<<ofwdm[po_jbwd]
- // << " other: " << ofwdj[po_other]<<"/"<<ofwdm[po_other]
- // << "]" << endl
- // << " [first: " << obwdj[po_first]<<"/"<<obwdm[po_first]
- // << " last: " << obwdj[po_last]<<"/"<<obwdm[po_last]
- // << " mono: " << obwdj[po_mono]<<"/"<<obwdm[po_mono]
- // << " jfwd: " << obwdj[po_jfwd]<<"/"<<obwdm[po_jfwd]
- // << " swap: " << obwdj[po_swap]<<"/"<<obwdm[po_swap]
- // << " jbwd: " << obwdj[po_jbwd]<<"/"<<obwdm[po_jbwd]
- // << " other: " << obwdj[po_other]<<"/"<<obwdm[po_other]
- // << "]" << endl;
-#endif
- }
+// if (ah.e2-ah.s2 == 1 and ah.e1-ah.s1 == 1)
+// out << " " << COOCjnt[s[ah.s1]][t[ah.s2]]
+// << "/" << PT.COOCraw[s[ah.s1]][t[ah.s2]]
+// << "=" << float(COOCjnt[s[ah.s1]][t[ah.s2]])/PT.COOCraw[s[ah.s1]][t[ah.s2]];
+// out << endl;
+// // float const* ofwdj = ah.pp.dfwd;
+// // float const* obwdj = ah.pp.dbwd;
+// // uint32_t const* ofwdm = spstats[ah.s1][ah.e1-ah.s1-1]->ofwd;
+// // uint32_t const* obwdm = spstats[ah.s1][ah.e1-ah.s1-1]->obwd;
+// // out << " [first: " << ofwdj[po_first]<<"/"<<ofwdm[po_first]
+// // << " last: " << ofwdj[po_last]<<"/"<<ofwdm[po_last]
+// // << " mono: " << ofwdj[po_mono]<<"/"<<ofwdm[po_mono]
+// // << " jfwd: " << ofwdj[po_jfwd]<<"/"<<ofwdm[po_jfwd]
+// // << " swap: " << ofwdj[po_swap]<<"/"<<ofwdm[po_swap]
+// // << " jbwd: " << ofwdj[po_jbwd]<<"/"<<ofwdm[po_jbwd]
+// // << " other: " << ofwdj[po_other]<<"/"<<ofwdm[po_other]
+// // << "]" << endl
+// // << " [first: " << obwdj[po_first]<<"/"<<obwdm[po_first]
+// // << " last: " << obwdj[po_last]<<"/"<<obwdm[po_last]
+// // << " mono: " << obwdj[po_mono]<<"/"<<obwdm[po_mono]
+// // << " jfwd: " << obwdj[po_jfwd]<<"/"<<obwdm[po_jfwd]
+// // << " swap: " << obwdj[po_swap]<<"/"<<obwdm[po_swap]
+// // << " jbwd: " << obwdj[po_jbwd]<<"/"<<obwdm[po_jbwd]
+// // << " other: " << obwdj[po_other]<<"/"<<obwdm[po_other]
+// // << "]" << endl;
+// #endif
+// }
- void
- Alignment::
- show(ostream& out)
- {
- // show what we have so far ...
- for (size_t s2 = 0; s2 < t.size(); ++s2)
- {
- VectorIndexSorter<PhraseAlnHyp> foo(PAH);
- sort(tpos2ahyp[s2].begin(), tpos2ahyp[s2].end(), foo);
- for (size_t h = 0; h < tpos2ahyp[s2].size(); ++h)
- show(out,PAH[tpos2ahyp[s2][h]]);
- }
- }
+// void
+// Alignment::
+// show(ostream& out)
+// {
+// // show what we have so far ...
+// for (size_t s2 = 0; s2 < t.size(); ++s2)
+// {
+// VectorIndexSorter<PhraseAlnHyp> foo(PAH);
+// sort(tpos2ahyp[s2].begin(), tpos2ahyp[s2].end(), foo);
+// for (size_t h = 0; h < tpos2ahyp[s2].size(); ++h)
+// show(out,PAH[tpos2ahyp[s2][h]]);
+// }
+// }
- sptr<pstats>
- Alignment::
- getPstats(span const& sspan)
- {
- size_t k = sspan.second - sspan.first - 1;
- if (k < spstats[sspan.first].size())
- return spstats[sspan.first][k];
- else return sptr<pstats>();
- }
+// sptr<pstats>
+// Alignment::
+// getPstats(span const& sspan)
+// {
+// size_t k = sspan.second - sspan.first - 1;
+// if (k < spstats[sspan.first].size())
+// return spstats[sspan.first][k];
+// else return sptr<pstats>();
+// }
- void
- Alignment::
- fill_tspan_maps()
- {
- tspan2pid.assign(t.size(),vector<uint64_t>(t.size(),0));
- for (size_t i = 0; i < t.size(); ++i)
- {
- tsa::tree_iterator m(PT.btfix.I2.get());
- for (size_t k = i; k < t.size() && m.extend(t[k]); ++k)
- {
- uint64_t pid = m.getPid();
- tpid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
- tspan2pid[i][k] = pid;
- }
- }
- }
+// void
+// Alignment::
+// fill_tspan_maps()
+// {
+// tspan2pid.assign(t.size(),vector<uint64_t>(t.size(),0));
+// for (size_t i = 0; i < t.size(); ++i)
+// {
+// tsa::tree_iterator m(PT.btfix.I2.get());
+// for (size_t k = i; k < t.size() && m.extend(t[k]); ++k)
+// {
+// uint64_t pid = m.getPid();
+// tpid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
+// tspan2pid[i][k] = pid;
+// }
+// }
+// }
- void
- Alignment::
- fill_sspan_maps()
- {
- sspan2pid.assign(s.size(),vector<uint64_t>(s.size(),0));
- spstats.resize(s.size());
- for (size_t i = 0; i < s.size(); ++i)
- {
- tsa::tree_iterator m(PT.btfix.I1.get());
- for (size_t k = i; k < s.size() && m.extend(s[k]); ++k)
- {
- uint64_t pid = m.getPid();
- sspan2pid[i][k] = pid;
- pid2span_t::iterator p = spid2span.find(pid);
- if (p != spid2span.end())
- {
- int x = p->second[0].first;
- int y = p->second[0].second-1;
- spstats[i].push_back(spstats[x][y-x]);
- }
- else
- {
- spstats[i].push_back(PT.btfix.lookup(m));
- cout << PT.btfix.T1->pid2str(PT.btfix.V1.get(),pid) << " "
- << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt
- << endl;
- }
- spid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
- }
- }
- }
+// void
+// Alignment::
+// fill_sspan_maps()
+// {
+// sspan2pid.assign(s.size(),vector<uint64_t>(s.size(),0));
+// spstats.resize(s.size());
+// for (size_t i = 0; i < s.size(); ++i)
+// {
+// tsa::tree_iterator m(PT.btfix.I1.get());
+// for (size_t k = i; k < s.size() && m.extend(s[k]); ++k)
+// {
+// uint64_t pid = m.getPid();
+// sspan2pid[i][k] = pid;
+// pid2span_t::iterator p = spid2span.find(pid);
+// if (p != spid2span.end())
+// {
+// int x = p->second[0].first;
+// int y = p->second[0].second-1;
+// spstats[i].push_back(spstats[x][y-x]);
+// }
+// else
+// {
+// spstats[i].push_back(PT.btfix.lookup(m));
+// cout << PT.btfix.T1->pid2str(PT.btfix.V1.get(),pid) << " "
+// << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt
+// << endl;
+// }
+// spid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
+// }
+// }
+// }
- Alignment::
- Alignment(Mmsapt const& pt, string const& src, string const& trg)
- : PT(pt)
- {
- PT.btfix.V1->fillIdSeq(src,s);
- PT.btfix.V2->fillIdSeq(trg,t);
+// Alignment::
+// Alignment(Mmsapt const& pt, string const& src, string const& trg)
+// : PT(pt)
+// {
+// PT.btfix.V1->fillIdSeq(src,s);
+// PT.btfix.V2->fillIdSeq(trg,t);
- // LexicalPhraseScorer2<Token>::table_t const& COOC = PT.calc_lex.scorer.COOC;
- // BOOST_FOREACH(id_type i, t)
- // {
- // cout << (*PT.btfix.V2)[i];
- // if (i < PT.wlex21.size())
- // {
- // BOOST_FOREACH(id_type k, PT.wlex21[i])
- // {
- // size_t j = COOC[k][i];
- // size_t m1 = COOC.m1(k);
- // size_t m2 = COOC.m2(i);
- // if (j*1000 > m1 && j*1000 > m2)
- // cout << " " << (*PT.btfix.V1)[k];
- // }
- // }
- // cout << endl;
- // }
+// // LexicalPhraseScorer2<Token>::table_t const& COOC = PT.calc_lex.scorer.COOC;
+// // BOOST_FOREACH(id_type i, t)
+// // {
+// // cout << (*PT.btfix.V2)[i];
+// // if (i < PT.wlex21.size())
+// // {
+// // BOOST_FOREACH(id_type k, PT.wlex21[i])
+// // {
+// // size_t j = COOC[k][i];
+// // size_t m1 = COOC.m1(k);
+// // size_t m2 = COOC.m2(i);
+// // if (j*1000 > m1 && j*1000 > m2)
+// // cout << " " << (*PT.btfix.V1)[k];
+// // }
+// // }
+// // cout << endl;
+// // }
- fill_tspan_maps();
- fill_sspan_maps();
- tpos2ahyp.resize(t.size());
- // now fill the association score table
- PAH.reserve(1000000);
- typedef pid2span_t::iterator psiter;
- for (psiter L = spid2span.begin(); L != spid2span.end(); ++L)
- {
- if (!L->second.size()) continue; // should never happen anyway
- int i = L->second[0].first;
- int k = L->second[0].second - i -1;
- sptr<pstats> ps = spstats[i][k];
- PhrasePair pp; pp.init(L->first,*ps, PT.m_numScoreComponents);
- jStatsTable & J = ps->trg;
- for (jStatsTable::iterator y = J.begin(); y != J.end(); ++y)
- {
- psiter R = tpid2span.find(y->first);
- if (R == tpid2span.end()) continue;
- pp.update(y->first, y->second);
- PT.ScorePPfix(pp);
- pp.eval(PT.feature_weights);
- PP.push_back(pp);
- BOOST_FOREACH(span const& sspan, L->second)
- {
- BOOST_FOREACH(span const& tspan, R->second)
- {
- tpos2ahyp[tspan.first].push_back(PAH.size());
- PAH.push_back(PhraseAlnHyp(PP.back(),s.size(),sspan,tspan));
- }
- }
- }
- }
- }
+// fill_tspan_maps();
+// fill_sspan_maps();
+// tpos2ahyp.resize(t.size());
+// // now fill the association score table
+// PAH.reserve(1000000);
+// typedef pid2span_t::iterator psiter;
+// for (psiter L = spid2span.begin(); L != spid2span.end(); ++L)
+// {
+// if (!L->second.size()) continue; // should never happen anyway
+// int i = L->second[0].first;
+// int k = L->second[0].second - i -1;
+// sptr<pstats> ps = spstats[i][k];
+// PhrasePair pp; pp.init(L->first,*ps, PT.m_numScoreComponents);
+// jStatsTable & J = ps->trg;
+// for (jStatsTable::iterator y = J.begin(); y != J.end(); ++y)
+// {
+// psiter R = tpid2span.find(y->first);
+// if (R == tpid2span.end()) continue;
+// pp.update(y->first, y->second);
+// PT.ScorePPfix(pp);
+// pp.eval(PT.feature_weights);
+// PP.push_back(pp);
+// BOOST_FOREACH(span const& sspan, L->second)
+// {
+// BOOST_FOREACH(span const& tspan, R->second)
+// {
+// tpos2ahyp[tspan.first].push_back(PAH.size());
+// PAH.push_back(PhraseAlnHyp(PP.back(),s.size(),sspan,tspan));
+// }
+// }
+// }
+// }
+// }
- int
- extend(vector<PhraseAlnHyp> & PAH, int edge, int next)
- {
- if ((PAH[edge].scov & PAH[next].scov).count())
- return -1;
- int ret = PAH.size();
- PAH.push_back(PAH[next]);
- PhraseAlnHyp & h = PAH.back();
- h.prev = edge;
- h.scov |= PAH[edge].scov;
- h.score += log(PAH[edge].dprob_fwd(PAH[next]));
- h.score += log(PAH[next].dprob_bwd(PAH[edge]));
- return ret;
- }
+// int
+// extend(vector<PhraseAlnHyp> & PAH, int edge, int next)
+// {
+// if ((PAH[edge].scov & PAH[next].scov).count())
+// return -1;
+// int ret = PAH.size();
+// PAH.push_back(PAH[next]);
+// PhraseAlnHyp & h = PAH.back();
+// h.prev = edge;
+// h.scov |= PAH[edge].scov;
+// h.score += log(PAH[edge].dprob_fwd(PAH[next]));
+// h.score += log(PAH[next].dprob_bwd(PAH[edge]));
+// return ret;
+// }
- sptr<vector<int> >
- Mmsapt::
- align(string const& src, string const& trg) const
- {
- // For the time being, we consult only the fixed bitext.
- // We might also consider the dynamic bitext. => TO DO.
- Alignment A(*this,src,trg);
- VectorIndexSorter<PhraseAlnHyp> foo(A.PAH);
- vector<size_t> o; foo.GetOrder(o);
- BOOST_FOREACH(int i, o) A.show(cout,A.PAH[i]);
- sptr<vector<int> > aln;
- return aln;
-}
-}
+// sptr<vector<int> >
+// Mmsapt::
+// align(string const& src, string const& trg) const
+// {
+// // For the time being, we consult only the fixed bitext.
+// // We might also consider the dynamic bitext. => TO DO.
+// Alignment A(*this,src,trg);
+// VectorIndexSorter<PhraseAlnHyp> foo(A.PAH);
+// vector<size_t> o; foo.GetOrder(o);
+// BOOST_FOREACH(int i, o) A.show(cout,A.PAH[i]);
+// sptr<vector<int> > aln;
+// return aln;
+// }
+// }
diff --git a/moses/TranslationModel/UG/mmsapt_phrase_scorers.h b/moses/TranslationModel/UG/mmsapt_phrase_scorers.h
deleted file mode 100644
index 36c134da2..000000000
--- a/moses/TranslationModel/UG/mmsapt_phrase_scorers.h
+++ /dev/null
@@ -1,311 +0,0 @@
-// -*- c++ -*-
-#pragma once
-#include "moses/TranslationModel/UG/mm/ug_bitext.h"
-#include "util/exception.hh"
-
-namespace Moses {
- namespace bitext
- {
-
- template<typename Token>
- class
- PhraseScorer
- {
- protected:
- int m_index;
- int m_num_feats;
- vector<string> m_feature_names;
- public:
-
- virtual
- void
- operator()(Bitext<Token> const& pt, PhrasePair& pp, vector<float> * dest=NULL)
- const = 0;
-
- int
- fcnt() const
- { return m_num_feats; }
-
- vector<string> const &
- fnames() const
- { return m_feature_names; }
-
- string const &
- fname(int i) const
- {
- UTIL_THROW_IF2((i < m_index || i >= m_index + m_num_feats),
- "Feature name index out of range at "
- << __FILE__ << ":" << __LINE__);
- return m_feature_names.at(i - m_index);
- }
-
- int
- getIndex() const
- { return m_index; }
- };
-
- ////////////////////////////////////////////////////////////////////////////////
-
- template<typename Token>
- class
- PScorePfwd : public PhraseScorer<Token>
- {
- float conf;
- char denom;
- public:
- PScorePfwd()
- {
- this->m_num_feats = 1;
- }
-
- int
- init(int const i, float const c, char d)
- {
- conf = c;
- denom = d;
- this->m_index = i;
- ostringstream buf;
- buf << format("pfwd-%c%.3f") % denom % c;
- this->m_feature_names.push_back(buf.str());
- return i + this->m_num_feats;
- }
-
- void
- operator()(Bitext<Token> const& bt, PhrasePair & pp,
- vector<float> * dest = NULL) const
- {
- if (!dest) dest = &pp.fvals;
- if (pp.joint > pp.good1)
- {
- cerr<<bt.toString(pp.p1,0)<<" ::: "<<bt.toString(pp.p2,1)<<endl;
- cerr<<pp.joint<<"/"<<pp.good1<<"/"<<pp.raw2<<endl;
- }
- switch (denom)
- {
- case 'g':
- (*dest)[this->m_index] = log(lbop(pp.good1, pp.joint, conf));
- break;
- case 's':
- (*dest)[this->m_index] = log(lbop(pp.sample1, pp.joint, conf));
- break;
- case 'r':
- (*dest)[this->m_index] = log(lbop(pp.raw1, pp.joint, conf));
- }
- }
- };
-
- ////////////////////////////////////////////////////////////////////////////////
-
- template<typename Token>
- class
- PScorePbwd : public PhraseScorer<Token>
- {
- float conf;
- public:
- PScorePbwd()
- {
- this->m_num_feats = 1;
- }
-
- int
- init(int const i, float const c)
- {
- conf = c;
- this->m_index = i;
- ostringstream buf;
- buf << format("pbwd%.3f") % c;
- this->m_feature_names.push_back(buf.str());
- return i + this->m_num_feats;
- }
-
- void
- operator()(Bitext<Token> const& bt, PhrasePair& pp,
- vector<float> * dest = NULL) const
- {
- if (!dest) dest = &pp.fvals;
- (*dest)[this->m_index] = log(lbop(max(pp.raw2,pp.joint),pp.joint,conf));
- }
- };
-
- ////////////////////////////////////////////////////////////////////////////////
-
- template<typename Token>
- class
- PScoreCoherence : public PhraseScorer<Token>
- {
- public:
- PScoreCoherence()
- {
- this->m_num_feats = 1;
- }
-
- int
- init(int const i)
- {
- this->m_index = i;
- this->m_feature_names.push_back(string("coherence"));
- return i + this->m_num_feats;
- }
-
- void
- operator()(Bitext<Token> const& bt, PhrasePair& pp,
- vector<float> * dest = NULL) const
- {
- if (!dest) dest = &pp.fvals;
- (*dest)[this->m_index] = log(pp.good1) - log(pp.sample1);
- }
- };
-
- ////////////////////////////////////////////////////////////////////////////////
-
- template<typename Token>
- class
- PScoreLogCounts : public PhraseScorer<Token>
- {
- float conf;
- public:
- PScoreLogCounts()
- {
- this->m_num_feats = 5;
- }
-
- int
- init(int const i)
- {
- this->m_index = i;
- this->m_feature_names.push_back("log-r1");
- this->m_feature_names.push_back("log-s1");
- this->m_feature_names.push_back("log-g1");
- this->m_feature_names.push_back("log-j");
- this->m_feature_names.push_back("log-r2");
- return i + this->m_num_feats;
- }
-
- void
- operator()(Bitext<Token> const& bt, PhrasePair& pp,
- vector<float> * dest = NULL) const
- {
- if (!dest) dest = &pp.fvals;
- size_t i = this->m_index;
- assert(pp.raw1);
- assert(pp.sample1);
- assert(pp.good1);
- assert(pp.joint);
- assert(pp.raw2);
- (*dest)[i] = -log(pp.raw1);
- (*dest)[++i] = -log(pp.sample1);
- (*dest)[++i] = -log(pp.good1);
- (*dest)[++i] = +log(pp.joint);
- (*dest)[++i] = -log(pp.raw2);
- }
- };
-
- template<typename Token>
- class
- PScoreLex : public PhraseScorer<Token>
- {
- float const m_alpha;
- public:
- LexicalPhraseScorer2<Token> scorer;
-
- PScoreLex(float const a)
- : m_alpha(a)
- { this->m_num_feats = 2; }
-
- int
- init(int const i, string const& fname)
- {
- scorer.open(fname);
- this->m_index = i;
- this->m_feature_names.push_back("lexfwd");
- this->m_feature_names.push_back("lexbwd");
- return i + this->m_num_feats;
- }
-
- void
- operator()(Bitext<Token> const& bt, PhrasePair& pp, vector<float> * dest = NULL) const
- {
- if (!dest) dest = &pp.fvals;
- uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
- parse_pid(pp.p1, sid1, off1, len1);
- parse_pid(pp.p2, sid2, off2, len2);
-
-#if 0
- cout << len1 << " " << len2 << endl;
- Token const* t1 = bt.T1->sntStart(sid1);
- for (size_t i = off1; i < off1 + len1; ++i)
- cout << (*bt.V1)[t1[i].id()] << " ";
- cout << __FILE__ << ":" << __LINE__ << endl;
-
- Token const* t2 = bt.T2->sntStart(sid2);
- for (size_t i = off2; i < off2 + len2; ++i)
- cout << (*bt.V2)[t2[i].id()] << " ";
- cout << __FILE__ << ":" << __LINE__ << endl;
-
- BOOST_FOREACH (int a, pp.aln)
- cout << a << " " ;
- cout << __FILE__ << ":" << __LINE__ << "\n" << endl;
-
-#endif
- scorer.score(bt.T1->sntStart(sid1)+off1,0,len1,
- bt.T2->sntStart(sid2)+off2,0,len2,
- pp.aln, m_alpha,
- (*dest)[this->m_index],
- (*dest)[this->m_index+1]);
- }
-
- };
-
- /// Word penalty
- template<typename Token>
- class
- PScoreWP : public PhraseScorer<Token>
- {
- public:
-
- PScoreWP() { this->m_num_feats = 1; }
-
- int
- init(int const i)
- {
- this->m_index = i;
- return i + this->m_num_feats;
- }
-
- void
- operator()(Bitext<Token> const& bt, PhrasePair& pp, vector<float> * dest = NULL) const
- {
- if (!dest) dest = &pp.fvals;
- uint32_t sid2=0,off2=0,len2=0;
- parse_pid(pp.p2, sid2, off2, len2);
- (*dest)[this->m_index] = len2;
- }
-
- };
-
- /// Phrase penalty
- template<typename Token>
- class
- PScorePP : public PhraseScorer<Token>
- {
- public:
-
- PScorePP() { this->m_num_feats = 1; }
-
- int
- init(int const i)
- {
- this->m_index = i;
- return i + this->m_num_feats;
- }
-
- void
- operator()(Bitext<Token> const& bt, PhrasePair& pp, vector<float> * dest = NULL) const
- {
- if (!dest) dest = &pp.fvals;
- (*dest)[this->m_index] = 1;
- }
-
- };
- }
-}
diff --git a/moses/TranslationModel/UG/ptable-lookup.cc b/moses/TranslationModel/UG/ptable-lookup.cc
new file mode 100644
index 000000000..2cbf89b16
--- /dev/null
+++ b/moses/TranslationModel/UG/ptable-lookup.cc
@@ -0,0 +1,123 @@
+#include "mmsapt.h"
+#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/shared_ptr.hpp>
+#include <algorithm>
+#include <iostream>
+
+using namespace Moses;
+using namespace bitext;
+using namespace std;
+using namespace boost;
+
+vector<FactorType> fo(1,FactorType(0));
+
+class SimplePhrase : public Moses::Phrase
+{
+ vector<FactorType> const m_fo; // factor order
+public:
+ SimplePhrase(): m_fo(1,FactorType(0)) {}
+
+ void init(string const& s)
+ {
+ istringstream buf(s); string w;
+ while (buf >> w)
+ {
+ Word wrd;
+ this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false);
+ }
+ }
+};
+
+class TargetPhraseIndexSorter
+{
+ TargetPhraseCollection const& my_tpc;
+ CompareTargetPhrase cmp;
+public:
+ TargetPhraseIndexSorter(TargetPhraseCollection const& tpc) : my_tpc(tpc) {}
+ bool operator()(size_t a, size_t b) const
+ {
+ return cmp(*my_tpc[a], *my_tpc[b]);
+ }
+};
+
+int main(int argc, char* argv[])
+{
+ Parameter params;
+ if (!params.LoadParam(argc,argv) || !StaticData::LoadDataStatic(&params, argv[0]))
+ exit(1);
+
+ StaticData const& global = StaticData::Instance();
+ global.SetVerboseLevel(0);
+ vector<FactorType> ifo = global.GetInputFactorOrder();
+
+ PhraseDictionary* PT = PhraseDictionary::GetColl()[0];
+ Mmsapt* mmsapt = dynamic_cast<Mmsapt*>(PT);
+ PhraseDictionaryTreeAdaptor* pdta = dynamic_cast<PhraseDictionaryTreeAdaptor*>(PT);
+ // vector<FeatureFunction*> const& ffs = FeatureFunction::GetFeatureFunctions();
+
+ if (!mmsapt && !pdta)
+ {
+ cerr << "Phrase table implementation not supported by this utility." << endl;
+ exit(1);
+ }
+
+ string line;
+ while (true)
+ {
+ Sentence phrase;
+ if (!phrase.Read(cin,ifo)) break;
+ if (pdta)
+ {
+ pdta->InitializeForInput(phrase);
+ // do we also need to call CleanupAfterSentenceProcessing at the end?
+ }
+ Phrase& p = phrase;
+
+ cout << p << endl;
+ TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p);
+ if (!trg) continue;
+ vector<size_t> order(trg->GetSize());
+ for (size_t i = 0; i < order.size(); ++i) order[i] = i;
+ sort(order.begin(),order.end(),TargetPhraseIndexSorter(*trg));
+ size_t k = 0;
+ // size_t precision =
+ cout.precision(2);
+
+ vector<string> fname;
+ if (mmsapt)
+ {
+ fname = mmsapt->GetFeatureNames();
+ cout << " ";
+ BOOST_FOREACH(string const& fn, fname)
+ cout << " " << format("%10.10s") % fn;
+ cout << endl;
+ }
+
+ BOOST_FOREACH(size_t i, order)
+ {
+ Phrase const& phr = static_cast<Phrase const&>(*(*trg)[i]);
+ cout << setw(3) << ++k << " " << phr << endl;
+ ScoreComponentCollection const& scc = (*trg)[i]->GetScoreBreakdown();
+ ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
+ FVector const& scores = scc.GetScoresVector();
+ cout << " ";
+ for (size_t k = idx.first; k < idx.second; ++k)
+ {
+ size_t j = k-idx.first;
+ float f = (mmsapt ? mmsapt->isLogVal(j) ? exp(scores[k]) : scores[k]
+ : scores[k] < 0 ? exp(scores[k]) : scores[k]);
+ string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";
+ cout << " " << format(fmt) % f;
+ }
+ cout << endl;
+ }
+ PT->Release(trg);
+ }
+ exit(0);
+}
+
+
+
diff --git a/moses/TranslationModel/UG/sapt_phrase_key.h b/moses/TranslationModel/UG/sapt_phrase_key.h
new file mode 100644
index 000000000..e1ecf1573
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_phrase_key.h
@@ -0,0 +1,13 @@
+//-*- c++ -*-
+#pragma once
+#include <stdint.h>
+
+using namespace std;
+namespace sapt
+{
+ using namespace Moses;
+ using namespace std;
+
+
+
+}
diff --git a/moses/TranslationModel/UG/sapt_phrase_scorers.h b/moses/TranslationModel/UG/sapt_phrase_scorers.h
new file mode 100644
index 000000000..9870ed7f0
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_phrase_scorers.h
@@ -0,0 +1,14 @@
+// -*- c++ -*-
+// Phrase scoring functions for suffix array-based phrase tables
+// written by Ulrich Germann
+#pragma once
+#include "sapt_pscore_unaligned.h" // count # of unaligned words
+#include "sapt_pscore_provenance.h" // reward for joint phrase occ. per corpus
+#include "sapt_pscore_rareness.h" // penalty for rare occurrences (global?)
+#include "sapt_pscore_logcnt.h" // logs of observed counts
+#include "sapt_pscore_lex1.h" // plain vanilla Moses lexical scores
+#include "sapt_pscore_pfwd.h" // fwd phrase prob
+#include "sapt_pscore_pbwd.h" // bwd phrase prob
+#include "sapt_pscore_coherence.h" // coherence feature: good/sample-size
+#include "sapt_pscore_phrasecount.h" // phrase count
+#include "sapt_pscore_wordcount.h" // word count
diff --git a/moses/TranslationModel/UG/sapt_pscore_base.h b/moses/TranslationModel/UG/sapt_pscore_base.h
new file mode 100644
index 000000000..68a491145
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_base.h
@@ -0,0 +1,103 @@
+// -*- c++ -*-
+// Base classes for suffix array-based phrase scorers
+// written by Ulrich Germann
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+
+namespace Moses {
+ namespace bitext
+ {
+
+ // abstract base class that defines the common API for phrase scorers
+ template<typename Token>
+ class
+ PhraseScorer
+ {
+ protected:
+ int m_index;
+ int m_num_feats;
+ string m_tag;
+ vector<string> m_feature_names;
+ public:
+
+ virtual
+ void
+ operator()(Bitext<Token> const& pt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest=NULL)
+ const = 0;
+
+ void
+ setIndex(int const i) { m_index = i; }
+
+ int
+ getIndex() const { return m_index; }
+
+ int
+ fcnt() const { return m_num_feats; }
+
+ vector<string> const &
+ fnames() const { return m_feature_names; }
+
+ string const &
+ fname(int i) const
+ {
+ if (i < 0) i += m_num_feats;
+ UTIL_THROW_IF2(i < 0 || i >= m_num_feats,
+ "Feature name index out of range at " << HERE);
+ return m_feature_names.at(i);
+ }
+
+ virtual
+ bool
+ isLogVal(int i) const { return true; };
+ // is this feature log valued?
+
+ virtual
+ bool
+ isIntegerValued(int i) const { return false; };
+ // is this feature integer valued (e.g., count features)?
+
+ virtual
+ bool
+ allowPooling() const { return true; }
+ // does this feature function allow pooling of counts if
+ // there are no occurrences in the respective corpus?
+
+ };
+
+ // base class for 'families' of phrase scorers that have a single
+ template<typename Token>
+ class
+ SingleRealValuedParameterPhraseScorerFamily
+ : public PhraseScorer<Token>
+ {
+ protected:
+ vector<float> m_x;
+
+ virtual
+ void
+ init(string const specs)
+ {
+ using namespace boost;
+ UTIL_THROW_IF2(this->m_tag.size() == 0,
+ "m_tag must be initialized in constructor");
+ UTIL_THROW_IF2(specs.size() == 0,"empty specification string!");
+ UTIL_THROW_IF2(this->m_feature_names.size(),
+ "PhraseScorer can only be initialized once!");
+ this->m_index = -1;
+ float x; char c;
+ for (istringstream buf(specs); buf>>x; buf>>c)
+ {
+ this->m_x.push_back(x);
+ string fname = (format("%s-%.2f") % this->m_tag % x).str();
+ this->m_feature_names.push_back(fname);
+ }
+ this->m_num_feats = this->m_x.size();
+ }
+ };
+ } // namespace bitext
+} // namespace moses
diff --git a/moses/TranslationModel/UG/sapt_pscore_coherence.h b/moses/TranslationModel/UG/sapt_pscore_coherence.h
new file mode 100644
index 000000000..a3211df54
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_coherence.h
@@ -0,0 +1,33 @@
+// -*- c++ -*-
+// written by Ulrich Germann
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+
+namespace Moses {
+ namespace bitext
+ {
+ template<typename Token>
+ class
+ PScoreCoherence : public PhraseScorer<Token>
+ {
+ public:
+ PScoreCoherence(string const dummy)
+ {
+ this->m_index = -1;
+ this->m_num_feats = 1;
+ this->m_feature_names.push_back(string("coherence"));
+ }
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest = NULL) const
+ {
+ if (!dest) dest = &pp.fvals;
+ (*dest)[this->m_index] = log(pp.good1) - log(pp.sample1);
+ }
+ };
+ }
+}
diff --git a/moses/TranslationModel/UG/sapt_pscore_lex1.h b/moses/TranslationModel/UG/sapt_pscore_lex1.h
new file mode 100644
index 000000000..be994b0d3
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_lex1.h
@@ -0,0 +1,70 @@
+// -*- c++ -*-
+// Phrase scorer that counts the number of unaligend words in the phrase
+// written by Ulrich Germann
+
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+namespace Moses {
+ namespace bitext
+ {
+ template<typename Token>
+ class
+ PScoreLex1 : public PhraseScorer<Token>
+ {
+ float m_alpha;
+ public:
+ LexicalPhraseScorer2<Token> scorer;
+
+ PScoreLex1(string const& alpaspec, string const& lexfile)
+ {
+ this->m_index = -1;
+ this->m_num_feats = 2;
+ this->m_feature_names.reserve(2);
+ this->m_feature_names.push_back("lexfwd");
+ this->m_feature_names.push_back("lexbwd");
+ m_alpha = atof(alpaspec.c_str());
+ scorer.open(lexfile);
+ }
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest = NULL) const
+ {
+ if (!dest) dest = &pp.fvals;
+ // uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
+ // parse_pid(pp.p1, sid1, off1, len1);
+ // parse_pid(pp.p2, sid2, off2, len2);
+#if 0
+ cout << len1 << " " << len2 << endl;
+ Token const* t1 = bt.T1->sntStart(sid1);
+ for (size_t i = off1; i < off1 + len1; ++i)
+ cout << (*bt.V1)[t1[i].id()] << " ";
+ cout << __FILE__ << ":" << __LINE__ << endl;
+
+ Token const* t2 = bt.T2->sntStart(sid2);
+ for (size_t i = off2; i < off2 + len2; ++i)
+ cout << (*bt.V2)[t2[i].id()] << " ";
+ cout << __FILE__ << ":" << __LINE__ << endl;
+
+ BOOST_FOREACH (int a, pp.aln)
+ cout << a << " " ;
+ cout << __FILE__ << ":" << __LINE__ << "\n" << endl;
+
+ scorer.score(bt.T1->sntStart(sid1)+off1,0,len1,
+ bt.T2->sntStart(sid2)+off2,0,len2,
+ pp.aln, m_alpha,
+ (*dest)[this->m_index],
+ (*dest)[this->m_index+1]);
+#endif
+ scorer.score(pp.start1,0, pp.len1,
+ pp.start2,0, pp.len2, pp.aln, m_alpha,
+ (*dest)[this->m_index],
+ (*dest)[this->m_index+1]);
+ }
+ };
+ } //namespace bitext
+} // namespace Moses
+
diff --git a/moses/TranslationModel/UG/sapt_pscore_logcnt.h b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
new file mode 100644
index 000000000..2790323ed
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
@@ -0,0 +1,65 @@
+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function x/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+namespace Moses {
+ namespace bitext {
+
+ template<typename Token>
+ class
+ PScoreLogCnt : public PhraseScorer<Token>
+ {
+ string m_specs;
+ public:
+ PScoreLogCnt(string const specs)
+ {
+ this->m_index = -1;
+ this->m_specs = specs;
+ if (specs.find("r1") != string::npos) // raw source phrase counts
+ this->m_feature_names.push_back("log-r1");
+ if (specs.find("s1") != string::npos)
+ this->m_feature_names.push_back("log-s1"); // L1 sample size
+ if (specs.find("g1") != string::npos) // coherent phrases
+ this->m_feature_names.push_back("log-g1");
+ if (specs.find("j") != string::npos) // joint counts
+ this->m_feature_names.push_back("log-j");
+ if (specs.find("r2") != string::npos) // raw target phrase counts
+ this->m_feature_names.push_back("log-r2");
+ this->m_num_feats = this->m_feature_names.size();
+ }
+
+ bool
+ isIntegerValued(int i) const { return true; }
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest = NULL) const
+ {
+ if (!dest) dest = &pp.fvals;
+ assert(pp.raw1);
+ assert(pp.sample1);
+ assert(pp.good1);
+ assert(pp.joint);
+ assert(pp.raw2);
+ size_t i = this->m_index;
+ if (m_specs.find("r1") != string::npos)
+ (*dest)[i++] = log(pp.raw1);
+ if (m_specs.find("s1") != string::npos)
+ (*dest)[i++] = log(pp.sample1);
+ if (m_specs.find("g1") != string::npos)
+ (*dest)[i++] = log(pp.good1);
+ if (m_specs.find("j") != string::npos)
+ (*dest)[i++] = log(pp.joint);
+ if (m_specs.find("r2") != string::npos)
+ (*dest)[++i] = log(pp.raw2);
+ }
+ };
+ } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/sapt_pscore_pbwd.h b/moses/TranslationModel/UG/sapt_pscore_pbwd.h
new file mode 100644
index 000000000..f7b4686d7
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_pbwd.h
@@ -0,0 +1,58 @@
+//-*- c++ -*-
+// written by Ulrich Germann
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+#include "boost/foreach.hpp"
+
+namespace Moses {
+ namespace bitext
+ {
+ template<typename Token>
+ class
+ PScorePbwd : public PhraseScorer<Token>
+ {
+ float conf;
+ string denom;
+
+ public:
+ PScorePbwd(float const c, string d)
+ {
+ this->m_index = -1;
+ conf = c;
+ denom = d;
+ size_t checksum = d.size();
+ BOOST_FOREACH(char const& x, denom)
+ {
+ if (x == '+') { --checksum; continue; }
+ if (x != 'g' && x != 's' && x != 'r') continue;
+ string s = (format("pbwd-%c%.3f") % x % c).str();
+ this->m_feature_names.push_back(s);
+ }
+ this->m_num_feats = this->m_feature_names.size();
+ UTIL_THROW_IF2(this->m_feature_names.size() != checksum,
+ "Unknown parameter in specification '"
+ << d << "' for Pbwd phrase scorer at " << HERE);
+ }
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest = NULL) const
+ {
+ if (!dest) dest = &pp.fvals;
+ // we use the denominator specification to scale the raw counts on the
+ // target side; the clean way would be to counter-sample
+ size_t i = this->m_index;
+ BOOST_FOREACH(char const& x, denom)
+ {
+ uint32_t m2 = pp.raw2;
+ if (x == 'g') m2 = round(m2 * float(pp.good1) / pp.raw1);
+ else if (x == 's') m2 = round(m2 * float(pp.sample1) / pp.raw1);
+ (*dest)[i++] = log(lbop(max(m2, pp.joint),pp.joint,conf));
+ }
+ }
+ };
+ } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/sapt_pscore_pfwd.h b/moses/TranslationModel/UG/sapt_pscore_pfwd.h
new file mode 100644
index 000000000..ed48a93d2
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_pfwd.h
@@ -0,0 +1,70 @@
+// -*- c++ -*-
+// written by Ulrich Germann
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+#include "boost/foreach.hpp"
+
+namespace Moses {
+ namespace bitext
+ {
+ template<typename Token>
+ class
+ PScorePfwd : public PhraseScorer<Token>
+ {
+ float conf;
+ string denom;
+
+ public:
+
+ PScorePfwd(float const c, string d)
+ {
+ this->m_index = -1;
+ conf = c;
+ denom = d;
+ size_t checksum = d.size();
+ BOOST_FOREACH(char const& x, denom)
+ {
+ if (x == '+') { --checksum; continue; }
+ if (x != 'g' && x != 's' && x != 'r') continue;
+ string s = (format("pfwd-%c%.3f") % x % c).str();
+ this->m_feature_names.push_back(s);
+ }
+ this->m_num_feats = this->m_feature_names.size();
+ UTIL_THROW_IF2(this->m_feature_names.size() != checksum,
+ "Unknown parameter in specification '"
+ << d << "' for Pfwd phrase scorer at " << HERE);
+ }
+
+ void
+ operator()(Bitext<Token> const& bt, PhrasePair<Token> & pp,
+ vector<float> * dest = NULL) const
+ {
+ if (!dest) dest = &pp.fvals;
+ if (pp.joint > pp.good1)
+ {
+ pp.joint = pp.good1;
+ // cerr<<bt.toString(pp.p1,0)<<" ::: "<<bt.toString(pp.p2,1)<<endl;
+ // cerr<<pp.joint<<"/"<<pp.good1<<"/"<<pp.raw2<<endl;
+ }
+ size_t i = this->m_index;
+ BOOST_FOREACH(char const& c, this->denom)
+ {
+ switch (c)
+ {
+ case 'g':
+ (*dest)[i++] = log(lbop(pp.good1, pp.joint, conf));
+ break;
+ case 's':
+ (*dest)[i++] = log(lbop(pp.sample1, pp.joint, conf));
+ break;
+ case 'r':
+ (*dest)[i++] = log(lbop(pp.raw1, pp.joint, conf));
+ }
+ }
+ }
+ };
+ }
+}
+
diff --git a/moses/TranslationModel/UG/sapt_pscore_phrasecount.h b/moses/TranslationModel/UG/sapt_pscore_phrasecount.h
new file mode 100644
index 000000000..e0a6eb48b
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_phrasecount.h
@@ -0,0 +1,34 @@
+// -*- c++ -*-
+// written by Ulrich Germann
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+#include "sapt_pscore_base.h"
+
+namespace Moses {
+ namespace bitext
+ {
+ template<typename Token>
+ class
+ PScorePC : public PhraseScorer<Token>
+ {
+ public:
+ PScorePC(string const dummy)
+ {
+ this->m_index = -1;
+ this->m_num_feats = 1;
+ this->m_feature_names.push_back(string("phrasecount"));
+ }
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest = NULL) const
+ {
+ if (!dest) dest = &pp.fvals;
+ (*dest)[this->m_index] = 1;
+ }
+ };
+ }
+}
diff --git a/moses/TranslationModel/UG/sapt_pscore_provenance.h b/moses/TranslationModel/UG/sapt_pscore_provenance.h
new file mode 100644
index 000000000..c33b98fe7
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_provenance.h
@@ -0,0 +1,47 @@
+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function j/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+namespace Moses {
+ namespace bitext {
+
+ // asymptotic provenance feature n/(n+x)
+ template<typename Token>
+ class
+ PScoreProvenance : public SingleRealValuedParameterPhraseScorerFamily<Token>
+ {
+ public:
+
+ PScoreProvenance(string const& spec)
+ {
+ this->m_tag = "prov";
+ this->init(spec);
+ }
+
+ bool
+ isLogVal(int i) const { return false; }
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest = NULL) const
+ {
+ if (!dest) dest = &pp.fvals;
+ size_t i = this->m_index;
+ BOOST_FOREACH(float const x, this->m_x)
+ (*dest).at(i++) = pp.joint/(x + pp.joint);
+ }
+
+ bool
+ allowPooling() const
+ { return false; }
+
+ };
+ } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/sapt_pscore_rareness.h b/moses/TranslationModel/UG/sapt_pscore_rareness.h
new file mode 100644
index 000000000..58f204c88
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_rareness.h
@@ -0,0 +1,41 @@
+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function x/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+namespace Moses {
+ namespace bitext {
+
+ // rareness penalty: x/(n+x)
+ template<typename Token>
+ class
+ PScoreRareness : public SingleRealValuedParameterPhraseScorerFamily<Token>
+ {
+ public:
+ PScoreRareness(string const spec)
+ {
+ this->m_tag = "rare";
+ this->init(spec);
+ }
+
+ bool
+ isLogVal(int i) const { return false; }
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest = NULL) const
+ {
+ if (!dest) dest = &pp.fvals;
+ size_t i = this->m_index;
+ BOOST_FOREACH(float const x, this->m_x)
+ (*dest).at(i++) = x/(x + pp.joint);
+ }
+ };
+ } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/sapt_pscore_unaligned.h b/moses/TranslationModel/UG/sapt_pscore_unaligned.h
new file mode 100644
index 000000000..bdd2919b4
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_unaligned.h
@@ -0,0 +1,67 @@
+// -*- c++ -*-
+// Phrase scorer that counts the number of unaligend words in the phrase
+// written by Ulrich Germann
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+namespace Moses {
+ namespace bitext
+ {
+ template<typename Token>
+ class
+ PScoreUnaligned : public PhraseScorer<Token>
+ {
+ typedef boost::dynamic_bitset<uint64_t> bitvector;
+ public:
+ PScoreUnaligned(string const spec)
+ {
+ this->m_index = -1;
+ int f = this->m_num_feats = atoi(spec.c_str());
+ UTIL_THROW_IF2(f != 1 && f != 2,"unal parameter must be 1 or 2 at "<<HERE);
+ this->m_feature_names.resize(f);
+ if (f == 1)
+ this->m_feature_names[0] = "unal";
+ else
+ {
+ this->m_feature_names[0] = "unal-s";
+ this->m_feature_names[1] = "unal-t";
+ }
+ }
+
+ bool
+ isLogVal(int i) const { return false; }
+
+ bool
+ isIntegerValued(int i) const { return true; }
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest = NULL) const
+ {
+ if (!dest) dest = &pp.fvals;
+ // uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
+ // parse_pid(pp.p1, sid1, off1, len1);
+ // parse_pid(pp.p2, sid2, off2, len2);
+ bitvector check1(pp.len1),check2(pp.len2);
+ for (size_t i = 0; i < pp.aln.size(); )
+ {
+ check1.set(pp.aln[i++]);
+ check2.set(pp.aln.at(i++));
+ }
+
+ if (this->m_num_feats == 1)
+ {
+ (*dest)[this->m_index] = pp.len1 - check1.count();
+ (*dest)[this->m_index] += pp.len2 - check2.count();
+ }
+ else
+ {
+ (*dest)[this->m_index] = pp.len1 - check1.count();
+ (*dest)[this->m_index+1] = pp.len2 - check2.count();
+ }
+ }
+ };
+ } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/sapt_pscore_wordcount.h b/moses/TranslationModel/UG/sapt_pscore_wordcount.h
new file mode 100644
index 000000000..3227bb6ba
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_wordcount.h
@@ -0,0 +1,34 @@
+// -*- c++ -*-
+// written by Ulrich Germann
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+#include "sapt_pscore_base.h"
+
+namespace Moses {
+ namespace bitext
+ {
+ template<typename Token>
+ class
+ PScoreWC : public PhraseScorer<Token>
+ {
+ public:
+ PScoreWC(string const dummy)
+ {
+ this->m_index = -1;
+ this->m_num_feats = 1;
+ this->m_feature_names.push_back(string("wordcount"));
+ }
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest = NULL) const
+ {
+ if (!dest) dest = &pp.fvals;
+ (*dest)[this->m_index] = pp.len2;
+ }
+ };
+ }
+}
diff --git a/moses/TranslationModel/UG/sim-pe.cc b/moses/TranslationModel/UG/sim-pe.cc
new file mode 100644
index 000000000..58a70cab4
--- /dev/null
+++ b/moses/TranslationModel/UG/sim-pe.cc
@@ -0,0 +1,83 @@
+#include "mmsapt.h"
+#include "moses/Manager.h"
+#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/shared_ptr.hpp>
+#include <algorithm>
+#include <iostream>
+
+using namespace Moses;
+using namespace bitext;
+using namespace std;
+using namespace boost;
+
+vector<FactorType> fo(1,FactorType(0));
+
+ostream&
+operator<<(ostream& out, Hypothesis const* x)
+{
+ vector<const Hypothesis*> H;
+ for (const Hypothesis* h = x; h; h = h->GetPrevHypo())
+ H.push_back(h);
+ for (; H.size(); H.pop_back())
+ {
+ Phrase const& p = H.back()->GetCurrTargetPhrase();
+ for (size_t pos = 0 ; pos < p.GetSize() ; pos++)
+ out << *p.GetFactor(pos, 0) << (H.size() ? " " : "");
+ }
+ return out;
+}
+
+vector<FactorType> ifo;
+size_t lineNumber;
+
+string
+translate(string const& source)
+{
+ StaticData const& global = StaticData::Instance();
+
+ Sentence sentence;
+ istringstream ibuf(source+"\n");
+ sentence.Read(ibuf,ifo);
+
+ Manager manager(lineNumber, sentence, global.GetSearchAlgorithm());
+ manager.ProcessSentence();
+
+ ostringstream obuf;
+ const Hypothesis* h = manager.GetBestHypothesis();
+ obuf << h;
+ return obuf.str();
+
+}
+
+int main(int argc, char* argv[])
+{
+ Parameter params;
+ if (!params.LoadParam(argc,argv) || !StaticData::LoadDataStatic(&params, argv[0]))
+ exit(1);
+
+ StaticData const& global = StaticData::Instance();
+ global.SetVerboseLevel(0);
+ ifo = global.GetInputFactorOrder();
+
+ lineNumber = 0; // TODO: Include sentence request number here?
+ string source, target, alignment;
+ while (getline(cin,source))
+ {
+ getline(cin,target);
+ getline(cin,alignment);
+ cout << "[S] " << source << endl;
+ cout << "[H] " << translate(source) << endl;
+ cout << "[T] " << target << endl;
+ Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
+ pdsa->add(source,target,alignment);
+ cout << "[X] " << translate(source) << endl;
+ cout << endl;
+ }
+ exit(0);
+}
+
+
+
diff --git a/moses/TranslationModel/UG/spe-check-coverage.cc b/moses/TranslationModel/UG/spe-check-coverage.cc
new file mode 100644
index 000000000..039b4cd37
--- /dev/null
+++ b/moses/TranslationModel/UG/spe-check-coverage.cc
@@ -0,0 +1,214 @@
+#include "mmsapt.h"
+#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
+#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/shared_ptr.hpp>
+#include <algorithm>
+#include <iostream>
+
+using namespace Moses;
+using namespace bitext;
+using namespace std;
+using namespace boost;
+
+vector<FactorType> fo(1,FactorType(0));
+
+class SimplePhrase : public Moses::Phrase
+{
+ vector<FactorType> const m_fo; // factor order
+public:
+ SimplePhrase(): m_fo(1,FactorType(0)) {}
+
+ void init(string const& s)
+ {
+ istringstream buf(s); string w;
+ while (buf >> w)
+ {
+ Word wrd;
+ this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false);
+ }
+ }
+};
+
+class TargetPhraseIndexSorter
+{
+ TargetPhraseCollection const& my_tpc;
+ CompareTargetPhrase cmp;
+public:
+ TargetPhraseIndexSorter(TargetPhraseCollection const& tpc) : my_tpc(tpc) {}
+ bool operator()(size_t a, size_t b) const
+ {
+ // return cmp(*my_tpc[a], *my_tpc[b]);
+ return (my_tpc[a]->GetScoreBreakdown().GetWeightedScore() >
+ my_tpc[b]->GetScoreBreakdown().GetWeightedScore());
+ }
+};
+
+int main(int argc, char* argv[])
+{
+
+ string vlevel = "alt"; // verbosity level
+ vector<pair<string,int> > argfilter(5);
+ argfilter[0] = std::make_pair(string("--spe-src"),1);
+ argfilter[1] = std::make_pair(string("--spe-trg"),1);
+ argfilter[2] = std::make_pair(string("--spe-aln"),1);
+ argfilter[3] = std::make_pair(string("--spe-show"),1);
+
+ char** my_args; int my_acnt;
+ char** mo_args; int mo_acnt;
+ filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter);
+
+ ifstream spe_src,spe_trg,spe_aln;
+ // instead of translating show coverage by phrase tables
+ for (int i = 0; i < my_acnt; i += 2)
+ {
+ if (!strcmp(my_args[i],"--spe-src"))
+ spe_src.open(my_args[i+1]);
+ else if (!strcmp(my_args[i],"--spe-trg"))
+ spe_trg.open(my_args[i+1]);
+ else if (!strcmp(my_args[i],"--spe-aln"))
+ spe_aln.open(my_args[i+1]);
+ else if (!strcmp(my_args[i],"--spe-show"))
+ vlevel = my_args[i+1];
+ }
+
+ Parameter params;
+ if (!params.LoadParam(mo_acnt,mo_args) ||
+ !StaticData::LoadDataStatic(&params, mo_args[0]))
+ exit(1);
+
+ StaticData const& global = StaticData::Instance();
+ global.SetVerboseLevel(0);
+ vector<FactorType> ifo = global.GetInputFactorOrder();
+
+ PhraseDictionary* PT = PhraseDictionary::GetColl()[0];
+ Mmsapt* mmsapt = dynamic_cast<Mmsapt*>(PT);
+ if (!mmsapt)
+ {
+ cerr << "Phrase table implementation not supported by this utility." << endl;
+ exit(1);
+ }
+ mmsapt->SetTableLimit(0);
+
+ string srcline,trgline,alnline;
+ cout.precision(2);
+ vector<string> fname = mmsapt->GetFeatureNames();
+ while (getline(spe_src,srcline))
+ {
+ UTIL_THROW_IF2(!getline(spe_trg,trgline), HERE
+ << ": missing data for online updates.");
+ UTIL_THROW_IF2(!getline(spe_aln,alnline), HERE
+ << ": missing data for online updates.");
+ cout << string(80,'-') << "\n" << srcline << "\n" << trgline << "\n" << endl;
+
+ // cout << srcline << " " << HERE << endl;
+ Sentence snt;
+ istringstream buf(srcline+"\n");
+ if (!snt.Read(buf,ifo)) break;
+ // cout << Phrase(snt) << endl;
+ int dynprovidx = -1;
+ for (size_t i = 0; i < fname.size(); ++i)
+ {
+ if (fname[i].substr(0,7) == "prov-1.")
+ dynprovidx = i;
+ }
+ cout << endl;
+ for (size_t i = 0; i < snt.GetSize(); ++i)
+ {
+ for (size_t k = i; k < snt.GetSize(); ++k)
+ {
+ Phrase p = snt.GetSubString(WordsRange(i,k));
+ if (!mmsapt->PrefixExists(p)) break;
+ TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p);
+ if (!trg || !trg->GetSize()) continue;
+
+ bool header_done = false;
+ bool has_dynamic_match = vlevel == "all" || vlevel == "ALL";
+ vector<size_t> order; order.reserve(trg->GetSize());
+ size_t stop = trg->GetSize();
+
+ vector<size_t> o2(trg->GetSize());
+ for (size_t i = 0; i < stop; ++i) o2[i] = i;
+ sort(o2.begin(),o2.end(),TargetPhraseIndexSorter(*trg));
+
+ for (size_t r = 0; r < stop; ++r) // r for rank
+ {
+ if (vlevel != "ALL")
+ {
+ Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]);
+ ostringstream buf; buf << phr;
+ string tphrase = buf.str();
+ tphrase.erase(tphrase.size()-1);
+ size_t s = trgline.find(tphrase);
+ if (s == string::npos) continue;
+ size_t e = s + tphrase.size();
+ if ((s && trgline[s-1] != ' ') || (e < trgline.size() && trgline[e] != ' '))
+ continue;
+ }
+ order.push_back(r);
+ if (!has_dynamic_match)
+ {
+ ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown();
+ ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
+ FVector const& scores = scc.GetScoresVector();
+ has_dynamic_match = scores[idx.first + dynprovidx] > 0;
+ }
+ }
+ if ((vlevel == "alt" || vlevel == "new") && !has_dynamic_match)
+ continue;
+
+
+ BOOST_FOREACH(size_t const& r, order)
+ {
+ ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown();
+ ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
+ FVector const& scores = scc.GetScoresVector();
+ float wscore = scc.GetWeightedScore();
+ if (vlevel == "new" && scores[idx.first + dynprovidx] == 0)
+ continue;
+ if (!header_done)
+ {
+ cout << endl;
+ if (trg->GetSize() == 1)
+ cout << p << " (1 translation option)" << endl;
+ else
+ cout << p << " (" << trg->GetSize() << " translation options)" << endl;
+ header_done = true;
+ }
+ Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]);
+ cout << setw(3) << r+1 << " " << phr << endl;
+ cout << " ";
+ BOOST_FOREACH(string const& fn, fname)
+ cout << " " << format("%10.10s") % fn;
+ cout << endl;
+ cout << " ";
+ for (size_t x = idx.first; x < idx.second; ++x)
+ {
+ size_t j = x-idx.first;
+ float f = (mmsapt && mmsapt->isLogVal(j)) ? exp(scores[x]) : scores[x];
+ string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";
+ if (fname[j].substr(0,3) == "lex") fmt = "%10.3e";
+ if (fname[j].substr(0,7) == "prov-1.")
+ {
+ f = round(f/(1-f));
+ fmt = "%10d";
+ }
+ cout << " " << format(fmt) % (mmsapt->isInteger(j) ? round(f) : f);
+ }
+ cout << " " << format("%10.3e") % exp(wscore)
+ << " " << format("%10.3e") % exp((*trg)[o2[r]]->GetFutureScore()) << endl;
+ }
+ mmsapt->Release(trg);
+ continue;
+ }
+ }
+ mmsapt->add(srcline,trgline,alnline);
+ }
+ // }
+ exit(0);
+}
+
+
+
diff --git a/moses/TranslationModel/UG/spe-check-coverage2.cc b/moses/TranslationModel/UG/spe-check-coverage2.cc
new file mode 100644
index 000000000..fa9ce1c85
--- /dev/null
+++ b/moses/TranslationModel/UG/spe-check-coverage2.cc
@@ -0,0 +1,76 @@
+#include "mmsapt.h"
+#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
+#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/shared_ptr.hpp>
+#include <algorithm>
+#include <iostream>
+
+using namespace Moses;
+using namespace bitext;
+using namespace std;
+using namespace boost;
+
+typedef L2R_Token<SimpleWordId> Token;
+typedef mmBitext<Token> mmbitext;
+typedef imBitext<Token> imbitext;
+typedef Bitext<Token>::iter iter;
+
+mmbitext bg;
+
+void
+show(ostream& out, iter& f)
+{
+ iter b(bg.I2.get(),f.getToken(0),f.size());
+ if (b.size() == f.size())
+ out << setw(12) << int(round(b.approxOccurrenceCount()));
+ else
+ out << string(12,' ');
+ out << " " << setw(5) << int(round(f.approxOccurrenceCount())) << " ";
+ out << f.str(bg.V1.get()) << endl;
+}
+
+
+void
+dump(ostream& out, iter& f)
+{
+ float cnt = f.size() ? f.approxOccurrenceCount() : 0;
+ if (f.down())
+ {
+ cnt = f.approxOccurrenceCount();
+ do { dump(out,f); }
+ while (f.over());
+ f.up();
+ }
+ if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1)
+ show(out,f);
+}
+
+
+void
+read_data(string fname, vector<string>& dest)
+{
+ ifstream in(fname.c_str());
+ string line;
+ while (getline(in,line)) dest.push_back(line);
+ in.close();
+}
+
+int main(int argc, char* argv[])
+{
+ bg.open(argv[1],argv[2],argv[3]);
+ sptr<imbitext> fg(new imbitext(bg.V1,bg.V2));
+ vector<string> src,trg,aln;
+ read_data(argv[4],src);
+ read_data(argv[5],trg);
+ read_data(argv[6],aln);
+ fg = fg->add(src,trg,aln);
+ iter mfg(fg->I1.get());
+ dump(cout,mfg);
+ exit(0);
+}
+
+
+
diff --git a/moses/TranslationModel/UG/try-align.cc b/moses/TranslationModel/UG/try-align.cc
index 30c87ccab..483ad2c34 100644
--- a/moses/TranslationModel/UG/try-align.cc
+++ b/moses/TranslationModel/UG/try-align.cc
@@ -2,32 +2,33 @@
using namespace std;
using namespace Moses;
+// currently broken
Mmsapt* PT;
int main(int argc, char* argv[])
{
- string base = argv[1];
- string L1 = argv[2];
- string L2 = argv[3];
- ostringstream buf;
- buf << "Mmsapt name=PT0 output-factor=0 num-features=5 base="
- << base << " L1=" << L1 << " L2=" << L2;
- string configline = buf.str();
- PT = new Mmsapt(configline);
- PT->Load();
- float w[] = { 0.0582634, 0.0518865, 0.0229819, 0.00640856, 0.647506 };
- vector<float> weights(w,w+5);
- PT->setWeights(weights);
- // these values are taken from a moses.ini file;
- // is there a convenient way of accessing them from within mmsapt ???
- string eline,fline;
- // TokenIndex V; V.open("crp/trn/mm/de.tdx");
- while (getline(cin,eline) && getline(cin,fline))
- {
- cout << eline << endl;
- cout << fline << endl;
- PT->align(eline,fline);
- }
- delete PT;
+ // string base = argv[1];
+ // string L1 = argv[2];
+ // string L2 = argv[3];
+ // ostringstream buf;
+ // buf << "Mmsapt name=PT0 output-factor=0 num-features=5 base="
+ // << base << " L1=" << L1 << " L2=" << L2;
+ // string configline = buf.str();
+ // PT = new Mmsapt(configline);
+ // PT->Load();
+ // float w[] = { 0.0582634, 0.0518865, 0.0229819, 0.00640856, 0.647506 };
+ // vector<float> weights(w,w+5);
+ // PT->setWeights(weights);
+ // // these values are taken from a moses.ini file;
+ // // is there a convenient way of accessing them from within mmsapt ???
+ // string eline,fline;
+ // // TokenIndex V; V.open("crp/trn/mm/de.tdx");
+ // while (getline(cin,eline) && getline(cin,fline))
+ // {
+ // cout << eline << endl;
+ // cout << fline << endl;
+ // PT->align(eline,fline);
+ // }
+ // delete PT;
}
diff --git a/moses/TranslationModel/UG/util/tokenindex.dump.cc b/moses/TranslationModel/UG/util/tokenindex.dump.cc
index 55970dbf0..8ab68579d 100644
--- a/moses/TranslationModel/UG/util/tokenindex.dump.cc
+++ b/moses/TranslationModel/UG/util/tokenindex.dump.cc
@@ -7,7 +7,7 @@
* @brief Dumps a TokenIndex (vocab file for TPPT and TPLM) to stdout.
*/
-#include "tpt_tokenindex.h"
+#include "../mm/tpt_tokenindex.h"
#include <iostream>
#include <iomanip>
diff --git a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
index 8766743b3..a91c58343 100644
--- a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
+++ b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
@@ -345,10 +345,10 @@ string FuzzyMatchWrapper::ExtractTM(WordIndex &wordIndex, long translationId, co
// find the best matches according to letter sed
string best_path = "";
int best_match = -1;
- int best_letter_cost;
+ unsigned int best_letter_cost;
if (lsed_flag) {
best_letter_cost = compute_length( input[sentenceInd] ) * min_match / 100 + 1;
- for(int si=0; si<best_tm.size(); si++) {
+ for(size_t si=0; si<best_tm.size(); si++) {
int s = best_tm[si];
string path;
unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
diff --git a/moses/TranslationModel/fuzzy-match/create_xml.cpp b/moses/TranslationModel/fuzzy-match/create_xml.cpp
index 44c1efc9f..a8b6a52cf 100644
--- a/moses/TranslationModel/fuzzy-match/create_xml.cpp
+++ b/moses/TranslationModel/fuzzy-match/create_xml.cpp
@@ -31,8 +31,8 @@ void create_xml(const string &inPath)
ofstream rule((inPath + ".extract").c_str());
ofstream ruleInv((inPath + ".extract.inv").c_str());
- int setenceId;
- float score;
+ // int setenceId;
+ // float score;
string source, target, align, path;
string *input = NULL;
int count;
@@ -47,11 +47,11 @@ void create_xml(const string &inPath)
//cout << inLine << endl;
switch (step) {
case 0:
- setenceId = Scan<int>(inLine);
+ /*setenceId = */ Scan<int>(inLine);
++step;
break;
case 1:
- score = Scan<float>(inLine);
+ /*score = */ Scan<float>(inLine);
++step;
break;
case 2:
@@ -124,7 +124,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
int start_s = 0, start_i = 0;
//cerr << input << endl << source << endl << target << endl << path << endl;
- for ( int p = 0 ; p < path.length() ; p++ ) {
+ for ( int p = 0 ; p < int(path.length()) ; p++ ) {
string action = path.substr(p, 1);
// beginning of a mismatch
@@ -176,7 +176,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
}
// end of sentence? add to end
- if ( start_t == 1000 && i > inputToks.size() - 1 ) {
+ if ( start_t == 1000 && i > int(inputToks.size()) - 1 ) {
start_t = targetsToks.size() - 1;
}
@@ -216,13 +216,13 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
if ( action != "I" ) {
//cerr << " ->";
- if (s < alignments.m_alignS2T.size()) {
+ if (s < int(alignments.m_alignS2T.size())) {
const std::map<int, int> &targets = alignments.m_alignS2T[s];
//cerr << "s=" << s << endl;
std::map<int, int>::const_iterator iter;
for (iter = targets.begin(); iter != targets.end(); ++iter) {
- int tt = iter->first;
+ // int tt = iter->first;
//cerr << " " << tt;
}
}
@@ -245,7 +245,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
} // for ( int p = 0
//cerr << target << endl;
- for (int i = 0; i < targetBitmap.size(); ++i) {
+ for (size_t i = 0; i < targetBitmap.size(); ++i) {
//cerr << targetBitmap[i];
}
//cerr << endl;
@@ -260,13 +260,13 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
int rule_pos_s = 0;
map<int, int> ruleAlignS;
- for (int i = 0 ; i < inputBitmap.size() ; ++i ) {
+ for (int i = 0 ; i < int(inputBitmap.size()) ; ++i ) {
if ( inputBitmap[i] ) {
ret.ruleS += inputToks[i] + " ";
ruleAlignS[ alignI2S[i] ] = rule_pos_s++;
}
- for (int j = 0; j < nonTerms.size(); ++j) {
+ for (size_t j = 0; j < nonTerms.size(); ++j) {
map<string, int> &nt = nonTerms[j];
if (i == nt["start_i"]) {
ret.ruleS += "[X][X] ";
@@ -284,7 +284,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
ruleAlignT[t] = rule_pos_t++;
}
- for (int i = 0; i < nonTerms.size(); ++i) {
+ for (size_t i = 0; i < nonTerms.size(); ++i) {
map<string, int> &nt = nonTerms[i];
if (t == nt["start_t"]) {
@@ -300,7 +300,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
for (map<int, int>::const_iterator iter = ruleAlignS.begin(); iter != ruleAlignS.end(); ++iter) {
int s = iter->first;
- if (s < alignments.m_alignS2T.size()) {
+ if (s < int(alignments.m_alignS2T.size())) {
const std::map<int, int> &targets = alignments.m_alignS2T[s];
std::map<int, int>::const_iterator iter;
@@ -316,7 +316,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
//cerr << "numAlign=" << numAlign << endl;
- for (int i = 0; i < nonTerms.size(); ++i) {
+ for (size_t i = 0; i < nonTerms.size(); ++i) {
map<string, int> &nt = nonTerms[i];
ret.ruleAlignment += SPrint(nt["rule_pos_s"]) + "-" + SPrint(nt["rule_pos_t"]) + " ";
++numAlign;
@@ -329,7 +329,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
ret.ruleAlignment = TrimInternal(ret.ruleAlignment);
vector<string> ruleAlignmentToks = Tokenize(ret.ruleAlignment);
- for (int i = 0; i < ruleAlignmentToks.size(); ++i) {
+ for (size_t i = 0; i < ruleAlignmentToks.size(); ++i) {
const string &alignPoint = ruleAlignmentToks[i];
vector<string> toks = Tokenize(alignPoint, "-");
assert(toks.size() == 2);
@@ -338,7 +338,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
ret.ruleAlignmentInv = TrimInternal(ret.ruleAlignmentInv);
// frame
- ret.frame;
+ // ret.frame;
if (frameInput.find(-1) == frameInput.end())
ret.frame = frameInput[-1];
@@ -346,7 +346,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
int start_t = -1;
targetBitmap.push_back(0);
- for (int t = 0 ; t <= targetsToks.size() ; t++ ) {
+ for (size_t t = 0 ; t <= targetsToks.size() ; t++ ) {
// beginning of tm target inclusion
if ( !currently_included && targetBitmap[t] ) {
start_t = t;
@@ -360,7 +360,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
if ( start_t >= 0 ) {
string target = "";
//cerr << "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
- for (int tt = start_t ; tt < t + targetBitmap[t] ; tt++ ) {
+ for (size_t tt = start_t ; tt < t + targetBitmap[t] ; tt++ ) {
target += targetsToks[tt] + " ";
}
// target = Trim(target); TODO
diff --git a/moses/TranslationOption.cpp b/moses/TranslationOption.cpp
index 3ba73cf9e..5ef8293a2 100644
--- a/moses/TranslationOption.cpp
+++ b/moses/TranslationOption.cpp
@@ -32,7 +32,7 @@ namespace Moses
{
TranslationOption::TranslationOption()
- :m_targetPhrase()
+ :m_targetPhrase(NULL)
,m_inputPath(NULL)
,m_sourceWordsRange(NOT_FOUND, NOT_FOUND)
{
@@ -71,10 +71,10 @@ void TranslationOption::CacheLexReorderingScores(const LexicalReordering &produc
m_lexReorderingScores[&producer] = score;
}
-void TranslationOption::Evaluate(const InputType &input)
+void TranslationOption::EvaluateWithSourceContext(const InputType &input)
{
const InputPath &inputPath = GetInputPath();
- m_targetPhrase.Evaluate(input, inputPath);
+ m_targetPhrase.EvaluateWithSourceContext(input, inputPath);
}
const InputPath &TranslationOption::GetInputPath() const
diff --git a/moses/TranslationOption.h b/moses/TranslationOption.h
index 3bc1797bb..9d2e10780 100644
--- a/moses/TranslationOption.h
+++ b/moses/TranslationOption.h
@@ -135,7 +135,7 @@ public:
return m_targetPhrase.GetScoreBreakdown();
}
- void Evaluate(const InputType &input);
+ void EvaluateWithSourceContext(const InputType &input);
/** returns cached scores */
inline const Scores *GetLexReorderingScores(const LexicalReordering *scoreProducer) const {
diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp
index aed0fb62a..2d547df30 100644
--- a/moses/TranslationOptionCollection.cpp
+++ b/moses/TranslationOptionCollection.cpp
@@ -212,6 +212,12 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const InputPath &inputPa
float unknownScore = FloorScore(TransformScore(0));
const Word &sourceWord = inputPath.GetPhrase().GetWord(0);
+ // hack. Once the OOV FF is a phrase table, get rid of this
+ PhraseDictionary *firstPt = NULL;
+ if (PhraseDictionary::GetColl().size() == 0) {
+ firstPt = PhraseDictionary::GetColl()[0];
+ }
+
// unknown word, add as trans opt
FactorCollection &factorCollection = FactorCollection::Instance();
@@ -231,7 +237,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const InputPath &inputPa
// modify the starting bitmap
}
- TargetPhrase targetPhrase;
+ TargetPhrase targetPhrase(firstPt);
if (!(staticData.GetDropUnknown() || isEpsilon) || isDigit) {
// add to dictionary
@@ -266,7 +272,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const InputPath &inputPa
m_unksrcs.push_back(&sourcePhrase);
WordsRange range(sourcePos, sourcePos + length - 1);
- targetPhrase.Evaluate(sourcePhrase);
+ targetPhrase.EvaluateInIsolation(sourcePhrase);
TranslationOption *transOpt = new TranslationOption(range, targetPhrase);
transOpt->SetInputPath(inputPath);
@@ -410,7 +416,7 @@ void TranslationOptionCollection::CreateTranslationOptions()
ProcessUnknownWord();
- EvaluateWithSource();
+ EvaluateWithSourceContext();
// Prune
Prune();
@@ -535,7 +541,7 @@ void TranslationOptionCollection::SetInputScore(const InputPath &inputPath, Part
}
}
-void TranslationOptionCollection::EvaluateWithSource()
+void TranslationOptionCollection::EvaluateWithSourceContext()
{
const size_t size = m_source.GetSize();
for (size_t startPos = 0 ; startPos < size ; ++startPos) {
@@ -549,7 +555,7 @@ void TranslationOptionCollection::EvaluateWithSource()
TranslationOptionList::const_iterator iterTransOpt;
for(iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt) {
TranslationOption &transOpt = **iterTransOpt;
- transOpt.Evaluate(m_source);
+ transOpt.EvaluateWithSourceContext(m_source);
}
}
}
diff --git a/moses/TranslationOptionCollection.h b/moses/TranslationOptionCollection.h
index 89f27495f..6efec2a8a 100644
--- a/moses/TranslationOptionCollection.h
+++ b/moses/TranslationOptionCollection.h
@@ -96,7 +96,7 @@ protected:
//! implemented by inherited class, called by this class
virtual void ProcessUnknownWord(size_t sourcePos)=0;
- void EvaluateWithSource();
+ void EvaluateWithSourceContext();
void CacheLexReordering();
diff --git a/moses/TranslationOptionCollectionLattice.cpp b/moses/TranslationOptionCollectionLattice.cpp
index fd29466ef..349aa385c 100644
--- a/moses/TranslationOptionCollectionLattice.cpp
+++ b/moses/TranslationOptionCollectionLattice.cpp
@@ -147,7 +147,7 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
const TargetPhrase &tp = **iter;
TranslationOption *transOpt = new TranslationOption(range, tp);
transOpt->SetInputPath(path);
- transOpt->Evaluate(m_source);
+ transOpt->EvaluateWithSourceContext(m_source);
Add(transOpt);
}
diff --git a/moses/TreeInput.cpp b/moses/TreeInput.cpp
index 2b246aee5..6b36826b6 100644
--- a/moses/TreeInput.cpp
+++ b/moses/TreeInput.cpp
@@ -5,6 +5,7 @@
#include "Util.h"
#include "XmlOption.h"
#include "FactorCollection.h"
+#include "moses/TranslationModel/PhraseDictionary.h"
using namespace std;
@@ -30,6 +31,12 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
return true;
}
+ // hack. What pt should XML trans opt be assigned to?
+ PhraseDictionary *firstPt = NULL;
+ if (PhraseDictionary::GetColl().size() == 0) {
+ firstPt = PhraseDictionary::GetColl()[0];
+ }
+
// break up input into a vector of xml tags and text
// example: (this), (<b>), (is a), (</b>), (test .)
vector<string> xmlTokens = TokenizeXml(line);
@@ -173,7 +180,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
//TRACE_ERR("number of translations: " << altTexts.size() << endl);
for (size_t i=0; i<altTexts.size(); ++i) {
// set target phrase
- TargetPhrase targetPhrase;
+ TargetPhrase targetPhrase(firstPt);
// targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i], NULL);
@@ -203,7 +210,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
// convert from prob to log-prob
float scoreValue = FloorScore(TransformScore(probValue));
targetPhrase.SetXMLScore(scoreValue);
- targetPhrase.Evaluate(sourcePhrase);
+ targetPhrase.EvaluateInIsolation(sourcePhrase);
// set span and create XmlOption
WordsRange range(startPos+1,endPos);
diff --git a/moses/TypeDef.h b/moses/TypeDef.h
index fb9fd56cb..a5c434d4b 100644
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@@ -31,6 +31,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <BaseTsd.h>
#else
#include <stdint.h>
+
typedef uint32_t UINT32;
typedef uint64_t UINT64;
#endif
@@ -59,7 +60,12 @@ const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
const size_t DEFAULT_MAX_TRANS_OPT_SIZE = 5000;
const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
-const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
+#ifdef PT_UG
+// setting to std::numeric_limits<size_t>::max() makes the regression test for (deprecated) PhraseDictionaryDynamicSuffixArray fail.
+ const size_t DEFAULT_MAX_PHRASE_LENGTH = 100000;
+#else
+ const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
+#endif
const size_t DEFAULT_MAX_CHART_SPAN = 10;
const size_t ARRAY_SIZE_INCR = 10; //amount by which a phrase gets resized when necessary
const float LOWEST_SCORE = -100.0f;
@@ -106,24 +112,6 @@ enum DistortionOrientationOptions {
};
}
-enum PhraseTableImplementation {
- Memory = 0
- ,Binary = 1
- ,OnDisk = 2
- //,GlueRule = 3
- //,Joshua = 4
- //,MemorySourceLabel = 5
- ,SCFG = 6
- //,BerkeleyDb = 7
- ,SuffixArray = 8
- ,Hiero = 9
- ,ALSuffixArray = 10
- ,FuzzyMatch = 11
- ,Compact = 12
- ,Interpolated = 13
- ,DSuffixArray = 14
- ,MemMappedSA = 15
-};
enum InputTypeEnum {
SentenceInput = 0
diff --git a/moses/Util.h b/moses/Util.h
index 3bba71332..24a4e2c28 100644
--- a/moses/Util.h
+++ b/moses/Util.h
@@ -56,8 +56,12 @@ namespace Moses
/** verbose macros
* */
+
#define VERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR(str); } }
#define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level)
+#define XVERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR("[" << __FILE__ << ":" << __LINE__ << "] ");TRACE_ERR(str); } }
+#define HERE __FILE__ << ":" << __LINE__
+
#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2)
// gcc nth_element() bug
diff --git a/moses/Word.cpp b/moses/Word.cpp
index 04cbdb6a7..b1ea77059 100644
--- a/moses/Word.cpp
+++ b/moses/Word.cpp
@@ -139,8 +139,7 @@ CreateFromString(FactorDirection direction
<< " contains factor delimiter "
<< StaticData::Instance().GetFactorDelimiter()
<< " too many times.");
-
- UTIL_THROW_IF(i < factorOrder.size(),util::Exception,
+ UTIL_THROW_IF(!isNonTerminal && i < factorOrder.size(),util::Exception,
"Too few factors in string '" << str << "'.");
}
else
diff --git a/moses/XmlOption.cpp b/moses/XmlOption.cpp
index c42b200de..52779eaf9 100644
--- a/moses/XmlOption.cpp
+++ b/moses/XmlOption.cpp
@@ -30,6 +30,10 @@
#include "TargetPhrase.h"
#include "ReorderingConstraint.h"
#include "FactorCollection.h"
+#include "moses/TranslationModel/PhraseDictionary.h"
+#if PT_UG
+#include "TranslationModel/UG/mmsapt.h"
+#endif
namespace Moses
{
@@ -160,6 +164,12 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
const StaticData &staticData = StaticData::Instance();
+ // hack. What pt should XML trans opt be assigned to?
+ PhraseDictionary *firstPt = NULL;
+ if (PhraseDictionary::GetColl().size() == 0) {
+ firstPt = PhraseDictionary::GetColl()[0];
+ }
+
// no xml tag? we're done.
//if (line.find_first_of('<') == string::npos) {
if (line.find(lbrackStr) == string::npos) {
@@ -306,6 +316,38 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
placeholders.push_back(std::pair<size_t, std::string>(startPos, entity));
}
+ // update: add new aligned sentence pair to Mmsapt identified by name
+ else if (tagName == "update") {
+#if PT_UG
+ // get model name and aligned sentence pair
+ string pdName = ParseXmlTagAttribute(tagContent,"name");
+ string source = ParseXmlTagAttribute(tagContent,"source");
+ string target = ParseXmlTagAttribute(tagContent,"target");
+ string alignment = ParseXmlTagAttribute(tagContent,"alignment");
+ // find PhraseDictionary by name
+ const vector<PhraseDictionary*> &pds = PhraseDictionary::GetColl();
+ PhraseDictionary* pd = NULL;
+ for (vector<PhraseDictionary*>::const_iterator i = pds.begin(); i != pds.end(); ++i) {
+ PhraseDictionary* curPd = *i;
+ if (curPd->GetScoreProducerDescription() == pdName) {
+ pd = curPd;
+ break;
+ }
+ }
+ if (pd == NULL) {
+ TRACE_ERR("ERROR: No PhraseDictionary with name " << pdName << ", no update" << endl);
+ return false;
+ }
+ // update model
+ VERBOSE(3,"Updating " << pdName << " ||| " << source << " ||| " << target << " ||| " << alignment << endl);
+ Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(pd);
+ pdsa->add(source, target, alignment);
+#else
+ TRACE_ERR("ERROR: recompile with --with-mm to update PhraseDictionary at runtime" << endl);
+ return false;
+#endif
+ }
+
// default: opening tag that specifies translation options
else {
if (startPos > endPos) {
@@ -361,7 +403,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
float scoreValue = FloorScore(TransformScore(probValue));
WordsRange range(startPos + offset,endPos-1 + offset); // span covered by phrase
- TargetPhrase targetPhrase;
+ TargetPhrase targetPhrase(firstPt);
// targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i], NULL);
@@ -375,7 +417,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
}
targetPhrase.SetXMLScore(scoreValue);
- targetPhrase.Evaluate(sourcePhrase);
+ targetPhrase.EvaluateInIsolation(sourcePhrase);
XmlOption *option = new XmlOption(range,targetPhrase);
assert(option);
diff --git a/phrase-extract/ExtractionPhrasePair.cpp b/phrase-extract/ExtractionPhrasePair.cpp
index 9564b1cfe..ccf0fc275 100644
--- a/phrase-extract/ExtractionPhrasePair.cpp
+++ b/phrase-extract/ExtractionPhrasePair.cpp
@@ -463,6 +463,96 @@ std::string ExtractionPhrasePair::CollectAllLabelsSeparateLHSAndRHS(const std::s
}
+void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
+ const std::vector<float> &orientationClassPriorsL2R,
+ const std::vector<float> &orientationClassPriorsR2L,
+ double smoothingFactor,
+ std::ostream &out) const
+{
+ assert(orientationClassPriorsL2R.size()==4 && orientationClassPriorsR2L.size()==4); // mono swap dright dleft
+
+ const PROPERTY_VALUES *allPropertyValues = GetProperty( key );
+
+ if ( allPropertyValues == NULL ) {
+ return;
+ }
+
+ // bidirectional MSLR phrase orientation with 2x4 orientation classes:
+ // mono swap dright dleft
+ std::vector<float> orientationClassCountSumL2R(4,0);
+ std::vector<float> orientationClassCountSumR2L(4,0);
+
+ for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
+ iter!=allPropertyValues->end(); ++iter) {
+ std::string l2rOrientationClass, r2lOrientationClass;
+ try {
+ istringstream tokenizer(iter->first);
+ tokenizer >> l2rOrientationClass;
+ tokenizer >> r2lOrientationClass;
+ if ( tokenizer.peek() != EOF ) {
+ UTIL_THROW(util::Exception, "ExtractionPhrasePair"
+ << ": Collecting phrase orientations failed. "
+ << "Too many tokens?");
+ }
+ } catch (const std::exception &e) {
+ UTIL_THROW(util::Exception, "ExtractionPhrasePair"
+ << ": Collecting phrase orientations failed. "
+ << "Flawed property value in extract file?");
+ }
+
+ int l2rOrientationClassId = -1;
+ if (!l2rOrientationClass.compare("mono")) {
+ l2rOrientationClassId = 0;
+ }
+ if (!l2rOrientationClass.compare("swap")) {
+ l2rOrientationClassId = 1;
+ }
+ if (!l2rOrientationClass.compare("dright")) {
+ l2rOrientationClassId = 2;
+ }
+ if (!l2rOrientationClass.compare("dleft")) {
+ l2rOrientationClassId = 3;
+ }
+ if (l2rOrientationClassId == -1) {
+ UTIL_THROW(util::Exception, "ExtractionPhrasePair"
+ << ": Collecting phrase orientations failed. "
+ << "Unknown orientation class \"" << l2rOrientationClass << "\"." );
+ }
+ int r2lOrientationClassId = -1;
+ if (!r2lOrientationClass.compare("mono")) {
+ r2lOrientationClassId = 0;
+ }
+ if (!r2lOrientationClass.compare("swap")) {
+ r2lOrientationClassId = 1;
+ }
+ if (!r2lOrientationClass.compare("dright")) {
+ r2lOrientationClassId = 2;
+ }
+ if (!r2lOrientationClass.compare("dleft")) {
+ r2lOrientationClassId = 3;
+ }
+ if (r2lOrientationClassId == -1) {
+ UTIL_THROW(util::Exception, "ExtractionPhrasePair"
+ << ": Collecting phrase orientations failed. "
+ << "Unknown orientation class \"" << r2lOrientationClass << "\"." );
+ }
+
+ orientationClassCountSumL2R[l2rOrientationClassId] += iter->second;
+ orientationClassCountSumR2L[r2lOrientationClassId] += iter->second;
+ }
+
+ for (size_t i=0; i<4; ++i) {
+ if (i>0) {
+ out << " ";
+ }
+ out << (float)( (smoothingFactor*orientationClassPriorsL2R[i] + orientationClassCountSumL2R[i]) / (smoothingFactor + m_count) );
+ }
+ for (size_t i=0; i<4; ++i) {
+ out << " " << (float)( (smoothingFactor*orientationClassPriorsR2L[i] + orientationClassCountSumR2L[i]) / (smoothingFactor + m_count) );
+ }
+}
+
+
}
diff --git a/phrase-extract/ExtractionPhrasePair.h b/phrase-extract/ExtractionPhrasePair.h
index ba23ac1f2..e0f5dc5fb 100644
--- a/phrase-extract/ExtractionPhrasePair.h
+++ b/phrase-extract/ExtractionPhrasePair.h
@@ -131,6 +131,12 @@ public:
boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >& sourceRHSAndLHSJointCounts,
Vocabulary &vcbT) const;
+ void CollectAllPhraseOrientations(const std::string &key,
+ const std::vector<float> &orientationClassPriorsL2R,
+ const std::vector<float> &orientationClassPriorsR2L,
+ double smoothingFactor,
+ std::ostream &out) const;
+
void AddProperties( const std::string &str, float count );
void AddProperty( const std::string &key, const std::string &value, float count )
diff --git a/phrase-extract/Jamfile b/phrase-extract/Jamfile
index 50fed2973..7bf63e587 100644
--- a/phrase-extract/Jamfile
+++ b/phrase-extract/Jamfile
@@ -4,7 +4,7 @@ for local d in $(most-deps) {
obj $(d:B).o : $(d) ;
}
#and stuff them into an alias.
-alias deps : $(most-deps:B).o ..//z ..//boost_iostreams ../moses//ThreadPool ../moses//Util ../util//kenutil ;
+alias deps : $(most-deps:B).o ..//z ..//boost_iostreams ../moses//moses ../moses//ThreadPool ../moses//Util ../util//kenutil ;
#ExtractionPhrasePair.cpp requires that main define some global variables.
#Build the mains that do not need these global variables.
diff --git a/phrase-extract/PhraseExtractionOptions.h b/phrase-extract/PhraseExtractionOptions.h
index 87712d6d3..7132974d4 100644
--- a/phrase-extract/PhraseExtractionOptions.h
+++ b/phrase-extract/PhraseExtractionOptions.h
@@ -18,7 +18,6 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
-/* Created by Rohit Gupta, CDAC, Mumbai, India on 18 July, 2012*/
#include <string>
#include <vector>
diff --git a/phrase-extract/PropertiesConsolidator.cpp b/phrase-extract/PropertiesConsolidator.cpp
new file mode 100644
index 000000000..642c48672
--- /dev/null
+++ b/phrase-extract/PropertiesConsolidator.cpp
@@ -0,0 +1,159 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#include "PropertiesConsolidator.h"
+
+#include <sstream>
+#include <limits>
+#include <vector>
+
+#include "moses/Util.h"
+#include "phrase-extract/InputFileStream.h"
+#include "phrase-extract/OutputFileStream.h"
+
+
+namespace MosesTraining
+{
+
+void PropertiesConsolidator::ActivateSourceLabelsProcessing(const std::string &sourceLabelSetFile)
+{
+ Moses::InputFileStream inFile(sourceLabelSetFile);
+
+ // read source label set
+ m_sourceLabels.clear();
+ std::string line;
+ while (getline(inFile, line)) {
+ std::istringstream tokenizer(line);
+ std::string label;
+ size_t index;
+ try {
+ tokenizer >> label >> index;
+ } catch (const std::exception &e) {
+ UTIL_THROW2("Error reading source label set file " << sourceLabelSetFile << " .");
+ }
+ std::pair< std::map<std::string,size_t>::iterator, bool > inserted = m_sourceLabels.insert( std::pair<std::string,size_t>(label,index) );
+ UTIL_THROW_IF2(!inserted.second,"Source label set file " << sourceLabelSetFile << " should contain each syntactic label only once.");
+ }
+
+ inFile.Close();
+
+ m_sourceLabelsFlag = true;
+}
+
+
+std::string PropertiesConsolidator::ProcessPropertiesString(const std::string &propertiesString) const
+{
+ if ( propertiesString.empty() ) {
+ return propertiesString;
+ }
+
+ std::ostringstream out;
+ std::vector<std::string> toks;
+ Moses::TokenizeMultiCharSeparator(toks, propertiesString, "{{");
+ for (size_t i = 1; i < toks.size(); ++i) {
+ std::string &tok = toks[i];
+ if (tok.empty()) {
+ continue;
+ }
+ size_t endPos = tok.rfind("}");
+ tok = tok.substr(0, endPos - 1);
+ std::vector<std::string> keyValue = Moses::TokenizeFirstOnly(tok, " ");
+ assert(keyValue.size() == 2);
+
+ if ( !keyValue[0].compare("SourceLabels") ) {
+
+ if ( m_sourceLabelsFlag ) {
+
+ // SourceLabels additional property: replace strings with vocabulary indices
+ out << " {{" << keyValue[0];
+
+ std::istringstream tokenizer(keyValue[1]);
+
+ size_t nNTs;
+ double totalCount;
+
+ if (! (tokenizer >> nNTs)) { // first token: number of non-terminals (incl. left-hand side)
+ UTIL_THROW2("Not able to read number of non-terminals from SourceLabels property. "
+ << "Flawed SourceLabels property?");
+ }
+ assert( nNTs > 0 );
+ out << " " << nNTs;
+
+ if (! (tokenizer >> totalCount)) { // second token: overall rule count
+ UTIL_THROW2("Not able to read overall rule count from SourceLabels property. "
+ << "Flawed SourceLabels property?");
+ }
+ assert( totalCount > 0.0 );
+ out << " " << totalCount;
+
+ while (tokenizer.peek() != EOF) {
+ try {
+
+ size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
+
+ std::string token;
+
+ if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
+ for (size_t i=0; i<nNTs-1; ++i) { // RHS source non-terminal labels
+ tokenizer >> token; // RHS source non-terminal label
+ std::map<std::string,size_t>::const_iterator found = m_sourceLabels.find(token);
+ UTIL_THROW_IF2(found == m_sourceLabels.end(), "Label \"" << token << "\" from the phrase table not found in given label set.");
+ out << " " << found->second;
+ }
+
+ tokenizer >> token; // sourceLabelsRHSCount
+ out << " " << token;
+
+ tokenizer >> numberOfLHSsGivenRHS;
+ out << " " << numberOfLHSsGivenRHS;
+ }
+
+ for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS source non-terminal labels seen with this RHS
+ tokenizer >> token; // LHS source non-terminal label
+ std::map<std::string,size_t>::const_iterator found = m_sourceLabels.find(token);
+ UTIL_THROW_IF2(found == m_sourceLabels.end() ,"Label \"" << token << "\" from the phrase table not found in given label set.");
+ out << " " << found->second;
+
+ tokenizer >> token; // ruleSourceLabelledCount
+ out << " " << token;
+ }
+
+ } catch (const std::exception &e) {
+ UTIL_THROW2("Flawed item in SourceLabels property?");
+ }
+ }
+
+ out << "}}";
+
+ } else { // don't process source labels additional property
+ out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
+ }
+
+ } else {
+
+ // output other additional property
+ out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
+ }
+ }
+
+ return out.str();
+}
+
+} // namespace MosesTraining
+
diff --git a/phrase-extract/PropertiesConsolidator.h b/phrase-extract/PropertiesConsolidator.h
new file mode 100644
index 000000000..cc6a7a835
--- /dev/null
+++ b/phrase-extract/PropertiesConsolidator.h
@@ -0,0 +1,48 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+
+#pragma once
+
+#include <string>
+#include <map>
+
+
+namespace MosesTraining
+{
+
+class PropertiesConsolidator
+{
+public:
+
+ PropertiesConsolidator() : m_sourceLabelsFlag(false) {};
+
+ void ActivateSourceLabelsProcessing(const std::string &sourceLabelSetFile);
+
+ std::string ProcessPropertiesString(const std::string &propertiesString) const;
+
+private:
+
+ bool m_sourceLabelsFlag;
+ std::map<std::string,size_t> m_sourceLabels;
+
+};
+
+} // namespace MosesTraining
+
diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp
index 43d912b81..592ff7518 100644
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@@ -28,6 +28,7 @@
#include "tables-core.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
+#include "PropertiesConsolidator.h"
using namespace std;
@@ -37,13 +38,14 @@ bool phraseCountFlag = false;
bool lowCountFlag = false;
bool goodTuringFlag = false;
bool kneserNeyFlag = false;
+bool sourceLabelsFlag = false;
bool logProbFlag = false;
inline float maybeLogProb( float a )
{
return logProbFlag ? log(a) : a;
}
-void processFiles( char*, char*, char*, char* );
+void processFiles( char*, char*, char*, char*, char* );
void loadCountOfCounts( char* );
void breakdownCoreAndSparse( string combined, string &core, string &sparse );
bool getLine( istream &fileP, vector< string > &item );
@@ -57,13 +59,14 @@ int main(int argc, char* argv[])
<< "consolidating direct and indirect rule tables\n";
if (argc < 4) {
- cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--PhraseCount] \n";
+ cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--PhraseCount] [--GoodTuring counts-of-counts-file] [--KneserNey counts-of-counts-file] [--LowCountFeature] [--SourceLabels source-labels-file] \n";
exit(1);
}
char* &fileNameDirect = argv[1];
char* &fileNameIndirect = argv[2];
char* &fileNameConsolidated = argv[3];
char* fileNameCountOfCounts;
+ char* fileNameSourceLabelSet;
for(int i=4; i<argc; i++) {
if (strcmp(argv[i],"--Hierarchical") == 0) {
@@ -114,13 +117,21 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--LogProb") == 0) {
logProbFlag = true;
cerr << "using log-probabilities\n";
+ } else if (strcmp(argv[i],"--SourceLabels") == 0) {
+ sourceLabelsFlag = true;
+ if (i+1==argc) {
+ cerr << "ERROR: specify source label set file!\n";
+ exit(1);
+ }
+ fileNameSourceLabelSet = argv[++i];
+ cerr << "processing source labels property\n";
} else {
cerr << "ERROR: unknown option " << argv[i] << endl;
exit(1);
}
}
- processFiles( fileNameDirect, fileNameIndirect, fileNameConsolidated, fileNameCountOfCounts );
+ processFiles( fileNameDirect, fileNameIndirect, fileNameConsolidated, fileNameCountOfCounts, fileNameSourceLabelSet );
}
vector< float > countOfCounts;
@@ -169,7 +180,7 @@ void loadCountOfCounts( char* fileNameCountOfCounts )
if (kneserNey_D3 > 2.9) kneserNey_D3 = 2.9;
}
-void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameConsolidated, char* fileNameCountOfCounts )
+void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameConsolidated, char* fileNameCountOfCounts, char* fileNameSourceLabelSet )
{
if (goodTuringFlag || kneserNeyFlag)
loadCountOfCounts( fileNameCountOfCounts );
@@ -198,6 +209,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
exit(1);
}
+ // create properties consolidator
+ // (in case any additional phrase property requires further processing)
+ MosesTraining::PropertiesConsolidator propertiesConsolidator = MosesTraining::PropertiesConsolidator();
+ if (sourceLabelsFlag) {
+ propertiesConsolidator.ActivateSourceLabelsProcessing(fileNameSourceLabelSet);
+ }
+
// loop through all extracted phrase translations
int i=0;
while(true) {
@@ -307,12 +325,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
// counts, for debugging
fileConsolidated << "||| " << countE << " " << countF << " " << countEF;
- // count bin feature (as a sparse feature)
+ // sparse features
fileConsolidated << " |||";
if (directSparseScores.compare("") != 0)
fileConsolidated << " " << directSparseScores;
if (indirectSparseScores.compare("") != 0)
fileConsolidated << " " << indirectSparseScores;
+ // count bin feature (as a sparse feature)
if (sparseCountBinFeatureFlag) {
bool foundBin = false;
for(size_t i=0; i < countBin.size(); i++) {
@@ -332,8 +351,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
}
// arbitrary key-value pairs
+ fileConsolidated << " |||";
if (itemDirect.size() >= 6) {
- fileConsolidated << " ||| " << itemDirect[5];
+ //if (sourceLabelsFlag) {
+ fileConsolidated << " " << propertiesConsolidator.ProcessPropertiesString(itemDirect[5]);
+ //} else {
+ // fileConsolidated << itemDirect[5];
+ //}
}
fileConsolidated << endl;
diff --git a/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index b86c28586..a8c37fba5 100644
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@@ -27,6 +27,7 @@
#include "OutputFileStream.h"
#include "Options.h"
#include "ParseTree.h"
+#include "PhraseOrientation.h"
#include "ScfgRule.h"
#include "ScfgRuleWriter.h"
#include "Span.h"
@@ -66,11 +67,12 @@ int ExtractGHKM::Main(int argc, char *argv[])
// Open output files.
OutputFileStream fwdExtractStream;
OutputFileStream invExtractStream;
- std::ofstream glueGrammarStream;
- std::ofstream targetUnknownWordStream;
- std::ofstream sourceUnknownWordStream;
- std::ofstream sourceLabelSetStream;
- std::ofstream unknownWordSoftMatchesStream;
+ OutputFileStream glueGrammarStream;
+ OutputFileStream targetUnknownWordStream;
+ OutputFileStream sourceUnknownWordStream;
+ OutputFileStream sourceLabelSetStream;
+ OutputFileStream unknownWordSoftMatchesStream;
+
std::string fwdFileName = options.extractFile;
std::string invFileName = options.extractFile + std::string(".inv");
if (options.gzOutput) {
@@ -79,6 +81,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
}
OpenOutputFileOrDie(fwdFileName, fwdExtractStream);
OpenOutputFileOrDie(invFileName, invExtractStream);
+
if (!options.glueGrammarFile.empty()) {
OpenOutputFileOrDie(options.glueGrammarFile, glueGrammarStream);
}
@@ -118,7 +121,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
std::string sourceLine;
std::string alignmentLine;
Alignment alignment;
- XmlTreeParser xmlTreeParser(targetLabelSet, targetTopLabelSet);
+ XmlTreeParser targetXmlTreeParser(targetLabelSet, targetTopLabelSet);
// XmlTreeParser sourceXmlTreeParser(sourceLabelSet, sourceTopLabelSet);
ScfgRuleWriter writer(fwdExtractStream, invExtractStream, options);
size_t lineNum = options.sentenceOffset;
@@ -144,7 +147,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
}
std::auto_ptr<ParseTree> targetParseTree;
try {
- targetParseTree = xmlTreeParser.Parse(targetLine);
+ targetParseTree = targetXmlTreeParser.Parse(targetLine);
assert(targetParseTree.get());
} catch (const Exception &e) {
std::ostringstream oss;
@@ -181,7 +184,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
// Read source tokens.
std::vector<std::string> sourceTokens(ReadTokens(sourceLine));
- // Construct a source ParseTree object object from the SyntaxTree object.
+ // Construct a source ParseTree object from the SyntaxTree object.
std::auto_ptr<ParseTree> sourceParseTree;
if (options.sourceLabels) {
@@ -235,11 +238,26 @@ int ExtractGHKM::Main(int argc, char *argv[])
graph.ExtractComposedRules(options);
}
+ // Initialize phrase orientation scoring object
+ PhraseOrientation phraseOrientation( sourceTokens, targetXmlTreeParser.GetWords(), alignment);
+
// Write the rules, subject to scope pruning.
const std::vector<Node *> &targetNodes = graph.GetTargetNodes();
for (std::vector<Node *>::const_iterator p = targetNodes.begin();
p != targetNodes.end(); ++p) {
+
const std::vector<const Subgraph *> &rules = (*p)->GetRules();
+
+ REO_POS l2rOrientation=UNKNOWN, r2lOrientation=UNKNOWN;
+ if (options.phraseOrientation && !rules.empty()) {
+ int sourceSpanBegin = *((*p)->GetSpan().begin());
+ int sourceSpanEnd = *((*p)->GetSpan().rbegin());
+ l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,L2R);
+ r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,R2L);
+ // std::cerr << "span " << sourceSpanBegin << " " << sourceSpanEnd << std::endl;
+ // std::cerr << "phraseOrientation " << phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd) << std::endl;
+ }
+
for (std::vector<const Subgraph *>::const_iterator q = rules.begin();
q != rules.end(); ++q) {
ScfgRule *r = 0;
@@ -251,16 +269,34 @@ int ExtractGHKM::Main(int argc, char *argv[])
// TODO Can scope pruning be done earlier?
if (r->Scope() <= options.maxScope) {
if (!options.treeFragments) {
- writer.Write(*r);
+ writer.Write(*r,false);
} else {
- writer.Write(*r,**q);
+ writer.Write(*r,**q,false);
+ }
+ if (options.phraseOrientation) {
+ fwdExtractStream << " {{Orientation ";
+ phraseOrientation.WriteOrientation(fwdExtractStream,l2rOrientation);
+ fwdExtractStream << " ";
+ phraseOrientation.WriteOrientation(fwdExtractStream,r2lOrientation);
+ fwdExtractStream << "}}";
+ phraseOrientation.IncrementPriorCount(L2R,l2rOrientation,1);
+ phraseOrientation.IncrementPriorCount(R2L,r2lOrientation,1);
}
+ fwdExtractStream << std::endl;
+ invExtractStream << std::endl;
}
delete r;
}
}
}
+ if (options.phraseOrientation) {
+ std::string phraseOrientationPriorsFileName = options.extractFile + std::string(".phraseOrientationPriors");
+ OutputFileStream phraseOrientationPriorsStream;
+ OpenOutputFileOrDie(phraseOrientationPriorsFileName, phraseOrientationPriorsStream);
+ PhraseOrientation::WritePriorCounts(phraseOrientationPriorsStream);
+ }
+
std::map<std::string,size_t> sourceLabels;
if (options.sourceLabels && !options.sourceLabelSetFile.empty()) {
@@ -398,6 +434,8 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
"extract minimal rules only")
("PCFG",
"include score based on PCFG scores in target corpus")
+ ("PhraseOrientation",
+ "output phrase orientation information")
("TreeFragments",
"output parse tree information")
("SourceLabels",
@@ -502,6 +540,9 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
if (vm.count("PCFG")) {
options.pcfg = true;
}
+ if (vm.count("PhraseOrientation")) {
+ options.phraseOrientation = true;
+ }
if (vm.count("TreeFragments")) {
options.treeFragments = true;
}
@@ -576,9 +617,8 @@ void ExtractGHKM::WriteGlueGrammar(
}
}
- std::string sourceTopLabel = "TOPLABEL";
- std::string sourceSLabel = "S";
- std::string sourceSomeLabel = "SOMELABEL";
+ size_t sourceLabelGlueTop = 0;
+ size_t sourceLabelGlueX = 1;
// basic rules
out << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| ||| ||| |||";
@@ -586,7 +626,7 @@ void ExtractGHKM::WriteGlueGrammar(
out << " {{Tree [" << topLabel << " <s>]}}";
}
if (options.sourceLabels) {
- out << " {{SourceLabels 1 1 " << sourceTopLabel << " 1}}";
+ out << " {{SourceLabels 1 1 " << sourceLabelGlueTop << " 1}}";
}
out << std::endl;
@@ -595,7 +635,7 @@ void ExtractGHKM::WriteGlueGrammar(
out << " {{Tree [" << topLabel << " [" << topLabel << "] </s>]}}";
}
if (options.sourceLabels) {
- out << " {{SourceLabels 2 1 " << sourceTopLabel << " 1 1 " << sourceTopLabel << " 1}}";
+ out << " {{SourceLabels 2 1 " << sourceLabelGlueTop << " 1 1 " << sourceLabelGlueTop << " 1}}";
}
out << std::endl;
@@ -607,7 +647,7 @@ void ExtractGHKM::WriteGlueGrammar(
out << " {{Tree [" << topLabel << " <s> [" << i->first << "] </s>]}}";
}
if (options.sourceLabels) {
- out << " {{SourceLabels 2 1 " << sourceSLabel << " 1 1 " << sourceTopLabel << " 1}}";
+ out << " {{SourceLabels 2 1 " << sourceLabelGlueX << " 1 1 " << sourceLabelGlueTop << " 1}}";
}
out << std::endl;
}
@@ -620,7 +660,7 @@ void ExtractGHKM::WriteGlueGrammar(
out << " {{Tree [" << topLabel << " ["<< topLabel << "] [" << *i << "]]}}";
}
if (options.sourceLabels) {
- out << " {{SourceLabels 3 2.718 " << sourceTopLabel << " " << sourceSomeLabel << " 2.718 1 " << sourceTopLabel << " 2.718}}"; // TODO: there should be better options than using "SOMELABEL"
+ out << " {{SourceLabels 3 2.718 " << sourceLabelGlueTop << " " << sourceLabelGlueX << " 2.718 1 " << sourceLabelGlueTop << " 2.718}}"; // TODO: there should be better options than using "SOMELABEL"
}
out << std::endl;
}
@@ -631,7 +671,7 @@ void ExtractGHKM::WriteGlueGrammar(
out << " {{Tree [" << topLabel << " [" << topLabel << "] [X]]}}";
}
if (options.sourceLabels) {
- out << " {{SourceLabels 3 1 " << sourceTopLabel << " " << sourceSomeLabel << " 1 1 " << sourceTopLabel << " 1}}"; // TODO: there should be better options than using "SOMELABEL"
+ out << " {{SourceLabels 3 1 " << sourceLabelGlueTop << " " << sourceLabelGlueX << " 1 1 " << sourceLabelGlueTop << " 1}}"; // TODO: there should be better options than using "SOMELABEL"
}
out << std::endl;
}
@@ -736,8 +776,7 @@ void ExtractGHKM::WriteUnknownWordSoftMatches(
const std::set<std::string> &labelSet,
std::ostream &out)
{
- std::set<std::string>::const_iterator p = labelSet.begin();
- for (p; p != labelSet.end(); ++p) {
+ for (std::set<std::string>::const_iterator p = labelSet.begin(); p != labelSet.end(); ++p) {
std::string label = *p;
out << "UNK " << label << std::endl;
}
diff --git a/phrase-extract/extract-ghkm/Options.h b/phrase-extract/extract-ghkm/Options.h
index 28a581802..0102e2f64 100644
--- a/phrase-extract/extract-ghkm/Options.h
+++ b/phrase-extract/extract-ghkm/Options.h
@@ -40,6 +40,7 @@ public:
, maxScope(3)
, minimal(false)
, pcfg(false)
+ , phraseOrientation(false)
, treeFragments(false)
, sourceLabels(false)
, sentenceOffset(0)
@@ -64,6 +65,7 @@ public:
int maxScope;
bool minimal;
bool pcfg;
+ bool phraseOrientation;
bool treeFragments;
bool sourceLabels;
std::string sourceLabelSetFile;
diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.cpp b/phrase-extract/extract-ghkm/PhraseOrientation.cpp
new file mode 100644
index 000000000..e7f65be07
--- /dev/null
+++ b/phrase-extract/extract-ghkm/PhraseOrientation.cpp
@@ -0,0 +1,419 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "PhraseOrientation.h"
+
+#include <iostream>
+#include <sstream>
+#include <limits>
+
+#include <boost/assign/list_of.hpp>
+
+namespace Moses
+{
+namespace GHKM
+{
+
+std::vector<float> PhraseOrientation::m_l2rOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0);
+std::vector<float> PhraseOrientation::m_r2lOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0);
+
+PhraseOrientation::PhraseOrientation(const std::vector<std::string> &source,
+ const std::vector<std::string> &target,
+ const Alignment &alignment)
+ : m_source(source)
+ , m_target(target)
+ , m_alignment(alignment)
+{
+
+ int countF = m_source.size();
+ int countE = m_target.size();
+
+ // prepare data structures for alignments
+ std::vector<std::vector<int> > alignedToS;
+ for(int i=0; i<countF; ++i) {
+ std::vector< int > dummy;
+ alignedToS.push_back(dummy);
+ }
+ for(int i=0; i<countE; ++i) {
+ std::vector< int > dummy;
+ m_alignedToT.push_back(dummy);
+ }
+ std::vector<int> alignedCountS(countF,0);
+
+ for (Alignment::const_iterator a=alignment.begin(); a!=alignment.end(); ++a) {
+ m_alignedToT[a->second].push_back(a->first);
+ alignedCountS[a->first]++;
+ alignedToS[a->first].push_back(a->second);
+ }
+
+ for (int startF=0; startF<countF; ++startF) {
+ for (int endF=startF; endF<countF; ++endF) {
+
+ int minE = std::numeric_limits<int>::max();
+ int maxE = -1;
+ for (int fi=startF; fi<=endF; ++fi) {
+ for (size_t i=0; i<alignedToS[fi].size(); ++i) {
+ int ei = alignedToS[fi][i];
+ if (ei<minE) {
+ minE = ei;
+ }
+ if (ei>maxE) {
+ maxE = ei;
+ }
+ }
+ }
+
+ m_minAndMaxAlignedToSourceSpan[ std::pair<int,int>(startF,endF) ] = std::pair<int,int>(minE,maxE);
+ }
+ }
+
+ // check alignments for target phrase startE...endE
+ // loop over continuous phrases which are compatible with the word alignments
+ for (int startE=0; startE<countE; ++startE) {
+ for (int endE=startE; endE<countE; ++endE) {
+
+ int minF = std::numeric_limits<int>::max();
+ int maxF = -1;
+ std::vector< int > usedF = alignedCountS;
+ for (int ei=startE; ei<=endE; ++ei) {
+ for (size_t i=0; i<m_alignedToT[ei].size(); ++i) {
+ int fi = m_alignedToT[ei][i];
+ if (fi<minF) {
+ minF = fi;
+ }
+ if (fi>maxF) {
+ maxF = fi;
+ }
+ usedF[fi]--;
+ }
+ }
+
+ if (maxF >= 0) { // aligned to any source words at all
+
+ // check if source words are aligned to out of bound target words
+ bool out_of_bounds = false;
+ for (int fi=minF; fi<=maxF && !out_of_bounds; ++fi)
+ if (usedF[fi]>0) {
+ // cout << "ouf of bounds: " << fi << "\n";
+ out_of_bounds = true;
+ }
+
+ // cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
+ if (!out_of_bounds) {
+ // start point of source phrase may retreat over unaligned
+ for (int startF=minF;
+ (startF>=0 &&
+ (startF==minF || alignedCountS[startF]==0)); // unaligned
+ startF--) {
+ // end point of source phrase may advance over unaligned
+ for (int endF=maxF;
+ (endF<countF &&
+ (endF==maxF || alignedCountS[endF]==0)); // unaligned
+ endF++) { // at this point we have extracted a phrase
+
+ InsertPhraseVertices(m_topLeft, m_topRight, m_bottomLeft, m_bottomRight,
+ startF, startE, endF, endE);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+
+void PhraseOrientation::InsertVertex( HSentenceVertices & corners, int x, int y )
+{
+ std::set<int> tmp;
+ tmp.insert(x);
+ std::pair< HSentenceVertices::iterator, bool > ret = corners.insert( std::pair<int, std::set<int> > (y, tmp) );
+ if (ret.second == false) {
+ ret.first->second.insert(x);
+ }
+}
+
+
+void PhraseOrientation::InsertPhraseVertices(HSentenceVertices & topLeft,
+ HSentenceVertices & topRight,
+ HSentenceVertices & bottomLeft,
+ HSentenceVertices & bottomRight,
+ int startF, int startE, int endF, int endE)
+{
+
+ InsertVertex(topLeft, startF, startE);
+ InsertVertex(topRight, endF, startE);
+ InsertVertex(bottomLeft, startF, endE);
+ InsertVertex(bottomRight, endF, endE);
+}
+
+
+const std::string PhraseOrientation::GetOrientationInfoString(int startF, int endF, REO_DIR direction) const
+{
+ boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator foundMinMax
+ = m_minAndMaxAlignedToSourceSpan.find( std::pair<int,int>(startF,endF) );
+
+ if ( foundMinMax != m_minAndMaxAlignedToSourceSpan.end() ) {
+ int startE = (foundMinMax->second).first;
+ int endE = (foundMinMax->second).second;
+// std::cerr << "Phrase orientation for"
+// << " startF=" << startF
+// << " endF=" << endF
+// << " startE=" << startE
+// << " endE=" << endE
+// << std::endl;
+ return GetOrientationInfoString(startF, startE, endF, endE, direction);
+ } else {
+ std::cerr << "Error: not able to determine phrase orientation" << std::endl;
+ std::exit(1);
+ }
+}
+
+
+const std::string PhraseOrientation::GetOrientationInfoString(int startF, int startE, int endF, int endE, REO_DIR direction) const
+{
+ REO_POS hierPrevOrient=UNKNOWN, hierNextOrient=UNKNOWN;
+
+ bool connectedLeftTopP = IsAligned( startF-1, startE-1 );
+ bool connectedRightTopP = IsAligned( endF+1, startE-1 );
+ bool connectedLeftTopN = IsAligned( endF+1, endE+1 );
+ bool connectedRightTopN = IsAligned( startF-1, endE+1 );
+
+ if ( direction == L2R || direction == BIDIR )
+ hierPrevOrient = GetOrientHierModel(REO_MSLR,
+ connectedLeftTopP, connectedRightTopP,
+ startF, endF, startE, endE, m_source.size()-1, 0, 1,
+ &ge, &lt,
+ m_bottomRight, m_bottomLeft);
+
+ if ( direction == R2L || direction == BIDIR )
+ hierNextOrient = GetOrientHierModel(REO_MSLR,
+ connectedLeftTopN, connectedRightTopN,
+ endF, startF, endE, startE, 0, m_source.size()-1, -1,
+ &lt, &ge,
+ m_bottomLeft, m_bottomRight);
+
+ switch (direction) {
+ case L2R:
+ return GetOrientationString(hierPrevOrient, REO_MSLR);
+ break;
+ case R2L:
+ return GetOrientationString(hierNextOrient, REO_MSLR);
+ break;
+ case BIDIR:
+ return GetOrientationString(hierPrevOrient, REO_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MSLR);
+ break;
+ default:
+ return GetOrientationString(hierPrevOrient, REO_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MSLR);
+ break;
+ }
+ return "PhraseOrientationERROR";
+}
+
+
+REO_POS PhraseOrientation::GetOrientationInfo(int startF, int endF, REO_DIR direction) const
+{
+ boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator foundMinMax
+ = m_minAndMaxAlignedToSourceSpan.find( std::pair<int,int>(startF,endF) );
+
+ if ( foundMinMax != m_minAndMaxAlignedToSourceSpan.end() ) {
+ int startE = (foundMinMax->second).first;
+ int endE = (foundMinMax->second).second;
+// std::cerr << "Phrase orientation for"
+// << " startF=" << startF
+// << " endF=" << endF
+// << " startE=" << startE
+// << " endE=" << endE
+// << std::endl;
+ return GetOrientationInfo(startF, startE, endF, endE, direction);
+ } else {
+ std::cerr << "Error: not able to determine phrase orientation" << std::endl;
+ std::exit(1);
+ }
+}
+
+
+REO_POS PhraseOrientation::GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const
+{
+ if ( direction != L2R && direction != R2L ) {
+ std::cerr << "PhraseOrientation::GetOrientationInfo(): direction should be either L2R or R2L" << std::endl;
+ std::exit(1);
+ }
+
+ bool connectedLeftTopP = IsAligned( startF-1, startE-1 );
+ bool connectedRightTopP = IsAligned( endF+1, startE-1 );
+ bool connectedLeftTopN = IsAligned( endF+1, endE+1 );
+ bool connectedRightTopN = IsAligned( startF-1, endE+1 );
+
+ if ( direction == L2R )
+ return GetOrientHierModel(REO_MSLR,
+ connectedLeftTopP, connectedRightTopP,
+ startF, endF, startE, endE, m_source.size()-1, 0, 1,
+ &ge, &lt,
+ m_bottomRight, m_bottomLeft);
+
+ if ( direction == R2L )
+ return GetOrientHierModel(REO_MSLR,
+ connectedLeftTopN, connectedRightTopN,
+ endF, startF, endE, startE, 0, m_source.size()-1, -1,
+ &lt, &ge,
+ m_bottomLeft, m_bottomRight);
+
+ return UNKNOWN;
+}
+
+
+// to be called with countF-1 instead of countF
+REO_POS PhraseOrientation::GetOrientHierModel(REO_MODEL_TYPE modelType,
+ bool connectedLeftTop, bool connectedRightTop,
+ int startF, int endF, int startE, int endE, int countF, int zero, int unit,
+ bool (*ge)(int, int), bool (*lt)(int, int),
+ const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const
+{
+ HSentenceVertices::const_iterator it;
+
+ if ((connectedLeftTop && !connectedRightTop) ||
+ ((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
+ it->second.find(startF-unit) != it->second.end()))
+ return LEFT;
+
+ if (modelType == REO_MONO)
+ return UNKNOWN;
+
+ if ((!connectedLeftTop && connectedRightTop) ||
+ ((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
+ it->second.find(endF + unit) != it->second.end()))
+ return RIGHT;
+
+ if (modelType == REO_MSD)
+ return UNKNOWN;
+
+ connectedLeftTop = false;
+ for (int indexF=startF-2*unit; (*ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit) {
+ if ((connectedLeftTop = ((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
+ it->second.find(indexF) != it->second.end())))
+ return DRIGHT;
+ }
+
+ connectedRightTop = false;
+ for (int indexF=endF+2*unit; (*lt)(indexF, countF) && !connectedRightTop; indexF=indexF+unit) {
+ if ((connectedRightTop = ((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
+ it->second.find(indexF) != it->second.end())))
+ return DLEFT;
+ }
+
+ return UNKNOWN;
+}
+
+
+const std::string PhraseOrientation::GetOrientationString(const REO_POS orient, const REO_MODEL_TYPE modelType)
+{
+ std::ostringstream oss;
+ WriteOrientation(oss, orient, modelType);
+ return oss.str();
+}
+
+
+void PhraseOrientation::WriteOrientation(std::ostream& out, const REO_POS orient, const REO_MODEL_TYPE modelType)
+{
+ switch(orient) {
+ case LEFT:
+ out << "mono";
+ break;
+ case RIGHT:
+ out << "swap";
+ break;
+ case DRIGHT:
+ out << "dright";
+ break;
+ case DLEFT:
+ out << "dleft";
+ break;
+ case UNKNOWN:
+ switch(modelType) {
+ case REO_MONO:
+ out << "nomono";
+ break;
+ case REO_MSD:
+ out << "other";
+ break;
+ case REO_MSLR:
+ out << "dright";
+ break;
+ }
+ break;
+ }
+}
+
+
+bool PhraseOrientation::IsAligned(int fi, int ei) const
+{
+ if (ei == -1 && fi == -1)
+ return true;
+
+ if (ei <= -1 || fi <= -1)
+ return false;
+
+ if (ei == (int)m_target.size() && fi == (int)m_source.size())
+ return true;
+
+ if (ei >= (int)m_target.size() || fi >= (int)m_source.size())
+ return false;
+
+ for (size_t i=0; i<m_alignedToT[ei].size(); ++i)
+ if (m_alignedToT[ei][i] == fi)
+ return true;
+
+ return false;
+}
+
+
+void PhraseOrientation::IncrementPriorCount(REO_DIR direction, REO_POS orient, float increment)
+{
+ assert(direction==L2R || direction==R2L);
+ if (direction == L2R) {
+ m_l2rOrientationPriorCounts[orient] += increment;
+ } else if (direction == R2L) {
+ m_r2lOrientationPriorCounts[orient] += increment;
+ }
+}
+
+
+void PhraseOrientation::WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE modelType)
+{
+ std::map<std::string,float> l2rOrientationPriorCountsMap;
+ std::map<std::string,float> r2lOrientationPriorCountsMap;
+ for (int orient=0; orient<=UNKNOWN; ++orient) {
+ l2rOrientationPriorCountsMap[GetOrientationString((REO_POS)orient, modelType)] += m_l2rOrientationPriorCounts[orient];
+ }
+ for (int orient=0; orient<=UNKNOWN; ++orient) {
+ r2lOrientationPriorCountsMap[GetOrientationString((REO_POS)orient, modelType)] += m_r2lOrientationPriorCounts[orient];
+ }
+ for (std::map<std::string,float>::const_iterator l2rOrientationPriorCountsMapIt = l2rOrientationPriorCountsMap.begin();
+ l2rOrientationPriorCountsMapIt != l2rOrientationPriorCountsMap.end(); ++l2rOrientationPriorCountsMapIt) {
+ out << "L2R_" << l2rOrientationPriorCountsMapIt->first << " " << l2rOrientationPriorCountsMapIt->second << std::endl;
+ }
+ for (std::map<std::string,float>::const_iterator r2lOrientationPriorCountsMapIt = r2lOrientationPriorCountsMap.begin();
+ r2lOrientationPriorCountsMapIt != r2lOrientationPriorCountsMap.end(); ++r2lOrientationPriorCountsMapIt) {
+ out << "R2L_" << r2lOrientationPriorCountsMapIt->first << " " << r2lOrientationPriorCountsMapIt->second << std::endl;
+ }
+}
+
+} // namespace GHKM
+} // namespace Moses
+
diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.h b/phrase-extract/extract-ghkm/PhraseOrientation.h
new file mode 100644
index 000000000..6e83929f1
--- /dev/null
+++ b/phrase-extract/extract-ghkm/PhraseOrientation.h
@@ -0,0 +1,102 @@
+
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#pragma once
+
+#include "Alignment.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <boost/unordered_map.hpp>
+
+namespace Moses
+{
+namespace GHKM
+{
+
+enum REO_MODEL_TYPE {REO_MSD, REO_MSLR, REO_MONO};
+enum REO_POS {LEFT, RIGHT, DLEFT, DRIGHT, UNKNOWN};
+enum REO_DIR {L2R, R2L, BIDIR};
+
+// The key of the map is the English index and the value is a set of the source ones
+typedef std::map <int, std::set<int> > HSentenceVertices;
+
+
+class PhraseOrientation
+{
+public:
+
+ PhraseOrientation(const std::vector<std::string> &source,
+ const std::vector<std::string> &target,
+ const Alignment &alignment);
+
+ REO_POS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
+ REO_POS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
+ const std::string GetOrientationInfoString(int startF, int endF, REO_DIR direction=BIDIR) const;
+ const std::string GetOrientationInfoString(int startF, int startE, int endF, int endE, REO_DIR direction=BIDIR) const;
+ static const std::string GetOrientationString(const REO_POS orient, const REO_MODEL_TYPE modelType=REO_MSLR);
+ static void WriteOrientation(std::ostream& out, const REO_POS orient, const REO_MODEL_TYPE modelType=REO_MSLR);
+ void IncrementPriorCount(REO_DIR direction, REO_POS orient, float increment);
+ static void WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE modelType=REO_MSLR);
+
+private:
+
+ void InsertVertex( HSentenceVertices & corners, int x, int y );
+
+ void InsertPhraseVertices(HSentenceVertices & topLeft,
+ HSentenceVertices & topRight,
+ HSentenceVertices & bottomLeft,
+ HSentenceVertices & bottomRight,
+ int startF, int startE, int endF, int endE);
+
+ REO_POS GetOrientHierModel(REO_MODEL_TYPE modelType,
+ bool connectedLeftTop, bool connectedRightTop,
+ int startF, int endF, int startE, int endE, int countF, int zero, int unit,
+ bool (*ge)(int, int), bool (*lt)(int, int),
+ const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const;
+
+ bool IsAligned(int fi, int ei) const;
+
+ static bool ge(int first, int second) { return first >= second; };
+ static bool le(int first, int second) { return first <= second; };
+ static bool lt(int first, int second) { return first < second; };
+
+ const std::vector<std::string> &m_source;
+ const std::vector<std::string> &m_target;
+ const Alignment &m_alignment;
+
+ std::vector<std::vector<int> > m_alignedToT;
+
+ HSentenceVertices m_topLeft;
+ HSentenceVertices m_topRight;
+ HSentenceVertices m_bottomLeft;
+ HSentenceVertices m_bottomRight;
+
+ boost::unordered_map< std::pair<int,int> , std::pair<int,int> > m_minAndMaxAlignedToSourceSpan;
+
+ static std::vector<float> m_l2rOrientationPriorCounts;
+ static std::vector<float> m_r2lOrientationPriorCounts;
+};
+
+} // namespace GHKM
+} // namespace Moses
+
diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
index be373b67b..2fba6930b 100644
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
@@ -169,14 +169,17 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
}
}
-void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g)
+void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g, bool printEndl)
{
- Write(rule,false);
- m_fwd << " {{Tree ";
- g.PrintTree(m_fwd);
- m_fwd << "}}";
+ Write(rule,false);
+ m_fwd << " {{Tree ";
+ g.PrintTree(m_fwd);
+ m_fwd << "}}";
+
+ if (printEndl) {
m_fwd << std::endl;
m_inv << std::endl;
+ }
}
} // namespace GHKM
diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.h b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
index 18f423149..8a8564580 100644
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.h
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
@@ -44,7 +44,7 @@ public:
void Write(const ScfgRule &rule, bool printEndl=true);
- void Write(const ScfgRule &rule, const Subgraph &g);
+ void Write(const ScfgRule &rule, const Subgraph &g, bool printEndl=true);
private:
// Disallow copying
diff --git a/phrase-extract/extract-ghkm/XmlTreeParser.h b/phrase-extract/extract-ghkm/XmlTreeParser.h
index e5bf5b463..d0209254f 100644
--- a/phrase-extract/extract-ghkm/XmlTreeParser.h
+++ b/phrase-extract/extract-ghkm/XmlTreeParser.h
@@ -49,6 +49,8 @@ public:
static std::auto_ptr<ParseTree> ConvertTree(const MosesTraining::SyntaxNode &,
const std::vector<std::string> &);
+ const std::vector<std::string>& GetWords() { return m_words; };
+
private:
std::set<std::string> &m_labelSet;
diff --git a/phrase-extract/extract-main.cpp b/phrase-extract/extract-main.cpp
index fe3d99cd2..2f22d8aba 100644
--- a/phrase-extract/extract-main.cpp
+++ b/phrase-extract/extract-main.cpp
@@ -18,6 +18,7 @@
#include <map>
#include <set>
#include <vector>
+#include <limits>
#include "SentenceAlignment.h"
#include "tables-core.h"
@@ -363,8 +364,6 @@ void ExtractTask::extract(SentenceAlignment &sentence)
HSentenceVertices outBottomLeft;
HSentenceVertices outBottomRight;
- HSentenceVertices::const_iterator it;
-
bool relaxLimit = m_options.isHierModel();
bool buildExtraStructure = m_options.isPhraseModel() || m_options.isHierModel();
@@ -375,7 +374,7 @@ void ExtractTask::extract(SentenceAlignment &sentence)
(endE<countE && (relaxLimit || endE<startE+m_options.maxPhraseLength));
endE++) {
- int minF = 9999;
+ int minF = std::numeric_limits<int>::max();
int maxF = -1;
vector< int > usedF = sentence.alignedCountS;
for(int ei=startE; ei<=endE; ei++) {
diff --git a/contrib/other-builds/extract-mixed-syntax/AlignedSentence.cpp b/phrase-extract/extract-mixed-syntax/AlignedSentence.cpp
index 0f00d0bbf..082878c00 100644
--- a/contrib/other-builds/extract-mixed-syntax/AlignedSentence.cpp
+++ b/phrase-extract/extract-mixed-syntax/AlignedSentence.cpp
@@ -153,7 +153,12 @@ void AlignedSentence::CreateConsistentPhrases(const Parameter &params)
continue;
// source phrase has to be within limits
- if( maxS-minS >= params.maxSpan )
+ size_t width = maxS - minS + 1;
+
+ if( width < params.minSpan )
+ continue;
+
+ if( width > params.maxSpan )
continue;
// check if source words are aligned to out of bound target words
diff --git a/contrib/other-builds/extract-mixed-syntax/AlignedSentence.h b/phrase-extract/extract-mixed-syntax/AlignedSentence.h
index 915bdf90c..915bdf90c 100644
--- a/contrib/other-builds/extract-mixed-syntax/AlignedSentence.h
+++ b/phrase-extract/extract-mixed-syntax/AlignedSentence.h
diff --git a/contrib/other-builds/extract-mixed-syntax/AlignedSentenceSyntax.cpp b/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp
index 3d63ed044..3d63ed044 100644
--- a/contrib/other-builds/extract-mixed-syntax/AlignedSentenceSyntax.cpp
+++ b/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp
diff --git a/contrib/other-builds/extract-mixed-syntax/AlignedSentenceSyntax.h b/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.h
index 2e9431996..2e9431996 100644
--- a/contrib/other-builds/extract-mixed-syntax/AlignedSentenceSyntax.h
+++ b/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.h
diff --git a/contrib/other-builds/extract-mixed-syntax/ConsistentPhrase.cpp b/phrase-extract/extract-mixed-syntax/ConsistentPhrase.cpp
index bb913da5a..bb913da5a 100644
--- a/contrib/other-builds/extract-mixed-syntax/ConsistentPhrase.cpp
+++ b/phrase-extract/extract-mixed-syntax/ConsistentPhrase.cpp
diff --git a/contrib/other-builds/extract-mixed-syntax/ConsistentPhrase.h b/phrase-extract/extract-mixed-syntax/ConsistentPhrase.h
index 865b4386f..865b4386f 100644
--- a/contrib/other-builds/extract-mixed-syntax/ConsistentPhrase.h
+++ b/phrase-extract/extract-mixed-syntax/ConsistentPhrase.h
diff --git a/contrib/other-builds/extract-mixed-syntax/ConsistentPhrases.cpp b/phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp
index 8978c88fa..8978c88fa 100644
--- a/contrib/other-builds/extract-mixed-syntax/ConsistentPhrases.cpp
+++ b/phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp
diff --git a/contrib/other-builds/extract-mixed-syntax/ConsistentPhrases.h b/phrase-extract/extract-mixed-syntax/ConsistentPhrases.h
index 3daf6b7ff..3daf6b7ff 100644
--- a/contrib/other-builds/extract-mixed-syntax/ConsistentPhrases.h
+++ b/phrase-extract/extract-mixed-syntax/ConsistentPhrases.h
diff --git a/contrib/other-builds/extract-mixed-syntax/InputFileStream.cpp b/phrase-extract/extract-mixed-syntax/InputFileStream.cpp
index b52d1f920..b52d1f920 100644
--- a/contrib/other-builds/extract-mixed-syntax/InputFileStream.cpp
+++ b/phrase-extract/extract-mixed-syntax/InputFileStream.cpp
diff --git a/contrib/other-builds/extract-mixed-syntax/InputFileStream.h b/phrase-extract/extract-mixed-syntax/InputFileStream.h
index f10ec2164..f10ec2164 100644
--- a/contrib/other-builds/extract-mixed-syntax/InputFileStream.h
+++ b/phrase-extract/extract-mixed-syntax/InputFileStream.h
diff --git a/phrase-extract/extract-mixed-syntax/Jamfile b/phrase-extract/extract-mixed-syntax/Jamfile
new file mode 100644
index 000000000..520cd65cb
--- /dev/null
+++ b/phrase-extract/extract-mixed-syntax/Jamfile
@@ -0,0 +1,2 @@
+exe extract-mixed-syntax : Main.cpp AlignedSentence.cpp AlignedSentenceSyntax.cpp ConsistentPhrase.cpp ConsistentPhrases.cpp NonTerm.cpp Parameter.cpp Phrase.cpp pugixml.cpp Rule.cpp RulePhrase.cpp Rules.cpp RuleSymbol.cpp SyntaxTree.cpp Word.cpp ..//deps ../..//z ../..//boost_iostreams ../..//boost_program_options ../../moses//moses : <include>.. ;
+
diff --git a/contrib/other-builds/extract-mixed-syntax/Main.cpp b/phrase-extract/extract-mixed-syntax/Main.cpp
index 89875daa9..10656b577 100644
--- a/contrib/other-builds/extract-mixed-syntax/Main.cpp
+++ b/phrase-extract/extract-mixed-syntax/Main.cpp
@@ -25,12 +25,14 @@ int main(int argc, char** argv)
desc.add_options()
("help", "Print help messages")
("MaxSpan", po::value<int>()->default_value(params.maxSpan), "Max (source) span of a rule. ie. number of words in the source")
+ ("MinSpan", po::value<int>()->default_value(params.minSpan), "Min (source) span of a rule.")
("GlueGrammar", po::value<string>()->default_value(params.gluePath), "Output glue grammar to here")
("SentenceOffset", po::value<long>()->default_value(params.sentenceOffset), "Starting sentence id. Not used")
("GZOutput", "Compress extract files")
("MaxNonTerm", po::value<int>()->default_value(params.maxNonTerm), "Maximum number of non-terms allowed per rule")
("MaxHieroNonTerm", po::value<int>()->default_value(params.maxHieroNonTerm), "Maximum number of Hiero non-term. Usually, --MaxNonTerm is the normal constraint")
("MinHoleSource", po::value<int>()->default_value(params.minHoleSource), "Minimum source span for a non-term.")
+ ("MinHoleSourceSyntax", po::value<int>()->default_value(params.minHoleSourceSyntax), "Minimum source span for a syntactic non-term (source or target).")
("SourceSyntax", "Source sentence is a parse tree")
("TargetSyntax", "Target sentence is a parse tree")
@@ -40,8 +42,23 @@ int main(int argc, char** argv)
("MaxSpanFreeNonTermSource", po::value<int>()->default_value(params.maxSpanFreeNonTermSource), "Max number of words covered by beginning/end NT. Default = 0 (no limit)")
("NoNieceTerminal", "Don't extract rule if 1 of the non-term covers the same word as 1 of the terminals")
("MaxScope", po::value<int>()->default_value(params.maxScope), "maximum scope (see Hopkins and Langmead (2010)). Default is HIGH")
+ ("MinScope", po::value<int>()->default_value(params.minScope), "min scope.")
+
("SpanLength", "Property - span length of RHS each non-term")
- ("NonTermContext", "Property - left and right, inside and outside words of each non-term");
+
+ ("NonTermContext", "Property - (source) left and right, inside and outside words of each non-term ")
+ ("NonTermContextTarget", "Property - (target) left and right, inside and outside words of each non-term")
+ ("NonTermContextFactor", po::value<int>()->default_value(params.nonTermContextFactor), "Factor to use for non-term context property.")
+
+ ("NumSourceFactors", po::value<int>()->default_value(params.numSourceFactors), "Number of source factors.")
+ ("NumTargetFactors", po::value<int>()->default_value(params.numTargetFactors), "Number of target factors.")
+
+ ("HieroNonTerm", po::value<string>()->default_value(params.hieroNonTerm), "Hiero non-terminal label, including bracket")
+ ("ScopeSpan", po::value<string>()->default_value(params.scopeSpanStr), "Min and max span for rules of each scope. Format is min,max:min,max...")
+
+ ("NonTermConsecSource", "Allow consecutive non-terms on the source side")
+ ("NonTermConsecSourceMixedSyntax", po::value<int>()->default_value(params.nonTermConsecSourceMixedSyntax), "In mixed syntax mode, what nt can be consecutive. 0=don't allow consec nt. 1(default)=hiero+syntax. 2=syntax+syntax. 3=always allow");
+
po::variables_map vm;
try
@@ -69,12 +86,14 @@ int main(int argc, char** argv)
}
if (vm.count("MaxSpan")) params.maxSpan = vm["MaxSpan"].as<int>();
+ if (vm.count("MinSpan")) params.minSpan = vm["MinSpan"].as<int>();
if (vm.count("GZOutput")) params.gzOutput = true;
if (vm.count("GlueGrammar")) params.gluePath = vm["GlueGrammar"].as<string>();
if (vm.count("SentenceOffset")) params.sentenceOffset = vm["SentenceOffset"].as<long>();
if (vm.count("MaxNonTerm")) params.maxNonTerm = vm["MaxNonTerm"].as<int>();
if (vm.count("MaxHieroNonTerm")) params.maxHieroNonTerm = vm["MaxHieroNonTerm"].as<int>();
if (vm.count("MinHoleSource")) params.minHoleSource = vm["MinHoleSource"].as<int>();
+ if (vm.count("MinHoleSourceSyntax")) params.minHoleSourceSyntax = vm["MinHoleSourceSyntax"].as<int>();
if (vm.count("SourceSyntax")) params.sourceSyntax = true;
if (vm.count("TargetSyntax")) params.targetSyntax = true;
@@ -84,10 +103,25 @@ int main(int argc, char** argv)
if (vm.count("MaxSpanFreeNonTermSource")) params.maxSpanFreeNonTermSource = vm["MaxSpanFreeNonTermSource"].as<int>();
if (vm.count("NoNieceTerminal")) params.nieceTerminal = false;
if (vm.count("MaxScope")) params.maxScope = vm["MaxScope"].as<int>();
+ if (vm.count("MinScope")) params.minScope = vm["MinScope"].as<int>();
// properties
if (vm.count("SpanLength")) params.spanLength = true;
if (vm.count("NonTermContext")) params.nonTermContext = true;
+ if (vm.count("NonTermContextTarget")) params.nonTermContextTarget = true;
+ if (vm.count("NonTermContextFactor")) params.nonTermContextFactor = vm["NonTermContextFactor"].as<int>();
+
+ if (vm.count("NumSourceFactors")) params.numSourceFactors = vm["NumSourceFactors"].as<int>();
+ if (vm.count("NumTargetFactors")) params.numTargetFactors = vm["NumTargetFactors"].as<int>();
+
+ if (vm.count("HieroNonTerm")) params.hieroNonTerm = vm["HieroNonTerm"].as<string>();
+ if (vm.count("ScopeSpan")) {
+ params.SetScopeSpan(vm["ScopeSpan"].as<string>());
+ }
+
+ if (vm.count("NonTermConsecSource")) params.nonTermConsecSource = true;
+ if (vm.count("NonTermConsecSourceMixedSyntax")) params.nonTermConsecSourceMixedSyntax = vm["NonTermConsecSourceMixedSyntax"].as<int>();
+
// input files;
string pathTarget = argv[1];
diff --git a/contrib/other-builds/extract-mixed-syntax/Main.h b/phrase-extract/extract-mixed-syntax/Main.h
index 9744ba389..9744ba389 100644
--- a/contrib/other-builds/extract-mixed-syntax/Main.h
+++ b/phrase-extract/extract-mixed-syntax/Main.h
diff --git a/contrib/other-builds/extract-mixed-syntax/NonTerm.cpp b/phrase-extract/extract-mixed-syntax/NonTerm.cpp
index 9e7d0dcaa..5de780a9a 100644
--- a/contrib/other-builds/extract-mixed-syntax/NonTerm.cpp
+++ b/phrase-extract/extract-mixed-syntax/NonTerm.cpp
@@ -61,5 +61,6 @@ bool NonTerm::IsHiero(const Parameter &params) const
{
return IsHiero(Moses::Input, params) && IsHiero(Moses::Output, params);
}
+
int NonTerm::GetWidth(Moses::FactorDirection direction) const
{ return GetConsistentPhrase().GetWidth(direction); }
diff --git a/contrib/other-builds/extract-mixed-syntax/NonTerm.h b/phrase-extract/extract-mixed-syntax/NonTerm.h
index 5b3bb9f04..5b3bb9f04 100644
--- a/contrib/other-builds/extract-mixed-syntax/NonTerm.h
+++ b/phrase-extract/extract-mixed-syntax/NonTerm.h
diff --git a/phrase-extract/extract-mixed-syntax/Parameter.cpp b/phrase-extract/extract-mixed-syntax/Parameter.cpp
new file mode 100644
index 000000000..4f742e774
--- /dev/null
+++ b/phrase-extract/extract-mixed-syntax/Parameter.cpp
@@ -0,0 +1,72 @@
+/*
+ * Parameter.cpp
+ *
+ * Created on: 17 Feb 2014
+ * Author: hieu
+ */
+#include "Parameter.h"
+#include "moses/Util.h"
+#include "util/exception.hh"
+
+using namespace std;
+
+Parameter::Parameter()
+:maxSpan(10)
+,minSpan(0)
+,maxNonTerm(2)
+,maxHieroNonTerm(999)
+,maxSymbolsTarget(999)
+,maxSymbolsSource(5)
+,minHoleSource(2)
+,minHoleSourceSyntax(1)
+,sentenceOffset(0)
+,nonTermConsecSource(false)
+,requireAlignedWord(true)
+,fractionalCounting(true)
+,gzOutput(false)
+
+,hieroNonTerm("[X]")
+,sourceSyntax(false)
+,targetSyntax(false)
+
+,mixedSyntaxType(0)
+,multiLabel(0)
+,nonTermConsecSourceMixed(true)
+,hieroSourceLHS(false)
+,maxSpanFreeNonTermSource(0)
+,nieceTerminal(true)
+,maxScope(UNDEFINED)
+,minScope(0)
+
+,spanLength(false)
+,nonTermContext(false)
+,nonTermContextTarget(false)
+,nonTermContextFactor(0)
+
+,numSourceFactors(1)
+,numTargetFactors(1)
+
+,nonTermConsecSourceMixedSyntax(1)
+{}
+
+Parameter::~Parameter() {
+ // TODO Auto-generated destructor stub
+}
+
+void Parameter::SetScopeSpan(const std::string &str)
+{
+ scopeSpanStr = str;
+ vector<string> toks1;
+ Moses::Tokenize(toks1, str, ":");
+
+ for (size_t i = 0; i < toks1.size(); ++i) {
+ const string &tok1 = toks1[i];
+
+ vector<int> toks2;
+ Moses::Tokenize<int>(toks2, tok1, ",");
+ UTIL_THROW_IF2(toks2.size() != 2, "Format is min,max:min,max... String is " << tok1);
+
+ std::pair<int,int> values(toks2[0], toks2[1]);
+ scopeSpan.push_back(values);
+ }
+}
diff --git a/contrib/other-builds/extract-mixed-syntax/Parameter.h b/phrase-extract/extract-mixed-syntax/Parameter.h
index 1da090c86..1a9018504 100644
--- a/contrib/other-builds/extract-mixed-syntax/Parameter.h
+++ b/phrase-extract/extract-mixed-syntax/Parameter.h
@@ -8,6 +8,7 @@
#include <string>
#include <limits>
+#include <vector>
#define UNDEFINED std::numeric_limits<int>::max()
@@ -18,11 +19,13 @@ public:
virtual ~Parameter();
int maxSpan;
+ int minSpan;
int maxNonTerm;
int maxHieroNonTerm;
int maxSymbolsTarget;
int maxSymbolsSource;
int minHoleSource;
+ int minHoleSourceSyntax;
long sentenceOffset;
@@ -41,11 +44,22 @@ public:
bool hieroSourceLHS;
int maxSpanFreeNonTermSource;
bool nieceTerminal;
- int maxScope;
+ int maxScope, minScope;
- // prperties
+ // properties
bool spanLength;
bool nonTermContext;
+ bool nonTermContextTarget;
+ int nonTermContextFactor;
+
+ int numSourceFactors, numTargetFactors;
+
+ int nonTermConsecSourceMixedSyntax;
+
+ std::string scopeSpanStr;
+ std::vector<std::pair<int,int> > scopeSpan;
+
+ void SetScopeSpan(const std::string &str);
};
diff --git a/contrib/other-builds/extract-mixed-syntax/Phrase.cpp b/phrase-extract/extract-mixed-syntax/Phrase.cpp
index 535e10d6b..535e10d6b 100644
--- a/contrib/other-builds/extract-mixed-syntax/Phrase.cpp
+++ b/phrase-extract/extract-mixed-syntax/Phrase.cpp
diff --git a/contrib/other-builds/extract-mixed-syntax/Phrase.h b/phrase-extract/extract-mixed-syntax/Phrase.h
index 13912cb95..13912cb95 100644
--- a/contrib/other-builds/extract-mixed-syntax/Phrase.h
+++ b/phrase-extract/extract-mixed-syntax/Phrase.h
diff --git a/contrib/other-builds/extract-mixed-syntax/Rule.cpp b/phrase-extract/extract-mixed-syntax/Rule.cpp
index c16d0f8c4..a3e148e6c 100644
--- a/contrib/other-builds/extract-mixed-syntax/Rule.cpp
+++ b/phrase-extract/extract-mixed-syntax/Rule.cpp
@@ -182,41 +182,82 @@ void Rule::Output(std::ostream &out, bool forward, const Parameter &params) cons
out << "}} ";
}
- // non-term context
+ // non-term context (source)
if (forward && params.nonTermContext && m_nonterms.size()) {
out << "{{NonTermContext ";
+ int factor = params.nonTermContextFactor;
+
+ for (size_t i = 0; i < m_nonterms.size(); ++i) {
+ const NonTerm &nonTerm = *m_nonterms[i];
+ const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
+ NonTermContext(1, factor, i, cp, out);
+ }
+ out << "}} ";
+ }
+
+ // non-term context (target)
+ if (forward && params.nonTermContextTarget && m_nonterms.size()) {
+ out << "{{NonTermContextTarget ";
+
+ int factor = params.nonTermContextFactor;
+
for (size_t i = 0; i < m_nonterms.size(); ++i) {
const NonTerm &nonTerm = *m_nonterms[i];
const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
- NonTermContext(i, cp, out);
+ NonTermContext(2, factor, i, cp, out);
}
out << "}} ";
}
+
}
-void Rule::NonTermContext(size_t ntInd, const ConsistentPhrase &cp, std::ostream &out) const
+void Rule::NonTermContextFactor(int factor, const Word &word, std::ostream &out) const
{
- int startPos = cp.corners[0];
- int endPos = cp.corners[1];
+ out << word.GetString(factor) << " ";
+}
- const Phrase &source = m_alignedSentence.GetPhrase(Moses::Input);
+void Rule::NonTermContext(int sourceTarget, int factor, size_t ntInd, const ConsistentPhrase &cp, std::ostream &out) const
+{
+ int startPos, endPos;
+ const Phrase *phrase;
+ if (sourceTarget == 1) {
+ startPos = cp.corners[0];
+ endPos = cp.corners[1];
+ phrase = &m_alignedSentence.GetPhrase(Moses::Input);
+ }
+ else if (sourceTarget == 2) {
+ startPos = cp.corners[2];
+ endPos = cp.corners[3];
+ phrase = &m_alignedSentence.GetPhrase(Moses::Output);
+ }
+ else {
+ abort();
+ }
+
+ out << ntInd << " ";
+
+ // left outside
if (startPos == 0) {
- out << "<s> ";
+ out << "<s> ";
}
else {
- out << source[startPos - 1]->GetString() << " ";
+ NonTermContextFactor(factor, *phrase->at(startPos - 1), out);
}
- out << source[startPos]->GetString() << " ";
- out << source[endPos]->GetString() << " ";
+ // left inside
+ NonTermContextFactor(factor, *phrase->at(startPos), out);
+
+ // right inside
+ NonTermContextFactor(factor, *phrase->at(endPos), out);
- if (endPos == source.size() - 1) {
- out << "</s> ";
+ // right outside
+ if (endPos == phrase->size() - 1) {
+ out << "</s> ";
}
else {
- out << source[endPos + 1]->GetString() << " ";
+ NonTermContextFactor(factor, *phrase->at(endPos + 1), out);
}
@@ -236,12 +277,20 @@ void Rule::Prevalidate(const Parameter &params)
const NonTerm &lastNonTerm = *m_nonterms.back();
const ConsistentPhrase &cp = lastNonTerm.GetConsistentPhrase();
- int sourceWidth = cp.corners[1] - cp.corners[0] + 1;
- if (sourceWidth < params.minHoleSource) {
+ int sourceWidth = cp.GetWidth(Moses::Input);
+ if (lastNonTerm.IsHiero(params)) {
+ if (sourceWidth < params.minHoleSource) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ }
+ else if (sourceWidth < params.minHoleSourceSyntax) {
m_isValid = false;
m_canRecurse = false;
return;
}
+
}
// check number of non-terms
@@ -289,14 +338,31 @@ void Rule::Prevalidate(const Parameter &params)
}
else {
// Hieu's mixed syntax
- if (lastNonTerm.IsHiero(Moses::Input, params)
- && secondLastNonTerm.IsHiero(Moses::Input, params)) {
+ switch (params.nonTermConsecSourceMixedSyntax) {
+ case 0:
m_isValid = false;
m_canRecurse = false;
return;
- }
+ case 1:
+ if (lastNonTerm.IsHiero(Moses::Input, params)
+ && secondLastNonTerm.IsHiero(Moses::Input, params)) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ break;
+ case 2:
+ if (lastNonTerm.IsHiero(Moses::Input, params)
+ || secondLastNonTerm.IsHiero(Moses::Input, params)) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ break;
+ case 3:
+ break;
+ } // switch
}
-
}
}
@@ -389,18 +455,73 @@ void Rule::Prevalidate(const Parameter &params)
}
}
- if (params.maxScope != UNDEFINED) {
- int scope = CalcScope();
+ if (params.maxScope != UNDEFINED || params.minScope > 0) {
+ int scope = GetScope(params);
if (scope > params.maxScope) {
+ // scope of subsequent rules will be the same or increase
+ // therefore can NOT recurse
m_isValid = false;
m_canRecurse = false;
return;
}
+
+ if (scope < params.minScope) {
+ // scope of subsequent rules may increase
+ // therefore can recurse
+ m_isValid = false;
+ }
+ }
+
+ // min/max span per scope
+ if (params.scopeSpan.size()) {
+ int scope = GetScope(params);
+ if (scope >= params.scopeSpan.size()) {
+ // no constraint on it. It's ok
+ }
+ else {
+ const std::pair<int,int> &constraint = params.scopeSpan[scope];
+ int sourceWidth = m_lhs.GetWidth(Moses::Input);
+ if (sourceWidth < constraint.first || sourceWidth > constraint.second) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ }
}
}
-int Rule::CalcScope() const
+int Rule::GetScope(const Parameter &params) const
{
+ size_t scope = 0;
+ bool previousIsAmbiguous = false;
+
+ if (m_source[0]->IsNonTerm()) {
+ scope++;
+ previousIsAmbiguous = true;
+ }
+
+ for (size_t i = 1; i < m_source.GetSize(); ++i) {
+ const RuleSymbol *symbol = m_source[i];
+ bool isAmbiguous = symbol->IsNonTerm();
+ if (isAmbiguous) {
+ // mixed syntax
+ const NonTerm *nt = static_cast<const NonTerm*>(symbol);
+ isAmbiguous = nt->IsHiero(Moses::Input, params);
+ }
+
+ if (isAmbiguous && previousIsAmbiguous) {
+ scope++;
+ }
+ previousIsAmbiguous = isAmbiguous;
+ }
+
+ if (previousIsAmbiguous) {
+ scope++;
+ }
+
+ return scope;
+
+ /*
int scope = 0;
if (m_source.GetSize() > 1) {
const RuleSymbol &front = *m_source.Front();
@@ -414,6 +535,7 @@ int Rule::CalcScope() const
}
}
return scope;
+ */
}
template<typename T>
diff --git a/contrib/other-builds/extract-mixed-syntax/Rule.h b/phrase-extract/extract-mixed-syntax/Rule.h
index e97dc6d7f..15a142b97 100644
--- a/contrib/other-builds/extract-mixed-syntax/Rule.h
+++ b/phrase-extract/extract-mixed-syntax/Rule.h
@@ -79,9 +79,12 @@ protected:
void CreateAlignments(int sourcePos, const RuleSymbol *targetSought);
bool ContainTerm(const ConsistentPhrase &cp, const std::set<const Word*> &terms) const;
- int CalcScope() const; // not yet correctly calculated
+ int GetScope(const Parameter &params) const;
- void NonTermContext(size_t ntInd, const ConsistentPhrase &cp, std::ostream &out) const;
+ void NonTermContext(int sourceTarget, int factors, size_t ntInd, const ConsistentPhrase &cp, std::ostream &out) const;
+ // sourceTarget: 1 = source, 2 = target
+
+ void NonTermContextFactor(int factor, const Word &word, std::ostream &out) const;
};
diff --git a/contrib/other-builds/extract-mixed-syntax/RulePhrase.cpp b/phrase-extract/extract-mixed-syntax/RulePhrase.cpp
index 5c629168b..5c629168b 100644
--- a/contrib/other-builds/extract-mixed-syntax/RulePhrase.cpp
+++ b/phrase-extract/extract-mixed-syntax/RulePhrase.cpp
diff --git a/contrib/other-builds/extract-mixed-syntax/RulePhrase.h b/phrase-extract/extract-mixed-syntax/RulePhrase.h
index 412169b74..412169b74 100644
--- a/contrib/other-builds/extract-mixed-syntax/RulePhrase.h
+++ b/phrase-extract/extract-mixed-syntax/RulePhrase.h
diff --git a/contrib/other-builds/extract-mixed-syntax/RuleSymbol.cpp b/phrase-extract/extract-mixed-syntax/RuleSymbol.cpp
index 933ffc9c2..933ffc9c2 100644
--- a/contrib/other-builds/extract-mixed-syntax/RuleSymbol.cpp
+++ b/phrase-extract/extract-mixed-syntax/RuleSymbol.cpp
diff --git a/contrib/other-builds/extract-mixed-syntax/RuleSymbol.h b/phrase-extract/extract-mixed-syntax/RuleSymbol.h
index c292fcc0d..c292fcc0d 100644
--- a/contrib/other-builds/extract-mixed-syntax/RuleSymbol.h
+++ b/phrase-extract/extract-mixed-syntax/RuleSymbol.h
diff --git a/contrib/other-builds/extract-mixed-syntax/Rules.cpp b/phrase-extract/extract-mixed-syntax/Rules.cpp
index 1b93430e2..1b93430e2 100644
--- a/contrib/other-builds/extract-mixed-syntax/Rules.cpp
+++ b/phrase-extract/extract-mixed-syntax/Rules.cpp
diff --git a/contrib/other-builds/extract-mixed-syntax/Rules.h b/phrase-extract/extract-mixed-syntax/Rules.h
index 6d8cb122d..6d8cb122d 100644
--- a/contrib/other-builds/extract-mixed-syntax/Rules.h
+++ b/phrase-extract/extract-mixed-syntax/Rules.h
diff --git a/contrib/other-builds/extract-mixed-syntax/SyntaxTree.cpp b/phrase-extract/extract-mixed-syntax/SyntaxTree.cpp
index 472444e7c..472444e7c 100644
--- a/contrib/other-builds/extract-mixed-syntax/SyntaxTree.cpp
+++ b/phrase-extract/extract-mixed-syntax/SyntaxTree.cpp
diff --git a/contrib/other-builds/extract-mixed-syntax/SyntaxTree.h b/phrase-extract/extract-mixed-syntax/SyntaxTree.h
index 58f718151..58f718151 100644
--- a/contrib/other-builds/extract-mixed-syntax/SyntaxTree.h
+++ b/phrase-extract/extract-mixed-syntax/SyntaxTree.h
diff --git a/contrib/other-builds/extract-mixed-syntax/Word.cpp b/phrase-extract/extract-mixed-syntax/Word.cpp
index 691266874..8ce4f76c6 100644
--- a/contrib/other-builds/extract-mixed-syntax/Word.cpp
+++ b/phrase-extract/extract-mixed-syntax/Word.cpp
@@ -6,6 +6,7 @@
*/
#include <limits>
#include "Word.h"
+#include "moses/Util.h"
using namespace std;
@@ -54,3 +55,14 @@ int Word::CompareString(const Word &other) const
{
return m_str.compare(other.m_str);
}
+
+std::string Word::GetString(int factor) const
+{
+ vector<string> toks;
+ Moses::Tokenize(toks, m_str, "|");
+
+ assert(factor < toks.size());
+ return toks[factor];
+}
+
+
diff --git a/contrib/other-builds/extract-mixed-syntax/Word.h b/phrase-extract/extract-mixed-syntax/Word.h
index 2f4600166..54419ceb0 100644
--- a/contrib/other-builds/extract-mixed-syntax/Word.h
+++ b/phrase-extract/extract-mixed-syntax/Word.h
@@ -24,6 +24,8 @@ public:
std::string GetString() const
{ return m_str; }
+ std::string GetString(int factor) const;
+
int GetPos() const
{ return m_pos; }
diff --git a/contrib/other-builds/extract-mixed-syntax/gzfilebuf.h b/phrase-extract/extract-mixed-syntax/gzfilebuf.h
index 885c661f0..885c661f0 100644
--- a/contrib/other-builds/extract-mixed-syntax/gzfilebuf.h
+++ b/phrase-extract/extract-mixed-syntax/gzfilebuf.h
diff --git a/contrib/other-builds/extract-mixed-syntax/pugiconfig.hpp b/phrase-extract/extract-mixed-syntax/pugiconfig.hpp
index c2196715c..c2196715c 100644
--- a/contrib/other-builds/extract-mixed-syntax/pugiconfig.hpp
+++ b/phrase-extract/extract-mixed-syntax/pugiconfig.hpp
diff --git a/contrib/other-builds/extract-mixed-syntax/pugixml.cpp b/phrase-extract/extract-mixed-syntax/pugixml.cpp
index 4035ab1cf..4035ab1cf 100644
--- a/contrib/other-builds/extract-mixed-syntax/pugixml.cpp
+++ b/phrase-extract/extract-mixed-syntax/pugixml.cpp
diff --git a/contrib/other-builds/extract-mixed-syntax/pugixml.hpp b/phrase-extract/extract-mixed-syntax/pugixml.hpp
index 77b4dcf47..77b4dcf47 100644
--- a/contrib/other-builds/extract-mixed-syntax/pugixml.hpp
+++ b/phrase-extract/extract-mixed-syntax/pugixml.hpp
diff --git a/phrase-extract/extract-ordering-main.cpp b/phrase-extract/extract-ordering-main.cpp
deleted file mode 100644
index b418ba24d..000000000
--- a/phrase-extract/extract-ordering-main.cpp
+++ /dev/null
@@ -1,684 +0,0 @@
-/*
- * extract.cpp
- * Modified by: Rohit Gupta CDAC, Mumbai, India
- * on July 15, 2012 to implement parallel processing
- * Modified by: Nadi Tomeh - LIMSI/CNRS
- * Machine Translation Marathon 2010, Dublin
- */
-
-#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <string>
-#include <stdlib.h>
-#include <assert.h>
-#include <cstring>
-#include <sstream>
-#include <map>
-#include <set>
-#include <vector>
-
-#include "SentenceAlignment.h"
-#include "tables-core.h"
-#include "InputFileStream.h"
-#include "OutputFileStream.h"
-#include "PhraseExtractionOptions.h"
-
-using namespace std;
-using namespace MosesTraining;
-
-namespace MosesTraining
-{
-
-// HPhraseVertex represents a point in the alignment matrix
-typedef pair <int, int> HPhraseVertex;
-
-// Phrase represents a bi-phrase; each bi-phrase is defined by two points in the alignment matrix:
-// bottom-left and top-right
-typedef pair<HPhraseVertex, HPhraseVertex> HPhrase;
-
-// HPhraseVector is a vector of HPhrases
-typedef vector < HPhrase > HPhraseVector;
-
-// SentenceVertices represents, from all extracted phrases, all vertices that have the same positioning
-// The key of the map is the English index and the value is a set of the source ones
-typedef map <int, set<int> > HSentenceVertices;
-
-REO_POS getOrientWordModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
- int, int, int, int, int, int, int,
- bool (*)(int, int), bool (*)(int, int));
-REO_POS getOrientPhraseModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
- int, int, int, int, int, int, int,
- bool (*)(int, int), bool (*)(int, int),
- const HSentenceVertices &, const HSentenceVertices &);
-REO_POS getOrientHierModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
- int, int, int, int, int, int, int,
- bool (*)(int, int), bool (*)(int, int),
- const HSentenceVertices &, const HSentenceVertices &,
- const HSentenceVertices &, const HSentenceVertices &,
- REO_POS);
-
-void insertVertex(HSentenceVertices &, int, int);
-void insertPhraseVertices(HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, HSentenceVertices &,
- int, int, int, int);
-string getOrientString(REO_POS, REO_MODEL_TYPE);
-
-bool ge(int, int);
-bool le(int, int);
-bool lt(int, int);
-
-bool isAligned (SentenceAlignment &, int, int);
-
-int sentenceOffset = 0;
-
-std::vector<std::string> Tokenize(const std::string& str,
- const std::string& delimiters = " \t");
-
-bool flexScoreFlag = false;
-
-}
-
-namespace MosesTraining
-{
-
-class ExtractTask
-{
-public:
- ExtractTask(size_t id, SentenceAlignment &sentence,PhraseExtractionOptions &initoptions, Moses::OutputFileStream &extractFileOrientation)
- :m_sentence(sentence),
- m_options(initoptions),
- m_extractFileOrientation(extractFileOrientation)
- {}
- void Run();
-private:
- void extract(SentenceAlignment &);
- void addPhrase(SentenceAlignment &, int, int, int, int, string &);
- void writePhrasesToFile();
-
- SentenceAlignment &m_sentence;
- const PhraseExtractionOptions &m_options;
- Moses::OutputFileStream &m_extractFileOrientation;
-};
-}
-
-int main(int argc, char* argv[])
-{
- cerr << "PhraseExtract v1.4, written by Philipp Koehn\n"
- << "phrase extraction from an aligned parallel corpus\n";
-
- if (argc < 6) {
- cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] ";
- cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n | --InstanceWeights filename ]\n";
- exit(1);
- }
-
- Moses::OutputFileStream extractFileOrientation;
- const char* const &fileNameE = argv[1];
- const char* const &fileNameF = argv[2];
- const char* const &fileNameA = argv[3];
- const string fileNameExtract = string(argv[4]);
- PhraseExtractionOptions options(atoi(argv[5]));
-
- for(int i=6; i<argc; i++) {
- if (strcmp(argv[i],"--OnlyOutputSpanInfo") == 0) {
- options.initOnlyOutputSpanInfo(true);
- } else if (strcmp(argv[i],"orientation") == 0 || strcmp(argv[i],"--Orientation") == 0) {
- options.initOrientationFlag(true);
- } else if (strcmp(argv[i],"--FlexibilityScore") == 0) {
- options.initFlexScoreFlag(true);
- } else if (strcmp(argv[i],"--NoTTable") == 0) {
- options.initTranslationFlag(false);
- } else if (strcmp(argv[i], "--IncludeSentenceId") == 0) {
- options.initIncludeSentenceIdFlag(true);
- } else if (strcmp(argv[i], "--SentenceOffset") == 0) {
- if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
- cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
- exit(1);
- }
- sentenceOffset = atoi(argv[++i]);
- } else if (strcmp(argv[i], "--GZOutput") == 0) {
- options.initGzOutput(true);
- } else if (strcmp(argv[i], "--InstanceWeights") == 0) {
- if (i+1 >= argc) {
- cerr << "extract: syntax error, used switch --InstanceWeights without file name" << endl;
- exit(1);
- }
- options.initInstanceWeightsFile(argv[++i]);
- } else if (strcmp(argv[i], "--Debug") == 0) {
- options.debug = true;
- } else if (strcmp(argv[i], "--MinPhraseLength") == 0) {
- options.minPhraseLength = atoi(argv[++i]);
- } else if (strcmp(argv[i], "--Separator") == 0) {
- options.separator = argv[++i];
- } else if(strcmp(argv[i],"--model") == 0) {
- if (i+1 >= argc) {
- cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
- exit(1);
- }
- char* modelParams = argv[++i];
- char* modelName = strtok(modelParams, "-");
- char* modelType = strtok(NULL, "-");
-
- // REO_MODEL_TYPE intModelType;
-
- if(strcmp(modelName, "wbe") == 0) {
- options.initWordModel(true);
- if(strcmp(modelType, "msd") == 0)
- options.initWordType(REO_MSD);
- else if(strcmp(modelType, "mslr") == 0)
- options.initWordType(REO_MSLR);
- else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
- options.initWordType(REO_MONO);
- else {
- cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
- exit(1);
- }
- } else if(strcmp(modelName, "phrase") == 0) {
- options.initPhraseModel(true);
- if(strcmp(modelType, "msd") == 0)
- options.initPhraseType(REO_MSD);
- else if(strcmp(modelType, "mslr") == 0)
- options.initPhraseType(REO_MSLR);
- else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
- options.initPhraseType(REO_MONO);
- else {
- cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
- exit(1);
- }
- } else if(strcmp(modelName, "hier") == 0) {
- options.initHierModel(true);
- if(strcmp(modelType, "msd") == 0)
- options.initHierType(REO_MSD);
- else if(strcmp(modelType, "mslr") == 0)
- options.initHierType(REO_MSLR);
- else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
- options.initHierType(REO_MONO);
- else {
- cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
- exit(1);
- }
- } else {
- cerr << "extract: syntax error, unknown reordering model: " << modelName << endl;
- exit(1);
- }
-
- options.initAllModelsOutputFlag(true);
- } else {
- cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
- exit(1);
- }
- }
-
- // default reordering model if no model selected
- // allows for the old syntax to be used
- if(options.isOrientationFlag() && !options.isAllModelsOutputFlag()) {
- options.initWordModel(true);
- options.initWordType(REO_MSD);
- }
-
- // open input files
- Moses::InputFileStream eFile(fileNameE);
- Moses::InputFileStream fFile(fileNameF);
- Moses::InputFileStream aFile(fileNameA);
-
- istream *eFileP = &eFile;
- istream *fFileP = &fFile;
- istream *aFileP = &aFile;
-
- istream *iwFileP = NULL;
- auto_ptr<Moses::InputFileStream> instanceWeightsFile;
- if (options.getInstanceWeightsFile().length()) {
- instanceWeightsFile.reset(new Moses::InputFileStream(options.getInstanceWeightsFile()));
- iwFileP = instanceWeightsFile.get();
- }
-
- // open output files
- if (options.isOrientationFlag()) {
- string fileNameExtractOrientation = fileNameExtract + ".o" + (options.isGzOutput()?".gz":"");
- extractFileOrientation.Open(fileNameExtractOrientation.c_str());
- }
-
- int i = sentenceOffset;
-
- string englishString, foreignString, alignmentString, weightString;
-
- while(getline(*eFileP, englishString)) {
- i++;
-
- getline(*eFileP, englishString);
- getline(*fFileP, foreignString);
- getline(*aFileP, alignmentString);
- if (iwFileP) {
- getline(*iwFileP, weightString);
- }
-
- if (i%10000 == 0) cerr << "." << flush;
-
- SentenceAlignment sentence;
- // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
- //az: output src, tgt, and alingment line
- if (options.isOnlyOutputSpanInfo()) {
- cout << "LOG: SRC: " << foreignString << endl;
- cout << "LOG: TGT: " << englishString << endl;
- cout << "LOG: ALT: " << alignmentString << endl;
- cout << "LOG: PHRASES_BEGIN:" << endl;
- }
- if (sentence.create( englishString.c_str(), foreignString.c_str(), alignmentString.c_str(), weightString.c_str(), i, false)) {
- ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFileOrientation);
- task->Run();
- delete task;
-
- }
- if (options.isOnlyOutputSpanInfo()) cout << "LOG: PHRASES_END:" << endl; //az: mark end of phrases
- }
-
- eFile.Close();
- fFile.Close();
- aFile.Close();
-
- //az: only close if we actually opened it
- if (!options.isOnlyOutputSpanInfo()) {
- if (options.isOrientationFlag()) {
- extractFileOrientation.Close();
- }
- }
-}
-
-namespace MosesTraining
-{
-void ExtractTask::Run()
-{
- extract(m_sentence);
-}
-
-void ExtractTask::extract(SentenceAlignment &sentence)
-{
- int countE = sentence.target.size();
- int countF = sentence.source.size();
-
- HPhraseVector inboundPhrases;
-
- HSentenceVertices inTopLeft;
- HSentenceVertices inTopRight;
- HSentenceVertices inBottomLeft;
- HSentenceVertices inBottomRight;
-
- HSentenceVertices outTopLeft;
- HSentenceVertices outTopRight;
- HSentenceVertices outBottomLeft;
- HSentenceVertices outBottomRight;
-
- HSentenceVertices::const_iterator it;
-
- bool relaxLimit = m_options.isHierModel();
- bool buildExtraStructure = m_options.isPhraseModel() || m_options.isHierModel();
-
- // check alignments for target phrase startE...endE
- // loop over extracted phrases which are compatible with the word-alignments
- for(int startE=0; startE<countE; startE++) {
- for(int endE=startE;
- (endE<countE && (relaxLimit || endE<startE+m_options.maxPhraseLength));
- endE++) {
-
- int minF = 9999;
- int maxF = -1;
- vector< int > usedF = sentence.alignedCountS;
- for(int ei=startE; ei<=endE; ei++) {
- for(size_t i=0; i<sentence.alignedToT[ei].size(); i++) {
- int fi = sentence.alignedToT[ei][i];
- if (fi<minF) {
- minF = fi;
- }
- if (fi>maxF) {
- maxF = fi;
- }
- usedF[ fi ]--;
- }
- }
-
- if (maxF >= 0 && // aligned to any source words at all
- (relaxLimit || maxF-minF < m_options.maxPhraseLength)) { // source phrase within limits
-
- // check if source words are aligned to out of bound target words
- bool out_of_bounds = false;
- for(int fi=minF; fi<=maxF && !out_of_bounds; fi++)
- if (usedF[fi]>0) {
- // cout << "ouf of bounds: " << fi << "\n";
- out_of_bounds = true;
- }
-
- // cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
- if (!out_of_bounds) {
- // start point of source phrase may retreat over unaligned
- for(int startF=minF;
- (startF>=0 &&
- (relaxLimit || startF>maxF-m_options.maxPhraseLength) && // within length limit
- (startF==minF || sentence.alignedCountS[startF]==0)); // unaligned
- startF--)
- // end point of source phrase may advance over unaligned
- for(int endF=maxF;
- (endF<countF &&
- (relaxLimit || endF<startF+m_options.maxPhraseLength) && // within length limit
- (endF - startF + 1 > m_options.minPhraseLength) && // within length limit
- (endF==maxF || sentence.alignedCountS[endF]==0)); // unaligned
- endF++) { // at this point we have extracted a phrase
- if(buildExtraStructure) { // phrase || hier
- if(endE-startE < m_options.maxPhraseLength && endF-startF < m_options.maxPhraseLength) { // within limit
- inboundPhrases.push_back(HPhrase(HPhraseVertex(startF,startE),
- HPhraseVertex(endF,endE)));
- insertPhraseVertices(inTopLeft, inTopRight, inBottomLeft, inBottomRight,
- startF, startE, endF, endE);
- } else
- insertPhraseVertices(outTopLeft, outTopRight, outBottomLeft, outBottomRight,
- startF, startE, endF, endE);
- } else {
- string orientationInfo = "";
- if(m_options.isWordModel()) {
- REO_POS wordPrevOrient, wordNextOrient;
- bool connectedLeftTopP = isAligned( sentence, startF-1, startE-1 );
- bool connectedRightTopP = isAligned( sentence, endF+1, startE-1 );
- bool connectedLeftTopN = isAligned( sentence, endF+1, endE+1 );
- bool connectedRightTopN = isAligned( sentence, startF-1, endE+1 );
- wordPrevOrient = getOrientWordModel(sentence, m_options.isWordType(), connectedLeftTopP, connectedRightTopP, startF, endF, startE, endE, countF, 0, 1, &ge, &lt);
- wordNextOrient = getOrientWordModel(sentence, m_options.isWordType(), connectedLeftTopN, connectedRightTopN, endF, startF, endE, startE, 0, countF, -1, &lt, &ge);
- orientationInfo += getOrientString(wordPrevOrient, m_options.isWordType()) + " " + getOrientString(wordNextOrient, m_options.isWordType());
- if(m_options.isAllModelsOutputFlag())
- " | | ";
- }
- addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
- }
- }
- }
- }
- }
- }
-
-
-}
-
-REO_POS getOrientWordModel(SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
- bool connectedLeftTop, bool connectedRightTop,
- int startF, int endF, int startE, int endE, int countF, int zero, int unit,
- bool (*ge)(int, int), bool (*lt)(int, int) )
-{
-
- if( connectedLeftTop && !connectedRightTop)
- return LEFT;
- if(modelType == REO_MONO)
- return UNKNOWN;
- if (!connectedLeftTop && connectedRightTop)
- return RIGHT;
- if(modelType == REO_MSD)
- return UNKNOWN;
- for(int indexF=startF-2*unit; (*ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit)
- connectedLeftTop = isAligned(sentence, indexF, startE-unit);
- for(int indexF=endF+2*unit; (*lt)(indexF,countF) && !connectedRightTop; indexF=indexF+unit)
- connectedRightTop = isAligned(sentence, indexF, startE-unit);
- if(connectedLeftTop && !connectedRightTop)
- return DRIGHT;
- else if(!connectedLeftTop && connectedRightTop)
- return DLEFT;
- return UNKNOWN;
-}
-
-// to be called with countF-1 instead of countF
-REO_POS getOrientPhraseModel (SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
- bool connectedLeftTop, bool connectedRightTop,
- int startF, int endF, int startE, int endE, int countF, int zero, int unit,
- bool (*ge)(int, int), bool (*lt)(int, int),
- const HSentenceVertices & inBottomRight, const HSentenceVertices & inBottomLeft)
-{
-
- HSentenceVertices::const_iterator it;
-
- if((connectedLeftTop && !connectedRightTop) ||
- //(startE == 0 && startF == 0) ||
- //(startE == sentence.target.size()-1 && startF == sentence.source.size()-1) ||
- ((it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
- it->second.find(startF-unit) != it->second.end()))
- return LEFT;
- if(modelType == REO_MONO)
- return UNKNOWN;
- if((!connectedLeftTop && connectedRightTop) ||
- ((it = inBottomLeft.find(startE - unit)) != inBottomLeft.end() && it->second.find(endF + unit) != it->second.end()))
- return RIGHT;
- if(modelType == REO_MSD)
- return UNKNOWN;
- connectedLeftTop = false;
- for(int indexF=startF-2*unit; (*ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit)
- if(connectedLeftTop = (it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
- it->second.find(indexF) != it->second.end())
- return DRIGHT;
- connectedRightTop = false;
- for(int indexF=endF+2*unit; (*lt)(indexF, countF) && !connectedRightTop; indexF=indexF+unit)
- if(connectedRightTop = (it = inBottomLeft.find(startE - unit)) != inBottomLeft.end() &&
- it->second.find(indexF) != it->second.end())
- return DLEFT;
- return UNKNOWN;
-}
-
-// to be called with countF-1 instead of countF
-REO_POS getOrientHierModel (SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
- bool connectedLeftTop, bool connectedRightTop,
- int startF, int endF, int startE, int endE, int countF, int zero, int unit,
- bool (*ge)(int, int), bool (*lt)(int, int),
- const HSentenceVertices & inBottomRight, const HSentenceVertices & inBottomLeft,
- const HSentenceVertices & outBottomRight, const HSentenceVertices & outBottomLeft,
- REO_POS phraseOrient)
-{
-
- HSentenceVertices::const_iterator it;
-
- if(phraseOrient == LEFT ||
- (connectedLeftTop && !connectedRightTop) ||
- // (startE == 0 && startF == 0) ||
- //(startE == sentence.target.size()-1 && startF == sentence.source.size()-1) ||
- ((it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
- it->second.find(startF-unit) != it->second.end()) ||
- ((it = outBottomRight.find(startE - unit)) != outBottomRight.end() &&
- it->second.find(startF-unit) != it->second.end()))
- return LEFT;
- if(modelType == REO_MONO)
- return UNKNOWN;
- if(phraseOrient == RIGHT ||
- (!connectedLeftTop && connectedRightTop) ||
- ((it = inBottomLeft.find(startE - unit)) != inBottomLeft.end() &&
- it->second.find(endF + unit) != it->second.end()) ||
- ((it = outBottomLeft.find(startE - unit)) != outBottomLeft.end() &&
- it->second.find(endF + unit) != it->second.end()))
- return RIGHT;
- if(modelType == REO_MSD)
- return UNKNOWN;
- if(phraseOrient != UNKNOWN)
- return phraseOrient;
- connectedLeftTop = false;
- for(int indexF=startF-2*unit; (*ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit) {
- if((connectedLeftTop = (it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
- it->second.find(indexF) != it->second.end()) ||
- (connectedLeftTop = (it = outBottomRight.find(startE - unit)) != outBottomRight.end() &&
- it->second.find(indexF) != it->second.end()))
- return DRIGHT;
- }
- connectedRightTop = false;
- for(int indexF=endF+2*unit; (*lt)(indexF, countF) && !connectedRightTop; indexF=indexF+unit) {
- if((connectedRightTop = (it = inBottomLeft.find(startE - unit)) != inBottomLeft.end() &&
- it->second.find(indexF) != it->second.end()) ||
- (connectedRightTop = (it = outBottomLeft.find(startE - unit)) != outBottomLeft.end() &&
- it->second.find(indexF) != it->second.end()))
- return DLEFT;
- }
- return UNKNOWN;
-}
-
-bool isAligned ( SentenceAlignment &sentence, int fi, int ei )
-{
- if (ei == -1 && fi == -1)
- return true;
- if (ei <= -1 || fi <= -1)
- return false;
- if ((size_t)ei == sentence.target.size() && (size_t)fi == sentence.source.size())
- return true;
- if ((size_t)ei >= sentence.target.size() || (size_t)fi >= sentence.source.size())
- return false;
- for(size_t i=0; i<sentence.alignedToT[ei].size(); i++)
- if (sentence.alignedToT[ei][i] == fi)
- return true;
- return false;
-}
-
-bool ge(int first, int second)
-{
- return first >= second;
-}
-
-bool le(int first, int second)
-{
- return first <= second;
-}
-
-bool lt(int first, int second)
-{
- return first < second;
-}
-
-void insertVertex( HSentenceVertices & corners, int x, int y )
-{
- set<int> tmp;
- tmp.insert(x);
- pair< HSentenceVertices::iterator, bool > ret = corners.insert( pair<int, set<int> > (y, tmp) );
- if(ret.second == false) {
- ret.first->second.insert(x);
- }
-}
-
-void insertPhraseVertices(
- HSentenceVertices & topLeft,
- HSentenceVertices & topRight,
- HSentenceVertices & bottomLeft,
- HSentenceVertices & bottomRight,
- int startF, int startE, int endF, int endE)
-{
-
- insertVertex(topLeft, startF, startE);
- insertVertex(topRight, endF, startE);
- insertVertex(bottomLeft, startF, endE);
- insertVertex(bottomRight, endF, endE);
-}
-
-string getOrientString(REO_POS orient, REO_MODEL_TYPE modelType)
-{
- switch(orient) {
- case LEFT:
- return "mono";
- break;
- case RIGHT:
- return "swap";
- break;
- case DRIGHT:
- return "dright";
- break;
- case DLEFT:
- return "dleft";
- break;
- case UNKNOWN:
- switch(modelType) {
- case REO_MONO:
- return "nomono";
- break;
- case REO_MSD:
- return "other";
- break;
- case REO_MSLR:
- return "dright";
- break;
- }
- break;
- }
- return "";
-}
-
-int getClass(const std::string &str)
-{
- size_t pos = str.find("swap");
- if (pos == str.npos) {
- return 0;
- }
- else if (pos == 0) {
- return 1;
- }
- else {
- return 2;
- }
-}
-
-void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, int startF, int endF , string &orientationInfo)
-{
- if (m_options.isOnlyOutputSpanInfo()) {
- cout << startF << " " << endF << " " << startE << " " << endE << endl;
- return;
- }
-
- const string &sep = m_options.separator;
-
- m_extractFileOrientation << sentence.sentenceID << " " << sep << " ";
- m_extractFileOrientation << getClass(orientationInfo) << " " << sep << " ";
-
- // position
- m_extractFileOrientation << startF << " " << endF << " " << sep << " ";
-
- // start
- m_extractFileOrientation << "<s> ";
- for(int fi=0; fi<startF; fi++) {
- m_extractFileOrientation << sentence.source[fi] << " ";
- }
- m_extractFileOrientation << sep << " ";
-
- // middle
- for(int fi=startF; fi<=endF; fi++) {
- m_extractFileOrientation << sentence.source[fi] << " ";
- }
- m_extractFileOrientation << sep << " ";
-
- // end
- for(int fi=endF+1; fi<sentence.source.size(); fi++) {
- m_extractFileOrientation << sentence.source[fi] << " ";
- }
- m_extractFileOrientation << "</s> ";
-
-
- // target
- /*
- for(int ei=startE; ei<=endE; ei++) {
- m_extractFileOrientation << sentence.target[ei] << " ";
- }
- */
- m_extractFileOrientation << endl;
-}
-
-
-/** tokenise input string to vector of string. each element has been separated by a character in the delimiters argument.
- The separator can only be 1 character long. The default delimiters are space or tab
-*/
-std::vector<std::string> Tokenize(const std::string& str,
- const std::string& delimiters)
-{
- std::vector<std::string> tokens;
- // Skip delimiters at beginning.
- std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
- // Find first "non-delimiter".
- std::string::size_type pos = str.find_first_of(delimiters, lastPos);
-
- while (std::string::npos != pos || std::string::npos != lastPos) {
- // Found a token, add it to the vector.
- tokens.push_back(str.substr(lastPos, pos - lastPos));
- // Skip delimiters. Note the "not_of"
- lastPos = str.find_first_not_of(delimiters, pos);
- // Find next "non-delimiter"
- pos = str.find_first_of(delimiters, lastPos);
- }
-
- return tokens;
-}
-
-}
diff --git a/phrase-extract/extract-rules-main.cpp b/phrase-extract/extract-rules-main.cpp
index 592946b0d..676c145e2 100644
--- a/phrase-extract/extract-rules-main.cpp
+++ b/phrase-extract/extract-rules-main.cpp
@@ -29,6 +29,7 @@
#include <sstream>
#include <string>
#include <vector>
+#include <limits>
#ifdef WIN32
// Include Visual Leak Detector
@@ -407,7 +408,7 @@ void ExtractTask::extractRules()
// find find aligned source words
// first: find minimum and maximum source word
- int minS = 9999;
+ int minS = std::numeric_limits<int>::max();
int maxS = -1;
vector< int > usedS = m_sentence.alignedCountS;
for(int ti=startT; ti<=endT; ti++) {
diff --git a/phrase-extract/relax-parse-main.cpp b/phrase-extract/relax-parse-main.cpp
index e5feb94d0..b415c4d0e 100644
--- a/phrase-extract/relax-parse-main.cpp
+++ b/phrase-extract/relax-parse-main.cpp
@@ -31,15 +31,15 @@ int main(int argc, char* argv[])
// loop through all sentences
int i=0;
- string inBuffer;
- while(getline(cin, inBuffer)) {
+ string inBufferString;
+ while(cin.peek() != EOF) {
+ getline(cin,inBufferString);
i++;
if (i%1000 == 0) cerr << "." << flush;
if (i%10000 == 0) cerr << ":" << flush;
if (i%100000 == 0) cerr << "!" << flush;
// process into syntax tree representation
- string inBufferString = string( inBuffer );
set< string > labelCollection; // set of labels, not used
map< string, int > topLabelCollection; // count of top labels, not used
SyntaxTree tree;
diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp
index e8ba1d942..7f155f6ed 100644
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@@ -46,6 +46,7 @@ LexicalTable lexTable;
bool inverseFlag = false;
bool hierarchicalFlag = false;
bool pcfgFlag = false;
+bool phraseOrientationFlag = false;
bool treeFragmentsFlag = false;
bool sourceSyntaxLabelsFlag = false;
bool sourceSyntaxLabelSetFlag = false;
@@ -69,6 +70,7 @@ bool nonTermContext = false;
int countOfCounts[COC_MAX+1];
int totalDistinct = 0;
float minCountHierarchical = 0;
+bool phraseOrientationPriorsFlag = false;
boost::unordered_map<std::string,float> sourceLHSCounts;
boost::unordered_map<std::string, boost::unordered_map<std::string,float>* > targetLHSAndSourceLHSJointCounts;
@@ -82,6 +84,9 @@ std::set<std::string> targetPreferenceLabelSet;
std::map<std::string,size_t> targetPreferenceLabels;
std::vector<std::string> targetPreferenceLabelsByIndex;
+std::vector<float> orientationClassPriorsL2R(4,0); // mono swap dright dleft
+std::vector<float> orientationClassPriorsR2L(4,0); // mono swap dright dleft
+
Vocabulary vcbT;
Vocabulary vcbS;
@@ -106,6 +111,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair, float, int, ostrea
double computeLexicalTranslation( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
double computeUnalignedPenalty( const ALIGNMENT *alignmentTargetToSource );
set<std::string> functionWordList;
+void loadOrientationPriors(const std::string &fileNamePhraseOrientationPriors, std::vector<float> &orientationClassPriorsL2R, std::vector<float> &orientationClassPriorsR2L);
void loadFunctionWords( const string &fileNameFunctionWords );
double computeUnalignedFWPenalty( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
int calcCrossedNonTerm( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
@@ -136,6 +142,7 @@ int main(int argc, char* argv[])
std::string fileNameTargetPreferenceLabelSet;
std::string fileNameLeftHandSideTargetPreferenceLabelCounts;
std::string fileNameLeftHandSideRuleTargetTargetPreferenceLabelCounts;
+ std::string fileNamePhraseOrientationPriors;
std::vector<std::string> featureArgs; // all unknown args passed to feature manager
for(int i=4; i<argc; i++) {
@@ -148,9 +155,12 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--PCFG") == 0) {
pcfgFlag = true;
std::cerr << "including PCFG scores" << std::endl;
+ } else if (strcmp(argv[i],"--PhraseOrientation") == 0) {
+ phraseOrientationFlag = true;
+ std::cerr << "including phrase orientation information" << std::endl;
} else if (strcmp(argv[i],"--TreeFragments") == 0) {
treeFragmentsFlag = true;
- std::cerr << "including tree fragment information from syntactic parse\n";
+ std::cerr << "including tree fragment information from syntactic parse" << std::endl;
} else if (strcmp(argv[i],"--SourceLabels") == 0) {
sourceSyntaxLabelsFlag = true;
std::cerr << "including source label information" << std::endl;
@@ -216,6 +226,14 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--CrossedNonTerm") == 0) {
crossedNonTerm = true;
std::cerr << "crossed non-term reordering feature" << std::endl;
+ } else if (strcmp(argv[i],"--PhraseOrientationPriors") == 0) {
+ phraseOrientationPriorsFlag = true;
+ if (i+1==argc) {
+ std::cerr << "ERROR: specify priors file for phrase orientation!" << std::endl;
+ exit(1);
+ }
+ fileNamePhraseOrientationPriors = argv[++i];
+ std::cerr << "smoothing phrase orientation with priors from " << fileNamePhraseOrientationPriors << std::endl;
} else if (strcmp(argv[i],"--SpanLength") == 0) {
spanLength = true;
std::cerr << "span length feature" << std::endl;
@@ -254,6 +272,10 @@ int main(int argc, char* argv[])
for(int i=1; i<=COC_MAX; i++) countOfCounts[i] = 0;
}
+ if (phraseOrientationPriorsFlag) {
+ loadOrientationPriors(fileNamePhraseOrientationPriors,orientationClassPriorsL2R,orientationClassPriorsR2L);
+ }
+
// sorted phrase extraction file
Moses::InputFileStream extractFile(fileNameExtract);
@@ -774,11 +796,6 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
if (kneserNeyFlag)
phraseTableFile << " " << distinctCount;
- if ((treeFragmentsFlag || sourceSyntaxLabelsFlag || targetPreferenceLabelsFlag) &&
- !inverseFlag) {
- phraseTableFile << " |||";
- }
-
phraseTableFile << " |||";
// tree fragments
@@ -832,6 +849,13 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
}
}
+ // phrase orientation
+ if (phraseOrientationFlag && !inverseFlag) {
+ phraseTableFile << " {{Orientation ";
+ phrasePair.CollectAllPhraseOrientations("Orientation",orientationClassPriorsL2R,orientationClassPriorsR2L,0.5,phraseTableFile);
+ phraseTableFile << "}}";
+ }
+
if (spanLength && !inverseFlag) {
string propValue = phrasePair.CollectAllPropertyValues("SpanLength");
if (!propValue.empty()) {
@@ -851,6 +875,94 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
+void loadOrientationPriors(const std::string &fileNamePhraseOrientationPriors,
+ std::vector<float> &orientationClassPriorsL2R,
+ std::vector<float> &orientationClassPriorsR2L)
+{
+ assert(orientationClassPriorsL2R.size()==4 && orientationClassPriorsR2L.size()==4); // mono swap dright dleft
+
+ std::cerr << "Loading phrase orientation priors from " << fileNamePhraseOrientationPriors;
+ ifstream inFile;
+ inFile.open(fileNamePhraseOrientationPriors.c_str());
+ if (inFile.fail()) {
+ std::cerr << " - ERROR: could not open file" << std::endl;
+ exit(1);
+ }
+
+ std::string line;
+ size_t linesRead = 0;
+ float l2rSum = 0;
+ float r2lSum = 0;
+ while (getline(inFile, line)) {
+ istringstream tokenizer(line);
+ std::string key;
+ tokenizer >> key;
+
+ bool l2rFlag = false;
+ bool r2lFlag = false;
+ if (!key.substr(0,4).compare("L2R_")) {
+ l2rFlag = true;
+ }
+ if (!key.substr(0,4).compare("R2L_")) {
+ r2lFlag = true;
+ }
+ if (!l2rFlag && !r2lFlag) {
+ std::cerr << " - ERROR: malformed line in orientation priors file" << std::endl;
+ }
+ key.erase(0,4);
+
+ int orientationClassId = -1;
+ if (!key.compare("mono")) {
+ orientationClassId = 0;
+ }
+ if (!key.compare("swap")) {
+ orientationClassId = 1;
+ }
+ if (!key.compare("dright")) {
+ orientationClassId = 2;
+ }
+ if (!key.compare("dleft")) {
+ orientationClassId = 3;
+ }
+ if (orientationClassId == -1) {
+ std::cerr << " - ERROR: malformed line in orientation priors file" << std::endl;
+ }
+
+ float count;
+ tokenizer >> count;
+
+ if (l2rFlag) {
+ orientationClassPriorsL2R[orientationClassId] += count;
+ l2rSum += count;
+ }
+ if (r2lFlag) {
+ orientationClassPriorsR2L[orientationClassId] += count;
+ r2lSum += count;
+ }
+
+ ++linesRead;
+ }
+
+ // normalization: return prior probabilities, not counts
+ if (l2rSum != 0) {
+ for (std::vector<float>::iterator orientationClassPriorsL2RIt = orientationClassPriorsL2R.begin();
+ orientationClassPriorsL2RIt != orientationClassPriorsL2R.end(); ++orientationClassPriorsL2RIt) {
+ *orientationClassPriorsL2RIt /= l2rSum;
+ }
+ }
+ if (r2lSum != 0) {
+ for (std::vector<float>::iterator orientationClassPriorsR2LIt = orientationClassPriorsR2L.begin();
+ orientationClassPriorsR2LIt != orientationClassPriorsR2L.end(); ++orientationClassPriorsR2LIt) {
+ *orientationClassPriorsR2LIt /= r2lSum;
+ }
+ }
+
+ std::cerr << " - read " << linesRead << " lines from orientation priors file" << std::endl;
+ inFile.close();
+}
+
+
+
bool calcCrossedNonTerm( size_t targetPos, size_t sourcePos, const ALIGNMENT *alignmentTargetToSource )
{
for (size_t currTarget = 0; currTarget < alignmentTargetToSource->size(); ++currTarget) {
diff --git a/scripts/OSM/OSM-Train.perl b/scripts/OSM/OSM-Train.perl
index 2d2427bc5..ae5a386fa 100755
--- a/scripts/OSM/OSM-Train.perl
+++ b/scripts/OSM/OSM-Train.perl
@@ -190,3 +190,22 @@ sub open_or_zcat {
open($hdl,$read) or die "Can't read $fn ($read)";
return $hdl;
}
+
+sub safesystem {
+ print STDERR "Executing: @_\n";
+ system(@_);
+ if ($? == -1) {
+ print STDERR "ERROR: Failed to execute: @_\n $!\n";
+ exit(1);
+ }
+ elsif ($? & 127) {
+ printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
+ ($? & 127), ($? & 128) ? 'with' : 'without';
+ exit(1);
+ }
+ else {
+ my $exitcode = $? >> 8;
+ print STDERR "Exit code: $exitcode\n" if $exitcode;
+ return ! $exitcode;
+ }
+} \ No newline at end of file
diff --git a/scripts/Transliteration/post-decoding-transliteration.pl b/scripts/Transliteration/post-decoding-transliteration.pl
index 8aca3460d..69fd8bf46 100755
--- a/scripts/Transliteration/post-decoding-transliteration.pl
+++ b/scripts/Transliteration/post-decoding-transliteration.pl
@@ -21,12 +21,12 @@ die("ERROR: wrong syntax when invoking postDecodingTransliteration.perl")
'transliteration-model-dir=s' => \$TRANSLIT_MODEL,
'input-extension=s' => \$INPUT_EXTENSION,
'output-extension=s' => \$OUTPUT_EXTENSION,
- 'decoder=s' => \$DECODER,
+ 'decoder=s' => \$DECODER,
'oov-file=s' => \$OOV_FILE,
'input-file=s' => \$INPUT_FILE,
'output-file=s' => \$OUTPUT_FILE,
'verbose' => \$VERBOSE,
- 'language-model=s' => \$LM_FILE);
+ 'language-model=s' => \$LM_FILE);
# check if the files are in place
die("ERROR: you need to define --moses-src-dir --external-bin-dir, --transliteration-model-dir, --oov-file, --output-file --input-extension, --output-extension, and --language-model")
@@ -38,6 +38,11 @@ die("ERROR: you need to define --moses-src-dir --external-bin-dir, --translitera
defined($INPUT_FILE)&&
defined($EXTERNAL_BIN_DIR)&&
defined($LM_FILE));
+if (! -e $LM_FILE) {
+ my $LM_FILE_WORD = `ls $LM_FILE*word*`;
+ chop($LM_FILE_WORD);
+ $LM_FILE = $LM_FILE_WORD if $LM_FILE_WORD ne "";
+}
die("ERROR: could not find Language Model '$LM_FILE'")
unless -e $LM_FILE;
die("ERROR: could not find Transliteration Model '$TRANSLIT_MODEL'")
diff --git a/scripts/Transliteration/train-transliteration-module.pl b/scripts/Transliteration/train-transliteration-module.pl
index 355232222..7739e2a2b 100755
--- a/scripts/Transliteration/train-transliteration-module.pl
+++ b/scripts/Transliteration/train-transliteration-module.pl
@@ -13,7 +13,7 @@ print STDERR "Training Transliteration Module - Start\n".`date`;
my $ORDER = 5;
my $OUT_DIR = "/tmp/Transliteration-Model.$$";
my $___FACTOR_DELIMITER = "|";
-my ($MOSES_SRC_DIR,$CORPUS_F,$CORPUS_E,$ALIGNMENT,$SRILM_DIR,$FACTOR,$EXTERNAL_BIN_DIR,$INPUT_EXTENSION, $OUTPUT_EXTENSION, $SOURCE_SYNTAX, $TARGET_SYNTAX);
+my ($MOSES_SRC_DIR,$CORPUS_F,$CORPUS_E,$ALIGNMENT,$SRILM_DIR,$FACTOR,$EXTERNAL_BIN_DIR,$INPUT_EXTENSION, $OUTPUT_EXTENSION, $SOURCE_SYNTAX, $TARGET_SYNTAX,$DECODER);
# utilities
my $ZCAT = "gzip -cd";
@@ -31,8 +31,9 @@ die("ERROR: wrong syntax when invoking train-transliteration-module.perl")
'factor=s' => \$FACTOR,
'srilm-dir=s' => \$SRILM_DIR,
'out-dir=s' => \$OUT_DIR,
- 'source-syntax' => \$SOURCE_SYNTAX,
- 'target-syntax' => \$TARGET_SYNTAX);
+ 'decoder=s' => \$DECODER,
+ 'source-syntax' => \$SOURCE_SYNTAX,
+ 'target-syntax' => \$TARGET_SYNTAX);
# check if the files are in place
die("ERROR: you need to define --corpus-e, --corpus-f, --alignment, --srilm-dir, --moses-src-dir --external-bin-dir, --input-extension and --output-extension")
@@ -48,8 +49,9 @@ die("ERROR: could not find input corpus file '$CORPUS_F'")
unless -e $CORPUS_F;
die("ERROR: could not find output corpus file '$CORPUS_E'")
unless -e $CORPUS_E;
-die("ERROR: could not find algnment file '$ALIGNMENT'")
+die("ERROR: could not find alignment file '$ALIGNMENT'")
unless -e $ALIGNMENT;
+$DECODER = "$MOSES_SRC_DIR/bin/moses" unless defined($DECODER);
`mkdir $OUT_DIR`;
@@ -184,7 +186,7 @@ sub train_transliteration_module{
`$MOSES_SRC_DIR/scripts/ems/support/substitute-filtered-tables.perl $OUT_DIR/tuning/filtered/moses.ini < $OUT_DIR/model/moses.ini > $OUT_DIR/tuning/moses.filtered.ini`;
- `$MOSES_SRC_DIR/scripts/training/mert-moses.pl $OUT_DIR/tuning/input $OUT_DIR/tuning/reference $MOSES_SRC_DIR/bin/moses $OUT_DIR/tuning/moses.filtered.ini --nbest 100 --working-dir $OUT_DIR/tuning/tmp --decoder-flags "-threads 16 -drop-unknown -v 0 -distortion-limit 0" --rootdir $MOSES_SRC_DIR/scripts -mertdir $MOSES_SRC_DIR/mert -threads=16 --no-filter-phrase-table`;
+ `$MOSES_SRC_DIR/scripts/training/mert-moses.pl $OUT_DIR/tuning/input $OUT_DIR/tuning/reference $DECODER $OUT_DIR/tuning/moses.filtered.ini --nbest 100 --working-dir $OUT_DIR/tuning/tmp --decoder-flags "-threads 16 -drop-unknown -v 0 -distortion-limit 0" --rootdir $MOSES_SRC_DIR/scripts -mertdir $MOSES_SRC_DIR/mert -threads=16 --no-filter-phrase-table`;
`cp $OUT_DIR/tuning/tmp/moses.ini $OUT_DIR/tuning/moses.ini`;
diff --git a/scripts/ems/example/config.basic b/scripts/ems/example/config.basic
index 1db8154f5..8421a8fa1 100644
--- a/scripts/ems/example/config.basic
+++ b/scripts/ems/example/config.basic
@@ -137,7 +137,7 @@ raw-stem = $wmt12-data/training/undoc.2000.$pair-extension
### tool to be used for language model training
# kenlm training
lm-training = "$moses-script-dir/ems/support/lmplz-wrapper.perl -bin $moses-bin-dir/lmplz"
-settings = "--prune '0 0 1' -T $working-dir/lm/tmp -S 50%"
+settings = "--prune '0 0 1' -T $working-dir/lm -S 20%"
# srilm
#lm-training = $srilm-dir/ngram-count
@@ -299,6 +299,7 @@ script = $moses-script-dir/training/train-model.perl
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
+# * "-parallel" for parallel execution of mkcls and giza
#
#training-options = ""
@@ -371,6 +372,11 @@ alignment-symmetrization-method = grow-diag-final-and
#mml-before-wa = "-proportion 0.9"
#mml-after-wa = "-proportion 0.9"
+### build memory mapped suffix array phrase table
+# (binarizing the reordering table is a good idea, since filtering makes little sense)
+#mmsapt = "num-features=9 pfwd=g+ pbwd=g+ smooth=0 sample=1000 workers=1"
+#binarize-all = $moses-script-dir/training/binarize-model.perl
+
### create a bilingual concordancer for the model
#
#biconcor = $moses-bin-dir/biconcor
@@ -383,11 +389,17 @@ alignment-symmetrization-method = grow-diag-final-and
#operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
+#
+# if OSM training should be skipped, point to OSM Model
+#osm-model =
-### if OSM training should be skipped,
-# point to OSM Model
+### unsupervised transliteration module
+# Durrani, Sajjad, Hoang and Koehn (EACL, 2014).
+# "Integrating an Unsupervised Transliteration Model
+# into Statistical Machine Translation."
#
-# osm-model =
+#transliteration-module = "yes"
+#post-decoding-transliteration = "yes"
### lexicalized reordering: specify orientation type
# (default: only distance-based reordering model)
diff --git a/scripts/ems/example/config.factored b/scripts/ems/example/config.factored
index c3a6b2a85..9aff587ff 100644
--- a/scripts/ems/example/config.factored
+++ b/scripts/ems/example/config.factored
@@ -137,7 +137,7 @@ raw-stem = $wmt12-data/training/undoc.2000.$pair-extension
### tool to be used for language model training
# kenlm training
lm-training = "$moses-script-dir/ems/support/lmplz-wrapper.perl -bin $moses-bin-dir/lmplz"
-settings = "--prune '0 0 1' -T $working-dir/lm/tmp -S 50%"
+settings = "--prune '0 0 1' -T $working-dir/lm -S 20%"
# srilm
#lm-training = $srilm-dir/ngram-count
@@ -319,6 +319,7 @@ script = $moses-script-dir/training/train-model.perl
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
+# * "-parallel" for parallel execution of mkcls and giza
#
#training-options = ""
@@ -391,6 +392,11 @@ alignment-symmetrization-method = grow-diag-final-and
#mml-before-wa = "-proportion 0.9"
#mml-after-wa = "-proportion 0.9"
+### build memory mapped suffix array phrase table
+# (binarizing the reordering table is a good idea, since filtering makes little sense)
+#mmsapt = "num-features=9 pfwd=g+ pbwd=g+ smooth=0 sample=1000 workers=1"
+#binarize-all = $moses-script-dir/training/binarize-model.perl
+
### create a bilingual concordancer for the model
#
#biconcor = $moses-bin-dir/biconcor
@@ -403,11 +409,17 @@ alignment-symmetrization-method = grow-diag-final-and
#operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
+#
+# if OSM training should be skipped, point to OSM Model
+#osm-model =
-### if OSM training should be skipped,
-# point to OSM Model
+### unsupervised transliteration module
+# Durrani, Sajjad, Hoang and Koehn (EACL, 2014).
+# "Integrating an Unsupervised Transliteration Model
+# into Statistical Machine Translation."
#
-# osm-model =
+#transliteration-module = "yes"
+#post-decoding-transliteration = "yes"
### lexicalized reordering: specify orientation type
# (default: only distance-based reordering model)
diff --git a/scripts/ems/example/config.hierarchical b/scripts/ems/example/config.hierarchical
index 673ad64a9..9d47aa001 100644
--- a/scripts/ems/example/config.hierarchical
+++ b/scripts/ems/example/config.hierarchical
@@ -137,7 +137,7 @@ raw-stem = $wmt12-data/training/undoc.2000.$pair-extension
### tool to be used for language model training
# kenlm training
lm-training = "$moses-script-dir/ems/support/lmplz-wrapper.perl -bin $moses-bin-dir/lmplz"
-settings = "--prune '0 0 1' -T $working-dir/lm/tmp -S 50%"
+settings = "--prune '0 0 1' -T $working-dir/lm -S 20%"
# srilm
#lm-training = $srilm-dir/ngram-count
@@ -299,6 +299,7 @@ script = $moses-script-dir/training/train-model.perl
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
+# * "-parallel" for parallel execution of mkcls and giza
#
#training-options = ""
@@ -371,6 +372,11 @@ alignment-symmetrization-method = grow-diag-final-and
#mml-before-wa = "-proportion 0.9"
#mml-after-wa = "-proportion 0.9"
+### build memory mapped suffix array phrase table
+# (binarizing the reordering table is a good idea, since filtering makes little sense)
+#mmsapt = "num-features=9 pfwd=g+ pbwd=g+ smooth=0 sample=1000 workers=1"
+#binarize-all = $moses-script-dir/training/binarize-model.perl
+
### create a bilingual concordancer for the model
#
#biconcor = $moses-bin-dir/biconcor
@@ -383,11 +389,17 @@ alignment-symmetrization-method = grow-diag-final-and
#operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
+#
+# if OSM training should be skipped, point to OSM Model
+#osm-model =
-### if OSM training should be skipped,
-# point to OSM Model
+### unsupervised transliteration module
+# Durrani, Sajjad, Hoang and Koehn (EACL, 2014).
+# "Integrating an Unsupervised Transliteration Model
+# into Statistical Machine Translation."
#
-# osm-model =
+#transliteration-module = "yes"
+#post-decoding-transliteration = "yes"
### lexicalized reordering: specify orientation type
# (default: only distance-based reordering model)
diff --git a/scripts/ems/example/config.syntax b/scripts/ems/example/config.syntax
index 7df60f990..d874e74c0 100644
--- a/scripts/ems/example/config.syntax
+++ b/scripts/ems/example/config.syntax
@@ -141,7 +141,7 @@ raw-stem = $wmt12-data/training/undoc.2000.$pair-extension
### tool to be used for language model training
# kenlm training
lm-training = "$moses-script-dir/ems/support/lmplz-wrapper.perl -bin $moses-bin-dir/lmplz"
-settings = "--prune '0 0 1' -T $working-dir/lm/tmp -S 50%"
+settings = "--prune '0 0 1' -T $working-dir/lm -S 20%"
# srilm
#lm-training = $srilm-dir/ngram-count
@@ -303,6 +303,7 @@ script = $moses-script-dir/training/train-model.perl
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
+# * "-parallel" for parallel execution of mkcls and giza
#
#training-options = ""
@@ -375,6 +376,11 @@ alignment-symmetrization-method = grow-diag-final-and
#mml-before-wa = "-proportion 0.9"
#mml-after-wa = "-proportion 0.9"
+### build memory mapped suffix array phrase table
+# (binarizing the reordering table is a good idea, since filtering makes little sense)
+#mmsapt = "num-features=9 pfwd=g+ pbwd=g+ smooth=0 sample=1000 workers=1"
+#binarize-all = $moses-script-dir/training/binarize-model.perl
+
### create a bilingual concordancer for the model
#
#biconcor = $moses-bin-dir/biconcor
@@ -387,11 +393,17 @@ alignment-symmetrization-method = grow-diag-final-and
#operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
+#
+# if OSM training should be skipped, point to OSM Model
+#osm-model =
-### if OSM training should be skipped,
-# point to OSM Model
+### unsupervised transliteration module
+# Durrani, Sajjad, Hoang and Koehn (EACL, 2014).
+# "Integrating an Unsupervised Transliteration Model
+# into Statistical Machine Translation."
#
-# osm-model =
+#transliteration-module = "yes"
+#post-decoding-transliteration = "yes"
### lexicalized reordering: specify orientation type
# (default: only distance-based reordering model)
diff --git a/scripts/ems/example/config.toy b/scripts/ems/example/config.toy
index bd328a18e..195a89fa5 100644
--- a/scripts/ems/example/config.toy
+++ b/scripts/ems/example/config.toy
@@ -131,7 +131,7 @@ raw-stem = $toy-data/nc-5k
### tool to be used for language model training
# kenlm training
lm-training = "$moses-script-dir/ems/support/lmplz-wrapper.perl -bin $moses-bin-dir/lmplz"
-settings = "--prune '0 0 1' -T $working-dir/lm/tmp -S 50%"
+settings = "--prune '0 0 1' -T $working-dir/lm -S 20%"
# srilm
#lm-training = $srilm-dir/ngram-count
@@ -283,6 +283,7 @@ script = $moses-script-dir/training/train-model.perl
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
+# * "-parallel" for parallel execution of mkcls and giza
#
#training-options = ""
@@ -355,6 +356,11 @@ alignment-symmetrization-method = grow-diag-final-and
#mml-before-wa = "-proportion 0.9"
#mml-after-wa = "-proportion 0.9"
+### build memory mapped suffix array phrase table
+# (binarizing the reordering table is a good idea, since filtering makes little sense)
+#mmsapt = "num-features=9 pfwd=g+ pbwd=g+ smooth=0 sample=1000 workers=1"
+#binarize-all = $moses-script-dir/training/binarize-model.perl
+
### create a bilingual concordancer for the model
#
#biconcor = $moses-bin-dir/biconcor
@@ -367,11 +373,17 @@ alignment-symmetrization-method = grow-diag-final-and
#operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = ""
+#
+# if OSM training should be skipped, point to OSM Model
+#osm-model =
-### if OSM training should be skipped,
-# point to OSM Model
+### unsupervised transliteration module
+# Durrani, Sajjad, Hoang and Koehn (EACL, 2014).
+# "Integrating an Unsupervised Transliteration Model
+# into Statistical Machine Translation."
#
-# osm-model =
+#transliteration-module = "yes"
+#post-decoding-transliteration = "yes"
### lexicalized reordering: specify orientation type
# (default: only distance-based reordering model)
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 9785d8940..f4df557b0 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -22,6 +22,7 @@ clean
rerun-on-change: max-sentence-length $moses-script-dir/training/clean-corpus-n.perl
template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 $max-sentence-length OUT.lines-retained
error: there is a blank factor
+ error: is too long! at
parse
in: clean-stem
out: parsed-stem
@@ -104,7 +105,7 @@ tokenize
train
in: tokenized
out: recase-config
- template: $moses-script-dir/recaser/train-recaser.perl -train-script $TRAINING:script -dir OUT.model -corpus IN -scripts-root-dir $moses-script-dir -config OUT -ngram-count $lm-training
+ template: $moses-script-dir/recaser/train-recaser.perl -train-script $TRAINING:script -dir OUT.model -corpus IN -scripts-root-dir $moses-script-dir -config OUT $recasing-settings
default-name: recasing/moses.ini
tmp-name: recasing/model
ignore-unless: EVALUATION:recaser
@@ -116,6 +117,7 @@ consolidate
out: tokenized-stem
default-name: truecaser/corpus
template: $moses-script-dir/ems/support/consolidate-training-data.perl $input-extension $output-extension OUT IN
+ error: number of lines don't match
train
in: tokenized-stem
out: truecase-model
@@ -159,7 +161,6 @@ mock-parse
factorize
in: mock-parsed-corpus
out: factorized-corpus
- rerun-on-change: TRAINING:output-factors
default-name: lm/factored
pass-unless: factors
parallelizable: yes
@@ -277,7 +278,7 @@ split-tuning
template: $output-splitter -model IN1.$output-extension < IN > OUT
interpolate
in: script split-tuning LM:lm
- rerun-on-change: srilm-dir group
+ rerun-on-change: srilm-dir group weights
out: lm
default-name: lm/interpolated-lm
randomize
@@ -563,7 +564,6 @@ extract-phrases
rerun-on-change: max-phrase-length translation-factors reordering-factors hierarchical-rule-set extract-settings training-options script use-ghkm domain-features baseline-extract lexicalized-reordering
only-existence-matters: domain-features
default-name: model/extract
- ignore-if: suffix-array
build-reordering
in: extracted-phrases
out: reordering-table
@@ -576,7 +576,14 @@ build-ttable
out: phrase-translation-table
rerun-on-change: translation-factors hierarchical-rule-set score-settings training-options script EVALUATION:report-precision-by-coverage include-word-alignment-in-rules domain-features
default-name: model/phrase-table
- ignore-if: suffix-array
+ ignore-if: suffix-array mmsapt
+ final-model: yes
+build-mmsapt
+ in: corpus-mml-postfilter=OR=word-alignment corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus
+ out: phrase-translation-table
+ ignore-unless: mmsapt
+ default-name: model/phrase-table-mmsapt
+ template: $moses-script-dir/training/build-mmsapt.perl --alignment IN.$alignment-symmetrization-method --corpus IN1 --f $input-extension --e $output-extension --dir OUT --settings '$mmsapt'
final-model: yes
sigtest-filter-suffix-array
in: corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus
@@ -635,7 +642,7 @@ create-config
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
out: config
ignore-if: use-hiero
- rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini
+ rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini mmsapt
default-name: model/moses.ini
error: Unknown option
final-model: yes
@@ -1077,7 +1084,7 @@ decode
default-name: evaluation/output
qsub-script: yes
ignore-if: use-hiero
- rerun-on-change: decoder decoder-settings nbest report-segmentation report-precision-by-coverage analyze-search-graph wade
+ rerun-on-change: decoder decoder-settings nbest report-segmentation report-precision-by-coverage analyze-search-graph wade TRAINING:post-decoding-transliteration
error: Translation was not performed correctly
not-error: trans: No such file or directory
final-model: yes
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index 4f67a6d8a..31ae4aa32 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -716,9 +716,11 @@ sub delete_crashed {
for(my $i=0;$i<=$#DO_STEP;$i++) {
my $step_file = &versionize(&step_file($i),$DELETE_CRASHED);
next unless -e $step_file;
- next unless &check_if_crashed($i,$DELETE_CRASHED,"no wait");
- &delete_step($DO_STEP[$i],$DELETE_CRASHED);
- $crashed++;
+ if (! -e $step_file.".DONE" || # interrupted (machine went down)
+ &check_if_crashed($i,$DELETE_CRASHED,"no wait")) { # noted crash
+ &delete_step($DO_STEP[$i],$DELETE_CRASHED);
+ $crashed++;
+ }
}
print "run with -exec to delete steps\n" if $crashed && !$EXECUTE;
print "nothing to do\n" unless $crashed;
@@ -813,7 +815,6 @@ sub delete_output {
if (-d $file) {
print "\tdelete directory $file\n";
`rm -r $file` if $EXECUTE;
- return;
}
# delete regular file that matches exactly
if (-e $file) {
@@ -821,11 +822,20 @@ sub delete_output {
`rm $file` if $EXECUTE;
}
# delete files that have additional extension
+ $file =~ /^(.+)\/([^\/]+)$/;
+ my ($dir,$f) = ($1,$2);
my @FILES = `ls $file.* 2>/dev/null`;
- foreach (@FILES) {
+ foreach (`ls $dir`) {
chop;
- print "\tdelete file $_\n";
- `rm $_` if $EXECUTE;
+ next unless substr($_,0,length($f)) eq $f;
+ if (-e $_) {
+ print "\tdelete file $dir/$_\n";
+ `rm $dir/$_` if $EXECUTE;
+ }
+ else {
+ print "\tdelete directory $dir/$_\n";
+ `rm -r $dir/$_` if $EXECUTE;
+ }
}
}
@@ -1513,9 +1523,9 @@ sub check_if_crashed {
'error','killed','core dumped','can\'t read',
'no such file or directory','unknown option',
'died at','exit code','permission denied',
- 'segmentation fault','abort',
- 'no space left on device',
- 'can\'t locate', 'unrecognized option', 'Exception') {
+ 'segmentation fault','abort',
+ 'no space left on device', ': not found',
+ 'can\'t locate', 'unrecognized option', 'Exception') {
if (/$pattern/i) {
my $not_error = 0;
if (defined($NOT_ERROR{&defined_step_id($i)})) {
@@ -1535,7 +1545,6 @@ sub check_if_crashed {
# check if output file empty
my $output = &get_default_file(&deconstruct_name($DO_STEP[$i]));
- print STDERR "".$DO_STEP[$i]." -> $output\n";
# currently only works for single output file
if (-e $output && -z $output) {
push @DIGEST,"output file $output is empty";
@@ -1861,7 +1870,7 @@ sub define_tuning_tune {
$cmd .= " --lambdas \"$lambda\"" if $lambda;
$cmd .= " --continue" if $tune_continue;
$cmd .= " --skip-decoder" if $skip_decoder;
- $cmd .= " --inputtype $tune_inputtype" if $tune_inputtype;
+ $cmd .= " --inputtype $tune_inputtype" if defined($tune_inputtype);
my $qsub_args = &get_qsub_args("TUNING");
$cmd .= " --queue-flags=\"$qsub_args\"" if ($CLUSTER && $qsub_args);
@@ -2152,13 +2161,14 @@ sub define_training_build_transliteration_model {
my ($model, $corpus, $alignment) = &get_output_and_input($step_id);
- my $moses_script_dir = &check_and_get("GENERAL:moses-script-dir");
- my $input_extension = &check_backoff_and_get("TRAINING:input-extension");
- my $output_extension = &check_backoff_and_get("TRAINING:output-extension");
- my $sym_method = &check_and_get("TRAINING:alignment-symmetrization-method");
- my $moses_src_dir = &check_and_get("GENERAL:moses-src-dir");
- my $external_bin_dir = &check_and_get("GENERAL:external-bin-dir");
- my $srilm_dir = &check_and_get("GENERAL:srilm-dir");
+ my $moses_script_dir = &check_and_get("GENERAL:moses-script-dir");
+ my $input_extension = &check_backoff_and_get("TRAINING:input-extension");
+ my $output_extension = &check_backoff_and_get("TRAINING:output-extension");
+ my $sym_method = &check_and_get("TRAINING:alignment-symmetrization-method");
+ my $moses_src_dir = &check_and_get("GENERAL:moses-src-dir");
+ my $external_bin_dir = &check_and_get("GENERAL:external-bin-dir");
+ my $srilm_dir = &check_and_get("TRAINING:srilm-dir");
+ my $decoder = &get("TRAINING:transliteration-decoder");
my $cmd = "$moses_script_dir/Transliteration/train-transliteration-module.pl";
$cmd .= " --corpus-f $corpus.$input_extension";
@@ -2166,6 +2176,7 @@ sub define_training_build_transliteration_model {
$cmd .= " --alignment $alignment.$sym_method";
$cmd .= " --out-dir $model";
$cmd .= " --moses-src-dir $moses_src_dir";
+ $cmd .= " --decoder $decoder" if defined($decoder);
$cmd .= " --external-bin-dir $external_bin_dir";
$cmd .= " --srilm-dir $srilm_dir";
$cmd .= " --input-extension $input_extension";
@@ -2174,7 +2185,7 @@ sub define_training_build_transliteration_model {
$cmd .= " --source-syntax " if &get("GENERAL:input-parser");
$cmd .= " --target-syntax " if &get("GENERAL:output-parser");
- &create_step($step_id, $cmd);
+ &create_step($step_id, $cmd);
}
sub define_training_extract_phrases {
@@ -2194,9 +2205,10 @@ sub define_training_extract_phrases {
$cmd .= "-glue-grammar-file $glue_grammar_file ";
if (&get("GENERAL:output-parser") && (&get("TRAINING:use-unknown-word-labels") || &get("TRAINING:use-unknown-word-soft-matches"))) {
- my $unknown_word_label = &versionize(&long_file_name("unknown-word-label","model",""));
- $cmd .= "-unknown-word-label $unknown_word_label ";
+ my $unknown_word_label = &versionize(&long_file_name("unknown-word-label","model",""));
+ $cmd .= "-unknown-word-label $unknown_word_label ";
}
+
if (&get("GENERAL:output-parser") && &get("TRAINING:use-unknown-word-soft-matches")) {
my $unknown_word_soft_matches = &versionize(&long_file_name("unknown-word-soft-matches","model",""));
$cmd .= "-unknown-word-soft-matches $unknown_word_soft_matches ";
@@ -2209,6 +2221,16 @@ sub define_training_extract_phrases {
if (&get("TRAINING:ghkm-tree-fragments")) {
$cmd .= "-ghkm-tree-fragments ";
}
+
+ if (&get("TRAINING:ghkm-phrase-orientation")) {
+ $cmd .= "-ghkm-phrase-orientation ";
+ my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model",""));
+ $cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file ";
+ }
+
+ if (&get("TRAINING:ghkm-source-labels")) {
+ $cmd .= "-ghkm-source-labels ";
+ }
}
my $extract_settings = &get("TRAINING:extract-settings");
@@ -2241,6 +2263,16 @@ sub define_training_build_ttable {
if (&get("TRAINING:ghkm-tree-fragments")) {
$cmd .= "-ghkm-tree-fragments ";
}
+ if (&get("TRAINING:ghkm-phrase-orientation")) {
+ $cmd .= "-ghkm-phrase-orientation ";
+ my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model",""));
+ $cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file ";
+ }
+ if (&get("TRAINING:ghkm-source-labels")) {
+ $cmd .= "-ghkm-source-labels ";
+ my $source_labels_file = &versionize(&long_file_name("source-labels","model",""));
+ $cmd .= "-ghkm-source-labels-file $source_labels_file ";
+ }
}
&create_step($step_id,$cmd);
@@ -2349,6 +2381,15 @@ sub get_config_tables {
}
}
+ # memory mapped suffix array phrase table
+ my $mmsapt = &get("TRAINING:mmsapt");
+ if (defined($mmsapt)) {
+ $ptImpl = 11; # mmsapt
+ $mmsapt =~ s/num-features=(\d+) // || die("ERROR: mmsapt setting needs to set num-features");
+ $numFF = $1;
+ $cmd .= "-mmsapt '$mmsapt' ";
+ }
+
# additional settings for factored models
$cmd .= &get_table_name_settings("translation-factors","phrase-translation-table", $phrase_translation_table);
$cmd = trim($cmd);
@@ -2416,6 +2457,12 @@ sub define_training_create_config {
}
}
+ if (&get("TRAINING:ghkm-source-labels")) {
+ $cmd .= "-ghkm-source-labels ";
+ my $source_labels_file = &versionize(&long_file_name("source-labels","model",""));
+ $cmd .= "-ghkm-source-labels-file $source_labels_file ";
+ }
+
# sparse lexical features provide additional content for config file
$cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features;
@@ -2496,10 +2543,19 @@ sub define_interpolated_lm_interpolate {
$interpolation_script, $tuning, @LM) = &get_output_and_input($step_id);
my $srilm_dir = &check_backoff_and_get("INTERPOLATED-LM:srilm-dir");
my $group = &get("INTERPOLATED-LM:group");
+ my $weights = &get("INTERPOLATED-LM:weights");
my $scripts = &check_backoff_and_get("TUNING:moses-script-dir");
my $cmd = "";
+ my %WEIGHT;
+ if (defined($weights)) {
+ foreach (split(/ *, */,$weights)) {
+ /^ *(\S+) *= *(\S+)/ || die("ERROR: wrong interpolation weight specification $_ ($weights)");
+ $WEIGHT{$1} = $2;
+ }
+ }
+
# go through language models by factor and order
my ($icount,$ILM_SETS) = &get_interpolated_lm_sets();
foreach my $factor (keys %{$ILM_SETS}) {
@@ -2508,11 +2564,18 @@ sub define_interpolated_lm_interpolate {
# get list of language model files
my $lm_list = "";
+ my $weight_list = "";
foreach my $id_set (@{$$ILM_SETS{$factor}{$order}}) {
my ($id,$set) = split(/ /,$id_set,2);
$lm_list .= $LM[$id].",";
+ if (defined($weights)) {
+ die("ERROR: no interpolation weight set for $factor:$order:$set (factor:order:set)")
+ unless defined($WEIGHT{"$factor:$order:$set"});
+ $weight_list .= $WEIGHT{"$factor:$order:$set"}.",";
+ }
}
chop($lm_list);
+ chop($weight_list);
# if grouping, identify position in list
my $numbered_string = "";
@@ -2553,6 +2616,7 @@ sub define_interpolated_lm_interpolate {
}
$cmd .= "$interpolation_script --tuning $factored_tuning --name $name --srilm $srilm_dir --lm $lm_list";
$cmd .= " --group \"$numbered_string\"" if defined($group);
+ $cmd .= " --weights \"$weight_list\"" if defined($weights);
$cmd .= "\n";
}
}
@@ -3373,7 +3437,7 @@ sub check_backoff_and_get_array {
# the following two functions deal with getting information about
# files that are passed between steps. this are either specified
# in the meta file (default) or in the configuration file (here called
-# 'specified', in the step management refered to as 'given').
+# 'specified', in the step management referred to as 'given').
sub get_specified_or_default_file {
my ($specified_module,$specified_set,$specified_parameter,
@@ -3418,10 +3482,11 @@ sub get_default_file {
my $name = &construct_name($module,$set,$out);
return &check_backoff_and_get($name);
}
-# print "\t\tpassing $step -> ";
+# print "\t\tpassing $step\n";
$i = $DEPENDENCY[$i][0];
$step = $DO_STEP[$i];
# print "\t\tbacking off to $step\n";
+ ($default_module,$default_set,$default_step) = &deconstruct_name($step);
}
# get file name
diff --git a/scripts/ems/support/interpolate-lm.perl b/scripts/ems/support/interpolate-lm.perl
index 6d6f3fdd1..936091daa 100755
--- a/scripts/ems/support/interpolate-lm.perl
+++ b/scripts/ems/support/interpolate-lm.perl
@@ -12,7 +12,7 @@ binmode(STDERR, ":utf8");
my $SRILM = "/home/pkoehn/moses/srilm/bin/i686-m64";
my $TEMPDIR = "/tmp";
-my ($TUNING,$LM,$NAME,$GROUP,$CONTINUE);
+my ($TUNING,$LM,$NAME,$GROUP,$WEIGHTS,$CONTINUE);
die("interpolate-lm.perl --tuning set --name out-lm --lm lm0,lm1,lm2,lm3 [--srilm srilm-dir --tempdir tempdir --group \"0,1 2,3\"]")
unless &GetOptions('tuning=s' => => \$TUNING,
@@ -21,6 +21,7 @@ die("interpolate-lm.perl --tuning set --name out-lm --lm lm0,lm1,lm2,lm3 [--sril
'tempdir=s' => \$TEMPDIR,
'continue' => \$CONTINUE,
'group=s' => \$GROUP,
+ 'weights=s' => \$WEIGHTS,
'lm=s' => \$LM);
# check and set default to unset parameters
@@ -32,6 +33,10 @@ die("ERROR: did not find srilm dir") unless -e $SRILM;
die("ERROR: cannot run ngram") unless -x $SRILM."/ngram";
my @LM = split(/,/,$LM);
+my @WEIGHT;
+@WEIGHT = split(/,/,$WEIGHTS) if defined($WEIGHTS);
+die("ERROR: different number of weights and language models: ".scalar(@WEIGHT)." vs. ".scalar(@LM))
+ if defined($WEIGHTS) && scalar(@WEIGHT) != scalar(@LM);
# establish order
my $order = 0;
@@ -75,7 +80,7 @@ if (!defined($GROUP) && scalar(@LM) > 10) {
# normal interpolation
if (!defined($GROUP)) {
- &interpolate($NAME,@LM);
+ &interpolate($NAME,\@WEIGHT,@LM);
exit;
}
@@ -98,50 +103,59 @@ foreach my $subgroup (split(/ /,$GROUP)) {
my $name = $NAME.".group-".chr(97+($g++));
push @SUB_NAME,$name;
print STDERR "\n=== BUILDING SUB LM $name from\n\t".join("\n\t",@SUB_LM)."\n===\n\n";
- &interpolate($name, @SUB_LM) unless $CONTINUE && -e $name;
+ &interpolate($name, undef, @SUB_LM) unless $CONTINUE && -e $name;
}
for(my $lm_i=0; $lm_i < scalar(@LM); $lm_i++) {
next if defined($ALREADY{$lm_i});
push @SUB_NAME, $LM[$lm_i];
}
print STDERR "\n=== BUILDING FINAL LM ===\n\n";
-&interpolate($NAME, @SUB_NAME);
+&interpolate($NAME, undef, @SUB_NAME);
# main interpolation function
sub interpolate {
- my ($name,@LM) = @_;
+ my ($name,$WEIGHT,@LM) = @_;
die("cannot interpolate more than 10 language models at once: ",join(",",@LM))
if scalar(@LM) > 10;
my $tmp = tempdir(DIR=>$TEMPDIR);
+ my @LAMBDA;
- # compute perplexity
- my $i = 0;
- foreach my $lm (@LM) {
- print STDERR "compute perplexity for $lm\n";
- safesystem("$SRILM/ngram -unk -order $order -lm $lm -ppl $TUNING -debug 2 > $tmp/iplm.$$.$i") or die "Failed to compute perplexity for $lm\n";
- print STDERR `tail -n 2 $tmp/iplm.$$.$i`;
- $i++;
+ # if weights are specified, use them
+ if (defined($WEIGHT) && scalar(@$WEIGHT) == scalar(@LM)) {
+ @LAMBDA = @$WEIGHT;
}
+ # no specified weights -> compute them
+ else {
- # compute lambdas
- print STDERR "computing lambdas...\n";
- my $cmd = "$SRILM/compute-best-mix";
- for(my $i=0;$i<scalar(@LM);$i++) {
- $cmd .= " $tmp/iplm.$$.$i";
- }
- my ($mixout, $mixerr, $mixexitcode) = saferun3($cmd);
- die "Failed to mix models: $mixerr" if $mixexitcode != 0;
- my $mix = $mixout;
- `rm $tmp/iplm.$$.*`;
- $mix =~ /best lambda \(([\d\. e-]+)\)/ || die("ERROR: computing lambdas failed: $mix");
- my @LAMBDA = split(/ /,$1);
+ # compute perplexity
+ my $i = 0;
+ foreach my $lm (@LM) {
+ print STDERR "compute perplexity for $lm\n";
+ safesystem("$SRILM/ngram -unk -order $order -lm $lm -ppl $TUNING -debug 2 > $tmp/iplm.$$.$i") or die "Failed to compute perplexity for $lm\n";
+ print STDERR `tail -n 2 $tmp/iplm.$$.$i`;
+ $i++;
+ }
+ # compute lambdas
+ print STDERR "computing lambdas...\n";
+ my $cmd = "$SRILM/compute-best-mix";
+ for(my $i=0;$i<scalar(@LM);$i++) {
+ $cmd .= " $tmp/iplm.$$.$i";
+ }
+ my ($mixout, $mixerr, $mixexitcode) = saferun3($cmd);
+ die "Failed to mix models: $mixerr" if $mixexitcode != 0;
+ my $mix = $mixout;
+ `rm $tmp/iplm.$$.*`;
+ $mix =~ /best lambda \(([\d\. e-]+)\)/ || die("ERROR: computing lambdas failed: $mix");
+ @LAMBDA = split(/ /,$1);
+ }
+
# create new language model
print STDERR "creating new language model...\n";
- $i = 0;
- $cmd = "$SRILM/ngram -unk -order $order -write-lm $name";
+ my $i = 0;
+ my $cmd = "$SRILM/ngram -unk -order $order -write-lm $name";
foreach my $lm (@LM) {
$cmd .= " -lm " if $i==0;
$cmd .= " -mix-lm " if $i==1;
diff --git a/scripts/generic/extract-parallel.perl b/scripts/generic/extract-parallel.perl
index 7abada1de..433e95b9d 100755
--- a/scripts/generic/extract-parallel.perl
+++ b/scripts/generic/extract-parallel.perl
@@ -29,6 +29,8 @@ my $otherExtractArgs= "";
my $weights = "";
my $baselineExtract;
my $glueFile;
+my $phraseOrientation = 0;
+my $phraseOrientationPriorsFile;
for (my $i = 8; $i < $#ARGV + 1; ++$i)
{
@@ -45,6 +47,11 @@ for (my $i = 8; $i < $#ARGV + 1; ++$i)
$glueFile = $ARGV[++$i];
next;
}
+ $phraseOrientation = 1 if $ARGV[$i] eq "--PhraseOrientation";
+ if ($ARGV[$i] eq '--PhraseOrientationPriors') {
+ $phraseOrientationPriorsFile = $ARGV[++$i];
+ next;
+ }
$otherExtractArgs .= $ARGV[$i] ." ";
}
@@ -212,13 +219,39 @@ foreach (@children) {
waitpid($_, 0);
}
-# glue rules
+# merge glue rules
if (defined($glueFile)) {
my $cmd = "cat $TMPDIR/glue.* | LC_ALL=C sort | uniq > $glueFile";
print STDERR "Merging glue rules: $cmd \n";
print STDERR `$cmd`;
}
+# merge phrase orientation priors (GHKM extraction)
+if ($phraseOrientation && defined($phraseOrientationPriorsFile)) {
+ print STDERR "Merging phrase orientation priors\n";
+
+ my @orientationPriorsCountFiles = glob("$TMPDIR/*.phraseOrientationPriors");
+ my %priorCounts;
+
+ foreach my $filenamePhraseOrientationPriors (@orientationPriorsCountFiles) {
+ if (-f $filenamePhraseOrientationPriors) {
+ open my $infilePhraseOrientationPriors, '<', $filenamePhraseOrientationPriors or die "cannot open $filenamePhraseOrientationPriors: $!";
+ while (my $line = <$infilePhraseOrientationPriors>) {
+ print $line;
+ my ($key, $value) = split / /, $line;
+ $priorCounts{$key} += $value;
+ }
+ close $infilePhraseOrientationPriors;
+ }
+ }
+
+ open my $outPhraseOrientationPriors, '>', $phraseOrientationPriorsFile or die "cannot open $phraseOrientationPriorsFile: $!";
+ foreach my $key (sort keys %priorCounts) {
+ print $outPhraseOrientationPriors $key." ".$priorCounts{$key}."\n";
+ }
+ close($outPhraseOrientationPriors);
+}
+
# delete temporary files
$cmd = "rm -rf $TMPDIR \n";
print STDERR $cmd;
diff --git a/scripts/generic/moses_sim_pe.py b/scripts/generic/moses_sim_pe.py
new file mode 100755
index 000000000..f77e7de6e
--- /dev/null
+++ b/scripts/generic/moses_sim_pe.py
@@ -0,0 +1,346 @@
+#!/usr/bin/env python
+
+# Written by Michael Denkowski
+
+# This script parallelizes decoding with simulated post-editing via moses XML
+# input (XML entities need to be escaped in tokenization). Memory mapped
+# dynamic phrase tables (Ulrich Germann,
+# www.statmt.org/moses/?n=Moses.AdvancedFeatures#ntoc40) and language models
+# (Kenneth Heafield,
+# http://www.statmt.org/moses/?n=FactoredTraining.BuildingLanguageModel#ntoc19)
+# facilitate memory efficient multi process decoding. Input is divided into
+# batches, each of which is decoded sequentially. Each batch pre-loads the data
+# from previous batches.
+
+# To use in tuning, run mert-moses.pl with --sim-pe=SYMAL where SYMAL is the
+# alignment from input to references. Specify the number of jobs with
+# --decoder-flags="-threads N".
+
+import gzip
+import itertools
+import math
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+
+HELP = '''Moses with simulated post-editing
+
+Usage: {} moses-cmd -config moses.ini -input-file text.src -ref text.tgt -symal text.src-tgt.symal [options] [decoder flags]
+
+Options:
+ -threads N: number of decoders to run in parallel (default read from moses.ini, 1 if not present)
+ -n-best-list nbest.out N [distinct]: location and size of N-best list
+ -show-weights: for mert-moses.pl, just call moses and exit
+ -tmp: location of temp directory (default /tmp)
+
+Other options (decoder flags) are passed through to moses-cmd\n'''
+
+# Provides progress bar
+class Progress:
+
+ def __init__(self):
+ self.i = 0
+ self.lock = threading.Lock()
+
+ def inc(self):
+ self.lock.acquire()
+ self.i += 1
+ if self.i % 100 == 0:
+ sys.stderr.write('.')
+ if self.i % 1000 == 0:
+ sys.stderr.write(' [{}]\n'.format(self.i))
+ sys.stderr.flush()
+ self.lock.release()
+
+ def done(self):
+ self.lock.acquire()
+ if self.i % 1000 != 0:
+ sys.stderr.write('\n')
+ self.lock.release()
+
+# Run with atomic (synchronous) I/O
+def atomic_io(cmd, in_file, out_file, err_file, prog=None):
+ with open(in_file, 'r') as inp, open(out_file, 'w') as out, open(err_file, 'w') as err:
+ p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=err)
+ while True:
+ line = inp.readline()
+ if not line:
+ break
+ p.stdin.write(line)
+ out.write(p.stdout.readline())
+ out.flush()
+ if prog:
+ prog.inc()
+ p.stdin.close()
+ p.wait()
+
+# Open plain or gzipped text
+def gzopen(f):
+ return gzip.open(f, 'rb') if f.endswith('.gz') else open(f, 'r')
+
+# Word count
+def wc(f):
+ i = 0
+ for line in gzopen(f):
+ i += 1
+ return i
+
+# Write lines to gzipped file
+def write_gzfile(lines, f):
+ out = gzip.open(f, 'wb')
+ for line in lines:
+ out.write('{}\n'.format(line))
+ out.close()
+
+def main(argv):
+
+ # Defaults
+ moses_ini = None
+ moses_ini_lines = None
+ text_src = None
+ text_tgt = None
+ text_symal = None
+ text_len = None
+ threads_found = False
+ threads = 1
+ n_best_out = None
+ n_best_size = None
+ n_best_distinct = False
+ tmp_dir = '/tmp'
+ xml_found = False
+ xml_input = 'exclusive'
+ show_weights = False
+ mmsapt_dynamic = []
+ mmsapt_static = []
+ mmsapt_l1 = None
+ mmsapt_l2 = None
+
+ # Decoder command
+ cmd = argv[1:]
+
+ # Parse special options and remove from cmd
+ i = 1
+ while i < len(cmd):
+ if cmd[i] in ('-f', '-config'):
+ moses_ini = cmd[i + 1]
+ cmd = cmd[:i] + cmd[i + 2:]
+ elif cmd[i] in ('-i', '-input-file'):
+ text_src = cmd[i + 1]
+ cmd = cmd[:i] + cmd[i + 2:]
+ elif cmd[i] == '-ref':
+ text_tgt = cmd[i + 1]
+ cmd = cmd[:i] + cmd[i + 2:]
+ elif cmd[i] == '-symal':
+ text_symal = cmd[i + 1]
+ cmd = cmd[:i] + cmd[i + 2:]
+ elif cmd[i] in ('-th', '-threads'):
+ threads_found = True
+ threads = int(cmd[i + 1])
+ cmd = cmd[:i] + cmd[i + 2:]
+ elif cmd[i] == '-n-best-list':
+ n_best_out = cmd[i + 1]
+ n_best_size = cmd[i + 2]
+ # Optional "distinct"
+ if i + 3 < len(cmd) and cmd[i + 3] == 'distinct':
+ n_best_distinct = True
+ cmd = cmd[:i] + cmd[i + 4:]
+ else:
+ cmd = cmd[:i] + cmd[i + 3:]
+ elif cmd[i] == '-tmp':
+ tmp_dir = cmd[i + 1]
+ cmd = cmd[:i] + cmd[i + 2:]
+ # Handled specially to make sure XML input is turned on somewhere
+ elif cmd[i] in ('-xi', '-xml-input'):
+ xml_found = True
+ xml_input = cmd[i + 1]
+ cmd = cmd[:i] + cmd[i + 2:]
+ # Handled specially for mert-moses.pl
+ elif cmd[i] == '-show-weights':
+ show_weights = True
+ # Do not remove from cmd
+ i += 1
+ else:
+ i += 1
+
+ # Read moses.ini
+ if moses_ini:
+ moses_ini_lines = [line.strip() for line in open(moses_ini, 'r')]
+ i = 0
+ while i < len(moses_ini_lines):
+ # PhraseDictionaryBitextSampling name=TranslationModel0 output-factor=0 num-features=7 path=corpus. L1=src L2=tgt pfwd=g pbwd=g smooth=0 sample=1000 workers=1
+ if moses_ini_lines[i].startswith('PhraseDictionaryBitextSampling'):
+ for (k, v) in (pair.split('=') for pair in moses_ini_lines[i].split()[1:]):
+ if k == 'name':
+ # Dynamic means update this model
+ if v.startswith('Dynamic'):
+ mmsapt_dynamic.append(v)
+ else:
+ mmsapt_static.append(v)
+ elif k == 'L1':
+ if mmsapt_l1 and v != mmsapt_l1:
+ sys.stderr.write('Error: All PhraseDictionaryBitextSampling entries should have same L1: {} != {}\n'.format(v, mmsapt_l1))
+ sys.exit(1)
+ mmsapt_l1 = v
+ elif k == 'L2':
+ if mmsapt_l2 and v != mmsapt_l2:
+ sys.stderr.write('Error: All PhraseDictionaryBitextSampling entries should have same L2: {} != {}\n'.format(v, mmsapt_l2))
+ sys.exit(1)
+ mmsapt_l2 = v
+ moses_ini_lines[i] += '{mmsapt_extra}'
+ # [threads]
+ # 8
+ elif moses_ini_lines[i] == '[threads]':
+ # Prefer command line over moses.ini
+ if not threads_found:
+ threads = int(moses_ini_lines[i + 1])
+ i += 1
+ # [xml-input]
+ # exclusive
+ elif moses_ini_lines[i] == '[xml-input]':
+ # Prefer command line over moses.ini
+ if not xml_found:
+ xml_found = True
+ xml_input = moses_ini_lines[i + 1]
+ i += 1
+ i += 1
+
+ # If mert-moses.pl passes -show-weights, just call moses
+ if show_weights:
+ # re-append original moses.ini
+ cmd.append('-config')
+ cmd.append(moses_ini)
+ sys.stdout.write(subprocess.check_output(cmd))
+ sys.stdout.flush()
+ sys.exit(0)
+
+ # Input length
+ if text_src:
+ text_len = wc(text_src)
+
+ # Check inputs
+ if not (len(cmd) > 0 and all((moses_ini, text_src, text_tgt, text_symal))):
+ sys.stderr.write(HELP.format(argv[0]))
+ sys.exit(2)
+ if not (os.path.isfile(cmd[0]) and os.access(cmd[0], os.X_OK)):
+ sys.stderr.write('Error: moses-cmd "{}" is not executable\n'.format(cmd[0]))
+ sys.exit(1)
+ if not mmsapt_dynamic:
+ sys.stderr.write('Error: no PhraseDictionaryBitextSampling entries named "Dynamic..." found in {}. See http://www.statmt.org/moses/?n=Moses.AdvancedFeatures#ntoc40\n'.format(moses_ini))
+ sys.exit(1)
+ if wc(text_tgt) != text_len or wc(text_symal) != text_len:
+ sys.stderr.write('Error: length mismatch between "{}", "{}", and "{}"\n'.format(text_src, text_tgt, text_symal))
+ sys.exit(1)
+
+ # Setup
+ work_dir = tempfile.mkdtemp(prefix='moses.', dir=os.path.abspath(tmp_dir))
+ threads = min(threads, text_len)
+ batch_size = int(math.ceil(float(text_len) / threads))
+
+ # Report settings
+ sys.stderr.write('Moses flags: {}\n'.format(' '.join('\'{}\''.format(s) if ' ' in s else s for s in cmd[1:])))
+ for (i, n) in enumerate(mmsapt_dynamic):
+ sys.stderr.write('Dynamic mmsapt {}: {} {} {}\n'.format(i, n, mmsapt_l1, mmsapt_l2))
+ for (i, n) in enumerate(mmsapt_static):
+ sys.stderr.write('Static mmsapt {}: {} {} {}\n'.format(i, n, mmsapt_l1, mmsapt_l2))
+ sys.stderr.write('XML mode: {}\n'.format(xml_input))
+ sys.stderr.write('Inputs: {} {} {} ({})\n'.format(text_src, text_tgt, text_symal, text_len))
+ sys.stderr.write('Jobs: {}\n'.format(threads))
+ sys.stderr.write('Batch size: {}\n'.format(batch_size))
+ if n_best_out:
+ sys.stderr.write('N-best list: {} ({}{})\n'.format(n_best_out, n_best_size, ', distinct' if n_best_distinct else ''))
+ sys.stderr.write('Temp dir: {}\n'.format(work_dir))
+
+ # Accumulate seen lines
+ src_lines = []
+ tgt_lines = []
+ symal_lines = []
+
+ # Current XML source file
+ xml_out = None
+
+ # Split into batches. Each batch after 0 gets extra files with data from previous batches.
+ # Data from previous lines in the current batch is added using XML input.
+ job = -1
+ lc = -1
+ for (src, tgt, symal) in itertools.izip(gzopen(text_src), gzopen(text_tgt), gzopen(text_symal)):
+ (src, tgt, symal) = (src.strip(), tgt.strip(), symal.strip())
+ lc += 1
+ if lc % batch_size == 0:
+ job += 1
+ xml_file = os.path.join(work_dir, 'input.{}.xml'.format(job))
+ extra_src_file = os.path.join(work_dir, 'extra.{}.{}.txt.gz'.format(job, mmsapt_l1))
+ extra_tgt_file = os.path.join(work_dir, 'extra.{}.{}.txt.gz'.format(job, mmsapt_l2))
+ extra_symal_file = os.path.join(work_dir, 'extra.{}.{}-{}.symal.gz'.format(job, mmsapt_l1, mmsapt_l2))
+ if job > 0:
+ xml_out.close()
+ write_gzfile(src_lines, extra_src_file)
+ write_gzfile(tgt_lines, extra_tgt_file)
+ write_gzfile(symal_lines, extra_symal_file)
+ xml_out = open(xml_file, 'w')
+ with open(os.path.join(work_dir, 'moses.{}.ini'.format(job)), 'w') as moses_ini_out:
+ extra = '' if job == 0 else ' extra={}'.format(os.path.join(work_dir, 'extra.{}.'.format(job)))
+ moses_ini_out.write('{}\n'.format('\n'.join(moses_ini_lines).format(mmsapt_extra=extra)))
+ src_lines.append(src)
+ tgt_lines.append(tgt)
+ symal_lines.append(symal)
+ # Lines after first start with update tag including previous translation.
+ # Translation of last line of each batch is included in extra for next batch.
+ xml_tags = []
+ if lc % batch_size != 0:
+ for n in mmsapt_dynamic:
+ # note: space after tag
+ xml_tags.append('<update name="{}" source="{}" target="{}" alignment="{}" /> '.format(n, src_lines[-2], tgt_lines[-2], symal_lines[-2]))
+ xml_out.write('{}{}\n'.format(''.join(xml_tags), src))
+ xml_out.close()
+
+ # Run decoders in parallel
+ workers = []
+ prog = Progress()
+ for i in range(threads):
+ work_cmd = cmd[:]
+ work_cmd.append('-config')
+ work_cmd.append(os.path.join(work_dir, 'moses.{}.ini'.format(i)))
+ # Workers use 1 CPU each
+ work_cmd.append('-threads')
+ work_cmd.append('1')
+ if not xml_found:
+ work_cmd.append('-xml-input')
+ work_cmd.append(xml_input)
+ if n_best_out:
+ work_cmd.append('-n-best-list')
+ work_cmd.append(os.path.join(work_dir, 'nbest.{}'.format(i)))
+ work_cmd.append(str(n_best_size))
+ if n_best_distinct:
+ work_cmd.append('distinct')
+ in_file = os.path.join(work_dir, 'input.{}.xml'.format(i))
+ out_file = os.path.join(work_dir, 'out.{}'.format(i))
+ err_file = os.path.join(work_dir, 'err.{}'.format(i))
+ t = threading.Thread(target=atomic_io, args=(work_cmd, in_file, out_file, err_file, prog))
+ workers.append(t)
+ t.start()
+ # Wait for all to finish
+ for t in workers:
+ t.join()
+ prog.done()
+
+ # Gather N-best lists
+ if n_best_out:
+ with open(n_best_out, 'w') as out:
+ for i in range(threads):
+ for line in open(os.path.join(work_dir, 'nbest.{}'.format(i)), 'r'):
+ entry = line.partition(' ')
+ out.write('{} {}'.format(int(entry[0]) + (i * batch_size), entry[2]))
+
+ # Gather stdout
+ for i in range(threads):
+ for line in open(os.path.join(work_dir, 'out.{}'.format(i)), 'r'):
+ sys.stdout.write(line)
+
+ # Cleanup
+ shutil.rmtree(work_dir)
+
+if __name__ == '__main__':
+ main(sys.argv)
diff --git a/scripts/generic/score-parallel.perl b/scripts/generic/score-parallel.perl
index 213b9e90e..7835d3826 100755
--- a/scripts/generic/score-parallel.perl
+++ b/scripts/generic/score-parallel.perl
@@ -27,10 +27,22 @@ my $scoreCmd = $ARGV[2];
my $extractFile = $ARGV[3]; # 1st arg of extract argument
my $lexFile = $ARGV[4];
my $ptHalf = $ARGV[5]; # output
+my $inverse = 0;
+my $sourceLabelsFile;
my $otherExtractArgs= "";
for (my $i = 6; $i < $#ARGV; ++$i)
{
+ if ($ARGV[$i] eq '--SourceLabels') {
+ $sourceLabelsFile = $ARGV[++$i];
+ $otherExtractArgs .= "--SourceLabels --SourceLabelCountsLHS --SourceLabelSet ";
+ next;
+ }
+ if ($ARGV[$i] eq '--Inverse') {
+ $inverse = 1;
+ $otherExtractArgs .= $ARGV[$i] ." ";
+ next;
+ }
$otherExtractArgs .= $ARGV[$i] ." ";
}
#$scoreCmd $extractFile $lexFile $ptHalf $otherExtractArgs
@@ -258,6 +270,14 @@ if (-e $cocPath)
close(FHCOC);
}
+# merge source label files
+if (!$inverse && defined($sourceLabelsFile))
+{
+ my $cmd = "(echo \"GlueTop 0\"; echo \"GlueX 1\"; cat $TMPDIR/phrase-table.half.*.gz.syntaxLabels.src | LC_ALL=C sort | uniq | perl -pe \"s/\$/ \@{[\$.+1]}/\") > $sourceLabelsFile";
+ print STDERR "Merging source label files: $cmd \n";
+ `$cmd`;
+}
+
$cmd = "rm -rf $TMPDIR \n";
print STDERR $cmd;
systemCheck($cmd);
diff --git a/scripts/server/moses.py b/scripts/server/moses.py
index 155458b9b..a176c473a 100644
--- a/scripts/server/moses.py
+++ b/scripts/server/moses.py
@@ -152,7 +152,7 @@ def find_free_port(p):
class MosesServer(ProcessWrapper):
- def __init__(self,args=["-fd", "\n"]):
+ def __init__(self,args=[]):
self.process = None
mserver_cmd = moses_root+"/bin/mosesserver"
self.cmd = [mserver_cmd] + args
@@ -175,7 +175,10 @@ class MosesServer(ProcessWrapper):
self.cmd.extend(["--server-port", "%d"%self.port])
if debug:
print >>sys.stderr,self.cmd
- self.process = Popen(self.cmd,stderr = sys.stderr)
+ # self.stderr = open("mserver.%d.stderr"%self.port,'w')
+ # self.stdout = open("mserver.%d.stdout"%self.port,'w')
+ # self.process = Popen(self.cmd,stderr = self.stderr,stdout = self.stdout)
+ self.process = Popen(self.cmd)
else:
devnull = open(os.devnull,"w")
self.process = Popen(self.cmd, stderr=devnull, stdout=devnull)
@@ -216,10 +219,13 @@ class MosesServer(ProcessWrapper):
elif type(input) is list:
return [self.translate(x) for x in input]
+
elif type(input) is dict:
return self.proxy.translate(input)
+
else:
raise Exception("Can't handle input of this type!")
+
except:
attempts += 1
print >>sys.stderr, "WAITING", attempts
diff --git a/scripts/server/sim-pe.py b/scripts/server/sim-pe.py
index 340695a56..52d1e314a 100755
--- a/scripts/server/sim-pe.py
+++ b/scripts/server/sim-pe.py
@@ -127,13 +127,40 @@ def translate(proxy, args, line):
param['nbest-distinct'] = True
pass
attempts = 0
- while attempts < 120:
+ while attempts < 20:
+ t1 = time.time()
try:
- return proxy.translate(param)
- except:
- print >>sys.stderr, "Waiting", proxy
- attempts += 1
+ return proxy.translate(param)
+
+ # except xmlrpclib.Fault as e:
+ # except xmlrpclib.ProtocolError as e:
+ # except xmlrpclib.ResponseError as e:
+ except xmlrpclib.Error as e:
+ time.sleep(2) # give all the stderr stuff a chance to be flushed
+ print >>sys.stderr," XMLRPC error:",e
+ print >>sys.stderr, "Input was"
+ print >>sys.stderr, param
+ sys.exit(1)
+
+ except IOError as e:
+ print >>sys.stderr,"I/O error({0}): {1}".format(e.errno, e.strerror)
time.sleep(5)
+
+ except:
+ serverstatus = mserver.process.poll()
+ if serverstatus == None:
+ print >>sys.stderr, "Connection failed after %f seconds"%(time.time()-t1)
+ attempts += 1
+ if attempts > 10:
+ time.sleep(10)
+ else:
+ time.sleep(5)
+ pass
+ else:
+
+ print >>sys.stderr, "Oopsidaisy, server exited with code %d (signal %d)"\
+ %(serverstatus/256,serverstatus%256)
+ pass
pass
pass
raise Exception("Exception: could not reach translation server.")
@@ -210,17 +237,25 @@ if __name__ == "__main__":
pass
pass
- if args.url:
- mserver.connect(args.url)
- else:
- mserver.start(args=mo_args,port=args.port,debug=args.debug)
- pass
-
ref = None
aln = None
if args.ref: ref = read_data(args.ref)
if args.aln: aln = read_data(args.aln)
+ if ref and aln:
+ try:
+ mo_args.index("--serial")
+ except:
+ mo_args.append("--serial")
+ pass
+ pass
+
+ if args.url:
+ mserver.connect(args.url)
+ else:
+ mserver.start(args=mo_args, port=args.port, debug=args.debug)
+ pass
+
if (args.input == "-"):
line = sys.stdin.readline()
idx = 0
diff --git a/scripts/share/nonbreaking_prefixes/README.txt b/scripts/share/nonbreaking_prefixes/README.txt
index 02cdfccb9..2276a1138 100644
--- a/scripts/share/nonbreaking_prefixes/README.txt
+++ b/scripts/share/nonbreaking_prefixes/README.txt
@@ -2,4 +2,7 @@ The language suffix can be found here:
http://www.loc.gov/standards/iso639-2/php/code_list.php
+This code includes data from Daniel Naber's Language Tools (czech abbreviations).
+This code includes data from czech wiktionary (also czech abbreviations).
+
diff --git a/scripts/tokenizer/basic-protected-patterns b/scripts/tokenizer/basic-protected-patterns
new file mode 100644
index 000000000..2d1e53129
--- /dev/null
+++ b/scripts/tokenizer/basic-protected-patterns
@@ -0,0 +1,5 @@
+<\/?\S+\/?>
+<\S+( [a-zA-Z0-9]+\=\"?[^\"]\")+ ?\/?>
+<\S+( [a-zA-Z0-9]+\=\'?[^\']\')+ ?\/?>
+(\w\-\_\.)+\@((\w\-\_)+\.)+[a-zA-Z]{2,}
+(http[s]?|ftp):\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]+
diff --git a/scripts/tokenizer/normalize-punctuation.perl b/scripts/tokenizer/normalize-punctuation.perl
index 76f58714f..58f568b57 100755
--- a/scripts/tokenizer/normalize-punctuation.perl
+++ b/scripts/tokenizer/normalize-punctuation.perl
@@ -2,7 +2,13 @@
use strict;
-my ($language) = @ARGV;
+my $language = "en";
+while (@ARGV) {
+ $_ = shift;
+ /^-b$/ && ($| = 1, next); # not buffered (flush each line)
+ /^-l$/ && ($language = shift, next);
+ /^[^\-]/ && ($language = $_, next);
+}
while(<STDIN>) {
s/\r//g;
diff --git a/scripts/tokenizer/tokenizer.perl b/scripts/tokenizer/tokenizer.perl
index 9e399519a..224f2319c 100755
--- a/scripts/tokenizer/tokenizer.perl
+++ b/scripts/tokenizer/tokenizer.perl
@@ -232,15 +232,20 @@ sub tokenize
# Find protected patterns
my @protected = ();
foreach my $protected_pattern (@protected_patterns) {
- foreach ($text =~ /($protected_pattern)/) {
- push @protected, $_;
+ my $t = $text;
+ while ($t =~ /($protected_pattern)(.*)$/) {
+ push @protected, $1;
+ $t = $2;
}
}
for (my $i = 0; $i < scalar(@protected); ++$i) {
my $subst = sprintf("THISISPROTECTED%.3d", $i);
- $text =~ s,\Q$protected[$i],$subst,g;
+ $text =~ s,\Q$protected[$i], $subst ,g;
}
+ $text =~ s/ +/ /g;
+ $text =~ s/^ //g;
+ $text =~ s/ $//g;
# seperate out all "other" special characters
$text =~ s/([^\p{IsAlnum}\s\.\'\`\,\-])/ $1 /g;
diff --git a/scripts/training/build-mmsapt.perl b/scripts/training/build-mmsapt.perl
new file mode 100755
index 000000000..2135c12c9
--- /dev/null
+++ b/scripts/training/build-mmsapt.perl
@@ -0,0 +1,22 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Getopt::Long "GetOptions";
+
+my ($DIR,$F,$E,$ALIGNMENT,$CORPUS,$SETTINGS);
+die("ERROR: syntax is --alignment FILE --corpus FILESTEM --f EXT --e EXT --DIR OUTDIR --settings STRING")
+ unless &GetOptions('DIR=s' => \$DIR,
+ 'f=s' => \$F,
+ 'e=s' => \$E,
+ 'corpus=s' => \$CORPUS,
+ 'alignment=s' => \$ALIGNMENT,
+ 'settings=s' => \$SETTINGS)
+ && defined($DIR) && defined($F) && defined($E) && defined($CORPUS) && defined($ALIGNMENT)
+ && -e $ALIGNMENT && -e "$CORPUS.$F" && -e "$CORPUS.$E";
+
+`mkdir $DIR`;
+`/opt/moses/bin/mtt-build < $CORPUS.$F -i -o $DIR/$F`;
+`/opt/moses/bin/mtt-build < $CORPUS.$E -i -o $DIR/$E`;
+`/opt/moses/bin/symal2mam < $ALIGNMENT $DIR/$F-$E.mam`;
+`/opt/moses/bin/mmlex-build $DIR/ $F $E -o $DIR/$F-$E.lex -c $DIR/$F-$E.cooc`;
+
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index d1ac5828a..027d94a77 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -127,8 +127,8 @@ my $___NOCASE = 0;
# Use "--nonorm" to non normalize translation before computing scores
my $___NONORM = 0;
-# set 0 if input type is text, set 1 if input type is confusion network
-my $___INPUTTYPE = 0;
+# set 0 if input type is text, set 1 if input type is confusion network, set 3 if input type is parse tree
+my $___INPUTTYPE;
my $mertdir = undef; # path to new mert directory
@@ -160,6 +160,12 @@ my $prev_aggregate_nbl_size = -1; # number of previous step to consider when loa
# and so on
my $maximum_iterations = 25;
+# Simulated post-editing
+my $___MOSES_SIM_PE = "$SCRIPTS_ROOTDIR/generic/moses_sim_pe.py";
+my $___DEV_SYMAL = undef;
+my $dev_symal_abs = undef;
+my $working_dir_abs = undef;
+
use Getopt::Long;
GetOptions(
"working-dir=s" => \$___WORKING_DIR,
@@ -213,7 +219,8 @@ GetOptions(
"batch-mira-args=s" => \$batch_mira_args,
"promix-training=s" => \$__PROMIX_TRAINING,
"promix-table=s" => \@__PROMIX_TABLES,
- "threads=i" => \$__THREADS
+ "threads=i" => \$__THREADS,
+ "spe-symal=s" => \$___DEV_SYMAL
) or exit(1);
# the 4 required parameters can be supplied on the command line directly
@@ -308,6 +315,8 @@ Options:
--threads=NUMBER ... Use multi-threaded mert (must be compiled in).
--historic-interpolation ... Interpolate optimized weights with prior iterations' weight
(parameter sets factor [0;1] given to current weights)
+ --spe-symal=SYMAL ... Use simulated post-editing when decoding.
+ (SYMAL aligns input to refs)
";
exit 1;
}
@@ -467,6 +476,12 @@ if ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /
die "It is forbidden to supply any of -config, -ttable-file, -distortion-file, -generation-file or -lmodel-file in the --decoder-flags.\nPlease use only the --config option to give the config file that lists all the supplementary files.";
}
+# Paths needed for simulated post-editing
+$working_dir_abs = ensure_full_path($___WORKING_DIR);
+if (defined $___DEV_SYMAL) {
+ $dev_symal_abs = ensure_full_path($___DEV_SYMAL);
+}
+
# as weights are normalized in the next steps (by cmert)
# normalize initial LAMBDAs, too
my $need_to_normalize = 1;
@@ -863,8 +878,8 @@ while (1) {
$mira_settings .= "$batch_mira_args ";
}
- $mira_settings .= " --dense-init run$run.$weights_in_file";
- #$mira_settings .= " --dense-init run$run.dense";
+ #$mira_settings .= " --dense-init run$run.$weights_in_file";
+ $mira_settings .= " --dense-init run$run.dense";
if (-e "run$run.sparse-weights") {
$mira_settings .= " --sparse-init run$run.sparse-weights";
}
@@ -1228,14 +1243,24 @@ sub run_decoder {
if (defined $___JOBS && $___JOBS > 0) {
die "Hypergraph mira not supported by moses-parallel" if $___HG_MIRA;
- $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$___DECODER_FLAGS $decoder_config\" $lsamp_cmd -n-best-list \"$filename $___N_BEST_LIST_SIZE\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
+ $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG";
+ $decoder_cmd .= " -inputtype $___INPUTTYPE" if defined($___INPUTTYPE);
+ $decoder_cmd .= " -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$___DECODER_FLAGS $decoder_config\" $lsamp_cmd -n-best-list \"$filename $___N_BEST_LIST_SIZE distinct\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
} else {
- my $nbest_list_cmd = "-n-best-list $filename $___N_BEST_LIST_SIZE";
+ my $nbest_list_cmd = "-n-best-list $filename $___N_BEST_LIST_SIZE distinct";
if ($___HG_MIRA) {
safesystem("rm -rf $hypergraph_dir");
$nbest_list_cmd = "-output-search-graph-hypergraph true gz";
}
- $decoder_cmd = "$___DECODER $___DECODER_FLAGS -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config $lsamp_cmd $nbest_list_cmd -input-file $___DEV_F > run$run.out";
+ $decoder_cmd = "$___DECODER $___DECODER_FLAGS -config $___CONFIG";
+ $decoder_cmd .= " -inputtype $___INPUTTYPE" if defined($___INPUTTYPE);
+ $decoder_cmd .= " $decoder_config $lsamp_cmd $nbest_list_cmd -input-file $___DEV_F";
+ if (defined $___DEV_SYMAL) {
+ # If simulating post-editing, route command through moses_sim_pe.py
+ # Always use single (first) reference. Simulated post-editing undefined for multiple references.
+ $decoder_cmd = "$___MOSES_SIM_PE $decoder_cmd -ref $references[0] -symal $dev_symal_abs -tmp $working_dir_abs > run$run.out";
+ }
+ $decoder_cmd .= " > run$run.out";
}
print STDERR "Executing: $decoder_cmd \n";
@@ -1309,7 +1334,9 @@ sub get_featlist_from_moses {
print STDERR "Using cached features list: $featlistfn\n";
} else {
print STDERR "Asking moses for feature names and values from $___CONFIG\n";
- my $cmd = "$___DECODER $___DECODER_FLAGS -config $configfn -inputtype $___INPUTTYPE -show-weights > $featlistfn";
+ my $cmd = "$___DECODER $___DECODER_FLAGS -config $configfn";
+ $cmd .= " -inputtype $___INPUTTYPE" if defined($___INPUTTYPE);
+ $cmd .= " -show-weights > $featlistfn";
print STDERR "Executing: $cmd\n";
safesystem($cmd) or die "Failed to run moses with the config $configfn";
}
diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl
index a9ed58535..8f661b812 100755
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@@ -32,12 +32,12 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
$_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
@_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
$_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_OSM_FACTORS, $_POST_DECODING_TRANSLIT, $_TRANSLITERATION_PHRASE_TABLE,
- $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_GHKM_TREE_FRAGMENTS,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
+ $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_GHKM_TREE_FRAGMENTS,$_GHKM_PHRASE_ORIENTATION,$_PHRASE_ORIENTATION_PRIORS_FILE,$_GHKM_SOURCE_LABELS,$_GHKM_SOURCE_LABELS_FILE,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
$_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2, $_UNKNOWN_WORD_SOFT_MATCHES_FILE,
$_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
$_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
$_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS,
- @_ADDITIONAL_INI,$_ADDITIONAL_INI_FILE,
+ @_ADDITIONAL_INI,$_ADDITIONAL_INI_FILE,$_MMSAPT,
@_BASELINE_ALIGNMENT_MODEL, $_BASELINE_EXTRACT, $_BASELINE_ALIGNMENT,
$_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $_INSTANCE_WEIGHTS_FILE, $_LMODEL_OOV_FEATURE, $_NUM_LATTICE_FEATURES, $IGNORE, $_FLEXIBILITY_SCORE, $_EXTRACT_COMMAND);
my $_BASELINE_CORPUS = "";
@@ -110,6 +110,10 @@ $_HELP = 1
'unknown-word-soft-matches-file=s' => \$_UNKNOWN_WORD_SOFT_MATCHES_FILE, # give dummy label to unknown word, and allow soft matches to all other labels (with cost determined by sparse features)
'ghkm' => \$_GHKM,
'ghkm-tree-fragments' => \$_GHKM_TREE_FRAGMENTS,
+ 'ghkm-phrase-orientation' => \$_GHKM_PHRASE_ORIENTATION,
+ 'phrase-orientation-priors-file=s' => \$_PHRASE_ORIENTATION_PRIORS_FILE, # currently relevant for GHKM extraction only; phrase orientation for PBT has different implementation
+ 'ghkm-source-labels' => \$_GHKM_SOURCE_LABELS,
+ 'ghkm-source-labels-file=s' => \$_GHKM_SOURCE_LABELS_FILE,
'pcfg' => \$_PCFG,
'alt-direct-rule-score-1' => \$_ALT_DIRECT_RULE_SCORE_1,
'alt-direct-rule-score-2' => \$_ALT_DIRECT_RULE_SCORE_2,
@@ -121,9 +125,10 @@ $_HELP = 1
'no-word-alignment' => \$_OMIT_WORD_ALIGNMENT,
'config=s' => \$_CONFIG,
'osm-model=s' => \$_OSM,
- 'osm-setting=s' => \$_OSM_FACTORS,
- 'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,
- 'transliteration-phrase-table=s' => \$_TRANSLITERATION_PHRASE_TABLE,
+ 'osm-setting=s' => \$_OSM_FACTORS,
+ 'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,
+ 'transliteration-phrase-table=s' => \$_TRANSLITERATION_PHRASE_TABLE,
+ 'mmsapt=s' => \$_MMSAPT,
'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING,
'do-steps=s' => \$_DO_STEPS,
'memscore:s' => \$_MEMSCORE,
@@ -1424,8 +1429,15 @@ sub extract_phrase {
$cmd .= " --PCFG" if $_PCFG;
$cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2;
$cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1;
- $cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS;
- if (!defined($_GHKM)) {
+ if (defined($_GHKM))
+ {
+ $cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS;
+ $cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION;
+ $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if defined($_PHRASE_ORIENTATION_PRIORS_FILE);
+ $cmd .= " --SourceLabels" if $_GHKM_SOURCE_LABELS;
+ }
+ else
+ {
$cmd .= " --SourceSyntax" if $_SOURCE_SYNTAX;
$cmd .= " --TargetSyntax" if $_TARGET_SYNTAX;
$cmd .= " --MaxSpan $max_length";
@@ -1549,6 +1561,9 @@ sub score_phrase_phrase_extract {
my $NEG_LOG_PROB = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /NegLogProb/);
my $NO_LEX = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /NoLex/);
my $MIN_COUNT_HIERARCHICAL = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /MinCountHierarchical ([\d\.]+)/) ? $1 : undef;
+ my $SOURCE_LABELS = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabels/);
+ my $SOURCE_LABEL_COUNTS_LHS = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabelCountsLHS/);
+ my $SOURCE_LABEL_SET = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabelSet/);
my $SPAN_LENGTH = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SpanLength/);
my $CORE_SCORE_OPTIONS = "";
$CORE_SCORE_OPTIONS .= " --LogProb" if $LOG_PROB;
@@ -1556,6 +1571,9 @@ sub score_phrase_phrase_extract {
$CORE_SCORE_OPTIONS .= " --NoLex" if $NO_LEX;
$CORE_SCORE_OPTIONS .= " --Singleton" if $SINGLETON;
$CORE_SCORE_OPTIONS .= " --CrossedNonTerm" if $CROSSEDNONTERM;
+ $CORE_SCORE_OPTIONS .= " --SourceLabels" if $SOURCE_LABELS;
+ $CORE_SCORE_OPTIONS .= " --SourceLabelCountsLHS " if $SOURCE_LABEL_COUNTS_LHS;
+ $CORE_SCORE_OPTIONS .= " --SourceLabelSet " if $SOURCE_LABEL_SET;
my $substep = 1;
my $isParent = 1;
@@ -1596,6 +1614,9 @@ sub score_phrase_phrase_extract {
$cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2;
$cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1;
$cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS;
+ $cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION;
+ $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if $_GHKM_PHRASE_ORIENTATION && defined($_PHRASE_ORIENTATION_PRIORS_FILE);
+ $cmd .= " --SourceLabels $_GHKM_SOURCE_LABELS_FILE" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
$cmd .= " $DOMAIN" if $DOMAIN;
$cmd .= " $CORE_SCORE_OPTIONS" if defined($_SCORE_OPTIONS);
$cmd .= " --FlexibilityScore=$FLEX_SCORER" if $_FLEXIBILITY_SCORE;
@@ -1646,6 +1667,7 @@ sub score_phrase_phrase_extract {
$cmd .= " --SparseCountBinFeature $SPARSE_COUNT_BIN" if $SPARSE_COUNT_BIN;
$cmd .= " --GoodTuring $ttable_file.half.f2e.gz.coc" if $GOOD_TURING;
$cmd .= " --KneserNey $ttable_file.half.f2e.gz.coc" if $KNESER_NEY;
+ $cmd .= " --SourceLabels $_GHKM_SOURCE_LABELS_FILE" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
$cmd .= " | gzip -c > $ttable_file.gz";
@@ -1950,14 +1972,20 @@ sub create_ini {
$phrase_table_impl_name = "PhraseDictionaryOnDisk" if $phrase_table_impl==2;
$phrase_table_impl_name = "PhraseDictionaryMemory" if $phrase_table_impl==6;
$phrase_table_impl_name = "PhraseDictionaryALSuffixArray" if $phrase_table_impl==10;
+ $phrase_table_impl_name = "Mmsapt" if $phrase_table_impl==11;
+ $file .= "/" if $phrase_table_impl==11 && $file !~ /\/$/;
- #table limit
+ # table limit (maximum number of translation options per input phrase)
my $table_limit = 0;
if ($i == 0) {
$table_limit = 20;
}
+
# sum up...
- $feature_spec .= "$phrase_table_impl_name name=TranslationModel$i table-limit=$table_limit num-features=$basic_weight_count path=$file input-factor=$input_factor output-factor=$output_factor\n";
+ $feature_spec .= "$phrase_table_impl_name name=TranslationModel$i num-features=$basic_weight_count ".($phrase_table_impl==11?"base":"path")."=$file input-factor=$input_factor output-factor=$output_factor";
+ $feature_spec .= " L1=$___F L2=$___E ".$_MMSAPT if defined($_MMSAPT); # extra settings for memory mapped suffix array phrase table
+ $feature_spec .= " table-limit=$table_limit" unless defined($_MMSAPT);
+ $feature_spec .= "\n";
$weight_spec .= "TranslationModel$i=";
for(my $j=0;$j<$basic_weight_count;$j++) { $weight_spec .= " 0.2"; }
$weight_spec .= "\n";
@@ -1970,8 +1998,7 @@ sub create_ini {
exit 1 if $i < $stepsused{"T"}; # fatal to define less
}
- if ($_TRANSLITERATION_PHRASE_TABLE){
-
+ if ($_TRANSLITERATION_PHRASE_TABLE) {
$feature_spec .= "PhraseDictionaryMemory name=TranslationModel$i table-limit=100 num-features=4 path=$_TRANSLITERATION_PHRASE_TABLE input-factor=0 output-factor=0\n";
$weight_spec .= "TranslationModel$i= 0.2 0.2 0.2 0.2\n";
$i++;
@@ -2146,6 +2173,7 @@ sub create_ini {
print INI "WordPenalty\n";
print INI "PhrasePenalty\n";
print INI "SoftMatchingFeature name=SM0 path=$_UNKNOWN_WORD_SOFT_MATCHES_FILE\n" if $_TARGET_SYNTAX && defined($_UNKNOWN_WORD_SOFT_MATCHES_FILE);
+ print INI "SoftSourceSyntacticConstraintsFeature sourceLabelSetFile=$_GHKM_SOURCE_LABELS_FILE\n" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
print INI $feature_spec;
print INI "\n# dense weights for feature functions\n";
@@ -2153,6 +2181,7 @@ sub create_ini {
print INI "UnknownWordPenalty0= 1\n";
print INI "WordPenalty0= -1\n";
print INI "PhrasePenalty0= 0.2\n";
+ print INI "SoftSourceSyntacticConstraintsFeature0= 0.3 -0.3 -0.3\n" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
print INI $weight_spec;
close(INI);
}
diff --git a/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl b/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl
index 60f341de8..13aa7f912 100755
--- a/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl
+++ b/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl
@@ -2,7 +2,7 @@
use strict;
-my ($cluster_file,$in,$out,$tmp) = @ARGV;
+my ($lowercase, $cluster_file,$in,$out,$tmp) = @ARGV;
my $CLUSTER = &read_cluster_from_mkcls($cluster_file);
@@ -17,7 +17,10 @@ while(<IN>) {
s/ $//;
my $first = 1;
foreach my $word (split) {
- my $cluster = defined($$CLUSTER{$word}) ? $$CLUSTER{$word} : "<unk>";
+ if ($lowercase) {
+ $word = lc($word);
+ }
+ my $cluster = defined($$CLUSTER{$word}) ? $$CLUSTER{$word} : "0";
print OUT " " unless $first;
print OUT $cluster;
$first = 0;
diff --git a/util/exception.hh b/util/exception.hh
index 2fb00667f..4e50a6f3a 100644
--- a/util/exception.hh
+++ b/util/exception.hh
@@ -1,5 +1,5 @@
-#ifndef UTIL_EXCEPTION__
-#define UTIL_EXCEPTION__
+#ifndef UTIL_EXCEPTION_H
+#define UTIL_EXCEPTION_H
#include <exception>
#include <limits>
@@ -84,7 +84,7 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep
UTIL_THROW_BACKEND(NULL, Exception, , Modify);
#define UTIL_THROW2(Modify) \
- UTIL_THROW_BACKEND(NULL, util::Exception, , Modify);
+ UTIL_THROW_BACKEND(NULL, util::Exception, , Modify);
#if __GNUC__ >= 3
#define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0)
@@ -146,4 +146,4 @@ inline std::size_t CheckOverflow(uint64_t value) {
} // namespace util
-#endif // UTIL_EXCEPTION__
+#endif // UTIL_EXCEPTION_H
diff --git a/util/read_compressed.cc b/util/read_compressed.cc
index 71ef0e251..cee98040b 100644
--- a/util/read_compressed.cc
+++ b/util/read_compressed.cc
@@ -374,7 +374,6 @@ ReadBase *ReadFactory(int fd, uint64_t &raw_amount, const void *already_data, co
header.resize(original + got);
}
if (header.empty()) {
- hold.release();
return new Complete();
}
switch (DetectMagic(&header[0], header.size())) {
@@ -435,4 +434,15 @@ std::size_t ReadCompressed::Read(void *to, std::size_t amount) {
return internal_->Read(to, amount, *this);
}
+std::size_t ReadCompressed::ReadOrEOF(void *const to_in, std::size_t amount) {
+ uint8_t *to = reinterpret_cast<uint8_t*>(to_in);
+ while (amount) {
+ std::size_t got = Read(to, amount);
+ if (!got) break;
+ to += got;
+ amount -= got;
+ }
+ return to - reinterpret_cast<uint8_t*>(to_in);
+}
+
} // namespace util
diff --git a/util/read_compressed.hh b/util/read_compressed.hh
index 763e6bbd3..767ee94b2 100644
--- a/util/read_compressed.hh
+++ b/util/read_compressed.hh
@@ -62,6 +62,10 @@ class ReadCompressed {
std::size_t Read(void *to, std::size_t amount);
+ // Repeatedly call read to fill a buffer unless EOF is hit.
+ // Return number of bytes read.
+ std::size_t ReadOrEOF(void *const to, std::size_t amount);
+
uint64_t RawAmount() const { return raw_amount_; }
private: