Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorUlrich Germann <ugermann@inf.ed.ac.uk>2015-02-06 04:30:00 +0300
committerUlrich Germann <ugermann@inf.ed.ac.uk>2015-02-06 04:30:00 +0300
commitbe5799dca34027849fc40a38a63459e164f27add (patch)
tree140e865a962c546c12e2322ab76a56699e7338c6 /moses
parent80a9f84422f3b7ce3ddf0bcfcbe2e8d06bba9e98 (diff)
parent8b61f396a7558bf628c2e94a9583023b9ae34a8c (diff)
Merge branch 'master' of https://github.com/moses-smt/mosesdecoder
Conflicts: moses/TranslationOptionCollection.cpp moses/TranslationOptionCollectionLattice.cpp moses/TranslationOptionCollectionLattice.h moses/TranslationOptionList.h
Diffstat (limited to 'moses')
-rw-r--r--moses/AlignmentInfo.cpp12
-rw-r--r--moses/AlignmentInfo.h5
-rw-r--r--moses/AlignmentInfoCollection.h9
-rw-r--r--moses/BaseManager.cpp63
-rw-r--r--moses/BaseManager.h77
-rw-r--r--moses/BitmapContainer.cpp36
-rw-r--r--moses/BitmapContainer.h4
-rw-r--r--moses/ChartCell.cpp4
-rw-r--r--moses/ChartCell.h4
-rw-r--r--moses/ChartCellLabelSet.h18
-rw-r--r--moses/ChartHypothesis.cpp109
-rw-r--r--moses/ChartHypothesis.h54
-rw-r--r--moses/ChartHypothesisCollection.cpp4
-rw-r--r--moses/ChartKBestExtractor.cpp47
-rw-r--r--moses/ChartKBestExtractor.h11
-rw-r--r--moses/ChartManager.cpp600
-rw-r--r--moses/ChartManager.h84
-rw-r--r--moses/ChartParser.cpp13
-rw-r--r--moses/ChartParser.h10
-rw-r--r--moses/ChartRuleLookupManager.h2
-rw-r--r--moses/ChartTranslationOption.cpp4
-rw-r--r--moses/ChartTranslationOption.h5
-rw-r--r--moses/ChartTranslationOptionList.cpp45
-rw-r--r--moses/ChartTranslationOptions.cpp38
-rw-r--r--moses/ChartTranslationOptions.h5
-rw-r--r--moses/ConfusionNet.cpp507
-rw-r--r--moses/ConfusionNet.h2
-rw-r--r--moses/DecodeGraph.cpp3
-rw-r--r--moses/DecodeGraph.h8
-rw-r--r--moses/DecodeStep.h10
-rw-r--r--moses/DecodeStepTranslation.cpp10
-rw-r--r--moses/ExportInterface.cpp215
-rw-r--r--moses/ExportInterface.h42
-rw-r--r--moses/FF/BleuScoreFeature.cpp7
-rw-r--r--moses/FF/BleuScoreFeature.h34
-rw-r--r--moses/FF/ConstrainedDecoding.cpp29
-rw-r--r--moses/FF/ConstrainedDecoding.h32
-rw-r--r--moses/FF/ControlRecombination.cpp2
-rw-r--r--moses/FF/ControlRecombination.h29
-rw-r--r--moses/FF/CountNonTerms.cpp60
-rw-r--r--moses/FF/CountNonTerms.h35
-rw-r--r--moses/FF/CoveredReferenceFeature.cpp41
-rw-r--r--moses/FF/CoveredReferenceFeature.h24
-rw-r--r--moses/FF/DecodeFeature.cpp2
-rw-r--r--moses/FF/DecodeFeature.h51
-rw-r--r--moses/FF/DistortionScoreProducer.cpp3
-rw-r--r--moses/FF/DistortionScoreProducer.h33
-rw-r--r--moses/FF/DynamicCacheBasedLanguageModel.cpp458
-rw-r--r--moses/FF/DynamicCacheBasedLanguageModel.h164
-rw-r--r--moses/FF/ExternalFeature.h29
-rw-r--r--moses/FF/Factory.cpp69
-rw-r--r--moses/FF/FeatureFunction.cpp15
-rw-r--r--moses/FF/FeatureFunction.h40
-rw-r--r--moses/FF/GlobalLexicalModel.cpp9
-rw-r--r--moses/FF/GlobalLexicalModel.h30
-rw-r--r--moses/FF/GlobalLexicalModelUnlimited.cpp14
-rw-r--r--moses/FF/GlobalLexicalModelUnlimited.h31
-rw-r--r--moses/FF/HyperParameterAsWeight.cpp2
-rw-r--r--moses/FF/HyperParameterAsWeight.h37
-rw-r--r--moses/FF/InputFeature.cpp14
-rw-r--r--moses/FF/InputFeature.h30
-rw-r--r--moses/FF/InternalTree.cpp370
-rw-r--r--moses/FF/InternalTree.h227
-rw-r--r--moses/FF/LexicalReordering/LexicalReordering.cpp14
-rw-r--r--moses/FF/LexicalReordering/LexicalReordering.h41
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.cpp14
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.h2
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingTable.cpp6
-rw-r--r--moses/FF/LexicalReordering/SparseReordering.cpp65
-rw-r--r--moses/FF/LexicalReordering/SparseReordering.h30
-rw-r--r--moses/FF/MaxSpanFreeNonTermSource.cpp50
-rw-r--r--moses/FF/MaxSpanFreeNonTermSource.h59
-rw-r--r--moses/FF/NieceTerminal.cpp78
-rw-r--r--moses/FF/NieceTerminal.h29
-rw-r--r--moses/FF/OSM-Feature/KenOSM.cpp38
-rw-r--r--moses/FF/OSM-Feature/KenOSM.h60
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.cpp15
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.h23
-rw-r--r--moses/FF/PhraseBoundaryFeature.h30
-rw-r--r--moses/FF/PhraseLengthFeature.cpp6
-rw-r--r--moses/FF/PhraseLengthFeature.h29
-rw-r--r--moses/FF/PhraseOrientationFeature.cpp898
-rw-r--r--moses/FF/PhraseOrientationFeature.h391
-rw-r--r--moses/FF/PhrasePairFeature.cpp10
-rw-r--r--moses/FF/PhrasePairFeature.h31
-rw-r--r--moses/FF/PhrasePenalty.cpp36
-rw-r--r--moses/FF/PhrasePenalty.h33
-rw-r--r--moses/FF/ReferenceComparison.cpp2
-rw-r--r--moses/FF/ReferenceComparison.h64
-rw-r--r--moses/FF/RuleScope.cpp35
-rw-r--r--moses/FF/RuleScope.h48
-rw-r--r--moses/FF/SetSourcePhrase.cpp10
-rw-r--r--moses/FF/SetSourcePhrase.h40
-rw-r--r--moses/FF/SkeletonChangeInput.cpp50
-rw-r--r--moses/FF/SkeletonChangeInput.h24
-rw-r--r--moses/FF/SkeletonStatefulFF.cpp20
-rw-r--r--moses/FF/SkeletonStatefulFF.h24
-rw-r--r--moses/FF/SkeletonStatelessFF.cpp36
-rw-r--r--moses/FF/SkeletonStatelessFF.h25
-rw-r--r--moses/FF/SkeletonTranslationOptionListFeature.h67
-rw-r--r--moses/FF/SoftMatchingFeature.cpp67
-rw-r--r--moses/FF/SoftMatchingFeature.h25
-rw-r--r--moses/FF/SoftSourceSyntacticConstraintsFeature.cpp377
-rw-r--r--moses/FF/SoftSourceSyntacticConstraintsFeature.h51
-rw-r--r--moses/FF/SourceGHKMTreeInputMatchFeature.cpp10
-rw-r--r--moses/FF/SourceGHKMTreeInputMatchFeature.h25
-rw-r--r--moses/FF/SourceWordDeletionFeature.cpp7
-rw-r--r--moses/FF/SourceWordDeletionFeature.h36
-rw-r--r--moses/FF/SpanLength.cpp60
-rw-r--r--moses/FF/SpanLength.h60
-rw-r--r--moses/FF/SparseHieroReorderingFeature.cpp59
-rw-r--r--moses/FF/SparseHieroReorderingFeature.h43
-rw-r--r--moses/FF/StatefulFeatureFunction.h5
-rw-r--r--moses/FF/StatelessFeatureFunction.h9
-rw-r--r--moses/FF/SyntaxRHS.cpp36
-rw-r--r--moses/FF/SyntaxRHS.h29
-rw-r--r--moses/FF/TargetBigramFeature.cpp4
-rw-r--r--moses/FF/TargetBigramFeature.h30
-rw-r--r--moses/FF/TargetNgramFeature.cpp8
-rw-r--r--moses/FF/TargetNgramFeature.h29
-rw-r--r--moses/FF/TargetWordInsertionFeature.cpp6
-rw-r--r--moses/FF/TargetWordInsertionFeature.h34
-rw-r--r--moses/FF/TreeStructureFeature.cpp39
-rw-r--r--moses/FF/TreeStructureFeature.h43
-rw-r--r--moses/FF/UnalignedWordCountFeature.cpp85
-rw-r--r--moses/FF/UnalignedWordCountFeature.h47
-rw-r--r--moses/FF/UnknownWordPenaltyProducer.h37
-rw-r--r--moses/FF/VW/ThreadLocalByFeatureStorage.h82
-rw-r--r--moses/FF/VW/VW.h436
-rw-r--r--moses/FF/VW/VWFeatureBase.cpp12
-rw-r--r--moses/FF/VW/VWFeatureBase.h122
-rw-r--r--moses/FF/VW/VWFeatureSource.h43
-rw-r--r--moses/FF/VW/VWFeatureSourceBagOfWords.h34
-rw-r--r--moses/FF/VW/VWFeatureSourceExternalFeatures.h63
-rw-r--r--moses/FF/VW/VWFeatureSourceIndicator.h42
-rw-r--r--moses/FF/VW/VWFeatureSourcePhraseInternal.h39
-rw-r--r--moses/FF/VW/VWFeatureSourceWindow.h53
-rw-r--r--moses/FF/VW/VWFeatureTarget.h41
-rw-r--r--moses/FF/VW/VWFeatureTargetIndicator.h31
-rw-r--r--moses/FF/VW/VWFeatureTargetPhraseInternal.h33
-rw-r--r--moses/FF/VW/VWFeatureTargetPhraseScores.h52
-rw-r--r--moses/FF/WordPenaltyProducer.cpp6
-rw-r--r--moses/FF/WordPenaltyProducer.h40
-rw-r--r--moses/FF/WordTranslationFeature.cpp29
-rw-r--r--moses/FF/WordTranslationFeature.h35
-rw-r--r--moses/FactorCollection.cpp6
-rw-r--r--moses/FeatureVector.cpp9
-rw-r--r--moses/FeatureVector.h1
-rw-r--r--moses/File.h22
-rw-r--r--moses/ForestInput.cpp250
-rw-r--r--moses/ForestInput.h90
-rw-r--r--moses/GenerationDictionary.cpp1
-rw-r--r--moses/GenerationDictionary.h2
-rw-r--r--moses/HypergraphOutput.cpp44
-rw-r--r--moses/HypergraphOutput.h43
-rw-r--r--moses/Hypothesis.cpp185
-rw-r--r--moses/Hypothesis.h17
-rw-r--r--moses/HypothesisStackCubePruning.cpp2
-rw-r--r--moses/IOWrapper.cpp1430
-rw-r--r--moses/IOWrapper.h153
-rw-r--r--moses/Incremental.cpp246
-rw-r--r--moses/Incremental.h62
-rw-r--r--moses/InputPath.cpp22
-rw-r--r--moses/InputPath.h9
-rw-r--r--moses/Jamfile22
-rw-r--r--moses/LM/Backward.cpp52
-rw-r--r--moses/LM/Base.cpp6
-rw-r--r--moses/LM/Base.h23
-rw-r--r--moses/LM/BilingualLM.cpp253
-rw-r--r--moses/LM/BilingualLM.h66
-rw-r--r--moses/LM/DALMWrapper.cpp755
-rw-r--r--moses/LM/DALMWrapper.h61
-rw-r--r--moses/LM/IRST.cpp204
-rw-r--r--moses/LM/IRST.h42
-rw-r--r--moses/LM/Implementation.cpp29
-rw-r--r--moses/LM/Jamfile6
-rw-r--r--moses/LM/Ken.cpp101
-rw-r--r--moses/LM/LDHT.cpp4
-rw-r--r--moses/LM/MaxEntSRI.cpp2
-rw-r--r--moses/LM/NeuralLMWrapper.cpp2
-rw-r--r--moses/LM/NeuralLMWrapper.h5
-rw-r--r--moses/LM/SRI.cpp2
-rw-r--r--moses/LM/SingleFactor.cpp16
-rw-r--r--moses/LM/bilingual-lm/BiLM_NPLM.cpp69
-rw-r--r--moses/LM/bilingual-lm/BiLM_NPLM.h15
-rw-r--r--moses/LM/oxlm/OxLM.cpp38
-rw-r--r--moses/LM/oxlm/OxLM.h18
-rw-r--r--moses/LM/oxlm/OxLMMapper.cpp30
-rw-r--r--moses/LM/oxlm/OxLMMapper.h22
-rw-r--r--moses/LM/oxlm/OxLMParallelMapper.cpp17
-rw-r--r--moses/LM/oxlm/OxLMParallelMapper.h16
-rw-r--r--moses/LM/oxlm/SourceOxLM.cpp46
-rw-r--r--moses/LM/oxlm/SourceOxLM.h20
-rw-r--r--moses/LVoc.h2
-rw-r--r--moses/LatticeMBR.cpp6
-rw-r--r--moses/LatticeMBR.h6
-rw-r--r--moses/Manager.cpp642
-rw-r--r--moses/Manager.h53
-rw-r--r--moses/MockHypothesis.cpp2
-rw-r--r--moses/PCNTools.h8
-rw-r--r--moses/PDTAimp.cpp52
-rw-r--r--moses/PP/CountsPhraseProperty.cpp10
-rw-r--r--moses/PP/CountsPhraseProperty.h4
-rw-r--r--moses/PP/Factory.cpp14
-rw-r--r--moses/PP/NonTermContextProperty.cpp128
-rw-r--r--moses/PP/NonTermContextProperty.h46
-rw-r--r--moses/PP/OrientationPhraseProperty.cpp8
-rw-r--r--moses/PP/OrientationPhraseProperty.h20
-rw-r--r--moses/PP/PhraseProperty.cpp4
-rw-r--r--moses/PP/PhraseProperty.h14
-rw-r--r--moses/PP/SourceLabelsPhraseProperty.cpp60
-rw-r--r--moses/PP/SourceLabelsPhraseProperty.h17
-rw-r--r--moses/PP/SpanLengthPhraseProperty.cpp119
-rw-r--r--moses/PP/SpanLengthPhraseProperty.h22
-rw-r--r--moses/Parameter.cpp316
-rw-r--r--moses/Parameter.h35
-rw-r--r--moses/PartialTranslOptColl.cpp6
-rw-r--r--moses/Phrase.cpp6
-rw-r--r--moses/Phrase.h9
-rw-r--r--moses/PrefixTreeMap.cpp16
-rw-r--r--moses/RuleCube.cpp4
-rw-r--r--moses/RuleCube.h9
-rw-r--r--moses/ScoreComponentCollection.cpp59
-rw-r--r--moses/ScoreComponentCollection.h32
-rw-r--r--moses/ScoreComponentCollectionTest.cpp24
-rw-r--r--moses/Search.cpp5
-rw-r--r--moses/Search.h2
-rw-r--r--moses/SearchCubePruning.cpp2
-rw-r--r--moses/SearchCubePruning.h2
-rw-r--r--moses/SearchNormal.cpp16
-rw-r--r--moses/SearchNormal.h2
-rw-r--r--moses/SearchNormalBatch.cpp4
-rw-r--r--moses/SearchNormalBatch.h2
-rw-r--r--moses/Sentence.cpp43
-rw-r--r--moses/Sentence.h6
-rw-r--r--moses/StaticData.cpp789
-rw-r--r--moses/StaticData.h82
-rw-r--r--moses/SyntacticLanguageModel.h4
-rw-r--r--moses/Syntax/BoundedPriorityContainer.h40
-rw-r--r--moses/Syntax/Cube.cpp22
-rw-r--r--moses/Syntax/Cube.h18
-rw-r--r--moses/Syntax/CubeQueue.h12
-rw-r--r--moses/Syntax/F2S/DerivationWriter.cpp101
-rw-r--r--moses/Syntax/F2S/DerivationWriter.h36
-rw-r--r--moses/Syntax/F2S/Forest.cpp34
-rw-r--r--moses/Syntax/F2S/Forest.h51
-rw-r--r--moses/Syntax/F2S/GlueRuleSynthesizer.cpp85
-rw-r--r--moses/Syntax/F2S/GlueRuleSynthesizer.h37
-rw-r--r--moses/Syntax/F2S/HyperPath.cpp20
-rw-r--r--moses/Syntax/F2S/HyperPath.h35
-rw-r--r--moses/Syntax/F2S/HyperPathLoader.cpp172
-rw-r--r--moses/Syntax/F2S/HyperPathLoader.h70
-rw-r--r--moses/Syntax/F2S/HyperTree.cpp70
-rw-r--r--moses/Syntax/F2S/HyperTree.h79
-rw-r--r--moses/Syntax/F2S/HyperTreeCreator.h32
-rw-r--r--moses/Syntax/F2S/HyperTreeLoader.cpp148
-rw-r--r--moses/Syntax/F2S/HyperTreeLoader.h31
-rw-r--r--moses/Syntax/F2S/Manager-inl.h318
-rw-r--r--moses/Syntax/F2S/Manager.h69
-rw-r--r--moses/Syntax/F2S/PHyperedgeToSHyperedgeBundle.h32
-rw-r--r--moses/Syntax/F2S/PVertexToStackMap.h20
-rw-r--r--moses/Syntax/F2S/RuleMatcher.h24
-rw-r--r--moses/Syntax/F2S/RuleMatcherCallback.h46
-rw-r--r--moses/Syntax/F2S/RuleMatcherHyperTree-inl.h192
-rw-r--r--moses/Syntax/F2S/RuleMatcherHyperTree.h78
-rw-r--r--moses/Syntax/F2S/TopologicalSorter.cpp55
-rw-r--r--moses/Syntax/F2S/TopologicalSorter.h34
-rw-r--r--moses/Syntax/F2S/TreeFragmentTokenizer.cpp93
-rw-r--r--moses/Syntax/F2S/TreeFragmentTokenizer.h73
-rw-r--r--moses/Syntax/KBestExtractor.cpp74
-rw-r--r--moses/Syntax/KBestExtractor.h13
-rw-r--r--moses/Syntax/Manager.cpp236
-rw-r--r--moses/Syntax/Manager.h63
-rw-r--r--moses/Syntax/NonTerminalMap.h40
-rw-r--r--moses/Syntax/PHyperedge.h7
-rw-r--r--moses/Syntax/PLabel.h15
-rw-r--r--moses/Syntax/PVertex.h10
-rw-r--r--moses/Syntax/RuleTable.h4
-rw-r--r--moses/Syntax/RuleTableFF.cpp22
-rw-r--r--moses/Syntax/RuleTableFF.h15
-rw-r--r--moses/Syntax/S2T/DerivationWriter.cpp2
-rw-r--r--moses/Syntax/S2T/DerivationWriter.h4
-rw-r--r--moses/Syntax/S2T/Manager-inl.h68
-rw-r--r--moses/Syntax/S2T/Manager.h20
-rw-r--r--moses/Syntax/S2T/OovHandler-inl.h8
-rw-r--r--moses/Syntax/S2T/OovHandler.h5
-rw-r--r--moses/Syntax/S2T/PChart.h17
-rw-r--r--moses/Syntax/S2T/PHyperedgeToSHyperedgeBundle.h9
-rw-r--r--moses/Syntax/S2T/ParserCallback.h40
-rw-r--r--moses/Syntax/S2T/Parsers/Parser.h4
-rw-r--r--moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h51
-rw-r--r--moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h8
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h16
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h6
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.cpp16
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.h10
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/SentenceMap.h2
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.cpp8
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.h4
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/TailLattice.h8
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.cpp6
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.h4
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h8
-rw-r--r--moses/Syntax/S2T/RuleTrie.h4
-rw-r--r--moses/Syntax/S2T/RuleTrieCYKPlus.cpp30
-rw-r--r--moses/Syntax/S2T/RuleTrieCYKPlus.h26
-rw-r--r--moses/Syntax/S2T/RuleTrieCreator.h6
-rw-r--r--moses/Syntax/S2T/RuleTrieLoader.cpp14
-rw-r--r--moses/Syntax/S2T/RuleTrieLoader.h2
-rw-r--r--moses/Syntax/S2T/RuleTrieScope3.cpp10
-rw-r--r--moses/Syntax/S2T/RuleTrieScope3.h42
-rw-r--r--moses/Syntax/S2T/SChart.h13
-rw-r--r--moses/Syntax/SHyperedge.cpp42
-rw-r--r--moses/Syntax/SHyperedge.h11
-rw-r--r--moses/Syntax/SHyperedgeBundle.h3
-rw-r--r--moses/Syntax/SHyperedgeBundleScorer.h9
-rw-r--r--moses/Syntax/SLabel.h18
-rw-r--r--moses/Syntax/SVertex.h3
-rw-r--r--moses/Syntax/SVertexRecombinationOrderer.h11
-rw-r--r--moses/Syntax/SVertexStack.h10
-rw-r--r--moses/Syntax/SymbolEqualityPred.h2
-rw-r--r--moses/Syntax/SymbolHasher.h2
-rw-r--r--moses/Syntax/T2S/GlueRuleSynthesizer.cpp77
-rw-r--r--moses/Syntax/T2S/GlueRuleSynthesizer.h35
-rw-r--r--moses/Syntax/T2S/HyperTree.h81
-rw-r--r--moses/Syntax/T2S/InputTree.h38
-rw-r--r--moses/Syntax/T2S/InputTreeBuilder.cpp171
-rw-r--r--moses/Syntax/T2S/InputTreeBuilder.h39
-rw-r--r--moses/Syntax/T2S/InputTreeToForest.cpp52
-rw-r--r--moses/Syntax/T2S/InputTreeToForest.h19
-rw-r--r--moses/Syntax/T2S/Manager-inl.h301
-rw-r--r--moses/Syntax/T2S/Manager.h67
-rw-r--r--moses/Syntax/T2S/RuleMatcher.h24
-rw-r--r--moses/Syntax/T2S/RuleMatcherSCFG-inl.h107
-rw-r--r--moses/Syntax/T2S/RuleMatcherSCFG.h42
-rw-r--r--moses/Syntax/T2S/RuleTrie.cpp139
-rw-r--r--moses/Syntax/T2S/RuleTrie.h90
-rw-r--r--moses/Syntax/T2S/RuleTrieCreator.h32
-rw-r--r--moses/Syntax/T2S/RuleTrieLoader.cpp154
-rw-r--r--moses/Syntax/T2S/RuleTrieLoader.h31
-rw-r--r--moses/TabbedSentence.cpp72
-rw-r--r--moses/TabbedSentence.h87
-rw-r--r--moses/TargetPhrase.cpp85
-rw-r--r--moses/TargetPhrase.h33
-rw-r--r--moses/TargetPhraseCollection.h23
-rw-r--r--moses/Timer.cpp16
-rw-r--r--moses/TrainingTask.h47
-rw-r--r--moses/TranslationAnalysis.h7
-rw-r--r--moses/TranslationModel/BilingualDynSuffixArray.cpp6
-rw-r--r--moses/TranslationModel/BilingualDynSuffixArray.h4
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h11
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp271
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h6
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp271
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h10
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp17
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h2
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp4
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h2
-rw-r--r--moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp57
-rw-r--r--moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h31
-rw-r--r--moses/TranslationModel/CYKPlusParser/DotChartInMemory.h3
-rw-r--r--moses/TranslationModel/CompactPT/BlockHashIndex.cpp8
-rw-r--r--moses/TranslationModel/CompactPT/CanonicalHuffman.h8
-rw-r--r--moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp10
-rw-r--r--moses/TranslationModel/CompactPT/ListCoders.h3
-rw-r--r--moses/TranslationModel/CompactPT/PhraseDecoder.h1
-rw-r--r--moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp3
-rw-r--r--moses/TranslationModel/CompactPT/PhraseTableCreator.cpp95
-rw-r--r--moses/TranslationModel/CompactPT/PhraseTableCreator.h8
-rw-r--r--moses/TranslationModel/CompactPT/StringVectorTemp.h430
-rw-r--r--moses/TranslationModel/DynSAInclude/FileHandler.cpp4
-rw-r--r--moses/TranslationModel/DynSAInclude/RandLMFilter.h10
-rw-r--r--moses/TranslationModel/DynSAInclude/onlineRLM.h6
-rw-r--r--moses/TranslationModel/DynSAInclude/params.cpp4
-rw-r--r--moses/TranslationModel/DynSAInclude/quantizer.h6
-rw-r--r--moses/TranslationModel/DynSAInclude/vocab.cpp2
-rw-r--r--moses/TranslationModel/PhraseDictionary.cpp70
-rw-r--r--moses/TranslationModel/PhraseDictionary.h11
-rw-r--r--moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp759
-rw-r--r--moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h179
-rw-r--r--moses/TranslationModel/PhraseDictionaryMemory.cpp11
-rw-r--r--moses/TranslationModel/PhraseDictionaryMultiModel.cpp22
-rw-r--r--moses/TranslationModel/PhraseDictionaryMultiModel.h1
-rw-r--r--moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp10
-rw-r--r--moses/TranslationModel/PhraseDictionaryMultiModelCounts.h1
-rw-r--r--moses/TranslationModel/PhraseDictionaryNodeMemory.cpp14
-rw-r--r--moses/TranslationModel/PhraseDictionaryScope3.cpp1
-rw-r--r--moses/TranslationModel/PhraseDictionaryTransliteration.cpp169
-rw-r--r--moses/TranslationModel/PhraseDictionaryTree.cpp32
-rw-r--r--moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp3
-rw-r--r--moses/TranslationModel/ProbingPT/ProbingPT.cpp145
-rw-r--r--moses/TranslationModel/ProbingPT/hash.cpp39
-rw-r--r--moses/TranslationModel/ProbingPT/huffmanish.cpp671
-rw-r--r--moses/TranslationModel/ProbingPT/huffmanish.hh4
-rw-r--r--moses/TranslationModel/ProbingPT/line_splitter.cpp92
-rw-r--r--moses/TranslationModel/ProbingPT/probing_hash_utils.cpp56
-rw-r--r--moses/TranslationModel/ProbingPT/quering.cpp318
-rw-r--r--moses/TranslationModel/ProbingPT/quering.hh4
-rw-r--r--moses/TranslationModel/ProbingPT/storing.cpp280
-rw-r--r--moses/TranslationModel/ProbingPT/storing.hh4
-rw-r--r--moses/TranslationModel/ProbingPT/tests/tokenization_tests.cpp346
-rw-r--r--moses/TranslationModel/ProbingPT/tests/vocabid_test.cpp81
-rw-r--r--moses/TranslationModel/ProbingPT/vocabid.cpp41
-rw-r--r--moses/TranslationModel/RuleTable/LoaderCompact.cpp13
-rw-r--r--moses/TranslationModel/RuleTable/LoaderFactory.cpp5
-rw-r--r--moses/TranslationModel/RuleTable/LoaderStandard.cpp20
-rw-r--r--moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp3
-rw-r--r--moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp30
-rw-r--r--moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp26
-rw-r--r--moses/TranslationModel/Scope3Parser/Parser.cpp3
-rw-r--r--moses/TranslationModel/Scope3Parser/Parser.h2
-rw-r--r--moses/TranslationModel/SkeletonPT.cpp6
-rw-r--r--moses/TranslationModel/UG/Jamfile9
-rw-r--r--moses/TranslationModel/UG/generic/file_io/ug_stream.cpp12
-rw-r--r--moses/TranslationModel/UG/mm/mtt-build.cc4
-rw-r--r--moses/TranslationModel/UG/spe-check-coverage.cc8
-rw-r--r--moses/TranslationModel/fuzzy-match/create_xml.cpp6
-rw-r--r--moses/TranslationOption.cpp4
-rw-r--r--moses/TranslationOption.h4
-rw-r--r--moses/TranslationOptionCollection.cpp3
-rw-r--r--moses/TranslationOptionCollection.h7
-rw-r--r--moses/TranslationOptionCollectionConfusionNet.cpp34
-rw-r--r--moses/TranslationOptionCollectionLattice.cpp97
-rw-r--r--moses/TranslationOptionCollectionLattice.h8
-rw-r--r--moses/TranslationOptionList.h4
-rw-r--r--moses/TranslationTask.cpp424
-rw-r--r--moses/TranslationTask.h44
-rw-r--r--moses/TreeInput.cpp15
-rw-r--r--moses/TreeInput.h7
-rw-r--r--moses/TrellisPath.cpp2
-rw-r--r--moses/TrellisPath.h2
-rw-r--r--moses/TypeDef.h18
-rw-r--r--moses/UserMessage.cpp62
-rw-r--r--moses/UserMessage.h54
-rw-r--r--moses/Util.cpp174
-rw-r--r--moses/Util.h83
-rw-r--r--moses/Word.cpp98
-rw-r--r--moses/Word.h2
-rw-r--r--moses/WordLattice.cpp29
-rw-r--r--moses/WordsBitmap.cpp2
-rw-r--r--moses/WordsBitmap.h2
-rw-r--r--moses/WordsRange.h1
-rw-r--r--moses/XmlOption.cpp125
444 files changed, 18777 insertions, 8525 deletions
diff --git a/moses/AlignmentInfo.cpp b/moses/AlignmentInfo.cpp
index b059a9ffd..97efc25eb 100644
--- a/moses/AlignmentInfo.cpp
+++ b/moses/AlignmentInfo.cpp
@@ -21,6 +21,7 @@
#include "AlignmentInfo.h"
#include "TypeDef.h"
#include "StaticData.h"
+#include "Util.h"
#include "util/exception.hh"
namespace Moses
@@ -40,6 +41,17 @@ AlignmentInfo::AlignmentInfo(const std::vector<unsigned char> &aln)
BuildNonTermIndexMaps();
}
+AlignmentInfo::AlignmentInfo(const std::string &str)
+{
+ std::vector<std::string> points = Tokenize(str, " ");
+ std::vector<std::string>::const_iterator iter;
+ for (iter = points.begin(); iter != points.end(); iter++) {
+ std::vector<size_t> point = Tokenize<size_t>(*iter, "-");
+ UTIL_THROW_IF2(point.size() != 2, "Bad format of word alignment point: " << *iter);
+ Add(point[0], point[1]);
+ }
+}
+
void AlignmentInfo::BuildNonTermIndexMaps()
{
if (m_collection.empty()) {
diff --git a/moses/AlignmentInfo.h b/moses/AlignmentInfo.h
index 895dde8a1..c74ff340c 100644
--- a/moses/AlignmentInfo.h
+++ b/moses/AlignmentInfo.h
@@ -41,6 +41,7 @@ class AlignmentInfo
friend struct AlignmentInfoOrderer;
friend struct AlignmentInfoHasher;
friend class AlignmentInfoCollection;
+ friend class VW;
public:
typedef std::set<std::pair<size_t,size_t> > CollType;
@@ -95,6 +96,10 @@ private:
//! AlignmentInfo objects should only be created by an AlignmentInfoCollection
explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
explicit AlignmentInfo(const std::vector<unsigned char> &aln);
+
+ // used only by VW to load word alignment between sentences
+ explicit AlignmentInfo(const std::string &str);
+
void BuildNonTermIndexMaps();
CollType m_collection;
diff --git a/moses/AlignmentInfoCollection.h b/moses/AlignmentInfoCollection.h
index 1db0a2268..92462d3b8 100644
--- a/moses/AlignmentInfoCollection.h
+++ b/moses/AlignmentInfoCollection.h
@@ -46,14 +46,13 @@ public:
* contains such an object then returns a pointer to it; otherwise a new
* one is inserted.
*/
- private:
+private:
const AlignmentInfo* Add(AlignmentInfo const& ainfo);
- public:
+public:
template<typename ALNREP>
- AlignmentInfo const *
- Add(ALNREP const & aln)
- {
+ AlignmentInfo const *
+ Add(ALNREP const & aln) {
return this->Add(AlignmentInfo(aln));
}
diff --git a/moses/BaseManager.cpp b/moses/BaseManager.cpp
new file mode 100644
index 000000000..2c57e8336
--- /dev/null
+++ b/moses/BaseManager.cpp
@@ -0,0 +1,63 @@
+#include <vector>
+
+#include "StaticData.h"
+#include "BaseManager.h"
+#include "moses/FF/StatelessFeatureFunction.h"
+#include "moses/FF/StatefulFeatureFunction.h"
+
+using namespace std;
+
+namespace Moses
+{
+/***
+ * print surface factor only for the given phrase
+ */
+void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
+ const std::vector<FactorType> &outputFactorOrder,
+ bool reportAllFactors) const
+{
+ UTIL_THROW_IF2(outputFactorOrder.size() == 0,
+ "Cannot be empty phrase");
+ if (reportAllFactors == true) {
+ out << phrase;
+ } else {
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+ out << *factor;
+ UTIL_THROW_IF2(factor == NULL,
+ "Empty factor 0 at position " << pos);
+
+ for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+ UTIL_THROW_IF2(factor == NULL,
+ "Empty factor " << i << " at position " << pos);
+
+ out << "|" << *factor;
+ }
+ out << " ";
+ }
+ }
+}
+
+// Emulates the old operator<<(ostream &, const DottedRule &) function. The
+// output format is a bit odd (reverse order and double spacing between symbols)
+// but there are scripts and tools that expect the output of -T to look like
+// that.
+void BaseManager::WriteApplicationContext(std::ostream &out,
+ const ApplicationContext &context) const
+{
+ assert(!context.empty());
+ ApplicationContext::const_reverse_iterator p = context.rbegin();
+ while (true) {
+ out << p->second << "=" << p->first << " ";
+ if (++p == context.rend()) {
+ break;
+ }
+ out << " ";
+ }
+}
+
+} // namespace
+
+
diff --git a/moses/BaseManager.h b/moses/BaseManager.h
new file mode 100644
index 000000000..c0b6d22c1
--- /dev/null
+++ b/moses/BaseManager.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#include <iostream>
+#include <string>
+#include "ScoreComponentCollection.h"
+#include "InputType.h"
+
+namespace Moses
+{
+class ScoreComponentCollection;
+class FeatureFunction;
+class OutputCollector;
+
+class BaseManager
+{
+protected:
+ const InputType &m_source; /**< source sentence to be translated */
+
+ BaseManager(const InputType &source)
+ :m_source(source) {
+ }
+
+ // output
+ typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
+ typedef std::set< std::pair<size_t, size_t> > Alignments;
+
+ void OutputSurface(std::ostream &out,
+ const Phrase &phrase,
+ const std::vector<FactorType> &outputFactorOrder,
+ bool reportAllFactors) const;
+ void WriteApplicationContext(std::ostream &out,
+ const ApplicationContext &context) const;
+
+ template <class T>
+ void ShiftOffsets(std::vector<T> &offsets, T shift) const {
+ T currPos = shift;
+ for (size_t i = 0; i < offsets.size(); ++i) {
+ if (offsets[i] == 0) {
+ offsets[i] = currPos;
+ ++currPos;
+ } else {
+ currPos += offsets[i];
+ }
+ }
+ }
+
+public:
+ virtual ~BaseManager() {
+ }
+
+ //! the input sentence being decoded
+ const InputType& GetSource() const {
+ return m_source;
+ }
+
+ virtual void Decode() = 0;
+ // outputs
+ virtual void OutputBest(OutputCollector *collector) const = 0;
+ virtual void OutputNBest(OutputCollector *collector) const = 0;
+ virtual void OutputLatticeSamples(OutputCollector *collector) const = 0;
+ virtual void OutputAlignment(OutputCollector *collector) const = 0;
+ virtual void OutputDetailedTranslationReport(OutputCollector *collector) const = 0;
+ virtual void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const = 0;
+ virtual void OutputWordGraph(OutputCollector *collector) const = 0;
+ virtual void OutputSearchGraph(OutputCollector *collector) const = 0;
+ virtual void OutputUnknowns(OutputCollector *collector) const = 0;
+ virtual void OutputSearchGraphSLF() const = 0;
+ virtual void OutputSearchGraphHypergraph() const = 0;
+
+ /***
+ * to be called after processing a sentence
+ */
+ virtual void CalcDecoderStatistics() const = 0;
+
+};
+
+}
diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp
index 061a5953f..40ec74153 100644
--- a/moses/BitmapContainer.cpp
+++ b/moses/BitmapContainer.cpp
@@ -56,19 +56,15 @@ public:
m_transOptRange(transOptRange) {
m_totalWeightDistortion = 0;
const StaticData &staticData = StaticData::Instance();
- const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
- std::vector<FeatureFunction*>::const_iterator iter;
+
+ const std::vector<const DistortionScoreProducer*> &ffs = DistortionScoreProducer::GetDistortionFeatureFunctions();
+ std::vector<const DistortionScoreProducer*>::const_iterator iter;
for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
- const FeatureFunction *ff = *iter;
+ const DistortionScoreProducer *ff = *iter;
- const DistortionScoreProducer *model = dynamic_cast<const DistortionScoreProducer*>(ff);
- if (model) {
- float weight =staticData.GetAllWeights().GetScoreForProducer(model);
- m_totalWeightDistortion += weight;
- }
+ float weight =staticData.GetAllWeights().GetScoreForProducer(ff);
+ m_totalWeightDistortion += weight;
}
-
-
}
const WordsRange* m_transOptRange;
@@ -166,16 +162,16 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
if (m_translations.size() > 1) {
UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
- "Non-monotonic future score: "
- << m_translations.Get(0)->GetFutureScore() << " vs. "
- << m_translations.Get(1)->GetFutureScore());
+ "Non-monotonic future score: "
+ << m_translations.Get(0)->GetFutureScore() << " vs. "
+ << m_translations.Get(1)->GetFutureScore());
}
if (m_hypotheses.size() > 1) {
UTIL_THROW_IF2(m_hypotheses[0]->GetTotalScore() < m_hypotheses[1]->GetTotalScore(),
- "Non-monotonic total score"
- << m_hypotheses[0]->GetTotalScore() << " vs. "
- << m_hypotheses[1]->GetTotalScore());
+ "Non-monotonic total score"
+ << m_hypotheses[0]->GetTotalScore() << " vs. "
+ << m_hypotheses[1]->GetTotalScore());
}
HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
@@ -223,7 +219,7 @@ Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const
bool
BackwardsEdge::SeenPosition(const size_t x, const size_t y)
{
- std::set< int >::iterator iter = m_seenPosition.find((x<<16) + y);
+ boost::unordered_set< int >::iterator iter = m_seenPosition.find((x<<16) + y);
return (iter != m_seenPosition.end());
}
@@ -450,9 +446,9 @@ BitmapContainer::ProcessBestHypothesis()
if (!Empty()) {
HypothesisQueueItem *check = Dequeue(true);
UTIL_THROW_IF2(item->GetHypothesis()->GetTotalScore() < check->GetHypothesis()->GetTotalScore(),
- "Non-monotonic total score: "
- << item->GetHypothesis()->GetTotalScore() << " vs. "
- << check->GetHypothesis()->GetTotalScore());
+ "Non-monotonic total score: "
+ << item->GetHypothesis()->GetTotalScore() << " vs. "
+ << check->GetHypothesis()->GetTotalScore());
}
// Logging for the criminally insane
diff --git a/moses/BitmapContainer.h b/moses/BitmapContainer.h
index 51f1659ef..500059081 100644
--- a/moses/BitmapContainer.h
+++ b/moses/BitmapContainer.h
@@ -33,6 +33,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TypeDef.h"
#include "WordsBitmap.h"
+#include <boost/unordered_set.hpp>
+
namespace Moses
{
@@ -165,7 +167,7 @@ private:
const SquareMatrix &m_futurescore;
std::vector< const Hypothesis* > m_hypotheses;
- std::set< int > m_seenPosition;
+ boost::unordered_set< int > m_seenPosition;
// We don't want to instantiate "empty" objects.
BackwardsEdge();
diff --git a/moses/ChartCell.cpp b/moses/ChartCell.cpp
index 1d84ba71b..c942375e2 100644
--- a/moses/ChartCell.cpp
+++ b/moses/ChartCell.cpp
@@ -84,8 +84,8 @@ void ChartCell::PruneToSize()
* \param transOptList list of applicable rules to create hypotheses for the cell
* \param allChartCells entire chart - needed to look up underlying hypotheses
*/
-void ChartCell::ProcessSentence(const ChartTranslationOptionList &transOptList
- , const ChartCellCollection &allChartCells)
+void ChartCell::Decode(const ChartTranslationOptionList &transOptList
+ , const ChartCellCollection &allChartCells)
{
const StaticData &staticData = StaticData::Instance();
diff --git a/moses/ChartCell.h b/moses/ChartCell.h
index 99bc90866..d9213d5e1 100644
--- a/moses/ChartCell.h
+++ b/moses/ChartCell.h
@@ -96,8 +96,8 @@ public:
ChartCell(size_t startPos, size_t endPos, ChartManager &manager);
~ChartCell();
- void ProcessSentence(const ChartTranslationOptionList &transOptList
- ,const ChartCellCollection &allChartCells);
+ void Decode(const ChartTranslationOptionList &transOptList
+ ,const ChartCellCollection &allChartCells);
//! Get all hypotheses in the cell that have the specified constituent label
const HypoList *GetSortedHypotheses(const Word &constituentLabel) const {
diff --git a/moses/ChartCellLabelSet.h b/moses/ChartCellLabelSet.h
index d946058ba..4977c941f 100644
--- a/moses/ChartCellLabelSet.h
+++ b/moses/ChartCellLabelSet.h
@@ -45,9 +45,9 @@ public:
typedef MapType::iterator iterator;
ChartCellLabelSet(const WordsRange &coverage)
- : m_coverage(coverage)
- , m_map(FactorCollection::Instance().GetNumNonTerminals(), NULL)
- , m_size(0) { }
+ : m_coverage(coverage)
+ , m_map(FactorCollection::Instance().GetNumNonTerminals(), NULL)
+ , m_size(0) { }
~ChartCellLabelSet() {
RemoveAllInColl(m_map);
@@ -84,8 +84,7 @@ public:
if (ChartCellExists(idx)) {
ChartCellLabel::Stack & s = m_map[idx]->MutableStack();
s.cube = stack;
- }
- else {
+ } else {
ChartCellLabel::Stack s;
s.cube = stack;
m_size++;
@@ -99,8 +98,7 @@ public:
if (m_map.at(idx) != NULL) {
return true;
}
- }
- catch (const std::out_of_range& oor) {
+ } catch (const std::out_of_range& oor) {
m_map.resize(FactorCollection::Instance().GetNumNonTerminals(), NULL);
}
return false;
@@ -118,8 +116,7 @@ public:
size_t idx = w[0]->GetId();
try {
return m_map.at(idx);
- }
- catch (const std::out_of_range& oor) {
+ } catch (const std::out_of_range& oor) {
return NULL;
}
}
@@ -127,8 +124,7 @@ public:
const ChartCellLabel *Find(size_t idx) const {
try {
return m_map.at(idx);
- }
- catch (const std::out_of_range& oor) {
+ } catch (const std::out_of_range& oor) {
return NULL;
}
}
diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp
index 8339ee5b2..0d62e33bf 100644
--- a/moses/ChartHypothesis.cpp
+++ b/moses/ChartHypothesis.cpp
@@ -71,7 +71,6 @@ ChartHypothesis::ChartHypothesis(const ChartTranslationOptions &transOpt,
ChartHypothesis::ChartHypothesis(const ChartHypothesis &pred,
const ChartKBestExtractor & /*unused*/)
:m_currSourceWordsRange(pred.m_currSourceWordsRange)
- ,m_scoreBreakdown(pred.m_scoreBreakdown)
,m_totalScore(pred.m_totalScore)
,m_arcList(NULL)
,m_winningHypo(NULL)
@@ -124,12 +123,12 @@ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
if (sourcePosSet.size() == 1) {
const std::vector<const Word*> *ruleSourceFromInputPath = GetTranslationOption().GetSourceRuleFromInputPath();
UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
- "No source rule");
+ "No source rule");
size_t sourcePos = *sourcePosSet.begin();
const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
UTIL_THROW_IF2(sourceWord == NULL,
- "No source word");
+ "No source word");
const Factor *factor = sourceWord->GetFactor(placeholderFactor);
if (factor) {
outPhrase.Back()[0] = factor;
@@ -149,37 +148,36 @@ Phrase ChartHypothesis::GetOutputPhrase() const
return outPhrase;
}
-void ChartHypothesis::GetOutputPhrase(int leftRightMost, int numWords, Phrase &outPhrase) const
+/** TODO: this method isn't used anywhere. Remove? */
+void ChartHypothesis::GetOutputPhrase(size_t leftRightMost, size_t numWords, Phrase &outPhrase) const
{
const TargetPhrase &tp = GetCurrTargetPhrase();
- int targetSize = tp.GetSize();
- for (int i = 0; i < targetSize; ++i) {
- int pos;
- if (leftRightMost == 1) {
- pos = i;
- }
- else if (leftRightMost == 2) {
- pos = targetSize - i - 1;
- }
- else {
- abort();
- }
-
- const Word &word = tp.GetWord(pos);
-
- if (word.IsNonTerminal()) {
- // non-term. fill out with prev hypo
- size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
- const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
- prevHypo->GetOutputPhrase(outPhrase);
- } else {
- outPhrase.AddWord(word);
- }
-
- if (outPhrase.GetSize() >= numWords) {
- return;
- }
+ size_t targetSize = tp.GetSize();
+ for (size_t i = 0; i < targetSize; ++i) {
+ size_t pos;
+ if (leftRightMost == 1) {
+ pos = i;
+ } else if (leftRightMost == 2) {
+ pos = targetSize - i - 1;
+ } else {
+ abort();
+ }
+
+ const Word &word = tp.GetWord(pos);
+
+ if (word.IsNonTerminal()) {
+ // non-term. fill out with prev hypo
+ size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
+ const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
+ prevHypo->GetOutputPhrase(outPhrase);
+ } else {
+ outPhrase.AddWord(word);
+ }
+
+ if (outPhrase.GetSize() >= numWords) {
+ return;
+ }
}
}
@@ -209,24 +207,10 @@ int ChartHypothesis::RecombineCompare(const ChartHypothesis &compare) const
return 0;
}
-/** calculate total score
- * @todo this should be in ScoreBreakdown
- */
+/** calculate total score */
void ChartHypothesis::EvaluateWhenApplied()
{
const StaticData &staticData = StaticData::Instance();
- // total scores from prev hypos
- std::vector<const ChartHypothesis*>::iterator iter;
- for (iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
- const ChartHypothesis &prevHypo = **iter;
- const ScoreComponentCollection &scoreBreakdown = prevHypo.GetScoreBreakdown();
-
- m_scoreBreakdown.PlusEquals(scoreBreakdown);
- }
-
- // scores from current translation rule. eg. translation models & word penalty
- const ScoreComponentCollection &scoreBreakdown = GetTranslationOption().GetScores();
- m_scoreBreakdown.PlusEquals(scoreBreakdown);
// compute values of stateless feature functions that were not
// cached in the translation option-- there is no principled distinction
@@ -234,7 +218,7 @@ void ChartHypothesis::EvaluateWhenApplied()
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) {
if (! staticData.IsFeatureFunctionIgnored( *sfs[i] )) {
- sfs[i]->EvaluateWhenApplied(*this,&m_scoreBreakdown);
+ sfs[i]->EvaluateWhenApplied(*this,&m_currScoreBreakdown);
}
}
@@ -242,31 +226,42 @@ void ChartHypothesis::EvaluateWhenApplied()
StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i) {
if (! staticData.IsFeatureFunctionIgnored( *ffs[i] )) {
- m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_scoreBreakdown);
+ m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_currScoreBreakdown);
}
}
- m_totalScore = m_scoreBreakdown.GetWeightedScore();
+ // total score from current translation rule
+ m_totalScore = GetTranslationOption().GetScores().GetWeightedScore();
+ m_totalScore += m_currScoreBreakdown.GetWeightedScore();
+
+ // total scores from prev hypos
+ for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
+ const ChartHypothesis &prevHypo = **iter;
+ m_totalScore += prevHypo.GetTotalScore();
+ }
}
void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
{
if (!m_arcList) {
- if (loserHypo->m_arcList) { // we don't have an arcList, but loser does
+ if (loserHypo->m_arcList) {
+ // we don't have an arcList, but loser does
this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
loserHypo->m_arcList = 0; // prevent a double deletion
} else {
this->m_arcList = new ChartArcList();
}
} else {
- if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser
+ if (loserHypo->m_arcList) {
+ // both have an arc list: merge. delete loser
size_t my_size = m_arcList->size();
size_t add_size = loserHypo->m_arcList->size();
this->m_arcList->resize(my_size + add_size, 0);
std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(ChartHypothesis *));
delete loserHypo->m_arcList;
loserHypo->m_arcList = 0;
- } else { // loserHypo doesn't have any arcs
+ } else {
+ // loserHypo doesn't have any arcs
// DO NOTHING
}
}
@@ -274,7 +269,7 @@ void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
}
// sorting helper
-struct CompareChartChartHypothesisTotalScore {
+struct CompareChartHypothesisTotalScore {
bool operator()(const ChartHypothesis* hypo1, const ChartHypothesis* hypo2) const {
return hypo1->GetTotalScore() > hypo2->GetTotalScore();
}
@@ -297,10 +292,10 @@ void ChartHypothesis::CleanupArcList()
if (!distinctNBest && m_arcList->size() > nBestSize) {
// prune arc list only if there too many arcs
- NTH_ELEMENT4(m_arcList->begin()
- , m_arcList->begin() + nBestSize - 1
- , m_arcList->end()
- , CompareChartChartHypothesisTotalScore());
+ NTH_ELEMENT4(m_arcList->begin()
+ , m_arcList->begin() + nBestSize - 1
+ , m_arcList->end()
+ , CompareChartHypothesisTotalScore());
// delete bad ones
ChartArcList::iterator iter;
diff --git a/moses/ChartHypothesis.h b/moses/ChartHypothesis.h
index 8dc26e721..25216c04c 100644
--- a/moses/ChartHypothesis.h
+++ b/moses/ChartHypothesis.h
@@ -21,6 +21,7 @@
#pragma once
#include <vector>
+#include <boost/scoped_ptr.hpp>
#include "Util.h"
#include "WordsRange.h"
#include "ScoreComponentCollection.h"
@@ -45,7 +46,7 @@ typedef std::vector<ChartHypothesis*> ChartArcList;
class ChartHypothesis
{
friend std::ostream& operator<<(std::ostream&, const ChartHypothesis&);
- friend class ChartKBestExtractor;
+// friend class ChartKBestExtractor;
protected:
#ifdef USE_HYPO_POOL
@@ -56,7 +57,10 @@ protected:
WordsRange m_currSourceWordsRange;
std::vector<const FFState*> m_ffStates; /*! stateful feature function states */
- ScoreComponentCollection m_scoreBreakdown /*! detailed score break-down by components (for instance language model, word penalty, etc) */
+ /*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
+ mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
+ mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown;
+ ScoreComponentCollection m_currScoreBreakdown /*! scores for this hypothesis only */
,m_lmNGram
,m_lmPrefix;
float m_totalScore;
@@ -76,9 +80,6 @@ protected:
//! not implemented
ChartHypothesis(const ChartHypothesis &copy);
- //! only used by ChartKBestExtractor
- ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
-
public:
#ifdef USE_HYPO_POOL
void *operator new(size_t /* num_bytes */) {
@@ -100,23 +101,26 @@ public:
ChartHypothesis(const ChartTranslationOptions &, const RuleCubeItem &item,
ChartManager &manager);
+ //! only used by ChartKBestExtractor
+ ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
+
~ChartHypothesis();
unsigned GetId() const {
return m_id;
}
- const ChartTranslationOption &GetTranslationOption()const {
+ const ChartTranslationOption &GetTranslationOption() const {
return *m_transOpt;
}
//! Get the rule that created this hypothesis
- const TargetPhrase &GetCurrTargetPhrase()const {
+ const TargetPhrase &GetCurrTargetPhrase() const {
return m_transOpt->GetPhrase();
}
//! the source range that this hypothesis spans
- const WordsRange &GetCurrSourceRange()const {
+ const WordsRange &GetCurrSourceRange() const {
return m_currSourceWordsRange;
}
@@ -140,7 +144,7 @@ public:
// get leftmost/rightmost words only
// leftRightMost: 1=left, 2=right
- void GetOutputPhrase(int leftRightMost, int numWords, Phrase &outPhrase) const;
+ void GetOutputPhrase(size_t leftRightMost, size_t numWords, Phrase &outPhrase) const;
int RecombineCompare(const ChartHypothesis &compare) const;
@@ -152,11 +156,41 @@ public:
//! get the unweighted score for each feature function
const ScoreComponentCollection &GetScoreBreakdown() const {
- return m_scoreBreakdown;
+ // Note: never call this method before m_currScoreBreakdown is fully computed
+ if (!m_scoreBreakdown.get()) {
+ m_scoreBreakdown.reset(new ScoreComponentCollection());
+ // score breakdown from current translation rule
+ if (m_transOpt) {
+ m_scoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
+ }
+ m_scoreBreakdown->PlusEquals(m_currScoreBreakdown);
+ // score breakdowns from prev hypos
+ for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
+ const ChartHypothesis &prevHypo = **iter;
+ m_scoreBreakdown->PlusEquals(prevHypo.GetScoreBreakdown());
+ }
+ }
+ return *(m_scoreBreakdown.get());
+ }
+
+ //! get the unweighted score delta for each feature function
+ const ScoreComponentCollection &GetDeltaScoreBreakdown() const {
+ // Note: never call this method before m_currScoreBreakdown is fully computed
+ if (!m_deltaScoreBreakdown.get()) {
+ m_deltaScoreBreakdown.reset(new ScoreComponentCollection());
+ // score breakdown from current translation rule
+ if (m_transOpt) {
+ m_deltaScoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
+ }
+ m_deltaScoreBreakdown->PlusEquals(m_currScoreBreakdown);
+ // delta: score breakdowns from prev hypos _not_ added
+ }
+ return *(m_deltaScoreBreakdown.get());
}
//! Get the weighted total score
float GetTotalScore() const {
+ // scores from current translation rule. eg. translation models & word penalty
return m_totalScore;
}
diff --git a/moses/ChartHypothesisCollection.cpp b/moses/ChartHypothesisCollection.cpp
index d5a3b3d53..d53211f34 100644
--- a/moses/ChartHypothesisCollection.cpp
+++ b/moses/ChartHypothesisCollection.cpp
@@ -90,7 +90,7 @@ bool ChartHypothesisCollection::AddHypothesis(ChartHypothesis *hypo, ChartManage
HCType::iterator &iterExisting = addRet.first;
ChartHypothesis *hypoExisting = *iterExisting;
UTIL_THROW_IF2(iterExisting == m_hypos.end(),
- "Adding a hypothesis should have returned a valid iterator");
+ "Adding a hypothesis should have returned a valid iterator");
//StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting);
@@ -256,7 +256,7 @@ void ChartHypothesisCollection::PruneToSize(ChartManager &manager)
ChartHypothesis *hypo = *iter;
HCType::iterator iterFindHypo = m_hypos.find(hypo);
UTIL_THROW_IF2(iterFindHypo == m_hypos.end(),
- "Adding a hypothesis should have returned a valid iterator");
+ "Adding a hypothesis should have returned a valid iterator");
Remove(iterFindHypo);
}
diff --git a/moses/ChartKBestExtractor.cpp b/moses/ChartKBestExtractor.cpp
index e916ba7db..60e4e7f2b 100644
--- a/moses/ChartKBestExtractor.cpp
+++ b/moses/ChartKBestExtractor.cpp
@@ -34,8 +34,8 @@ namespace Moses
// Extract the k-best list from the search graph.
void ChartKBestExtractor::Extract(
- const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
- KBestVec &kBestList)
+ const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
+ KBestVec &kBestList)
{
kBestList.clear();
if (topLevelHypos.empty()) {
@@ -47,7 +47,7 @@ void ChartKBestExtractor::Extract(
std::vector<const ChartHypothesis*>::const_iterator p = topLevelHypos.begin();
const ChartHypothesis &bestTopLevelHypo = **p;
boost::scoped_ptr<ChartHypothesis> supremeHypo(
- new ChartHypothesis(bestTopLevelHypo, *this));
+ new ChartHypothesis(bestTopLevelHypo, *this));
// Do the same for each alternative top-level hypothesis, but add the new
// ChartHypothesis objects as arcs from supremeHypo, as if they had been
@@ -70,8 +70,8 @@ void ChartKBestExtractor::Extract(
// each derivation.
kBestList.reserve(targetVertex->kBestList.size());
for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
- q = targetVertex->kBestList.begin();
- q != targetVertex->kBestList.end(); ++q) {
+ q = targetVertex->kBestList.begin();
+ q != targetVertex->kBestList.end(); ++q) {
const boost::shared_ptr<Derivation> d(*q);
assert(d);
assert(d->subderivations.size() == 1);
@@ -124,6 +124,28 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
return ret;
}
+// Generate the score breakdown of the derivation d.
+boost::shared_ptr<ScoreComponentCollection>
+ChartKBestExtractor::GetOutputScoreBreakdown(const Derivation &d)
+{
+ const ChartHypothesis &hypo = d.edge.head->hypothesis;
+ boost::shared_ptr<ScoreComponentCollection> scoreBreakdown(new ScoreComponentCollection());
+ scoreBreakdown->PlusEquals(hypo.GetDeltaScoreBreakdown());
+ const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
+ const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+ phrase.GetAlignNonTerm().GetNonTermIndexMap();
+ for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) {
+ const Word &word = phrase.GetWord(pos);
+ if (word.IsNonTerminal()) {
+ std::size_t nonTermInd = nonTermIndexMap[pos];
+ const Derivation &subderivation = *d.subderivations[nonTermInd];
+ scoreBreakdown->PlusEquals(*GetOutputScoreBreakdown(subderivation));
+ }
+ }
+
+ return scoreBreakdown;
+}
+
// Generate the target tree of the derivation d.
TreePointer ChartKBestExtractor::GetOutputTree(const Derivation &d)
{
@@ -147,15 +169,14 @@ TreePointer ChartKBestExtractor::GetOutputTree(const Derivation &d)
mytree->Combine(previous_trees);
return mytree;
- }
- else {
+ } else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
}
}
// Create an unweighted hyperarc corresponding to the given ChartHypothesis.
ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
- const ChartHypothesis &h)
+ const ChartHypothesis &h)
{
UnweightedHyperarc edge;
edge.head = FindOrCreateVertex(h);
@@ -191,7 +212,7 @@ ChartKBestExtractor::FindOrCreateVertex(const ChartHypothesis &h)
}
boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
#ifndef NDEBUG
- std::pair<DerivationSet::iterator, bool> q =
+ std::pair<DerivationSet::iterator, bool> q =
#endif
m_derivations.insert(bestDerivation);
assert(q.second);
@@ -286,7 +307,6 @@ ChartKBestExtractor::Derivation::Derivation(const UnweightedHyperarc &e)
boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
subderivations.push_back(sub);
}
- scoreBreakdown = edge.head->hypothesis.GetScoreBreakdown();
score = edge.head->hypothesis.GetTotalScore();
}
@@ -298,15 +318,14 @@ ChartKBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i)
backPointers = d.backPointers;
subderivations = d.subderivations;
std::size_t j = ++backPointers[i];
- scoreBreakdown = d.scoreBreakdown;
+ score = d.score;
// Deduct the score of the old subderivation.
- scoreBreakdown.MinusEquals(subderivations[i]->scoreBreakdown);
+ score -= subderivations[i]->score;
// Update the subderivation pointer.
boost::shared_ptr<Derivation> newSub(edge.tail[i]->kBestList[j]);
subderivations[i] = newSub;
// Add the score of the new subderivation.
- scoreBreakdown.PlusEquals(subderivations[i]->scoreBreakdown);
- score = scoreBreakdown.GetWeightedScore();
+ score += subderivations[i]->score;
}
} // namespace Moses
diff --git a/moses/ChartKBestExtractor.h b/moses/ChartKBestExtractor.h
index 1b348a0a4..01c928175 100644
--- a/moses/ChartKBestExtractor.h
+++ b/moses/ChartKBestExtractor.h
@@ -26,6 +26,7 @@
#include <boost/unordered_set.hpp>
#include <boost/weak_ptr.hpp>
+#include <boost/shared_ptr.hpp>
#include <queue>
#include <vector>
@@ -56,7 +57,6 @@ public:
UnweightedHyperarc edge;
std::vector<std::size_t> backPointers;
std::vector<boost::shared_ptr<Derivation> > subderivations;
- ScoreComponentCollection scoreBreakdown;
float score;
};
@@ -71,8 +71,8 @@ public:
struct Vertex {
typedef std::priority_queue<boost::weak_ptr<Derivation>,
- std::vector<boost::weak_ptr<Derivation> >,
- DerivationOrderer> DerivationQueue;
+ std::vector<boost::weak_ptr<Derivation> >,
+ DerivationOrderer> DerivationQueue;
Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
@@ -90,11 +90,12 @@ public:
std::size_t k, KBestVec &);
static Phrase GetOutputPhrase(const Derivation &);
+ static boost::shared_ptr<ScoreComponentCollection> GetOutputScoreBreakdown(const Derivation &);
static TreePointer GetOutputTree(const Derivation &);
private:
typedef boost::unordered_map<const ChartHypothesis *,
- boost::shared_ptr<Vertex> > VertexMap;
+ boost::shared_ptr<Vertex> > VertexMap;
struct DerivationHasher {
std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
@@ -116,7 +117,7 @@ private:
};
typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
- DerivationEqualityPred> DerivationSet;
+ DerivationEqualityPred> DerivationSet;
UnweightedHyperarc CreateEdge(const ChartHypothesis &);
boost::shared_ptr<Vertex> FindOrCreateVertex(const ChartHypothesis &);
diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp
index 56bc8529d..d183c97e6 100644
--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@@ -29,10 +29,13 @@
#include "StaticData.h"
#include "DecodeStep.h"
#include "TreeInput.h"
+#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/WordPenaltyProducer.h"
+#include "moses/OutputCollector.h"
+#include "moses/ChartKBestExtractor.h"
+#include "moses/HypergraphOutput.h"
using namespace std;
-using namespace Moses;
namespace Moses
{
@@ -43,7 +46,7 @@ extern bool g_mosesDebug;
* \param system which particular set of models to use.
*/
ChartManager::ChartManager(InputType const& source)
- :m_source(source)
+ :BaseManager(source)
,m_hypoStackColl(source, *this)
,m_start(clock())
,m_hypothesisId(0)
@@ -62,7 +65,7 @@ ChartManager::~ChartManager()
}
//! decode the sentence. This contains the main laps. Basically, the CKY++ algorithm
-void ChartManager::ProcessSentence()
+void ChartManager::Decode()
{
VERBOSE(1,"Translating: " << m_source << endl);
@@ -90,7 +93,7 @@ void ChartManager::ProcessSentence()
// decode
ChartCell &cell = m_hypoStackColl.Get(range);
- cell.ProcessSentence(m_translationOptionList, m_hypoStackColl);
+ cell.Decode(m_translationOptionList, m_hypoStackColl);
m_translationOptionList.Clear();
cell.PruneToSize();
@@ -171,9 +174,9 @@ const ChartHypothesis *ChartManager::GetBestHypothesis() const
* \param onlyDistinct whether to check for distinct output sentence or not (default - don't check, just return top n-paths)
*/
void ChartManager::CalcNBest(
- std::size_t n,
- std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList,
- bool onlyDistinct) const
+ std::size_t n,
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList,
+ bool onlyDistinct) const
{
nBestList.clear();
if (n == 0 || m_source.GetSize() == 0) {
@@ -184,7 +187,7 @@ void ChartManager::CalcNBest(
WordsRange range(0, m_source.GetSize()-1);
const ChartCell &lastCell = m_hypoStackColl.Get(range);
boost::scoped_ptr<const std::vector<const ChartHypothesis*> > topLevelHypos(
- lastCell.GetAllSortedHypotheses());
+ lastCell.GetAllSortedHypotheses());
if (!topLevelHypos) {
return;
}
@@ -287,14 +290,591 @@ void ChartManager::FindReachableHypotheses(
}
}
-void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const {
+void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const
+{
ChartSearchGraphWriterHypergraph writer(&outputSearchGraphStream);
WriteSearchGraph(writer);
}
-void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const {
+void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const
+{
ChartSearchGraphWriterMoses writer(&outputSearchGraphStream, m_source.GetTranslationId());
WriteSearchGraph(writer);
}
+void ChartManager::OutputBest(OutputCollector *collector) const
+{
+ const ChartHypothesis *bestHypo = GetBestHypothesis();
+ if (collector && bestHypo) {
+ const size_t translationId = m_source.GetTranslationId();
+ const ChartHypothesis *bestHypo = GetBestHypothesis();
+ OutputBestHypo(collector, bestHypo, translationId);
+ }
+}
+
+void ChartManager::OutputNBest(OutputCollector *collector) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ size_t nBestSize = staticData.GetNBestSize();
+ if (nBestSize > 0) {
+ const size_t translationId = m_source.GetTranslationId();
+
+ VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
+ CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
+ OutputNBestList(collector, nBestList, translationId);
+ IFVERBOSE(2) {
+ PrintUserTime("N-Best Hypotheses Generation Time:");
+ }
+ }
+
+}
+
+void ChartManager::OutputNBestList(OutputCollector *collector,
+ const ChartKBestExtractor::KBestVec &nBestList,
+ long translationId) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
+
+ std::ostringstream out;
+
+ if (collector->OutputIsCout()) {
+ // Set precision only if we're writing the n-best list to cout. This is to
+ // preserve existing behaviour, but should probably be done either way.
+ FixPrecision(out);
+ }
+
+ bool includeWordAlignment =
+ StaticData::Instance().PrintAlignmentInfoInNbest();
+
+ bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
+
+ for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
+ p != nBestList.end(); ++p) {
+ const ChartKBestExtractor::Derivation &derivation = **p;
+
+ // get the derivation's target-side yield
+ Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
+
+ // delete <s> and </s>
+ UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ outputPhrase.RemoveWord(0);
+ outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
+
+ // print the translation ID, surface factors, and scores
+ out << translationId << " ||| ";
+ OutputSurface(out, outputPhrase, outputFactorOrder, false);
+ out << " ||| ";
+ boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation);
+ scoreBreakdown->OutputAllFeatureScores(out);
+ out << " ||| " << derivation.score;
+
+ // optionally, print word alignments
+ if (includeWordAlignment) {
+ out << " ||| ";
+ Alignments align;
+ OutputAlignmentNBest(align, derivation, 0);
+ for (Alignments::const_iterator q = align.begin(); q != align.end();
+ ++q) {
+ out << q->first << "-" << q->second << " ";
+ }
+ }
+
+ // optionally, print tree
+ if (PrintNBestTrees) {
+ TreePointer tree = ChartKBestExtractor::GetOutputTree(derivation);
+ out << " ||| " << tree->GetString();
+ }
+
+ out << std::endl;
+ }
+
+ assert(collector);
+ collector->Write(translationId, out.str());
+}
+
+size_t ChartManager::CalcSourceSize(const Moses::ChartHypothesis *hypo) const
+{
+ size_t ret = hypo->GetCurrSourceRange().GetNumWordsCovered();
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
+ for (size_t i = 0; i < prevHypos.size(); ++i) {
+ size_t childSize = prevHypos[i]->GetCurrSourceRange().GetNumWordsCovered();
+ ret -= (childSize - 1);
+ }
+ return ret;
+}
+
+size_t ChartManager::OutputAlignmentNBest(
+ Alignments &retAlign,
+ const Moses::ChartKBestExtractor::Derivation &derivation,
+ size_t startTarget) const
+{
+ const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
+
+ size_t totalTargetSize = 0;
+ size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
+
+ const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
+
+ size_t thisSourceSize = CalcSourceSize(&hypo);
+
+ // position of each terminal word in translation rule, irrespective of alignment
+ // if non-term, number is undefined
+ vector<size_t> sourceOffsets(thisSourceSize, 0);
+ vector<size_t> targetOffsets(tp.GetSize(), 0);
+
+ const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
+ vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
+ const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
+
+ UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
+ "Error");
+
+ size_t targetInd = 0;
+ for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
+ if (tp.GetWord(targetPos).IsNonTerminal()) {
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
+ size_t sourceInd = targetPos2SourceInd[targetPos];
+ size_t sourcePos = sourceInd2pos[sourceInd];
+
+ const Moses::ChartKBestExtractor::Derivation &subderivation =
+ *derivation.subderivations[sourceInd];
+
+ // calc source size
+ size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
+ sourceOffsets[sourcePos] = sourceSize;
+
+ // calc target size.
+ // Recursively look thru child hypos
+ size_t currStartTarget = startTarget + totalTargetSize;
+ size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
+ currStartTarget);
+ targetOffsets[targetPos] = targetSize;
+
+ totalTargetSize += targetSize;
+ ++targetInd;
+ } else {
+ ++totalTargetSize;
+ }
+ }
+
+ // convert position within translation rule to absolute position within
+ // source sentence / output sentence
+ ShiftOffsets(sourceOffsets, startSource);
+ ShiftOffsets(targetOffsets, startTarget);
+
+ // get alignments from this hypo
+ const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
+
+ // add to output arg, offsetting by source & target
+ AlignmentInfo::const_iterator iter;
+ for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
+ const std::pair<size_t,size_t> &align = *iter;
+ size_t relSource = align.first;
+ size_t relTarget = align.second;
+ size_t absSource = sourceOffsets[relSource];
+ size_t absTarget = targetOffsets[relTarget];
+
+ pair<size_t, size_t> alignPoint(absSource, absTarget);
+ pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
+ UTIL_THROW_IF2(!ret.second, "Error");
+ }
+
+ return totalTargetSize;
+}
+
+void ChartManager::OutputAlignment(OutputCollector *collector) const
+{
+ if (collector == NULL) {
+ return;
+ }
+
+ ostringstream out;
+
+ const ChartHypothesis *hypo = GetBestHypothesis();
+ if (hypo) {
+ Alignments retAlign;
+ OutputAlignment(retAlign, hypo, 0);
+
+ // output alignments
+ Alignments::const_iterator iter;
+ for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
+ const pair<size_t, size_t> &alignPoint = *iter;
+ out << alignPoint.first << "-" << alignPoint.second << " ";
+ }
+ }
+ out << endl;
+
+ collector->Write(m_source.GetTranslationId(), out.str());
+
+}
+
+size_t ChartManager::OutputAlignment(Alignments &retAlign,
+ const Moses::ChartHypothesis *hypo,
+ size_t startTarget) const
+{
+ size_t totalTargetSize = 0;
+ size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
+
+ const TargetPhrase &tp = hypo->GetCurrTargetPhrase();
+
+ size_t thisSourceSize = CalcSourceSize(hypo);
+
+ // position of each terminal word in translation rule, irrespective of alignment
+ // if non-term, number is undefined
+ vector<size_t> sourceOffsets(thisSourceSize, 0);
+ vector<size_t> targetOffsets(tp.GetSize(), 0);
+
+ const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
+
+ const AlignmentInfo &aiNonTerm = hypo->GetCurrTargetPhrase().GetAlignNonTerm();
+ vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
+ const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
+
+ UTIL_THROW_IF2(sourceInd2pos.size() != prevHypos.size(), "Error");
+
+ size_t targetInd = 0;
+ for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
+ if (tp.GetWord(targetPos).IsNonTerminal()) {
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
+ size_t sourceInd = targetPos2SourceInd[targetPos];
+ size_t sourcePos = sourceInd2pos[sourceInd];
+
+ const ChartHypothesis *prevHypo = prevHypos[sourceInd];
+
+ // calc source size
+ size_t sourceSize = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
+ sourceOffsets[sourcePos] = sourceSize;
+
+ // calc target size.
+ // Recursively look thru child hypos
+ size_t currStartTarget = startTarget + totalTargetSize;
+ size_t targetSize = OutputAlignment(retAlign, prevHypo, currStartTarget);
+ targetOffsets[targetPos] = targetSize;
+
+ totalTargetSize += targetSize;
+ ++targetInd;
+ } else {
+ ++totalTargetSize;
+ }
+ }
+
+ // convert position within translation rule to absolute position within
+ // source sentence / output sentence
+ ShiftOffsets(sourceOffsets, startSource);
+ ShiftOffsets(targetOffsets, startTarget);
+
+ // get alignments from this hypo
+ const AlignmentInfo &aiTerm = hypo->GetCurrTargetPhrase().GetAlignTerm();
+
+ // add to output arg, offsetting by source & target
+ AlignmentInfo::const_iterator iter;
+ for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
+ const std::pair<size_t,size_t> &align = *iter;
+ size_t relSource = align.first;
+ size_t relTarget = align.second;
+ size_t absSource = sourceOffsets[relSource];
+ size_t absTarget = targetOffsets[relTarget];
+
+ pair<size_t, size_t> alignPoint(absSource, absTarget);
+ pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
+ UTIL_THROW_IF2(!ret.second, "Error");
+
+ }
+
+ return totalTargetSize;
+}
+
+void ChartManager::OutputDetailedTranslationReport(OutputCollector *collector) const
+{
+ if (collector) {
+ OutputDetailedTranslationReport(collector,
+ GetBestHypothesis(),
+ static_cast<const Sentence&>(m_source),
+ m_source.GetTranslationId());
+ }
+}
+
+void ChartManager::OutputDetailedTranslationReport(
+ OutputCollector *collector,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
+{
+ if (hypo == NULL) {
+ return;
+ }
+ std::ostringstream out;
+ ApplicationContext applicationContext;
+
+ OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
+ collector->Write(translationId, out.str());
+
+ //DIMw
+ const StaticData &staticData = StaticData::Instance();
+
+ if (staticData.IsDetailedAllTranslationReportingEnabled()) {
+ const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
+ size_t nBestSize = staticData.GetNBestSize();
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
+ CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
+ OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
+ }
+
+}
+
+void ChartManager::OutputTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
+{
+ if (hypo != NULL) {
+ OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
+ out << std::endl;
+ }
+
+ // recursive
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
+ std::vector<const ChartHypothesis*>::const_iterator iter;
+ for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
+ const ChartHypothesis *prevHypo = *iter;
+ OutputTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
+ }
+}
+
+void ChartManager::OutputTranslationOption(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
+{
+ ReconstructApplicationContext(*hypo, sentence, applicationContext);
+ out << "Trans Opt " << translationId
+ << " " << hypo->GetCurrSourceRange()
+ << ": ";
+ WriteApplicationContext(out, applicationContext);
+ out << ": " << hypo->GetCurrTargetPhrase().GetTargetLHS()
+ << "->" << hypo->GetCurrTargetPhrase()
+ << " " << hypo->GetTotalScore() << hypo->GetScoreBreakdown();
+}
+
+// Given a hypothesis and sentence, reconstructs the 'application context' --
+// the source RHS symbols of the SCFG rule that was applied, plus their spans.
+void ChartManager::ReconstructApplicationContext(const ChartHypothesis &hypo,
+ const Sentence &sentence,
+ ApplicationContext &context) const
+{
+ context.clear();
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo.GetPrevHypos();
+ std::vector<const ChartHypothesis*>::const_iterator p = prevHypos.begin();
+ std::vector<const ChartHypothesis*>::const_iterator end = prevHypos.end();
+ const WordsRange &span = hypo.GetCurrSourceRange();
+ size_t i = span.GetStartPos();
+ while (i <= span.GetEndPos()) {
+ if (p == end || i < (*p)->GetCurrSourceRange().GetStartPos()) {
+ // Symbol is a terminal.
+ const Word &symbol = sentence.GetWord(i);
+ context.push_back(std::make_pair(symbol, WordsRange(i, i)));
+ ++i;
+ } else {
+ // Symbol is a non-terminal.
+ const Word &symbol = (*p)->GetTargetLHS();
+ const WordsRange &range = (*p)->GetCurrSourceRange();
+ context.push_back(std::make_pair(symbol, range));
+ i = range.GetEndPos()+1;
+ ++p;
+ }
+ }
+}
+
+void ChartManager::OutputUnknowns(OutputCollector *collector) const
+{
+ if (collector) {
+ long translationId = m_source.GetTranslationId();
+ const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
+
+ std::ostringstream out;
+ for (std::vector<Phrase*>::const_iterator p = oovs.begin();
+ p != oovs.end(); ++p) {
+ out << *p;
+ }
+ out << std::endl;
+ collector->Write(translationId, out.str());
+ }
+
+}
+
+void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
+{
+ const ChartHypothesis *hypo = GetBestHypothesis();
+ if (collector == NULL || hypo == NULL) {
+ return;
+ }
+
+ std::ostringstream out;
+ ApplicationContext applicationContext;
+
+ const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
+ const size_t translationId = m_source.GetTranslationId();
+
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
+
+ //Tree of full sentence
+ const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
+ if (treeStructure != NULL) {
+ const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+ for( size_t i=0; i<sff.size(); i++ ) {
+ if (sff[i] == treeStructure) {
+ const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
+ out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
+ break;
+ }
+ }
+ }
+
+ collector->Write(translationId, out.str());
+
+}
+
+void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
+{
+
+ if (hypo != NULL) {
+ OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
+
+ const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
+
+ out << " ||| ";
+ if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
+ out << " " << *property->GetValueString();
+ } else {
+ out << " " << "noTreeInfo";
+ }
+ out << std::endl;
+ }
+
+ // recursive
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
+ std::vector<const ChartHypothesis*>::const_iterator iter;
+ for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
+ const ChartHypothesis *prevHypo = *iter;
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
+ }
+}
+
+void ChartManager::OutputSearchGraph(OutputCollector *collector) const
+{
+ if (collector) {
+ long translationId = m_source.GetTranslationId();
+ std::ostringstream out;
+ OutputSearchGraphMoses( out);
+ collector->Write(translationId, out.str());
+ }
+}
+
+//DIMw
+void ChartManager::OutputDetailedAllTranslationReport(
+ OutputCollector *collector,
+ const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
+ const Sentence &sentence,
+ long translationId) const
+{
+ std::ostringstream out;
+ ApplicationContext applicationContext;
+
+ const ChartCellCollection& cells = GetChartCellCollection();
+ size_t size = GetSource().GetSize();
+ for (size_t width = 1; width <= size; ++width) {
+ for (size_t startPos = 0; startPos <= size-width; ++startPos) {
+ size_t endPos = startPos + width - 1;
+ WordsRange range(startPos, endPos);
+ const ChartCell& cell = cells.Get(range);
+ const HypoList* hyps = cell.GetAllSortedHypotheses();
+ out << "Chart Cell [" << startPos << ".." << endPos << "]" << endl;
+ HypoList::const_iterator iter;
+ size_t c = 1;
+ for (iter = hyps->begin(); iter != hyps->end(); ++iter) {
+ out << "----------------Item " << c++ << " ---------------------"
+ << endl;
+ OutputTranslationOptions(out, applicationContext, *iter,
+ sentence, translationId);
+ }
+ }
+ }
+ collector->Write(translationId, out.str());
+}
+
+void ChartManager::OutputSearchGraphHypergraph() const
+{
+ const StaticData &staticData = StaticData::Instance();
+ if (staticData.GetOutputSearchGraphHypergraph()) {
+ HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
+ hypergraphOutputChart.Write(*this);
+ }
+}
+
+void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const
+{
+ if (!collector)
+ return;
+ std::ostringstream out;
+ FixPrecision(out);
+ if (hypo != NULL) {
+ VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
+ VERBOSE(3,"Best path: ");
+ Backtrack(hypo);
+ VERBOSE(3,"0" << std::endl);
+
+ if (StaticData::Instance().GetOutputHypoScore()) {
+ out << hypo->GetTotalScore() << " ";
+ }
+
+ if (StaticData::Instance().IsPathRecoveryEnabled()) {
+ out << "||| ";
+ }
+ Phrase outPhrase(ARRAY_SIZE_INCR);
+ hypo->GetOutputPhrase(outPhrase);
+
+ // delete 1st & last
+ UTIL_THROW_IF2(outPhrase.GetSize() < 2,
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+
+ outPhrase.RemoveWord(0);
+ outPhrase.RemoveWord(outPhrase.GetSize() - 1);
+
+ const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
+ string output = outPhrase.GetStringRep(outputFactorOrder);
+ out << output << endl;
+ } else {
+ VERBOSE(1, "NO BEST TRANSLATION" << endl);
+
+ if (StaticData::Instance().GetOutputHypoScore()) {
+ out << "0 ";
+ }
+
+ out << endl;
+ }
+ collector->Write(translationId, out.str());
+}
+
+void ChartManager::Backtrack(const ChartHypothesis *hypo) const
+{
+ const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
+
+ vector<const ChartHypothesis*>::const_iterator iter;
+ for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
+ const ChartHypothesis *prevHypo = *iter;
+
+ VERBOSE(3,prevHypo->GetId() << " <= ");
+ Backtrack(prevHypo);
+ }
+}
+
} // namespace Moses
diff --git a/moses/ChartManager.h b/moses/ChartManager.h
index 9ad4f4b85..745a792cb 100644
--- a/moses/ChartManager.h
+++ b/moses/ChartManager.h
@@ -25,12 +25,13 @@
#include <boost/unordered_map.hpp>
#include "ChartCell.h"
#include "ChartCellCollection.h"
-#include "InputType.h"
#include "WordsRange.h"
#include "SentenceStats.h"
#include "ChartTranslationOptionList.h"
#include "ChartParser.h"
#include "ChartKBestExtractor.h"
+#include "BaseManager.h"
+#include "moses/Syntax/KBestExtractor.h"
#include <boost/shared_ptr.hpp>
@@ -42,10 +43,9 @@ class ChartSearchGraphWriter;
/** Holds everything you need to decode 1 sentence with the hierachical/syntax decoder
*/
-class ChartManager
+class ChartManager : public BaseManager
{
private:
- InputType const& m_source; /**< source sentence to be translated */
ChartCellCollection m_hypoStackColl;
std::auto_ptr<SentenceStats> m_sentenceStats;
clock_t m_start; /**< starting time, used for logging */
@@ -56,14 +56,56 @@ private:
ChartTranslationOptionList m_translationOptionList; /**< pre-computed list of translation options for the phrases in this sentence */
/* auxilliary functions for SearchGraphs */
- void FindReachableHypotheses(
- const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
+ void FindReachableHypotheses(
+ const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
void WriteSearchGraph(const ChartSearchGraphWriter& writer) const;
+ // output
+ void OutputNBestList(OutputCollector *collector,
+ const ChartKBestExtractor::KBestVec &nBestList,
+ long translationId) const;
+ size_t CalcSourceSize(const Moses::ChartHypothesis *hypo) const;
+ size_t OutputAlignmentNBest(Alignments &retAlign,
+ const Moses::ChartKBestExtractor::Derivation &derivation,
+ size_t startTarget) const;
+ size_t OutputAlignment(Alignments &retAlign,
+ const Moses::ChartHypothesis *hypo,
+ size_t startTarget) const;
+ void OutputDetailedTranslationReport(
+ OutputCollector *collector,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputTranslationOption(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
+ void ReconstructApplicationContext(const ChartHypothesis &hypo,
+ const Sentence &sentence,
+ ApplicationContext &context) const;
+ void OutputTreeFragmentsTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputDetailedAllTranslationReport(
+ OutputCollector *collector,
+ const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const;
+ void Backtrack(const ChartHypothesis *hypo) const;
+
public:
ChartManager(InputType const& source);
~ChartManager();
- void ProcessSentence();
+ void Decode();
void AddXmlChartOptions();
const ChartHypothesis *GetBestHypothesis() const;
void CalcNBest(size_t n, std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList, bool onlyDistinct=false) const;
@@ -74,12 +116,6 @@ public:
/** Output in (modified) Kenneth hypergraph format */
void OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const;
-
- //! the input sentence being decoded
- const InputType& GetSource() const {
- return m_source;
- }
-
//! debug data collected when decoding sentence
SentenceStats& GetSentenceStats() const {
return *m_sentenceStats;
@@ -90,10 +126,6 @@ public:
return m_hypoStackColl;
}
- /***
- * to be called after processing a sentence (which may consist of more than just calling ProcessSentence() )
- * currently an empty function
- */
void CalcDecoderStatistics() const {
}
@@ -106,7 +138,25 @@ public:
return m_hypothesisId++;
}
- const ChartParser &GetParser() const { return m_parser; }
+ const ChartParser &GetParser() const {
+ return m_parser;
+ }
+
+ // outputs
+ void OutputBest(OutputCollector *collector) const;
+ void OutputNBest(OutputCollector *collector) const;
+ void OutputLatticeSamples(OutputCollector *collector) const {
+ }
+ void OutputAlignment(OutputCollector *collector) const;
+ void OutputDetailedTranslationReport(OutputCollector *collector) const;
+ void OutputUnknowns(OutputCollector *collector) const;
+ void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
+ void OutputWordGraph(OutputCollector *collector) const {
+ }
+ void OutputSearchGraph(OutputCollector *collector) const;
+ void OutputSearchGraphSLF() const {
+ }
+ void OutputSearchGraphHypergraph() const;
};
diff --git a/moses/ChartParser.cpp b/moses/ChartParser.cpp
index a79e1bc68..40f18e7e8 100644
--- a/moses/ChartParser.cpp
+++ b/moses/ChartParser.cpp
@@ -188,10 +188,11 @@ void ChartParser::Create(const WordsRange &wordsRange, ChartParserCallback &to)
size_t maxSpan = decodeGraph.GetMaxChartSpan();
size_t last = m_source.GetSize()-1;
if (maxSpan != 0) {
- last = min(last, wordsRange.GetStartPos()+maxSpan);
+ last = min(last, wordsRange.GetStartPos()+maxSpan);
}
if (maxSpan == 0 || wordsRange.GetNumWordsCovered() <= maxSpan) {
- ruleLookupManager.GetChartRuleCollection(wordsRange, last, to);
+ const InputPath &inputPath = GetInputPath(wordsRange);
+ ruleLookupManager.GetChartRuleCollection(inputPath, last, to);
}
}
@@ -211,7 +212,7 @@ void ChartParser::CreateInputPaths(const InputType &input)
m_inputPathMatrix.resize(size);
UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType,
- "Input must be a sentence or a tree, not lattice or confusion networks");
+ "Input must be a sentence or a tree, not lattice or confusion networks");
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
size_t endPos = startPos + phaseSize -1;
@@ -236,7 +237,7 @@ void ChartParser::CreateInputPaths(const InputType &input)
}
}
-const InputPath &ChartParser::GetInputPath(WordsRange &range) const
+const InputPath &ChartParser::GetInputPath(const WordsRange &range) const
{
return GetInputPath(range.GetStartPos(), range.GetEndPos());
}
@@ -245,7 +246,7 @@ const InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos) const
{
size_t offset = endPos - startPos;
UTIL_THROW_IF2(offset >= m_inputPathMatrix[startPos].size(),
- "Out of bound: " << offset);
+ "Out of bound: " << offset);
return *m_inputPathMatrix[startPos][offset];
}
@@ -253,7 +254,7 @@ InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos)
{
size_t offset = endPos - startPos;
UTIL_THROW_IF2(offset >= m_inputPathMatrix[startPos].size(),
- "Out of bound: " << offset);
+ "Out of bound: " << offset);
return *m_inputPathMatrix[startPos][offset];
}
/*
diff --git a/moses/ChartParser.h b/moses/ChartParser.h
index acd30179d..e438cf8ad 100644
--- a/moses/ChartParser.h
+++ b/moses/ChartParser.h
@@ -48,7 +48,9 @@ public:
void Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to);
- const std::vector<Phrase*> &GetUnknownSources() const { return m_unksrcs; }
+ const std::vector<Phrase*> &GetUnknownSources() const {
+ return m_unksrcs;
+ }
private:
std::vector<Phrase*> m_unksrcs;
@@ -68,8 +70,10 @@ public:
long GetTranslationId() const;
size_t GetSize() const;
const InputPath &GetInputPath(size_t startPos, size_t endPos) const;
- const InputPath &GetInputPath(WordsRange &range) const;
- const std::vector<Phrase*> &GetUnknownSources() const { return m_unknown.GetUnknownSources(); }
+ const InputPath &GetInputPath(const WordsRange &range) const;
+ const std::vector<Phrase*> &GetUnknownSources() const {
+ return m_unknown.GetUnknownSources();
+ }
private:
ChartParserUnknown m_unknown;
diff --git a/moses/ChartRuleLookupManager.h b/moses/ChartRuleLookupManager.h
index 94263b22e..b62ec157d 100644
--- a/moses/ChartRuleLookupManager.h
+++ b/moses/ChartRuleLookupManager.h
@@ -65,7 +65,7 @@ public:
* \param outColl return argument
*/
virtual void GetChartRuleCollection(
- const WordsRange &range,
+ const InputPath &inputPath,
size_t lastPos, // last position to consider if using lookahead
ChartParserCallback &outColl) = 0;
diff --git a/moses/ChartTranslationOption.cpp b/moses/ChartTranslationOption.cpp
index 332b26a15..65cb2afbd 100644
--- a/moses/ChartTranslationOption.cpp
+++ b/moses/ChartTranslationOption.cpp
@@ -11,8 +11,8 @@ ChartTranslationOption::ChartTranslationOption(const TargetPhrase &targetPhrase)
}
void ChartTranslationOption::EvaluateWithSourceContext(const InputType &input,
- const InputPath &inputPath,
- const StackVec &stackVec)
+ const InputPath &inputPath,
+ const StackVec &stackVec)
{
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
diff --git a/moses/ChartTranslationOption.h b/moses/ChartTranslationOption.h
index 06a6f797e..0b902f811 100644
--- a/moses/ChartTranslationOption.h
+++ b/moses/ChartTranslationOption.h
@@ -29,6 +29,7 @@ public:
const InputPath *GetInputPath() const {
return m_inputPath;
}
+
void SetInputPath(const InputPath *inputPath) {
m_inputPath = inputPath;
}
@@ -45,8 +46,8 @@ public:
}
void EvaluateWithSourceContext(const InputType &input,
- const InputPath &inputPath,
- const StackVec &stackVec);
+ const InputPath &inputPath,
+ const StackVec &stackVec);
};
}
diff --git a/moses/ChartTranslationOptionList.cpp b/moses/ChartTranslationOptionList.cpp
index 71a8b4b0f..8d3d9b3ab 100644
--- a/moses/ChartTranslationOptionList.cpp
+++ b/moses/ChartTranslationOptionList.cpp
@@ -106,10 +106,10 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
// Prune if bursting
if (m_ruleLimit && m_size == m_ruleLimit * 2) {
- NTH_ELEMENT4(m_collection.begin(),
- m_collection.begin() + m_ruleLimit - 1,
- m_collection.begin() + m_size,
- ChartTranslationOptionOrderer());
+ NTH_ELEMENT4(m_collection.begin(),
+ m_collection.begin() + m_ruleLimit - 1,
+ m_collection.begin() + m_size,
+ ChartTranslationOptionOrderer());
m_scoreThreshold = m_collection[m_ruleLimit-1]->GetEstimateOfBestScore();
m_size = m_ruleLimit;
}
@@ -133,9 +133,9 @@ void ChartTranslationOptionList::ApplyThreshold()
// Reduce the list to the best m_ruleLimit options. The remaining
// options can be overwritten on subsequent calls to Add().
NTH_ELEMENT4(m_collection.begin(),
- m_collection.begin()+m_ruleLimit,
- m_collection.begin()+m_size,
- ChartTranslationOptionOrderer());
+ m_collection.begin()+m_ruleLimit,
+ m_collection.begin()+m_size,
+ ChartTranslationOptionOrderer());
m_size = m_ruleLimit;
}
@@ -161,11 +161,11 @@ void ChartTranslationOptionList::ApplyThreshold()
float ChartTranslationOptionList::GetBestScore(const ChartCellLabel *chartCell) const
{
- const HypoList *stack = chartCell->GetStack().cube;
- assert(stack);
- assert(!stack->empty());
- const ChartHypothesis &bestHypo = **(stack->begin());
- return bestHypo.GetTotalScore();
+ const HypoList *stack = chartCell->GetStack().cube;
+ assert(stack);
+ assert(!stack->empty());
+ const ChartHypothesis &bestHypo = **(stack->begin());
+ return bestHypo.GetTotalScore();
}
void ChartTranslationOptionList::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
@@ -182,12 +182,11 @@ void ChartTranslationOptionList::EvaluateWithSourceContext(const InputType &inpu
for (size_t i = 0; i < m_size; ++i) {
ChartTranslationOptions *transOpts = m_collection[i];
if (transOpts->GetSize() == 0) {
- //delete transOpts;
- ++numDiscard;
- }
- else if (numDiscard) {
- SwapTranslationOptions(i - numDiscard, i);
- //m_collection[] = transOpts;
+ //delete transOpts;
+ ++numDiscard;
+ } else if (numDiscard) {
+ SwapTranslationOptions(i - numDiscard, i);
+ //m_collection[] = transOpts;
}
}
@@ -205,11 +204,11 @@ void ChartTranslationOptionList::SwapTranslationOptions(size_t a, size_t b)
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptionList &obj)
{
- for (size_t i = 0; i < obj.m_collection.size(); ++i) {
- const ChartTranslationOptions &transOpts = *obj.m_collection[i];
- out << transOpts << endl;
- }
- return out;
+ for (size_t i = 0; i < obj.m_collection.size(); ++i) {
+ const ChartTranslationOptions &transOpts = *obj.m_collection[i];
+ out << transOpts << endl;
+ }
+ return out;
}
}
diff --git a/moses/ChartTranslationOptions.cpp b/moses/ChartTranslationOptions.cpp
index 44aa67619..03a869109 100644
--- a/moses/ChartTranslationOptions.cpp
+++ b/moses/ChartTranslationOptions.cpp
@@ -51,6 +51,18 @@ ChartTranslationOptions::~ChartTranslationOptions()
}
+//! functor to compare (chart) hypotheses by (descending) score
+class ChartTranslationOptionScoreOrderer
+{
+public:
+ bool operator()(const boost::shared_ptr<ChartTranslationOption> &transOptA
+ , const boost::shared_ptr<ChartTranslationOption> &transOptB) const {
+ const ScoreComponentCollection &scoresA = transOptA->GetScores();
+ const ScoreComponentCollection &scoresB = transOptB->GetScores();
+ return scoresA.GetWeightedScore() > scoresB.GetWeightedScore();
+ }
+};
+
void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
{
SetInputPath(&inputPath);
@@ -71,15 +83,23 @@ void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input,
ChartTranslationOption *transOpt = m_collection[i].get();
if (transOpt->GetScores().GetWeightedScore() == - std::numeric_limits<float>::infinity()) {
- ++numDiscard;
- }
- else if (numDiscard) {
- m_collection[i - numDiscard] = m_collection[i];
+ ++numDiscard;
+ } else if (numDiscard) {
+ m_collection[i - numDiscard] = m_collection[i];
}
}
size_t newSize = m_collection.size() - numDiscard;
m_collection.resize(newSize);
+
+ // sort if necessary
+ const StaticData &staticData = StaticData::Instance();
+ if (staticData.RequireSortingAfterSourceContext()) {
+ std::sort(m_collection.begin()
+ , m_collection.begin() + newSize
+ , ChartTranslationOptionScoreOrderer());
+ }
+
}
void ChartTranslationOptions::SetInputPath(const InputPath *inputPath)
@@ -135,12 +155,12 @@ void ChartTranslationOptions::CreateSourceRuleFromInputPath()
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptions &obj)
{
- for (size_t i = 0; i < obj.m_collection.size(); ++i) {
- const ChartTranslationOption &transOpt = *obj.m_collection[i];
- out << transOpt << endl;
- }
+ for (size_t i = 0; i < obj.m_collection.size(); ++i) {
+ const ChartTranslationOption &transOpt = *obj.m_collection[i];
+ out << transOpt << endl;
+ }
- return out;
+ return out;
}
}
diff --git a/moses/ChartTranslationOptions.h b/moses/ChartTranslationOptions.h
index cdead7889..73c378eb0 100644
--- a/moses/ChartTranslationOptions.h
+++ b/moses/ChartTranslationOptions.h
@@ -59,8 +59,9 @@ public:
static float CalcEstimateOfBestScore(const TargetPhraseCollection &,
const StackVec &);
- size_t GetSize() const
- { return m_collection.size(); }
+ size_t GetSize() const {
+ return m_collection.size();
+ }
//! @todo dunno
const StackVec &GetStackVec() const {
diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp
index d9270bd1b..ce0d5e1c0 100644
--- a/moses/ConfusionNet.cpp
+++ b/moses/ConfusionNet.cpp
@@ -8,305 +8,302 @@
#include "TranslationOptionCollectionConfusionNet.h"
#include "StaticData.h"
#include "Sentence.h"
-#include "UserMessage.h"
#include "moses/FF/InputFeature.h"
#include "util/exception.hh"
namespace Moses
{
- struct CNStats {
- size_t created,destr,read,colls,words;
-
- CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
- ~CNStats() {
- print(std::cerr);
- }
+struct CNStats {
+ size_t created,destr,read,colls,words;
- void createOne() {
- ++created;
- }
- void destroyOne() {
- ++destr;
- }
-
- void collect(const ConfusionNet& cn) {
- ++read;
- colls+=cn.GetSize();
- for(size_t i=0; i<cn.GetSize(); ++i)
- words+=cn[i].size();
- }
- void print(std::ostream& out) const {
- if(created>0) {
- out<<"confusion net statistics:\n"
- " created:\t"<<created<<"\n"
- " destroyed:\t"<<destr<<"\n"
- " succ. read:\t"<<read<<"\n"
- " columns:\t"<<colls<<"\n"
- " words:\t"<<words<<"\n"
- " avg. word/column:\t"<<words/(1.0*colls)<<"\n"
- " avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
- "\n\n";
- }
- }
- };
-
- CNStats stats;
-
- size_t
- ConfusionNet::
- GetColumnIncrement(size_t i, size_t j) const
- {
- (void) i;
- (void) j;
- return 1;
+ CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
+ ~CNStats() {
+ print(std::cerr);
}
- ConfusionNet::
- ConfusionNet()
- : InputType()
- {
- stats.createOne();
+ void createOne() {
+ ++created;
+ }
+ void destroyOne() {
+ ++destr;
+ }
- const StaticData& staticData = StaticData::Instance();
- if (staticData.IsChart()) {
- m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
+ void collect(const ConfusionNet& cn) {
+ ++read;
+ colls+=cn.GetSize();
+ for(size_t i=0; i<cn.GetSize(); ++i)
+ words+=cn[i].size();
+ }
+ void print(std::ostream& out) const {
+ if(created>0) {
+ out<<"confusion net statistics:\n"
+ " created:\t"<<created<<"\n"
+ " destroyed:\t"<<destr<<"\n"
+ " succ. read:\t"<<read<<"\n"
+ " columns:\t"<<colls<<"\n"
+ " words:\t"<<words<<"\n"
+ " avg. word/column:\t"<<words/(1.0*colls)<<"\n"
+ " avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
+ "\n\n";
}
- UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
}
+};
- ConfusionNet::
- ~ConfusionNet()
- {
- stats.destroyOne();
- }
+CNStats stats;
- ConfusionNet::
- ConfusionNet(Sentence const& s)
- {
- data.resize(s.GetSize());
- for(size_t i=0; i<s.GetSize(); ++i) {
- ScorePair scorePair;
- std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
- data[i].push_back(temp);
- }
+size_t
+ConfusionNet::
+GetColumnIncrement(size_t i, size_t j) const
+{
+ (void) i;
+ (void) j;
+ return 1;
+}
+
+ConfusionNet::
+ConfusionNet()
+ : InputType()
+{
+ stats.createOne();
+
+ const StaticData& staticData = StaticData::Instance();
+ if (staticData.IsChart()) {
+ m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
}
+ UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
+}
- bool
- ConfusionNet::
- ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
- {
- VERBOSE(2, "read confusion net with format "<<format<<"\n");
- switch(format) {
- case 0:
- return ReadFormat0(in,factorOrder);
- case 1:
- return ReadFormat1(in,factorOrder);
- default:
- std::stringstream strme;
- strme << "ERROR: unknown format '"<<format
- <<"' in ConfusionNet::Read";
- UserMessage::Add(strme.str());
- }
- return false;
+ConfusionNet::
+~ConfusionNet()
+{
+ stats.destroyOne();
+}
+
+ConfusionNet::
+ConfusionNet(Sentence const& s)
+{
+ data.resize(s.GetSize());
+ for(size_t i=0; i<s.GetSize(); ++i) {
+ ScorePair scorePair;
+ std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
+ data[i].push_back(temp);
}
+}
- int
- ConfusionNet::
- Read(std::istream& in,
- const std::vector<FactorType>& factorOrder)
- {
- int rv=ReadF(in,factorOrder,0);
- if(rv) stats.collect(*this);
- return rv;
+bool
+ConfusionNet::
+ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
+{
+ VERBOSE(2, "read confusion net with format "<<format<<"\n");
+ switch(format) {
+ case 0:
+ return ReadFormat0(in,factorOrder);
+ case 1:
+ return ReadFormat1(in,factorOrder);
+ default:
+ std::cerr << "ERROR: unknown format '"<<format
+ <<"' in ConfusionNet::Read";
}
+ return false;
+}
+
+int
+ConfusionNet::
+Read(std::istream& in,
+ const std::vector<FactorType>& factorOrder)
+{
+ int rv=ReadF(in,factorOrder,0);
+ if(rv) stats.collect(*this);
+ return rv;
+}
#if 0
- // Deprecated due to code duplication;
- // use Word::CreateFromString() instead
- void
- ConfusionNet::
- String2Word(const std::string& s,Word& w,
- const std::vector<FactorType>& factorOrder)
- {
- std::vector<std::string> factorStrVector = Tokenize(s, "|");
- for(size_t i=0; i<factorOrder.size(); ++i)
- w.SetFactor(factorOrder[i],
- FactorCollection::Instance().AddFactor
- (Input,factorOrder[i], factorStrVector[i]));
- }
+// Deprecated due to code duplication;
+// use Word::CreateFromString() instead
+void
+ConfusionNet::
+String2Word(const std::string& s,Word& w,
+ const std::vector<FactorType>& factorOrder)
+{
+ std::vector<std::string> factorStrVector = Tokenize(s, "|");
+ for(size_t i=0; i<factorOrder.size(); ++i)
+ w.SetFactor(factorOrder[i],
+ FactorCollection::Instance().AddFactor
+ (Input,factorOrder[i], factorStrVector[i]));
+}
#endif
- bool
- ConfusionNet::
- ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
- {
- Clear();
-
- // const StaticData &staticData = StaticData::Instance();
- const InputFeature &inputFeature = InputFeature::Instance();
- size_t numInputScores = inputFeature.GetNumInputScores();
- size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
-
- size_t totalCount = numInputScores + numRealWordCount;
- bool addRealWordCount = (numRealWordCount > 0);
-
- std::string line;
- while(getline(in,line)) {
- std::istringstream is(line);
- std::string word;
-
- Column col;
- while(is>>word) {
- Word w;
- // String2Word(word,w,factorOrder);
- w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
- std::vector<float> probs(totalCount, 0.0);
- for(size_t i=0; i < numInputScores; i++) {
- double prob;
- if (!(is>>prob)) {
- TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
- return false;
- }
- if(prob<0.0) {
- VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
- prob=0.0;
- } else if (prob>1.0) {
- VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
- prob=1.0;
- }
- probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
-
- }
- //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
- if (addRealWordCount && word!=EPSILON && word!="")
- probs.back() = -1.0;
-
- ScorePair scorePair(probs);
-
- col.push_back(std::make_pair(w,scorePair));
+bool
+ConfusionNet::
+ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
+{
+ Clear();
+
+ // const StaticData &staticData = StaticData::Instance();
+ const InputFeature &inputFeature = InputFeature::Instance();
+ size_t numInputScores = inputFeature.GetNumInputScores();
+ size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
+
+ size_t totalCount = numInputScores + numRealWordCount;
+ bool addRealWordCount = (numRealWordCount > 0);
+
+ std::string line;
+ while(getline(in,line)) {
+ std::istringstream is(line);
+ std::string word;
+
+ Column col;
+ while(is>>word) {
+ Word w;
+ // String2Word(word,w,factorOrder);
+ w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
+ std::vector<float> probs(totalCount, 0.0);
+ for(size_t i=0; i < numInputScores; i++) {
+ double prob;
+ if (!(is>>prob)) {
+ TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
+ return false;
+ }
+ if(prob<0.0) {
+ VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
+ prob=0.0;
+ } else if (prob>1.0) {
+ VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
+ prob=1.0;
+ }
+ probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
+
}
- if(col.size()) {
- data.push_back(col);
- ShrinkToFit(data.back());
- } else break;
+ //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
+ if (addRealWordCount && word!=EPSILON && word!="")
+ probs.back() = -1.0;
+
+ ScorePair scorePair(probs);
+
+ col.push_back(std::make_pair(w,scorePair));
}
- return !data.empty();
+ if(col.size()) {
+ data.push_back(col);
+ ShrinkToFit(data.back());
+ } else break;
}
+ return !data.empty();
+}
- bool
- ConfusionNet::
- ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
- {
- Clear();
- std::string line;
+bool
+ConfusionNet::
+ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
+{
+ Clear();
+ std::string line;
+ if(!getline(in,line)) return 0;
+ size_t s;
+ if(getline(in,line)) s=atoi(line.c_str());
+ else return 0;
+ data.resize(s);
+ for(size_t i=0; i<data.size(); ++i) {
if(!getline(in,line)) return 0;
- size_t s;
- if(getline(in,line)) s=atoi(line.c_str());
- else return 0;
- data.resize(s);
- for(size_t i=0; i<data.size(); ++i) {
- if(!getline(in,line)) return 0;
- std::istringstream is(line);
- if(!(is>>s)) return 0;
- std::string word;
- double prob;
- data[i].resize(s);
- for(size_t j=0; j<s; ++j)
- if(is>>word>>prob) {
- //TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
- data[i][j].second.denseScores = std::vector<float> (1);
- data[i][j].second.denseScores.push_back((float) log(prob));
- if(data[i][j].second.denseScores[0]<0) {
- VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
- data[i][j].second.denseScores[0]=0.0;
- }
- // String2Word(word,data[i][j].first,factorOrder);
- Word& w = data[i][j].first;
- w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
- } else return 0;
- }
- return !data.empty();
+ std::istringstream is(line);
+ if(!(is>>s)) return 0;
+ std::string word;
+ double prob;
+ data[i].resize(s);
+ for(size_t j=0; j<s; ++j)
+ if(is>>word>>prob) {
+ //TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
+ data[i][j].second.denseScores = std::vector<float> (1);
+ data[i][j].second.denseScores.push_back((float) log(prob));
+ if(data[i][j].second.denseScores[0]<0) {
+ VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
+ data[i][j].second.denseScores[0]=0.0;
+ }
+ // String2Word(word,data[i][j].first,factorOrder);
+ Word& w = data[i][j].first;
+ w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
+ } else return 0;
}
+ return !data.empty();
+}
+
+void ConfusionNet::Print(std::ostream& out) const
+{
+ out<<"conf net: "<<data.size()<<"\n";
+ for(size_t i=0; i<data.size(); ++i) {
+ out<<i<<" -- ";
+ for(size_t j=0; j<data[i].size(); ++j) {
+ out<<"("<<data[i][j].first.ToString()<<", ";
+
+ // dense
+ std::vector<float>::const_iterator iterDense;
+ for(iterDense = data[i][j].second.denseScores.begin();
+ iterDense < data[i][j].second.denseScores.end();
+ ++iterDense) {
+ out<<", "<<*iterDense;
+ }
- void ConfusionNet::Print(std::ostream& out) const
- {
- out<<"conf net: "<<data.size()<<"\n";
- for(size_t i=0; i<data.size(); ++i) {
- out<<i<<" -- ";
- for(size_t j=0; j<data[i].size(); ++j) {
- out<<"("<<data[i][j].first.ToString()<<", ";
-
- // dense
- std::vector<float>::const_iterator iterDense;
- for(iterDense = data[i][j].second.denseScores.begin();
- iterDense < data[i][j].second.denseScores.end();
- ++iterDense) {
- out<<", "<<*iterDense;
- }
-
- // sparse
- std::map<StringPiece, float>::const_iterator iterSparse;
- for(iterSparse = data[i][j].second.sparseScores.begin();
- iterSparse != data[i][j].second.sparseScores.end();
- ++iterSparse) {
- out << ", " << iterSparse->first << "=" << iterSparse->second;
- }
-
- out<<") ";
+ // sparse
+ std::map<StringPiece, float>::const_iterator iterSparse;
+ for(iterSparse = data[i][j].second.sparseScores.begin();
+ iterSparse != data[i][j].second.sparseScores.end();
+ ++iterSparse) {
+ out << ", " << iterSparse->first << "=" << iterSparse->second;
}
- out<<"\n";
+
+ out<<") ";
}
- out<<"\n\n";
+ out<<"\n";
}
+ out<<"\n\n";
+}
#ifdef _WIN32
#pragma warning(disable:4716)
#endif
- Phrase
- ConfusionNet::
- GetSubString(const WordsRange&) const
- {
- UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
- //return Phrase(Input);
- }
+Phrase
+ConfusionNet::
+GetSubString(const WordsRange&) const
+{
+ UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
+ //return Phrase(Input);
+}
- std::string
- ConfusionNet::
- GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
- {
- TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
- return "";
- }
+std::string
+ConfusionNet::
+GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
+{
+ TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
+ return "";
+}
#ifdef _WIN32
#pragma warning(disable:4716)
#endif
- const Word& ConfusionNet::GetWord(size_t) const
- {
- UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
- }
+const Word& ConfusionNet::GetWord(size_t) const
+{
+ UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
+}
#ifdef _WIN32
#pragma warning(default:4716)
#endif
- std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
- {
- cn.Print(out);
- return out;
- }
+std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
+{
+ cn.Print(out);
+ return out;
+}
- TranslationOptionCollection*
- ConfusionNet::
- CreateTranslationOptionCollection() const
- {
- size_t maxNoTransOptPerCoverage
- = StaticData::Instance().GetMaxNoTransOptPerCoverage();
- float translationOptionThreshold
- = StaticData::Instance().GetTranslationOptionThreshold();
- TranslationOptionCollection *rv
- = new TranslationOptionCollectionConfusionNet
- (*this, maxNoTransOptPerCoverage, translationOptionThreshold);
- assert(rv);
- return rv;
- }
+TranslationOptionCollection*
+ConfusionNet::
+CreateTranslationOptionCollection() const
+{
+ size_t maxNoTransOptPerCoverage
+ = StaticData::Instance().GetMaxNoTransOptPerCoverage();
+ float translationOptionThreshold
+ = StaticData::Instance().GetTranslationOptionThreshold();
+ TranslationOptionCollection *rv
+ = new TranslationOptionCollectionConfusionNet
+ (*this, maxNoTransOptPerCoverage, translationOptionThreshold);
+ assert(rv);
+ return rv;
+}
}
diff --git a/moses/ConfusionNet.h b/moses/ConfusionNet.h
index 0372227ef..48fbaef62 100644
--- a/moses/ConfusionNet.h
+++ b/moses/ConfusionNet.h
@@ -45,7 +45,7 @@ public:
const Column& GetColumn(size_t i) const {
UTIL_THROW_IF2(i >= data.size(),
- "Out of bounds. Trying to access " << i << " when vector only contains " << data.size());
+ "Out of bounds. Trying to access " << i << " when vector only contains " << data.size());
return data[i];
}
const Column& operator[](size_t i) const {
diff --git a/moses/DecodeGraph.cpp b/moses/DecodeGraph.cpp
index c687c8d21..c7b897d88 100644
--- a/moses/DecodeGraph.cpp
+++ b/moses/DecodeGraph.cpp
@@ -33,7 +33,8 @@ DecodeGraph::~DecodeGraph()
}
//! Add another decode step to the graph
-void DecodeGraph::Add(DecodeStep *decodeStep) {
+void DecodeGraph::Add(DecodeStep *decodeStep)
+{
m_steps.push_back(decodeStep);
decodeStep->SetContainer(this);
}
diff --git a/moses/DecodeGraph.h b/moses/DecodeGraph.h
index aa5c7ace9..1be823dc3 100644
--- a/moses/DecodeGraph.h
+++ b/moses/DecodeGraph.h
@@ -49,8 +49,8 @@ public:
DecodeGraph(size_t id)
: m_id(id)
, m_maxChartSpan(NOT_FOUND)
- , m_backoff(0)
- {}
+ , m_backoff(0) {
+ }
// for chart decoding
DecodeGraph(size_t id, size_t maxChartSpan)
@@ -78,7 +78,7 @@ public:
}
size_t GetMaxChartSpan() const {
- UTIL_THROW_IF2(m_maxChartSpan == NOT_FOUND, "Max chart span not specified");
+ UTIL_THROW_IF2(m_maxChartSpan == NOT_FOUND, "Max chart span not specified");
return m_maxChartSpan;
}
@@ -86,7 +86,7 @@ public:
return m_backoff;
}
- void SetBackoff(size_t backoff){
+ void SetBackoff(size_t backoff) {
m_backoff = backoff;
}
diff --git a/moses/DecodeStep.h b/moses/DecodeStep.h
index d47616c25..ed41a1158 100644
--- a/moses/DecodeStep.h
+++ b/moses/DecodeStep.h
@@ -105,10 +105,12 @@ public:
void RemoveFeature(const FeatureFunction *ff);
- void SetContainer(const DecodeGraph *container)
- { m_container = container; }
- const DecodeGraph *GetContainer() const
- { return m_container; }
+ void SetContainer(const DecodeGraph *container) {
+ m_container = container;
+ }
+ const DecodeGraph *GetContainer() const {
+ return m_container;
+ }
};
diff --git a/moses/DecodeStepTranslation.cpp b/moses/DecodeStepTranslation.cpp
index e7dbba4f3..936f3a4b8 100644
--- a/moses/DecodeStepTranslation.cpp
+++ b/moses/DecodeStepTranslation.cpp
@@ -198,11 +198,11 @@ const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
const Word *wordIP = NULL;
for (size_t i = 0; i < phraseFromIP.GetSize(); ++i) {
- const Word &tempWord = phraseFromIP.GetWord(i);
- if (!tempWord.IsEpsilon()) {
- wordIP = &tempWord;
- break;
- }
+ const Word &tempWord = phraseFromIP.GetWord(i);
+ if (!tempWord.IsEpsilon()) {
+ wordIP = &tempWord;
+ break;
+ }
}
// const WordsRange &range = inputPath.GetWordsRange();
diff --git a/moses/ExportInterface.cpp b/moses/ExportInterface.cpp
new file mode 100644
index 000000000..87affdbed
--- /dev/null
+++ b/moses/ExportInterface.cpp
@@ -0,0 +1,215 @@
+// $Id: ExportInterface.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+/**
+ * Moses interface for main function, for single-threaded and multi-threaded.
+ **/
+#include <exception>
+#include <fstream>
+#include <sstream>
+#include <vector>
+
+#include "util/usage.hh"
+
+#ifdef WIN32
+// Include Visual Leak Detector
+//#include <vld.h>
+#endif
+
+#include "IOWrapper.h"
+#include "Hypothesis.h"
+#include "Manager.h"
+#include "StaticData.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "Timer.h"
+#include "TranslationModel/PhraseDictionary.h"
+#include "FF/StatefulFeatureFunction.h"
+#include "FF/StatelessFeatureFunction.h"
+#include "TranslationTask.h"
+
+#ifdef HAVE_PROTOBUF
+#include "hypergraph.pb.h"
+#endif
+
+#ifdef PT_UG
+#include <boost/foreach.hpp>
+#include "TranslationModel/UG/mmsapt.h"
+#include "TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
+#endif
+
+using namespace std;
+using namespace Moses;
+
+namespace Moses
+{
+
+void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
+{
+ outputSearchGraphStream.setf(std::ios::fixed);
+ outputSearchGraphStream.precision(6);
+ StaticData::Instance().GetAllWeights().Save(outputSearchGraphStream);
+}
+
+
+} //namespace
+
+/** Called by main function of the command line version of the decoder **/
+int decoder_main(int argc, char** argv)
+{
+ try {
+
+#ifdef HAVE_PROTOBUF
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
+#endif
+
+ // echo command line, if verbose
+ IFVERBOSE(1) {
+ TRACE_ERR("command: ");
+ for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
+ TRACE_ERR(endl);
+ }
+
+ // set number of significant decimals in output
+ FixPrecision(cout);
+ FixPrecision(cerr);
+
+ // load all the settings into the Parameter class
+ // (stores them as strings, or array of strings)
+ Parameter params;
+ if (!params.LoadParam(argc,argv)) {
+ exit(1);
+ }
+
+
+ // initialize all "global" variables, which are stored in StaticData
+ // note: this also loads models such as the language model, etc.
+ if (!StaticData::LoadDataStatic(&params, argv[0])) {
+ exit(1);
+ }
+
+ // setting "-show-weights" -> just dump out weights and exit
+ if (params.isParamSpecified("show-weights")) {
+ ShowWeights();
+ exit(0);
+ }
+
+ // shorthand for accessing information in StaticData
+ const StaticData& staticData = StaticData::Instance();
+
+
+ //initialise random numbers
+ srand(time(NULL));
+
+ // set up read/writing class
+ IFVERBOSE(1) {
+ PrintUserTime("Created input-output object");
+ }
+
+ IOWrapper* ioWrapper = new IOWrapper();
+ if (ioWrapper == NULL) {
+ cerr << "Error; Failed to create IO object" << endl;
+ exit(1);
+ }
+
+ // check on weights
+ const ScoreComponentCollection& weights = staticData.GetAllWeights();
+ IFVERBOSE(2) {
+ TRACE_ERR("The global weight vector looks like this: ");
+ TRACE_ERR(weights);
+ TRACE_ERR("\n");
+ }
+
+#ifdef WITH_THREADS
+ ThreadPool pool(staticData.ThreadCount());
+#endif
+
+ // main loop over set of input sentences
+ InputType* source = NULL;
+ size_t lineCount = staticData.GetStartTranslationId();
+ while(ioWrapper->ReadInput(staticData.GetInputType(),source)) {
+ source->SetTranslationId(lineCount);
+ IFVERBOSE(1) {
+ ResetUserTime();
+ }
+
+ FeatureFunction::CallChangeSource(source);
+
+ // set up task of translating one sentence
+ TranslationTask* task = new TranslationTask(source, *ioWrapper);
+
+ // execute task
+#ifdef WITH_THREADS
+#ifdef PT_UG
+ bool spe = params.isParamSpecified("spe-src");
+ if (spe) {
+ // simulated post-editing: always run single-threaded!
+ task->Run();
+ delete task;
+ string src,trg,aln;
+ UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] "
+ << "missing update data for simulated post-editing.");
+ UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] "
+ << "missing update data for simulated post-editing.");
+ UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] "
+ << "missing update data for simulated post-editing.");
+ BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) {
+ Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
+ if (sapt) sapt->add(src,trg,aln);
+ VERBOSE(1,"[" << HERE << " added src] " << src << endl);
+ VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
+ VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
+ }
+ } else
+#endif
+ pool.Submit(task);
+#else
+ task->Run();
+ delete task;
+#endif
+
+ source = NULL; //make sure it doesn't get deleted
+ ++lineCount;
+ }
+
+ // we are done, finishing up
+#ifdef WITH_THREADS
+ pool.Stop(true); //flush remaining jobs
+#endif
+
+ delete ioWrapper;
+ FeatureFunction::Destroy();
+
+ } catch (const std::exception &e) {
+ std::cerr << "Exception: " << e.what() << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ IFVERBOSE(1) util::PrintUsage(std::cerr);
+
+#ifndef EXIT_RETURN
+ //This avoids that destructors are called (it can take a long time)
+ exit(EXIT_SUCCESS);
+#else
+ return EXIT_SUCCESS;
+#endif
+}
+
diff --git a/moses/ExportInterface.h b/moses/ExportInterface.h
new file mode 100644
index 000000000..8f5b3b7f6
--- /dev/null
+++ b/moses/ExportInterface.h
@@ -0,0 +1,42 @@
+#pragma once
+// $Id$
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2006 University of Edinburgh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of the University of Edinburgh nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+// example file on how to use moses library
+
+
+#include "moses/StaticData.h"
+
+class IOWrapper;
+
+int decoder_main(int argc, char* argv[]);
+
diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp
index 0d0a20797..5be3b0b6b 100644
--- a/moses/FF/BleuScoreFeature.cpp
+++ b/moses/FF/BleuScoreFeature.cpp
@@ -1,7 +1,6 @@
#include "BleuScoreFeature.h"
#include "moses/StaticData.h"
-#include "moses/UserMessage.h"
#include "moses/Hypothesis.h"
#include "moses/FactorCollection.h"
#include "util/exception.hh"
@@ -118,7 +117,7 @@ void BleuScoreFeature::SetParameter(const std::string& key, const std::string& v
}
string line;
while (getline(in,line)) {
- /* if (GetSearchAlgorithm() == ChartDecoding) {
+ /* if (GetSearchAlgorithm() == CYKPlus) {
stringstream tmp;
tmp << "<s> " << line << " </s>";
line = tmp.str();
@@ -503,8 +502,8 @@ void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
* phrase translated.
*/
FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const
{
if (!m_enabled) return new BleuScoreState();
diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h
index cdba578ac..e1a7f09c7 100644
--- a/moses/FF/BleuScoreFeature.h
+++ b/moses/FF/BleuScoreFeature.h
@@ -62,7 +62,7 @@ class BleuScoreFeature : public StatefulFeatureFunction
{
public:
static const std::vector<BleuScoreFeature*>& GetColl() {
- return s_staticColl;
+ return s_staticColl;
}
typedef boost::unordered_map<size_t, RefValue > RefCounts;
@@ -116,23 +116,27 @@ public:
size_t skip = 0) const;
FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
- int featureID,
- ScoreComponentCollection* accumulator) const;
+ int featureID,
+ ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
bool Enabled() const {
return m_enabled;
diff --git a/moses/FF/ConstrainedDecoding.cpp b/moses/FF/ConstrainedDecoding.cpp
index bfe412913..89ad2c9d6 100644
--- a/moses/FF/ConstrainedDecoding.cpp
+++ b/moses/FF/ConstrainedDecoding.cpp
@@ -43,7 +43,7 @@ ConstrainedDecoding::ConstrainedDecoding(const std::string &line)
void ConstrainedDecoding::Load()
{
const StaticData &staticData = StaticData::Instance();
- bool addBeginEndWord = (staticData.GetSearchAlgorithm() == ChartDecoding) || (staticData.GetSearchAlgorithm() == ChartIncremental);
+ bool addBeginEndWord = (staticData.GetSearchAlgorithm() == CYKPlus) || (staticData.GetSearchAlgorithm() == ChartIncremental);
for(size_t i = 0; i < m_paths.size(); ++i) {
InputFileStream constraintFile(m_paths[i]);
@@ -51,7 +51,7 @@ void ConstrainedDecoding::Load()
long sentenceID = staticData.GetStartTranslationId() - 1;
while (getline(constraintFile, line)) {
vector<string> vecStr = Tokenize(line, "\t");
-
+
Phrase phrase(0);
if (vecStr.size() == 1) {
sentenceID++;
@@ -64,7 +64,7 @@ void ConstrainedDecoding::Load()
} else {
UTIL_THROW(util::Exception, "Reference file not loaded");
}
-
+
if (addBeginEndWord) {
phrase.InitStartEndWord();
}
@@ -76,7 +76,7 @@ void ConstrainedDecoding::Load()
std::vector<float> ConstrainedDecoding::DefaultWeights() const
{
UTIL_THROW_IF2(m_numScoreComponents != 1,
- "ConstrainedDecoding must only have 1 score");
+ "ConstrainedDecoding must only have 1 score");
vector<float> ret(1, 1);
return ret;
}
@@ -109,7 +109,7 @@ FFState* ConstrainedDecoding::EvaluateWhenApplied(
assert(ref);
ConstrainedDecodingState *ret = new ConstrainedDecodingState(hypo);
- const Phrase &outputPhrase = ret->GetPhrase();
+ const Phrase &outputPhrase = ret->GetPhrase();
size_t searchPos = NOT_FOUND;
size_t i = 0;
@@ -125,16 +125,14 @@ FFState* ConstrainedDecoding::EvaluateWhenApplied(
// translated entire sentence.
bool match = (searchPos == 0) && (size == outputPhrase.GetSize());
if (!m_negate) {
- score = match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
- }
- else {
- score = !match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
+ score = match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
+ } else {
+ score = !match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
}
} else if (m_negate) {
// keep all derivations
score = 0;
- }
- else {
+ } else {
score = (searchPos != NOT_FOUND) ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
}
@@ -165,7 +163,7 @@ FFState* ConstrainedDecoding::EvaluateWhenApplied(
size = (*ref)[i].GetSize();
i++;
}
-
+
float score;
if (hypo.GetCurrSourceRange().GetStartPos() == 0 &&
hypo.GetCurrSourceRange().GetEndPos() == source.GetSize() - 1) {
@@ -173,10 +171,9 @@ FFState* ConstrainedDecoding::EvaluateWhenApplied(
bool match = (searchPos == 0) && (size == outputPhrase.GetSize());
if (!m_negate) {
- score = match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
- }
- else {
- score = !match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
+ score = match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
+ } else {
+ score = !match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
}
} else if (m_negate) {
// keep all derivations
diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h
index ca007f21d..67833a1b4 100644
--- a/moses/FF/ConstrainedDecoding.h
+++ b/moses/FF/ConstrainedDecoding.h
@@ -11,8 +11,8 @@ namespace Moses
class ConstrainedDecodingState : public FFState
{
public:
- ConstrainedDecodingState()
- {}
+ ConstrainedDecodingState() {
+ }
ConstrainedDecodingState(const Hypothesis &hypo);
ConstrainedDecodingState(const ChartHypothesis &hypo);
@@ -42,19 +42,23 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
-
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
diff --git a/moses/FF/ControlRecombination.cpp b/moses/FF/ControlRecombination.cpp
index 85e88ac94..f7231d9b0 100644
--- a/moses/FF/ControlRecombination.cpp
+++ b/moses/FF/ControlRecombination.cpp
@@ -51,7 +51,7 @@ int ControlRecombinationState::Compare(const FFState& other) const
std::vector<float> ControlRecombination::DefaultWeights() const
{
UTIL_THROW_IF2(m_numScoreComponents,
- "ControlRecombination should not have any scores");
+ "ControlRecombination should not have any scores");
vector<float> ret(0);
return ret;
}
diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h
index 095cc6b29..f221f772f 100644
--- a/moses/FF/ControlRecombination.h
+++ b/moses/FF/ControlRecombination.h
@@ -20,8 +20,8 @@ class ControlRecombinationState : public FFState
{
public:
ControlRecombinationState(const ControlRecombination &ff)
- :m_ff(ff)
- {}
+ :m_ff(ff) {
+ }
ControlRecombinationState(const Hypothesis &hypo, const ControlRecombination &ff);
ControlRecombinationState(const ChartHypothesis &hypo, const ControlRecombination &ff);
@@ -58,17 +58,22 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
diff --git a/moses/FF/CountNonTerms.cpp b/moses/FF/CountNonTerms.cpp
index 03c7b7315..17d1c9c20 100644
--- a/moses/FF/CountNonTerms.cpp
+++ b/moses/FF/CountNonTerms.cpp
@@ -8,18 +8,18 @@ using namespace std;
namespace Moses
{
CountNonTerms::CountNonTerms(const std::string &line)
-:StatelessFeatureFunction(line)
-,m_all(true)
-,m_sourceSyntax(false)
-,m_targetSyntax(false)
+ :StatelessFeatureFunction(line)
+ ,m_all(true)
+ ,m_sourceSyntax(false)
+ ,m_targetSyntax(false)
{
ReadParameters();
}
void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
const StaticData &staticData = StaticData::Instance();
@@ -27,33 +27,33 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
size_t indScore = 0;
if (m_all) {
- for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
- const Word &word = targetPhrase.GetWord(i);
- if (word.IsNonTerminal()) {
- ++scores[indScore];
- }
- }
- ++indScore;
+ for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
+ const Word &word = targetPhrase.GetWord(i);
+ if (word.IsNonTerminal()) {
+ ++scores[indScore];
+ }
+ }
+ ++indScore;
}
if (m_targetSyntax) {
- for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
- const Word &word = targetPhrase.GetWord(i);
- if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
- ++scores[indScore];
- }
- }
- ++indScore;
+ for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
+ const Word &word = targetPhrase.GetWord(i);
+ if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
+ ++scores[indScore];
+ }
+ }
+ ++indScore;
}
if (m_sourceSyntax) {
- for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
- const Word &word = sourcePhrase.GetWord(i);
- if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
- ++scores[indScore];
- }
- }
- ++indScore;
+ for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
+ const Word &word = sourcePhrase.GetWord(i);
+ if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
+ ++scores[indScore];
+ }
+ }
+ ++indScore;
}
scoreBreakdown.PlusEquals(this, scores);
@@ -64,9 +64,9 @@ void CountNonTerms::SetParameter(const std::string& key, const std::string& valu
if (key == "all") {
m_all = Scan<bool>(value);
} else if (key == "source-syntax") {
- m_sourceSyntax = Scan<bool>(value);
+ m_sourceSyntax = Scan<bool>(value);
} else if (key == "target-syntax") {
- m_targetSyntax = Scan<bool>(value);
+ m_targetSyntax = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h
index c4e1467e9..2e29f2aaa 100644
--- a/moses/FF/CountNonTerms.h
+++ b/moses/FF/CountNonTerms.h
@@ -9,30 +9,35 @@ class CountNonTerms : public StatelessFeatureFunction
{
public:
CountNonTerms(const std::string &line);
- bool IsUseable(const FactorMask &mask) const
- { return true; }
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(
const ChartHypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp
index 3a2482d0d..dd2c890d7 100644
--- a/moses/FF/CoveredReferenceFeature.cpp
+++ b/moses/FF/CoveredReferenceFeature.cpp
@@ -22,44 +22,44 @@ int CoveredReferenceState::Compare(const FFState& other) const
const CoveredReferenceState &otherState = static_cast<const CoveredReferenceState&>(other);
if (m_coveredRef.size() != otherState.m_coveredRef.size()) {
- return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
+ return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
} else {
multiset<string>::const_iterator thisIt, otherIt;
for (thisIt = m_coveredRef.begin(), otherIt = otherState.m_coveredRef.begin();
- thisIt != m_coveredRef.end();
- thisIt++, otherIt++) {
+ thisIt != m_coveredRef.end();
+ thisIt++, otherIt++) {
if (*thisIt != *otherIt) return thisIt->compare(*otherIt);
}
}
return 0;
// return m_coveredRef == otherState.m_coveredRef;
-
+
// if (m_coveredRef == otherState.m_coveredRef)
// return 0;
// return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
}
void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{}
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
long id = input.GetTranslationId();
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase);
multiset<string> covered;
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
- refIt->second.begin(), refIt->second.end(),
- inserter(covered, covered.begin()));
+ refIt->second.begin(), refIt->second.end(),
+ inserter(covered, covered.begin()));
vector<float> scores;
scores.push_back(covered.size());
@@ -67,7 +67,8 @@ void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
estimatedFutureScore->Assign(this, scores);
}
-void CoveredReferenceFeature::Load() {
+void CoveredReferenceFeature::Load()
+{
InputFileStream refFile(m_path);
std::string line;
const StaticData &staticData = StaticData::Instance();
@@ -76,7 +77,7 @@ void CoveredReferenceFeature::Load() {
vector<string> words = Tokenize(line, " ");
multiset<string> wordSet;
// TODO make Tokenize work with other containers than vector
- copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
+ copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
m_refs.insert(make_pair(sentenceID++, wordSet));
}
}
@@ -107,15 +108,15 @@ FFState* CoveredReferenceFeature::EvaluateWhenApplied(
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
if (refIt == m_refs.end()) UTIL_THROW(util::Exception, "Sentence id out of range: " + SPrint<long>(id));
set_difference(refIt->second.begin(), refIt->second.end(),
- ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
- inserter(remaining, remaining.begin()));
+ ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
+ inserter(remaining, remaining.begin()));
// which of the remaining words are present in the current phrase
multiset<string> wordsInPhrase = GetWordsInPhrase(cur_hypo.GetCurrTargetPhrase());
multiset<string> newCovered;
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
- remaining.begin(), remaining.end(),
- inserter(newCovered, newCovered.begin()));
+ remaining.begin(), remaining.end(),
+ inserter(newCovered, newCovered.begin()));
vector<float> estimateScore =
cur_hypo.GetCurrTargetPhrase().GetScoreBreakdown().GetScoresForProducer(this);
diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h
index a6cdd6f99..d5873f33e 100644
--- a/moses/FF/CoveredReferenceFeature.h
+++ b/moses/FF/CoveredReferenceFeature.h
@@ -37,8 +37,7 @@ class CoveredReferenceFeature : public StatefulFeatureFunction
public:
CoveredReferenceFeature(const std::string &line)
- :StatefulFeatureFunction(1, line)
- {
+ :StatefulFeatureFunction(1, line) {
m_tuneable = true;
ReadParameters();
}
@@ -53,15 +52,20 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
diff --git a/moses/FF/DecodeFeature.cpp b/moses/FF/DecodeFeature.cpp
index 6581c7ab0..11c8653ea 100644
--- a/moses/FF/DecodeFeature.cpp
+++ b/moses/FF/DecodeFeature.cpp
@@ -38,7 +38,7 @@ DecodeFeature::DecodeFeature(const std::string &line)
}
DecodeFeature::DecodeFeature(size_t numScoreComponents
- , const std::string &line)
+ , const std::string &line)
: StatelessFeatureFunction(numScoreComponents, line)
, m_container(NULL)
{
diff --git a/moses/FF/DecodeFeature.h b/moses/FF/DecodeFeature.h
index a93eb9ba3..19c9b3161 100644
--- a/moses/FF/DecodeFeature.h
+++ b/moses/FF/DecodeFeature.h
@@ -43,12 +43,12 @@ public:
DecodeFeature(const std::string &line);
DecodeFeature(size_t numScoreComponents
- , const std::string &line);
+ , const std::string &line);
DecodeFeature(size_t numScoreComponents
- , const std::vector<FactorType> &input
- , const std::vector<FactorType> &output
- , const std::string &line);
+ , const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &line);
//! returns output factor types as specified by the ini file
const FactorMask& GetOutputFactorMask() const;
@@ -63,29 +63,34 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
- void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
- void SetContainer(const DecodeStep *container)
- { m_container = container; }
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+
+ void SetContainer(const DecodeStep *container) {
+ m_container = container;
+ }
const DecodeGraph &GetDecodeGraph() const;
diff --git a/moses/FF/DistortionScoreProducer.cpp b/moses/FF/DistortionScoreProducer.cpp
index 5995fe213..e1571d2a9 100644
--- a/moses/FF/DistortionScoreProducer.cpp
+++ b/moses/FF/DistortionScoreProducer.cpp
@@ -22,9 +22,12 @@ struct DistortionState_traditional : public FFState {
}
};
+std::vector<const DistortionScoreProducer*> DistortionScoreProducer::s_staticColl;
+
DistortionScoreProducer::DistortionScoreProducer(const std::string &line)
: StatefulFeatureFunction(1, line)
{
+ s_staticColl.push_back(this);
ReadParameters();
}
diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h
index aa2c18b95..218fb9b40 100644
--- a/moses/FF/DistortionScoreProducer.h
+++ b/moses/FF/DistortionScoreProducer.h
@@ -16,7 +16,14 @@ class WordsRange;
*/
class DistortionScoreProducer : public StatefulFeatureFunction
{
+protected:
+ static std::vector<const DistortionScoreProducer*> s_staticColl;
+
public:
+ static const std::vector<const DistortionScoreProducer*>& GetDistortionFeatureFunctions() {
+ return s_staticColl;
+ }
+
DistortionScoreProducer(const std::string &line);
bool IsUseable(const FactorMask &mask) const {
@@ -41,18 +48,22 @@ public:
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
- void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
};
}
diff --git a/moses/FF/DynamicCacheBasedLanguageModel.cpp b/moses/FF/DynamicCacheBasedLanguageModel.cpp
new file mode 100644
index 000000000..f4e98b3c5
--- /dev/null
+++ b/moses/FF/DynamicCacheBasedLanguageModel.cpp
@@ -0,0 +1,458 @@
+#include <utility>
+#include "moses/StaticData.h"
+#include "moses/InputFileStream.h"
+#include "DynamicCacheBasedLanguageModel.h"
+
+namespace Moses
+{
+
+std::map< const std::string, DynamicCacheBasedLanguageModel * > DynamicCacheBasedLanguageModel::s_instance_map;
+DynamicCacheBasedLanguageModel *DynamicCacheBasedLanguageModel::s_instance = NULL;
+
+DynamicCacheBasedLanguageModel::DynamicCacheBasedLanguageModel(const std::string &line)
+ : StatelessFeatureFunction(1, line)
+{
+ VERBOSE(2,"Initializing DynamicCacheBasedLanguageModel feature..." << std::endl);
+
+ m_query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS;
+ m_score_type = CBLM_SCORE_TYPE_HYPERBOLA;
+ m_maxAge = 1000;
+ m_name = "default";
+ m_constant = false;
+
+ ReadParameters();
+ UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 DynamicCacheBasedLanguageModel feature named " + m_name + " is allowed");
+ s_instance_map[m_name] = this;
+ s_instance = this; //for back compatibility
+
+ SetPreComputedScores();
+}
+
+DynamicCacheBasedLanguageModel::~DynamicCacheBasedLanguageModel() {};
+
+void DynamicCacheBasedLanguageModel::SetPreComputedScores()
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ precomputedScores.clear();
+ for (unsigned int i=0; i<m_maxAge; i++) {
+ precomputedScores.push_back(decaying_score(i));
+ }
+
+ if ( m_score_type == CBLM_SCORE_TYPE_HYPERBOLA
+ || m_score_type == CBLM_SCORE_TYPE_POWER
+ || m_score_type == CBLM_SCORE_TYPE_EXPONENTIAL
+ || m_score_type == CBLM_SCORE_TYPE_COSINE ) {
+ precomputedScores.push_back(decaying_score(m_maxAge));
+ } else { // m_score_type = CBLM_SCORE_TYPE_XXXXXXXXX_REWARD
+ precomputedScores.push_back(0.0);
+ }
+ m_lower_score = precomputedScores[m_maxAge];
+ VERBOSE(3, "SetPreComputedScores(): lower_age:|" << m_maxAge << "| lower_score:|" << m_lower_score << "|" << std::endl);
+}
+
+float DynamicCacheBasedLanguageModel::GetPreComputedScores(const unsigned int age)
+{
+ VERBOSE(2, "float DynamicCacheBasedLanguageModel::GetPreComputedScores" << std::endl);
+ VERBOSE(2, "age:|"<< age << "|" << std::endl);
+
+ if (age < m_maxAge) {
+ return precomputedScores.at(age);
+ } else {
+ VERBOSE(2, "is to big reduced to m)_maxAge:|"<< m_maxAge << "|" << std::endl);
+ return precomputedScores.at(m_maxAge);
+ }
+}
+
+void DynamicCacheBasedLanguageModel::SetParameter(const std::string& key, const std::string& value)
+{
+ VERBOSE(2, "DynamicCacheBasedLanguageModel::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl);
+ if (key == "cblm-query-type") {
+ SetQueryType(Scan<size_t>(value));
+ } else if (key == "cblm-score-type") {
+ SetScoreType(Scan<size_t>(value));
+ } else if (key == "cblm-max-age") {
+ SetMaxAge(Scan<unsigned int>(value));
+ } else if (key == "cblm-file") {
+ m_initfiles = Scan<std::string>(value);
+ } else if (key == "cblm-name") {
+ m_name = Scan<std::string>(value);
+ } else if (key == "cblm-constant") {
+ m_constant = Scan<bool>(value);
+ } else {
+ StatelessFeatureFunction::SetParameter(key, value);
+ }
+}
+
+void DynamicCacheBasedLanguageModel::EvaluateInIsolation(const Phrase &sp
+ , const TargetPhrase &tp
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
+{
+ float score = m_lower_score;
+ switch(m_query_type) {
+ case CBLM_QUERY_TYPE_WHOLESTRING:
+ score = Evaluate_Whole_String(tp);
+ break;
+ case CBLM_QUERY_TYPE_ALLSUBSTRINGS:
+ score = Evaluate_All_Substrings(tp);
+ break;
+ default:
+ UTIL_THROW_IF2(false, "This score type (" << m_query_type << ") is unknown.");
+ }
+
+ scoreBreakdown.Assign(this, score);
+}
+
+float DynamicCacheBasedLanguageModel::Evaluate_Whole_String(const TargetPhrase& tp) const
+{
+ //consider all words in the TargetPhrase as one n-gram
+ // and compute the decaying_score for the whole n-gram
+ // and return this value
+
+ decaying_cache_t::const_iterator it;
+ float score = m_lower_score;
+
+ std::string w = "";
+ size_t endpos = tp.GetSize();
+ for (size_t pos = 0 ; pos < endpos ; ++pos) {
+ w += tp.GetWord(pos).GetFactor(0)->GetString().as_string();
+ if (pos < endpos - 1) {
+ w += " ";
+ }
+ }
+ it = m_cache.find(w);
+
+ VERBOSE(4,"cblm::Evaluate_Whole_String: searching w:|" << w << "|" << std::endl);
+ if (it != m_cache.end()) { //found!
+ score = ((*it).second).second;
+ VERBOSE(4,"cblm::Evaluate_Whole_String: found w:|" << w << "|" << std::endl);
+ }
+
+ VERBOSE(4,"cblm::Evaluate_Whole_String: returning score:|" << score << "|" << std::endl);
+ return score;
+}
+
+float DynamicCacheBasedLanguageModel::Evaluate_All_Substrings(const TargetPhrase& tp) const
+{
+ //loop over all n-grams in the TargetPhrase (no matter of n)
+ //and compute the decaying_score for all words
+ //and return their sum
+
+ decaying_cache_t::const_iterator it;
+ float score = 0.0;
+
+ for (size_t startpos = 0 ; startpos < tp.GetSize() ; ++startpos) {
+ std::string w = "";
+ for (size_t endpos = startpos; endpos < tp.GetSize() ; ++endpos) {
+ w += tp.GetWord(endpos).GetFactor(0)->GetString().as_string();
+ it = m_cache.find(w);
+
+ if (it != m_cache.end()) { //found!
+ score += ((*it).second).second;
+ VERBOSE(3,"cblm::Evaluate_All_Substrings: found w:|" << w << "| actual score:|" << ((*it).second).second << "| score:|" << score << "|" << std::endl);
+ } else {
+ score += m_lower_score;
+ }
+
+ if (endpos == startpos) {
+ w += " ";
+ }
+
+ }
+ }
+ VERBOSE(3,"cblm::Evaluate_All_Substrings: returning score:|" << score << "|" << std::endl);
+ return score;
+}
+
+void DynamicCacheBasedLanguageModel::Print() const
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
+#endif
+ decaying_cache_t::const_iterator it;
+ std::cout << "Content of the cache of Cache-Based Language Model" << std::endl;
+ std::cout << "Size of the cache of Cache-Based Language Model:|" << m_cache.size() << "|" << std::endl;
+ for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) {
+ std::cout << "word:|" << (*it).first << "| age:|" << ((*it).second).first << "| score:|" << ((*it).second).second << "|" << std::endl;
+ }
+}
+
+void DynamicCacheBasedLanguageModel::Decay()
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ decaying_cache_t::iterator it;
+
+ unsigned int age;
+ float score;
+ for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) {
+ age=((*it).second).first + 1;
+ if (age > m_maxAge) {
+ m_cache.erase(it);
+ it--;
+ } else {
+ score = GetPreComputedScores(age);
+// score = decaying_score(age);
+ decaying_cache_value_t p (age, score);
+ (*it).second = p;
+ }
+ }
+}
+
+void DynamicCacheBasedLanguageModel::Update(std::vector<std::string> words, int age)
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl);
+ for (size_t j=0; j<words.size(); j++) {
+ words[j] = Trim(words[j]);
+// VERBOSE(3,"CacheBasedLanguageModel::Update word[" << j << "]:"<< words[j] << " age:" << age << " decaying_score(age):" << decaying_score(age) << std::endl);
+// decaying_cache_value_t p (age,decaying_score(age));
+ VERBOSE(3,"CacheBasedLanguageModel::Update word[" << j << "]:"<< words[j] << " age:" << age << " GetPreComputedScores(age):" << GetPreComputedScores(age) << std::endl);
+ decaying_cache_value_t p (age,GetPreComputedScores(age));
+ std::pair<std::string, decaying_cache_value_t> e (words[j],p);
+ m_cache.erase(words[j]); //always erase the element (do nothing if the entry does not exist)
+ m_cache.insert(e); //insert the entry
+ }
+}
+
+void DynamicCacheBasedLanguageModel::ClearEntries(std::string &entries)
+{
+ if (entries != "") {
+ VERBOSE(3,"entries:|" << entries << "|" << std::endl);
+ std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||");
+ VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
+ ClearEntries(elements);
+ }
+}
+
+void DynamicCacheBasedLanguageModel::ClearEntries(std::vector<std::string> words)
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl);
+ for (size_t j=0; j<words.size(); j++) {
+ words[j] = Trim(words[j]);
+ VERBOSE(3,"CacheBasedLanguageModel::ClearEntries word[" << j << "]:"<< words[j] << std::endl);
+ m_cache.erase(words[j]); //always erase the element (do nothing if the entry does not exist)
+ }
+}
+
+void DynamicCacheBasedLanguageModel::Insert(std::string &entries)
+{
+ if (entries != "") {
+ VERBOSE(3,"entries:|" << entries << "|" << std::endl);
+ std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||");
+ VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
+ Insert(elements);
+ }
+}
+
+void DynamicCacheBasedLanguageModel::Insert(std::vector<std::string> ngrams)
+{
+ VERBOSE(3,"DynamicCacheBasedLanguageModel Insert ngrams.size():|" << ngrams.size() << "|" << std::endl);
+ if (m_constant == false) {
+ Decay();
+ }
+ Update(ngrams,1);
+ IFVERBOSE(3) Print();
+}
+
+void DynamicCacheBasedLanguageModel::ExecuteDlt(std::map<std::string, std::string> dlt_meta)
+{
+ if (dlt_meta.find("cblm") != dlt_meta.end()) {
+ Insert(dlt_meta["cblm"]);
+ }
+ if (dlt_meta.find("cblm-command") != dlt_meta.end()) {
+ Execute(dlt_meta["cblm-command"]);
+ }
+ if (dlt_meta.find("cblm-file") != dlt_meta.end()) {
+ Load(dlt_meta["cblm-file"]);
+ }
+ if (dlt_meta.find("cblm-clear-entries") != dlt_meta.end()) {
+ ClearEntries(dlt_meta["cblm-clear-entries"]);
+ }
+ if (dlt_meta.find("cblm-clear-all") != dlt_meta.end()) {
+ Clear();
+ }
+
+}
+
+void DynamicCacheBasedLanguageModel::Execute(std::string command)
+{
+ VERBOSE(2,"DynamicCacheBasedLanguageModel::Execute(std::string command:|" << command << "|" << std::endl);
+ std::vector<std::string> commands = Tokenize(command, "||");
+ Execute(commands);
+}
+
+void DynamicCacheBasedLanguageModel::Execute(std::vector<std::string> commands)
+{
+ for (size_t j=0; j<commands.size(); j++) {
+ Execute_Single_Command(commands[j]);
+ }
+ IFVERBOSE(2) Print();
+}
+
+void DynamicCacheBasedLanguageModel::Execute_Single_Command(std::string command)
+{
+ VERBOSE(2,"CacheBasedLanguageModel::Execute_Single_Command(std::string command:|" << command << "|" << std::endl);
+ if (command == "clear") {
+ VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "|. Cache cleared." << std::endl);
+ Clear();
+ } else if (command == "settype_wholestring") {
+ VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "|. Query type set to " << CBLM_QUERY_TYPE_WHOLESTRING << " (CBLM_QUERY_TYPE_WHOLESTRING)." << std::endl);
+ SetQueryType(CBLM_QUERY_TYPE_WHOLESTRING);
+ } else if (command == "settype_allsubstrings") {
+ VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "|. Query type set to " << CBLM_QUERY_TYPE_ALLSUBSTRINGS << " (CBLM_QUERY_TYPE_ALLSUBSTRINGS)." << std::endl);
+ SetQueryType(CBLM_QUERY_TYPE_ALLSUBSTRINGS);
+ } else {
+ VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "| is unknown. Skipped." << std::endl);
+ }
+}
+
+void DynamicCacheBasedLanguageModel::Clear()
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ m_cache.clear();
+}
+
+void DynamicCacheBasedLanguageModel::Load()
+{
+// SetPreComputedScores();
+ VERBOSE(2,"DynamicCacheBasedLanguageModel::Load()" << std::endl);
+ Load(m_initfiles);
+}
+
+void DynamicCacheBasedLanguageModel::Load(const std::string filestr)
+{
+ VERBOSE(2,"DynamicCacheBasedLanguageModel::Load(const std::string filestr)" << std::endl);
+// std::vector<std::string> files = Tokenize(m_initfiles, "||");
+ std::vector<std::string> files = Tokenize(filestr, "||");
+ Load_Multiple_Files(files);
+}
+
+
+void DynamicCacheBasedLanguageModel::Load_Multiple_Files(std::vector<std::string> files)
+{
+ VERBOSE(2,"DynamicCacheBasedLanguageModel::Load_Multiple_Files(std::vector<std::string> files)" << std::endl);
+ for(size_t j = 0; j < files.size(); ++j) {
+ Load_Single_File(files[j]);
+ }
+}
+
+void DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file)
+{
+ VERBOSE(2,"DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file)" << std::endl);
+ //file format
+ //age || n-gram
+ //age || n-gram || n-gram || n-gram || ...
+ //....
+ //each n-gram is a sequence of n words (no matter of n)
+ //
+ //there is no limit on the size of n
+ //
+ //entries can be repeated, but the last entry overwrites the previous
+
+
+ VERBOSE(2,"Loading data from the cache file " << file << std::endl);
+ InputFileStream cacheFile(file);
+
+ std::string line;
+ int age;
+ std::vector<std::string> words;
+
+ while (getline(cacheFile, line)) {
+ std::vector<std::string> vecStr = TokenizeMultiCharSeparator( line , "||" );
+ if (vecStr.size() >= 2) {
+ age = Scan<int>(vecStr[0]);
+ vecStr.erase(vecStr.begin());
+ Update(vecStr,age);
+ } else {
+ UTIL_THROW_IF2(false, "The format of the loaded file is wrong: " << line);
+ }
+ }
+ IFVERBOSE(2) Print();
+}
+
+void DynamicCacheBasedLanguageModel::SetQueryType(size_t type)
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
+#endif
+
+ m_query_type = type;
+ if ( m_query_type != CBLM_QUERY_TYPE_WHOLESTRING
+ && m_query_type != CBLM_QUERY_TYPE_ALLSUBSTRINGS ) {
+ VERBOSE(2, "This query type " << m_query_type << " is unknown. Instead used " << CBLM_QUERY_TYPE_ALLSUBSTRINGS << "." << std::endl);
+ m_query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS;
+ }
+ VERBOSE(2, "CacheBasedLanguageModel QueryType: " << m_query_type << std::endl);
+
+};
+
+void DynamicCacheBasedLanguageModel::SetScoreType(size_t type)
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
+#endif
+ m_score_type = type;
+ if ( m_score_type != CBLM_SCORE_TYPE_HYPERBOLA
+ && m_score_type != CBLM_SCORE_TYPE_POWER
+ && m_score_type != CBLM_SCORE_TYPE_EXPONENTIAL
+ && m_score_type != CBLM_SCORE_TYPE_COSINE
+ && m_score_type != CBLM_SCORE_TYPE_HYPERBOLA_REWARD
+ && m_score_type != CBLM_SCORE_TYPE_POWER_REWARD
+ && m_score_type != CBLM_SCORE_TYPE_EXPONENTIAL_REWARD ) {
+ VERBOSE(2, "This score type " << m_score_type << " is unknown. Instead used " << CBLM_SCORE_TYPE_HYPERBOLA << "." << std::endl);
+ m_score_type = CBLM_SCORE_TYPE_HYPERBOLA;
+ }
+ VERBOSE(2, "CacheBasedLanguageModel ScoreType: " << m_score_type << std::endl);
+};
+
+void DynamicCacheBasedLanguageModel::SetMaxAge(unsigned int age)
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
+#endif
+ m_maxAge = age;
+ VERBOSE(2, "CacheBasedLanguageModel MaxAge: " << m_maxAge << std::endl);
+};
+
+float DynamicCacheBasedLanguageModel::decaying_score(const unsigned int age)
+{
+ float sc;
+ switch(m_score_type) {
+ case CBLM_SCORE_TYPE_HYPERBOLA:
+ sc = (float) 1.0/age - 1.0;
+ break;
+ case CBLM_SCORE_TYPE_POWER:
+ sc = (float) pow(age, -0.25) - 1.0;
+ break;
+ case CBLM_SCORE_TYPE_EXPONENTIAL:
+ sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0;
+ break;
+ case CBLM_SCORE_TYPE_COSINE:
+ sc = (float) cos( (age-1) * (PI/2) / m_maxAge ) - 1.0;
+ break;
+ case CBLM_SCORE_TYPE_HYPERBOLA_REWARD:
+ sc = (float) 1.0/age;
+ break;
+ case CBLM_SCORE_TYPE_POWER_REWARD:
+ sc = (float) pow(age, -0.25);
+ break;
+ case CBLM_SCORE_TYPE_EXPONENTIAL_REWARD:
+ sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0);
+ break;
+ default:
+ sc = -1.0;
+ }
+ return sc;
+}
+}
diff --git a/moses/FF/DynamicCacheBasedLanguageModel.h b/moses/FF/DynamicCacheBasedLanguageModel.h
new file mode 100644
index 000000000..5d9d17517
--- /dev/null
+++ b/moses/FF/DynamicCacheBasedLanguageModel.h
@@ -0,0 +1,164 @@
+// $Id$
+
+#ifndef moses_DynamicCacheBasedLanguageModel_h
+#define moses_DynamicCacheBasedLanguageModel_h
+
+#include "moses/Util.h"
+#include "FeatureFunction.h"
+
+#ifdef WITH_THREADS
+#include <boost/thread/shared_mutex.hpp>
+#include <boost/thread/locks.hpp>
+#endif
+
+typedef std::pair<int, float> decaying_cache_value_t;
+typedef std::map<std::string, decaying_cache_value_t > decaying_cache_t;
+
+#define CBLM_QUERY_TYPE_UNDEFINED (-1)
+#define CBLM_QUERY_TYPE_ALLSUBSTRINGS 0
+#define CBLM_QUERY_TYPE_WHOLESTRING 1
+
+#define CBLM_SCORE_TYPE_UNDEFINED (-1)
+#define CBLM_SCORE_TYPE_HYPERBOLA 0
+#define CBLM_SCORE_TYPE_POWER 1
+#define CBLM_SCORE_TYPE_EXPONENTIAL 2
+#define CBLM_SCORE_TYPE_COSINE 3
+#define CBLM_SCORE_TYPE_HYPERBOLA_REWARD 10
+#define CBLM_SCORE_TYPE_POWER_REWARD 11
+#define CBLM_SCORE_TYPE_EXPONENTIAL_REWARD 12
+#define PI 3.14159265
+
+namespace Moses
+{
+
+class WordsRange;
+
+/** Calculates score for the Dynamic Cache-Based pseudo LM
+ */
+class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction
+{
+ // data structure for the cache;
+ // the key is the word and the value is the decaying score
+ decaying_cache_t m_cache;
+ size_t m_query_type; //way of querying the cache
+ size_t m_score_type; //way of scoring entries of the cache
+ std::string m_initfiles; // vector of files loaded in the initialization phase
+ std::string m_name; // internal name to identify this instance of the Cache-based pseudo LM
+ float m_lower_score; //lower_bound_score for no match
+ bool m_constant; //flag for setting a non-decaying cache
+ std::vector<float> precomputedScores;
+ unsigned int m_maxAge;
+
+#ifdef WITH_THREADS
+ //multiple readers - single writer lock
+ mutable boost::shared_mutex m_cacheLock;
+#endif
+
+ float decaying_score(unsigned int age);
+ void SetPreComputedScores();
+ float GetPreComputedScores(const unsigned int age);
+
+ float Evaluate_Whole_String( const TargetPhrase&) const;
+ float Evaluate_All_Substrings( const TargetPhrase&) const;
+
+ void Decay();
+ void Update(std::vector<std::string> words, int age);
+
+ void ClearEntries(std::vector<std::string> entries);
+
+ void Execute(std::vector<std::string> commands);
+ void Execute_Single_Command(std::string command);
+
+ void Load_Multiple_Files(std::vector<std::string> files);
+ void Load_Single_File(const std::string file);
+
+ void Insert(std::vector<std::string> ngrams);
+
+// void EvaluateInIsolation(const Phrase&, const TargetPhrase&, ScoreComponentCollection&, ScoreComponentCollection& ) const;
+ void Print() const;
+
+protected:
+ static DynamicCacheBasedLanguageModel* s_instance;
+ static std::map< const std::string, DynamicCacheBasedLanguageModel* > s_instance_map;
+
+public:
+ DynamicCacheBasedLanguageModel(const std::string &line);
+ ~DynamicCacheBasedLanguageModel();
+
+ inline const std::string GetName() {
+ return m_name;
+ };
+ inline void SetName(const std::string name) {
+ m_name = name;
+ }
+
+ static const DynamicCacheBasedLanguageModel* Instance(const std::string& name) {
+ if (s_instance_map.find(name) == s_instance_map.end()) {
+ return NULL;
+ }
+ return s_instance_map[name];
+ }
+
+ static DynamicCacheBasedLanguageModel* InstanceNonConst(const std::string& name) {
+ if (s_instance_map.find(name) == s_instance_map.end()) {
+ return NULL;
+ }
+ return s_instance_map[name];
+ }
+
+
+
+ static const DynamicCacheBasedLanguageModel& Instance() {
+ return *s_instance;
+ }
+ static DynamicCacheBasedLanguageModel& InstanceNonConst() {
+ return *s_instance;
+ }
+
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ void Load();
+ void Load(const std::string filestr);
+ void Execute(std::string command);
+ void SetParameter(const std::string& key, const std::string& value);
+ void ExecuteDlt(std::map<std::string, std::string> dlt_meta);
+
+ void ClearEntries(std::string &entries);
+ void Insert(std::string &entries);
+ void Clear();
+
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
+
+ void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
+ void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void SetQueryType(size_t type);
+ void SetScoreType(size_t type);
+ void SetMaxAge(unsigned int age);
+};
+
+}
+
+#endif
diff --git a/moses/FF/ExternalFeature.h b/moses/FF/ExternalFeature.h
index 888fef951..6c0fb829e 100644
--- a/moses/FF/ExternalFeature.h
+++ b/moses/FF/ExternalFeature.h
@@ -18,8 +18,8 @@ protected:
public:
ExternalFeatureState(int stateSize)
:m_stateSize(stateSize)
- ,m_data(NULL)
- {}
+ ,m_data(NULL) {
+ }
ExternalFeatureState(int stateSize, void *data);
~ExternalFeatureState() {
@@ -52,17 +52,22 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index b88d01b71..28a19c91c 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -2,15 +2,17 @@
#include "moses/StaticData.h"
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
-#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
-#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryScope3.h"
#include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
+#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
+
+#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
+#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
#include "moses/FF/LexicalReordering/LexicalReordering.h"
@@ -39,10 +41,12 @@
#include "moses/FF/CoveredReferenceFeature.h"
#include "moses/FF/TreeStructureFeature.h"
#include "moses/FF/SoftMatchingFeature.h"
+#include "moses/FF/DynamicCacheBasedLanguageModel.h"
#include "moses/FF/SourceGHKMTreeInputMatchFeature.h"
#include "moses/FF/HyperParameterAsWeight.h"
#include "moses/FF/SetSourcePhrase.h"
#include "moses/FF/PhraseOrientationFeature.h"
+#include "moses/FF/UnalignedWordCountFeature.h"
#include "CountNonTerms.h"
#include "ReferenceComparison.h"
#include "RuleScope.h"
@@ -54,11 +58,24 @@
#include "moses/FF/SkeletonStatelessFF.h"
#include "moses/FF/SkeletonStatefulFF.h"
#include "moses/LM/SkeletonLM.h"
+#include "moses/FF/SkeletonTranslationOptionListFeature.h"
#include "moses/LM/BilingualLM.h"
#include "SkeletonChangeInput.h"
#include "moses/TranslationModel/SkeletonPT.h"
#include "moses/Syntax/RuleTableFF.h"
+#ifdef HAVE_VW
+#include "moses/FF/VW/VW.h"
+#include "moses/FF/VW/VWFeatureSourceBagOfWords.h"
+#include "moses/FF/VW/VWFeatureSourceIndicator.h"
+#include "moses/FF/VW/VWFeatureSourcePhraseInternal.h"
+#include "moses/FF/VW/VWFeatureSourceWindow.h"
+#include "moses/FF/VW/VWFeatureTargetIndicator.h"
+#include "moses/FF/VW/VWFeatureSourceExternalFeatures.h"
+#include "moses/FF/VW/VWFeatureTargetPhraseInternal.h"
+#include "moses/FF/VW/VWFeatureTargetPhraseScores.h"
+#endif
+
#ifdef HAVE_CMPH
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
#endif
@@ -177,6 +194,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(PhraseDictionaryALSuffixArray);
MOSES_FNAME(PhraseDictionaryDynSuffixArray);
MOSES_FNAME(PhraseDictionaryTransliteration);
+ MOSES_FNAME(PhraseDictionaryDynamicCacheBased);
MOSES_FNAME(PhraseDictionaryFuzzyMatch);
MOSES_FNAME2("RuleTable", Syntax::RuleTableFF);
@@ -207,6 +225,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SoftSourceSyntacticConstraintsFeature);
MOSES_FNAME(TreeStructureFeature);
MOSES_FNAME(SoftMatchingFeature);
+ MOSES_FNAME(DynamicCacheBasedLanguageModel);
MOSES_FNAME(HyperParameterAsWeight);
MOSES_FNAME(SetSourcePhrase);
MOSES_FNAME(CountNonTerms);
@@ -218,13 +237,27 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SpanLength);
MOSES_FNAME(SyntaxRHS);
MOSES_FNAME(PhraseOrientationFeature);
+ MOSES_FNAME(UnalignedWordCountFeature);
MOSES_FNAME(SkeletonStatelessFF);
MOSES_FNAME(SkeletonStatefulFF);
MOSES_FNAME(SkeletonLM);
MOSES_FNAME(SkeletonChangeInput);
+ MOSES_FNAME(SkeletonTranslationOptionListFeature);
MOSES_FNAME(SkeletonPT);
+#ifdef HAVE_VW
+ MOSES_FNAME(VW);
+ MOSES_FNAME(VWFeatureSourceBagOfWords);
+ MOSES_FNAME(VWFeatureSourceIndicator);
+ MOSES_FNAME(VWFeatureSourcePhraseInternal);
+ MOSES_FNAME(VWFeatureSourceWindow);
+ MOSES_FNAME(VWFeatureTargetPhraseInternal);
+ MOSES_FNAME(VWFeatureTargetIndicator);
+ MOSES_FNAME(VWFeatureSourceExternalFeatures);
+ MOSES_FNAME(VWFeatureTargetPhraseScores);
+#endif
+
#ifdef HAVE_CMPH
MOSES_FNAME(PhraseDictionaryCompact);
#endif
@@ -293,22 +326,22 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line
void FeatureRegistry::PrintFF() const
{
- vector<string> ffs;
- std::cerr << "Available feature functions:" << std::endl;
- Map::const_iterator iter;
- for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
- const string &ffName = iter->first;
- ffs.push_back(ffName);
- }
-
- vector<string>::const_iterator iterVec;
- std::sort(ffs.begin(), ffs.end());
- for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
- const string &ffName = *iterVec;
- std::cerr << ffName << " ";
- }
-
- std::cerr << std::endl;
+ vector<string> ffs;
+ std::cerr << "Available feature functions:" << std::endl;
+ Map::const_iterator iter;
+ for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
+ const string &ffName = iter->first;
+ ffs.push_back(ffName);
+ }
+
+ vector<string>::const_iterator iterVec;
+ std::sort(ffs.begin(), ffs.end());
+ for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
+ const string &ffName = *iterVec;
+ std::cerr << ffName << " ";
+ }
+
+ std::cerr << std::endl;
}
} // namespace Moses
diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp
index 5d4e0f91e..71f4ff568 100644
--- a/moses/FF/FeatureFunction.cpp
+++ b/moses/FF/FeatureFunction.cpp
@@ -7,6 +7,7 @@
#include "moses/Manager.h"
#include "moses/TranslationOption.h"
#include "moses/Util.h"
+#include "moses/FF/DistortionScoreProducer.h"
using namespace std;
@@ -37,14 +38,16 @@ void FeatureFunction::Destroy()
void FeatureFunction::CallChangeSource(InputType *&input)
{
for (size_t i = 0; i < s_staticColl.size(); ++i) {
- const FeatureFunction &ff = *s_staticColl[i];
- ff.ChangeSource(input);
+ const FeatureFunction &ff = *s_staticColl[i];
+ ff.ChangeSource(input);
}
}
FeatureFunction::
FeatureFunction(const std::string& line)
: m_tuneable(true)
+ , m_requireSortingAfterSourceContext(false)
+ , m_verbosity(std::numeric_limits<std::size_t>::max())
, m_numScoreComponents(1)
{
Initialize(line);
@@ -54,6 +57,8 @@ FeatureFunction::
FeatureFunction(size_t numScoreComponents,
const std::string& line)
: m_tuneable(true)
+ , m_requireSortingAfterSourceContext(false)
+ , m_verbosity(std::numeric_limits<std::size_t>::max())
, m_numScoreComponents(numScoreComponents)
{
Initialize(line);
@@ -83,7 +88,7 @@ void FeatureFunction::ParseLine(const std::string &line)
for (size_t i = 1; i < toks.size(); ++i) {
vector<string> args = TokenizeFirstOnly(toks[i], "=");
UTIL_THROW_IF2(args.size() != 2,
- "Incorrect format for feature function arg: " << toks[i]);
+ "Incorrect format for feature function arg: " << toks[i]);
pair<set<string>::iterator,bool> ret = keys.insert(args[0]);
UTIL_THROW_IF2(!ret.second, "Duplicate key in line " << line);
@@ -115,6 +120,10 @@ void FeatureFunction::SetParameter(const std::string& key, const std::string& va
{
if (key == "tuneable") {
m_tuneable = Scan<bool>(value);
+ } else if (key == "require-sorting-after-source-context") {
+ m_requireSortingAfterSourceContext = Scan<bool>(value);
+ } else if (key == "verbosity") {
+ m_verbosity = Scan<size_t>(value);
} else if (key == "filterable") { //ignore
} else {
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h
index b30815e05..767270140 100644
--- a/moses/FF/FeatureFunction.h
+++ b/moses/FF/FeatureFunction.h
@@ -12,6 +12,7 @@ namespace Moses
class Phrase;
class TargetPhrase;
+class TranslationOptionList;
class TranslationOption;
class Hypothesis;
class ChartHypothesis;
@@ -22,6 +23,7 @@ class WordsRange;
class FactorMask;
class InputPath;
class StackVec;
+class DistortionScoreProducer;
/** base class for all feature functions.
*/
@@ -34,6 +36,8 @@ protected:
std::string m_description, m_argLine;
std::vector<std::vector<std::string> > m_args;
bool m_tuneable;
+ bool m_requireSortingAfterSourceContext;
+ size_t m_verbosity;
size_t m_numScoreComponents;
//In case there's multiple producers with the same description
static std::multiset<std::string> description_counts;
@@ -45,6 +49,7 @@ public:
static const std::vector<FeatureFunction*>& GetFeatureFunctions() {
return s_staticColl;
}
+
static FeatureFunction &FindFeatureFunction(const std::string& name);
static void Destroy();
@@ -84,6 +89,11 @@ public:
virtual bool IsTuneable() const {
return m_tuneable;
}
+
+ virtual bool RequireSortingAfterSourceContext() const {
+ return m_requireSortingAfterSourceContext;
+ }
+
virtual std::vector<float> DefaultWeights() const;
//! Called before search and collecting of translation options
@@ -107,13 +117,13 @@ public:
// may have more factors than actually need, but not guaranteed.
// For SCFG decoding, the source contains non-terminals, NOT the raw source from the input sentence
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const = 0;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const = 0;
// override this method if you want to change the input before decoding
- virtual void ChangeSource(InputType *&input) const
- {}
+ virtual void ChangeSource(InputType *&input) const {
+ }
// This method is called once all the translation options are retrieved from the phrase table, and
// just before search.
@@ -123,11 +133,21 @@ public:
// For pb models, stackvec is NULL.
// No FF should set estimatedFutureScore in both overloads!
virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const = 0;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const = 0;
+
+ // This method is called once all the translation options are retrieved from the phrase table, and
+ // just before search.
+ // 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
+ // 'stackVec' is a vector of chart cells that the RHS non-terms cover.
+ // It is guaranteed to be in the same order as the non-terms in the source phrase.
+ // For pb models, stackvec is NULL.
+ // No FF should set estimatedFutureScore in both overloads!
+ virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const = 0;
virtual void SetParameter(const std::string& key, const std::string& value);
virtual void ReadParameters();
diff --git a/moses/FF/GlobalLexicalModel.cpp b/moses/FF/GlobalLexicalModel.cpp
index 5c603bc51..d52d62a49 100644
--- a/moses/FF/GlobalLexicalModel.cpp
+++ b/moses/FF/GlobalLexicalModel.cpp
@@ -3,7 +3,6 @@
#include "moses/StaticData.h"
#include "moses/InputFileStream.h"
#include "moses/TranslationOption.h"
-#include "moses/UserMessage.h"
#include "moses/FactorCollection.h"
#include "util/exception.hh"
@@ -166,11 +165,11 @@ float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetP
}
void GlobalLexicalModel::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
- scoreBreakdown.PlusEquals( this, GetFromCacheOrScorePhrase(targetPhrase) );
+ scoreBreakdown.PlusEquals( this, GetFromCacheOrScorePhrase(targetPhrase) );
}
bool GlobalLexicalModel::IsUseable(const FactorMask &mask) const
diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h
index 65b5cf2b8..a936c2e92 100644
--- a/moses/FF/GlobalLexicalModel.h
+++ b/moses/FF/GlobalLexicalModel.h
@@ -71,24 +71,28 @@ public:
bool IsUseable(const FactorMask &mask) const;
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
};
diff --git a/moses/FF/GlobalLexicalModelUnlimited.cpp b/moses/FF/GlobalLexicalModelUnlimited.cpp
index c8dbd5883..a757c1c4e 100644
--- a/moses/FF/GlobalLexicalModelUnlimited.cpp
+++ b/moses/FF/GlobalLexicalModelUnlimited.cpp
@@ -2,7 +2,6 @@
#include <fstream>
#include "moses/StaticData.h"
#include "moses/InputFileStream.h"
-#include "moses/UserMessage.h"
#include "moses/Hypothesis.h"
#include "util/string_piece_hash.hh"
@@ -14,7 +13,7 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
:StatelessFeatureFunction(0, line)
{
UTIL_THROW(util::Exception,
- "GlobalLexicalModelUnlimited hasn't been refactored for new feature function framework yet"); // TODO need to update arguments to key=value
+ "GlobalLexicalModelUnlimited hasn't been refactored for new feature function framework yet"); // TODO need to update arguments to key=value
const vector<string> modelSpec = Tokenize(line);
@@ -28,8 +27,8 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
// read optional punctuation and bias specifications
if (spec.size() > 0) {
if (spec.size() != 2 && spec.size() != 3 && spec.size() != 4 && spec.size() != 6) {
- UserMessage::Add("Format of glm feature is <factor-src>-<factor-tgt> [ignore-punct] [use-bias] "
- "[context-type] [filename-src filename-tgt]");
+ std::cerr << "Format of glm feature is <factor-src>-<factor-tgt> [ignore-punct] [use-bias] "
+ << "[context-type] [filename-src filename-tgt]";
//return false;
}
@@ -49,7 +48,7 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
factors = Tokenize(modelSpec[i],"-");
if ( factors.size() != 2 ) {
- UserMessage::Add("Wrong factor definition for global lexical model unlimited: " + modelSpec[i]);
+ std::cerr << "Wrong factor definition for global lexical model unlimited: " << modelSpec[i];
//return false;
}
@@ -61,7 +60,10 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
if (restricted) {
cerr << "loading word translation word lists from " << filenameSource << " and " << filenameTarget << endl;
if (!glmu->Load(filenameSource, filenameTarget)) {
- UserMessage::Add("Unable to load word lists for word translation feature from files " + filenameSource + " and " + filenameTarget);
+ std::cerr << "Unable to load word lists for word translation feature from files "
+ << filenameSource
+ << " and "
+ << filenameTarget;
//return false;
}
}
diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h
index 096254613..33c0d0010 100644
--- a/moses/FF/GlobalLexicalModelUnlimited.h
+++ b/moses/FF/GlobalLexicalModelUnlimited.h
@@ -82,26 +82,31 @@ public:
//TODO: This implements the old interface, but cannot be updated because
//it appears to be stateful
void EvaluateWhenApplied(const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis& /* cur_hypo */,
- int /* featureID */,
- ScoreComponentCollection* ) const {
+ int /* featureID */,
+ ScoreComponentCollection* ) const {
throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void AddFeature(ScoreComponentCollection* accumulator,
StringPiece sourceTrigger, StringPiece sourceWord, StringPiece targetTrigger,
diff --git a/moses/FF/HyperParameterAsWeight.cpp b/moses/FF/HyperParameterAsWeight.cpp
index 2fd0f2acb..a2c068530 100644
--- a/moses/FF/HyperParameterAsWeight.cpp
+++ b/moses/FF/HyperParameterAsWeight.cpp
@@ -7,7 +7,7 @@ namespace Moses
{
HyperParameterAsWeight::HyperParameterAsWeight(const std::string &line)
-:StatelessFeatureFunction(2, line)
+ :StatelessFeatureFunction(2, line)
{
ReadParameters();
diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h
index aaad21c14..cd8d66821 100644
--- a/moses/FF/HyperParameterAsWeight.h
+++ b/moses/FF/HyperParameterAsWeight.h
@@ -14,33 +14,38 @@ class HyperParameterAsWeight : public StatelessFeatureFunction
public:
HyperParameterAsWeight(const std::string &line);
- virtual bool IsUseable(const FactorMask &mask) const
- { return true; }
+ virtual bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
/**
* Same for chart-based features.
**/
virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
};
diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp
index 61753c595..39535f58f 100644
--- a/moses/FF/InputFeature.cpp
+++ b/moses/FF/InputFeature.cpp
@@ -18,14 +18,14 @@ InputFeature::InputFeature(const std::string &line)
{
m_numInputScores = this->m_numScoreComponents;
ReadParameters();
-
+
UTIL_THROW_IF2(s_instance, "Can only have 1 input feature");
s_instance = this;
}
void InputFeature::Load()
{
-
+
const PhraseDictionary *pt = PhraseDictionary::GetColl()[0];
const PhraseDictionaryTreeAdaptor *ptBin = dynamic_cast<const PhraseDictionaryTreeAdaptor*>(pt);
@@ -45,11 +45,11 @@ void InputFeature::SetParameter(const std::string& key, const std::string& value
}
void InputFeature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
if (m_legacy) {
//binary phrase-table does input feature itself
diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h
index ad4fe398a..c7b7237aa 100644
--- a/moses/FF/InputFeature.h
+++ b/moses/FF/InputFeature.h
@@ -42,24 +42,28 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
};
diff --git a/moses/FF/InternalTree.cpp b/moses/FF/InternalTree.cpp
index 2537cc50f..9e974d0cd 100644
--- a/moses/FF/InternalTree.cpp
+++ b/moses/FF/InternalTree.cpp
@@ -3,228 +3,242 @@
namespace Moses
{
-InternalTree::InternalTree(const std::string & line, const bool terminal):
- m_value_nt(0),
- m_isTerminal(terminal)
- {
+InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool terminal):
+ m_value_nt(0),
+ m_isTerminal(terminal)
+{
- size_t found = line.find_first_of("[] ");
+ if (len > 0) {
+ m_value.assign(line, start, len);
+ }
+}
- if (found == line.npos) {
- m_value = line;
- }
+InternalTree::InternalTree(const std::string & line, const bool terminal):
+ m_value_nt(0),
+ m_isTerminal(terminal)
+{
- else {
- AddSubTree(line, 0);
- }
-}
+ size_t found = line.find_first_of("[] ");
-size_t InternalTree::AddSubTree(const std::string & line, size_t pos) {
-
- std::string value;
- char token = 0;
-
- while (token != ']' && pos != std::string::npos)
- {
- size_t oldpos = pos;
- pos = line.find_first_of("[] ", pos);
- if (pos == std::string::npos) break;
- token = line[pos];
- value = line.substr(oldpos,pos-oldpos);
-
- if (token == '[') {
- if (m_value.size() > 0) {
- m_children.push_back(boost::make_shared<InternalTree>(value,false));
- pos = m_children.back()->AddSubTree(line, pos+1);
- }
- else {
- if (value.size() > 0) {
- m_value = value;
- }
- pos = AddSubTree(line, pos+1);
- }
- }
- else if (token == ' ' || token == ']') {
- if (value.size() > 0 && !(m_value.size() > 0)) {
- m_value = value;
- }
- else if (value.size() > 0) {
- m_isTerminal = false;
- m_children.push_back(boost::make_shared<InternalTree>(value,true));
- }
- if (token == ' ') {
- pos++;
- }
- }
+ if (found == line.npos) {
+ m_value = line;
+ } else {
+ AddSubTree(line, 0);
+ }
+}
- if (m_children.size() > 0) {
- m_isTerminal = false;
- }
- }
+size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
+{
- if (pos == std::string::npos) {
- return line.size();
- }
- return std::min(line.size(),pos+1);
+ char token = 0;
+ size_t len = 0;
+
+ while (token != ']' && pos != std::string::npos) {
+ size_t oldpos = pos;
+ pos = line.find_first_of("[] ", pos);
+ if (pos == std::string::npos) break;
+ token = line[pos];
+ len = pos-oldpos;
+
+ if (token == '[') {
+ if (!m_value.empty()) {
+ m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
+ pos = m_children.back()->AddSubTree(line, pos+1);
+ } else {
+ if (len > 0) {
+ m_value.assign(line, oldpos, len);
+ }
+ pos = AddSubTree(line, pos+1);
+ }
+ } else if (token == ' ' || token == ']') {
+ if (len > 0 && m_value.empty()) {
+ m_value.assign(line, oldpos, len);
+ } else if (len > 0) {
+ m_isTerminal = false;
+ m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
+ }
+ if (token == ' ') {
+ pos++;
+ }
+ }
+
+ if (!m_children.empty()) {
+ m_isTerminal = false;
+ }
+ }
+
+ if (pos == std::string::npos) {
+ return line.size();
+ }
+ return std::min(line.size(),pos+1);
}
-std::string InternalTree::GetString(bool start) const {
+std::string InternalTree::GetString(bool start) const
+{
- std::string ret = "";
- if (!start) {
- ret += " ";
- }
+ std::string ret = "";
+ if (!start) {
+ ret += " ";
+ }
- if (!m_isTerminal) {
- ret += "[";
- }
+ if (!m_isTerminal) {
+ ret += "[";
+ }
- ret += m_value;
- for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it)
- {
- ret += (*it)->GetString(false);
- }
+ ret += m_value;
+ for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
+ ret += (*it)->GetString(false);
+ }
- if (!m_isTerminal) {
- ret += "]";
- }
- return ret;
+ if (!m_isTerminal) {
+ ret += "]";
+ }
+ return ret;
}
-void InternalTree::Combine(const std::vector<TreePointer> &previous) {
+void InternalTree::Combine(const std::vector<TreePointer> &previous)
+{
- std::vector<TreePointer>::iterator it;
- bool found = false;
- leafNT next_leafNT(this);
- for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
- found = next_leafNT(it);
- if (found) {
- *it = *it_prev;
- }
- else {
- std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
- }
- }
+ std::vector<TreePointer>::iterator it;
+ bool found = false;
+ leafNT next_leafNT(this);
+ for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
+ found = next_leafNT(it);
+ if (found) {
+ *it = *it_prev;
+ } else {
+ std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
+ }
+ }
}
-bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetLabel() == label) {
- return true;
- }
+bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetLabel() == label) {
+ return true;
}
- return false;
+ }
+ return false;
}
-bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetLabel() == label) {
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(label, it2)) {
- it = it2;
- return true;
- }
+bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetLabel() == label) {
+ return true;
+ }
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(label, it2)) {
+ it = it2;
+ return true;
}
- return false;
+ }
+ return false;
}
-bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetLabel() == label) {
- parent = this;
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(label, it2, parent)) {
- it = it2;
- return true;
- }
- }
- return false;
+bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetLabel() == label) {
+ parent = this;
+ return true;
+ }
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(label, it2, parent)) {
+ it = it2;
+ return true;
+ }
+ }
+ return false;
}
-bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetNTLabel() == label) {
- return true;
- }
+bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetNTLabel() == label) {
+ return true;
}
- return false;
+ }
+ return false;
}
-bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetNTLabel() == label) {
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(label, it2)) {
- it = it2;
- return true;
- }
+bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetNTLabel() == label) {
+ return true;
}
- return false;
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(label, it2)) {
+ it = it2;
+ return true;
+ }
+ }
+ return false;
}
-bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetNTLabel() == label) {
- parent = this;
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(label, it2, parent)) {
- it = it2;
- return true;
- }
- }
- return false;
+bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetNTLabel() == label) {
+ parent = this;
+ return true;
+ }
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(label, it2, parent)) {
+ it = it2;
+ return true;
+ }
+ }
+ return false;
}
-bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
- return true;
- }
+bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
+ return true;
}
- return false;
+ }
+ return false;
}
-bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(labels, it2)) {
- it = it2;
- return true;
- }
+bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
+ return true;
}
- return false;
-}
-
-bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
- parent = this;
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(labels, it2, parent)) {
- it = it2;
- return true;
- }
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(labels, it2)) {
+ it = it2;
+ return true;
}
- return false;
+ }
+ return false;
}
+bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
+ parent = this;
+ return true;
+ }
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(labels, it2, parent)) {
+ it = it2;
+ return true;
+ }
+ }
+ return false;
}
+
+} \ No newline at end of file
diff --git a/moses/FF/InternalTree.h b/moses/FF/InternalTree.h
index 19006cdd3..722c5832f 100644
--- a/moses/FF/InternalTree.h
+++ b/moses/FF/InternalTree.h
@@ -19,78 +19,79 @@ typedef int NTLabel;
class InternalTree
{
-std::string m_value;
-NTLabel m_value_nt;
-std::vector<TreePointer> m_children;
-bool m_isTerminal;
+ std::string m_value;
+ NTLabel m_value_nt;
+ std::vector<TreePointer> m_children;
+ bool m_isTerminal;
public:
- InternalTree(const std::string & line, const bool terminal = false);
- InternalTree(const InternalTree & tree):
- m_value(tree.m_value),
- m_isTerminal(tree.m_isTerminal) {
- const std::vector<TreePointer> & children = tree.m_children;
- for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
- m_children.push_back(boost::make_shared<InternalTree>(**it));
- }
- }
- size_t AddSubTree(const std::string & line, size_t start);
-
- std::string GetString(bool start = true) const;
- void Combine(const std::vector<TreePointer> &previous);
- const std::string & GetLabel() const {
- return m_value;
+ InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
+ InternalTree(const std::string & line, const bool terminal = false);
+ InternalTree(const InternalTree & tree):
+ m_value(tree.m_value),
+ m_isTerminal(tree.m_isTerminal) {
+ const std::vector<TreePointer> & children = tree.m_children;
+ for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
+ m_children.push_back(boost::make_shared<InternalTree>(**it));
}
+ }
+ size_t AddSubTree(const std::string & line, size_t start);
- // optionally identify label by int instead of string;
- // allows abstraction if multiple nonterminal strings should map to same label.
- const NTLabel & GetNTLabel() const {
- return m_value_nt;
- }
+ std::string GetString(bool start = true) const;
+ void Combine(const std::vector<TreePointer> &previous);
+ const std::string & GetLabel() const {
+ return m_value;
+ }
- void SetNTLabel(NTLabel value) {
- m_value_nt = value;
- }
+ // optionally identify label by int instead of string;
+ // allows abstraction if multiple nonterminal strings should map to same label.
+ const NTLabel & GetNTLabel() const {
+ return m_value_nt;
+ }
- size_t GetLength() const {
- return m_children.size();
- }
- std::vector<TreePointer> & GetChildren() {
- return m_children;
- }
+ void SetNTLabel(NTLabel value) {
+ m_value_nt = value;
+ }
- bool IsTerminal() const {
- return m_isTerminal;
- }
+ size_t GetLength() const {
+ return m_children.size();
+ }
+ std::vector<TreePointer> & GetChildren() {
+ return m_children;
+ }
- bool IsLeafNT() const {
- return (!m_isTerminal && m_children.size() == 0);
- }
+ bool IsTerminal() const {
+ return m_isTerminal;
+ }
+
+ bool IsLeafNT() const {
+ return (!m_isTerminal && m_children.size() == 0);
+ }
- // different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
- // can be used for formulating syntax constraints.
+ // different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
+ // can be used for formulating syntax constraints.
- // if found, 'it' is iterator to first tree node that matches search string
- bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
- bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
+ // if found, 'it' is iterator to first tree node that matches search string
+ bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
+ bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
- // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
- bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
+ // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
+ bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
- // use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
- // if found, 'it' is iterator to first tree node that matches search string
- bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
- bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
+ // use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
+ // if found, 'it' is iterator to first tree node that matches search string
+ bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
+ bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
- // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
- bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
+ // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
+ bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
- // pass vector of possible labels to search
- // if found, 'it' is iterator to first tree node that matches search string
- bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
- bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
+ // pass vector of possible labels to search
+ // if found, 'it' is iterator to first tree node that matches search string
+ bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
+ bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
- // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
- bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
+ // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
+ bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
};
@@ -100,77 +101,79 @@ class TreeState : public FFState
TreePointer m_tree;
public:
TreeState(TreePointer tree)
- :m_tree(tree)
- {}
+ :m_tree(tree) {
+ }
TreePointer GetTree() const {
- return m_tree;
+ return m_tree;
}
- int Compare(const FFState& other) const {return 0;};
+ int Compare(const FFState& other) const {
+ return 0;
+ };
};
// Python-like generator that yields next nonterminal leaf on every call
-$generator(leafNT) {
- std::vector<TreePointer>::iterator it;
- InternalTree* tree;
- leafNT(InternalTree* root = 0): tree(root) {}
- $emit(std::vector<TreePointer>::iterator)
- for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
- if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
- $yield(it);
- }
- else if ((*it)->GetLength() > 0) {
- if ((*it).get()) { // normal pointer to same object that TreePointer points to
- $restart(tree = (*it).get());
- }
- }
+$generator(leafNT)
+{
+ std::vector<TreePointer>::iterator it;
+ InternalTree* tree;
+ leafNT(InternalTree* root = 0): tree(root) {}
+ $emit(std::vector<TreePointer>::iterator)
+ for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
+ if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
+ $yield(it);
+ } else if ((*it)->GetLength() > 0) {
+ if ((*it).get()) { // normal pointer to same object that TreePointer points to
+ $restart(tree = (*it).get());
+ }
}
- $stop;
+ }
+ $stop;
};
// Python-like generator that yields the parent of the next nonterminal leaf on every call
-$generator(leafNTParent) {
- std::vector<TreePointer>::iterator it;
- InternalTree* tree;
- leafNTParent(InternalTree* root = 0): tree(root) {}
- $emit(InternalTree*)
- for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
- if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
- $yield(tree);
- }
- else if ((*it)->GetLength() > 0) {
- if ((*it).get()) {
- $restart(tree = (*it).get());
- }
- }
+$generator(leafNTParent)
+{
+ std::vector<TreePointer>::iterator it;
+ InternalTree* tree;
+ leafNTParent(InternalTree* root = 0): tree(root) {}
+ $emit(InternalTree*)
+ for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
+ if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
+ $yield(tree);
+ } else if ((*it)->GetLength() > 0) {
+ if ((*it).get()) {
+ $restart(tree = (*it).get());
+ }
}
- $stop;
+ }
+ $stop;
};
// Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal
-$generator(leafNTPath) {
- std::vector<TreePointer>::iterator it;
- InternalTree* tree;
- std::vector<InternalTree*> * path;
- leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
- $emit(std::vector<TreePointer>::iterator)
- path->push_back(tree);
- for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
- if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
- path->push_back((*it).get());
- $yield(it);
- path->pop_back();
- }
- else if ((*it)->GetLength() > 0) {
- if ((*it).get()) {
- $restart(tree = (*it).get());
- }
- }
+$generator(leafNTPath)
+{
+ std::vector<TreePointer>::iterator it;
+ InternalTree* tree;
+ std::vector<InternalTree*> * path;
+ leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
+ $emit(std::vector<TreePointer>::iterator)
+ path->push_back(tree);
+ for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
+ if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
+ path->push_back((*it).get());
+ $yield(it);
+ path->pop_back();
+ } else if ((*it)->GetLength() > 0) {
+ if ((*it).get()) {
+ $restart(tree = (*it).get());
+ }
}
- path->pop_back();
- $stop;
+ }
+ path->pop_back();
+ $stop;
};
diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index b19d82304..0630d1077 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -1,4 +1,5 @@
#include <sstream>
+#include <boost/algorithm/string/predicate.hpp>
#include "moses/FF/FFState.h"
#include "LexicalReordering.h"
@@ -6,6 +7,7 @@
#include "moses/StaticData.h"
using namespace std;
+using namespace boost::algorithm;
namespace Moses
{
@@ -15,7 +17,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
std::cerr << "Initializing LexicalReordering.." << std::endl;
map<string,string> sparseArgs;
- m_haveDefaultScores = false;
+ m_haveDefaultScores = false;
for (size_t i = 0; i < m_args.size(); ++i) {
const vector<string> &args = m_args[i];
@@ -29,14 +31,14 @@ LexicalReordering::LexicalReordering(const std::string &line)
m_factorsE =Tokenize<FactorType>(args[1]);
} else if (args[0] == "path") {
m_filePath = args[1];
- } else if (args[0].substr(0,7) == "sparse-") {
+ } else if (starts_with(args[0], "sparse-")) {
sparseArgs[args[0].substr(7)] = args[1];
} else if (args[0] == "default-scores") {
vector<string> tokens = Tokenize(args[1],",");
for(size_t i=0; i<tokens.size(); i++) {
m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i]) ) );
}
- m_haveDefaultScores = true;
+ m_haveDefaultScores = true;
} else {
UTIL_THROW(util::Exception,"Unknown argument " + args[0]);
}
@@ -84,14 +86,16 @@ Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
}
FFState* LexicalReordering::EvaluateWhenApplied(const Hypothesis& hypo,
- const FFState* prev_state,
- ScoreComponentCollection* out) const
+ const FFState* prev_state,
+ ScoreComponentCollection* out) const
{
+ VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl);
Scores score(GetNumScoreComponents(), 0);
const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *>(prev_state);
LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out);
out->PlusEquals(this, score);
+ VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) END" << std::endl);
return next_state;
}
diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h
index 2e73f8736..444a5a68c 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@@ -46,28 +46,37 @@ public:
Scores GetProb(const Phrase& f, const Phrase& e) const;
virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateWhenApplied(const ChartHypothesis&,
- int /* featureID */,
- ScoreComponentCollection*) const {
+ int /* featureID */,
+ ScoreComponentCollection*) const {
UTIL_THROW(util::Exception, "LexicalReordering is not valid for chart decoder");
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
- bool GetHaveDefaultScores() { return m_haveDefaultScores; }
- float GetDefaultScore( size_t i ) { return m_defaultScores[i]; }
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+ bool GetHaveDefaultScores() {
+ return m_haveDefaultScores;
+ }
+ float GetDefaultScore( size_t i ) {
+ return m_defaultScores[i];
+ }
private:
bool DecodeCondition(std::string s);
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index 847409496..567d1b713 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -39,7 +39,7 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
}
void LexicalReorderingConfiguration::ConfigureSparse
- (const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
+(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
{
if (sparseArgs.size()) {
m_sparse.reset(new SparseReordering(sparseArgs, producer));
@@ -89,13 +89,13 @@ LexicalReorderingConfiguration::LexicalReorderingConfiguration(const std::string
} else if (config[i] == "allff") {
m_collapseScores = false;
} else {
- UserMessage::Add("Illegal part in the lexical reordering configuration string: "+config[i]);
+ std::cerr << "Illegal part in the lexical reordering configuration string: " << config[i] << std::endl;
exit(1);
}
}
if (m_modelType == None) {
- UserMessage::Add("You need to specify the type of the reordering model (msd, monotonicity,...)");
+ std::cerr << "You need to specify the type of the reordering model (msd, monotonicity,...)" << std::endl;
exit(1);
}
}
@@ -134,7 +134,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
{
// don't call this on a bidirectional object
UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
- "Unknown direction: " << m_direction);
+ "Unknown direction: " << m_direction);
const TranslationOption* relevantOpt = &topt;
if (m_direction != LexicalReorderingConfiguration::Backward) relevantOpt = m_prevOption;
const Scores *cachedScores = relevantOpt->GetLexReorderingScores(m_configuration.GetScoreProducer());
@@ -146,8 +146,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
const Scores &scoreSet = *cachedScores;
if(m_configuration.CollapseScores()) {
scores[m_offset] = scoreSet[m_offset + reoType];
- }
- else {
+ } else {
std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
scores[m_offset + reoType] = scoreSet[m_offset + reoType];
}
@@ -158,8 +157,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
if(m_configuration.CollapseScores()) {
scores[m_offset] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
- }
- else {
+ } else {
scores[m_offset + reoType] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
}
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index e309ed7f1..79537f119 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -124,7 +124,7 @@ protected:
int ComparePrevScores(const TranslationOption *other) const;
//constants for the different type of reorderings (corresponding to indexes in the table file)
- public:
+public:
static const ReorderingType M = 0; // monotonic
static const ReorderingType NM = 1; // non-monotonic
static const ReorderingType S = 1; // swap
diff --git a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
index e10fc3833..2cb9dfc5d 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
@@ -268,7 +268,7 @@ Scores LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, co
}
if(m_FactorsC.empty()) {
- UTIL_THROW_IF2(1 != cands.size(), "Error");
+ UTIL_THROW_IF2(1 != cands.size(), "Error");
return cands[0].GetScore(0);
} else {
score = auxFindScoreForContext(cands, c);
@@ -283,7 +283,7 @@ Scores LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, co
Scores LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands, const Phrase& context)
{
if(m_FactorsC.empty()) {
- UTIL_THROW_IF2(cands.size() > 1, "Error");
+ UTIL_THROW_IF2(cands.size() > 1, "Error");
return (1 == cands.size())?(cands[0].GetScore(0)):(Scores());
} else {
@@ -384,7 +384,7 @@ bool LexicalReorderingTableTree::Create(std::istream& inFile,
} else {
//sanity check ALL lines must have same number of tokens
UTIL_THROW_IF2(numTokens != tokens.size(),
- "Lines do not have the same number of tokens");
+ "Lines do not have the same number of tokens");
}
size_t phrase = 0;
for(; phrase < numKeyTokens; ++phrase) {
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index f62dcde8b..1561ef0af 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -16,10 +16,11 @@
using namespace std;
-namespace Moses
+namespace Moses
{
-const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) {
+const std::string& SparseReorderingFeatureKey::Name (const string& wordListId)
+{
static string kSep = "-";
static string name;
ostringstream buf;
@@ -55,7 +56,7 @@ const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) {
}
SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
- : m_producer(producer)
+ : m_producer(producer)
{
static const string kSource= "source";
static const string kTarget = "target";
@@ -93,22 +94,24 @@ SparseReordering::SparseReordering(const map<string,string>& config, const Lexic
}
-void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster) {
+void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster)
+{
for (size_t type = SparseReorderingFeatureKey::Stack;
- type <= SparseReorderingFeatureKey::Between; ++type) {
+ type <= SparseReorderingFeatureKey::Between; ++type) {
for (size_t position = SparseReorderingFeatureKey::First;
- position <= SparseReorderingFeatureKey::Last; ++position) {
+ position <= SparseReorderingFeatureKey::Last; ++position) {
for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) {
SparseReorderingFeatureKey key(
index, static_cast<SparseReorderingFeatureKey::Type>(type), factor, isCluster,
- static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
+ static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id))));
}
}
}
}
-void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists) {
+void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists)
+{
ifstream fh(filename.c_str());
UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
string line;
@@ -118,12 +121,13 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, Sp
//TODO: StringPiece
const Factor* factor = FactorCollection::Instance().AddFactor(line);
pWordLists->back().second.insert(factor);
- PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false);
+ PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false);
}
}
-void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps) {
+void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps)
+{
pClusterMaps->push_back(ClusterMap());
pClusterMaps->back().first = id;
util::FilePiece file(filename.c_str());
@@ -141,15 +145,16 @@ void SparseReordering::ReadClusterMap(const string& filename, const string& id,
if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing cluster id): '" << line << "'");
const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter);
pClusterMaps->back().second[wordFactor] = idFactor;
- PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true);
+ PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true);
}
}
void SparseReordering::AddFeatures(
- SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
- const Word& word, SparseReorderingFeatureKey::Position position,
- LexicalReorderingState::ReorderingType reoType,
- ScoreComponentCollection* scores) const {
+ SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
+ const Word& word, SparseReorderingFeatureKey::Position position,
+ LexicalReorderingState::ReorderingType reoType,
+ ScoreComponentCollection* scores) const
+{
const Factor* wordFactor = word.GetFactor(0);
@@ -174,7 +179,7 @@ void SparseReordering::AddFeatures(
for (size_t id = 0; id < clusterMaps->size(); ++id) {
const ClusterMap& clusterMap = (*clusterMaps)[id];
boost::unordered_map<const Factor*, const Factor*>::const_iterator clusterIter
- = clusterMap.second.find(wordFactor);
+ = clusterMap.second.find(wordFactor);
if (clusterIter != clusterMap.second.end()) {
SparseReorderingFeatureKey key(id, type, clusterIter->second, true, position, side, reoType);
FeatureMap::const_iterator fmi = m_featureMap.find(key);
@@ -186,18 +191,18 @@ void SparseReordering::AddFeatures(
}
void SparseReordering::CopyScores(
- const TranslationOption& currentOpt,
- const TranslationOption* previousOpt,
- const InputType& input,
- LexicalReorderingState::ReorderingType reoType,
- LexicalReorderingConfiguration::Direction direction,
- ScoreComponentCollection* scores) const
+ const TranslationOption& currentOpt,
+ const TranslationOption* previousOpt,
+ const InputType& input,
+ LexicalReorderingState::ReorderingType reoType,
+ LexicalReorderingConfiguration::Direction direction,
+ ScoreComponentCollection* scores) const
{
if (m_useBetween && direction == LexicalReorderingConfiguration::Backward &&
(reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL ||
- reoType == LexicalReorderingState::DR)) {
+ reoType == LexicalReorderingState::DR)) {
size_t gapStart, gapEnd;
- //NB: Using a static cast for speed, but could be nasty if
+ //NB: Using a static cast for speed, but could be nasty if
//using non-sentence input
const Sentence& sentence = static_cast<const Sentence&>(input);
const WordsRange& currentRange = currentOpt.GetSourceWordsRange();
@@ -217,9 +222,9 @@ void SparseReordering::CopyScores(
}
assert(gapStart < gapEnd);
for (size_t i = gapStart; i < gapEnd; ++i) {
- AddFeatures(SparseReorderingFeatureKey::Between,
- SparseReorderingFeatureKey::Source, sentence.GetWord(i),
- SparseReorderingFeatureKey::First, reoType, scores);
+ AddFeatures(SparseReorderingFeatureKey::Between,
+ SparseReorderingFeatureKey::Source, sentence.GetWord(i),
+ SparseReorderingFeatureKey::First, reoType, scores);
}
}
//std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
@@ -240,11 +245,11 @@ void SparseReordering::CopyScores(
}
const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
- SparseReorderingFeatureKey::First, reoType, scores);
+ SparseReorderingFeatureKey::First, reoType, scores);
AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
- const Phrase& targetPhrase = currentOpt.GetTargetPhrase();
+ const Phrase& targetPhrase = currentOpt.GetTargetPhrase();
AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
- SparseReorderingFeatureKey::First, reoType, scores);
+ SparseReorderingFeatureKey::First, reoType, scores);
AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
index 663785a88..50ec96a0d 100644
--- a/moses/FF/LexicalReordering/SparseReordering.h
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -23,7 +23,7 @@
/**
Configuration of sparse reordering:
-
+
The sparse reordering feature is configured using sparse-* configs in the lexical reordering line.
sparse-words-(source|target)-<id>=<filename> -- Features which fire for the words in the list
sparse-clusters-(source|target)-<id>=<filename> -- Features which fire for clusters in the list. Format
@@ -38,7 +38,7 @@
namespace Moses
{
-/**
+/**
* Used to store pre-calculated feature names.
**/
struct SparseReorderingFeatureKey {
@@ -51,17 +51,17 @@ struct SparseReorderingFeatureKey {
LexicalReorderingState::ReorderingType reoType;
SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, bool isCluster_,
- Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_)
+ Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_)
: id(id_), type(type_), word(word_), isCluster(isCluster_),
- position(position_), side(side_), reoType(reoType_)
- {}
+ position(position_), side(side_), reoType(reoType_) {
+ }
- const std::string& Name(const std::string& wordListId) ;
+ const std::string& Name(const std::string& wordListId) ;
};
struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorderingFeatureKey, std::size_t> {
std::size_t operator()(const SparseReorderingFeatureKey& key) const {
- //TODO: can we just hash the memory?
+ //TODO: can we just hash the memory?
//not sure, there could be random padding
std::size_t seed = 0;
seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed);
@@ -76,7 +76,7 @@ struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorder
};
struct EqualsSparseReorderingFeatureKey :
- public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
+ public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
bool operator()(const SparseReorderingFeatureKey& left, const SparseReorderingFeatureKey& right) const {
//TODO: Can we just compare the memory?
return left.id == right.id && left.type == right.type && left.word == right.word &&
@@ -89,14 +89,14 @@ class SparseReordering
{
public:
SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
-
+
//If direction is backward the options will be different, for forward they will be the same
void CopyScores(const TranslationOption& currentOpt,
const TranslationOption* previousOpt,
const InputType& input,
- LexicalReorderingState::ReorderingType reoType,
- LexicalReorderingConfiguration::Direction direction,
- ScoreComponentCollection* scores) const ;
+ LexicalReorderingState::ReorderingType reoType,
+ LexicalReorderingConfiguration::Direction direction,
+ ScoreComponentCollection* scores) const ;
private:
const LexicalReordering* m_producer;
@@ -113,14 +113,14 @@ private:
FeatureMap m_featureMap;
void ReadWordList(const std::string& filename, const std::string& id,
- SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
+ SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector<ClusterMap>* pClusterMaps);
void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster);
void AddFeatures(
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
- const Word& word, SparseReorderingFeatureKey::Position position,
- LexicalReorderingState::ReorderingType reoType,
+ const Word& word, SparseReorderingFeatureKey::Position position,
+ LexicalReorderingState::ReorderingType reoType,
ScoreComponentCollection* scores) const;
};
diff --git a/moses/FF/MaxSpanFreeNonTermSource.cpp b/moses/FF/MaxSpanFreeNonTermSource.cpp
index 9de582635..7799c0b2a 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.cpp
+++ b/moses/FF/MaxSpanFreeNonTermSource.cpp
@@ -14,10 +14,10 @@ using namespace std;
namespace Moses
{
MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
-:StatelessFeatureFunction(1, line)
-,m_maxSpan(2)
-,m_glueTargetLHSStr("S")
-,m_glueTargetLHS(true)
+ :StatelessFeatureFunction(1, line)
+ ,m_maxSpan(2)
+ ,m_glueTargetLHSStr("S")
+ ,m_glueTargetLHS(true)
{
m_tuneable = false;
ReadParameters();
@@ -28,25 +28,25 @@ MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
}
void MaxSpanFreeNonTermSource::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
targetPhrase.SetRuleSource(source);
}
void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
const Word &targetLHS = targetPhrase.GetTargetLHS();
if (targetLHS == m_glueTargetLHS) {
- // don't delete glue rules
- return;
+ // don't delete glue rules
+ return;
}
const Phrase *source = targetPhrase.GetRuleSource();
@@ -54,17 +54,17 @@ void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
float score = 0;
if (source->Front().IsNonTerminal()) {
- const ChartCellLabel &cell = *stackVec->front();
- if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
- score = - std::numeric_limits<float>::infinity();
- }
+ const ChartCellLabel &cell = *stackVec->front();
+ if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
+ score = - std::numeric_limits<float>::infinity();
+ }
}
if (source->Back().IsNonTerminal()) {
- const ChartCellLabel &cell = *stackVec->back();
- if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
- score = - std::numeric_limits<float>::infinity();
- }
+ const ChartCellLabel &cell = *stackVec->back();
+ if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
+ score = - std::numeric_limits<float>::infinity();
+ }
}
@@ -76,7 +76,7 @@ void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
void MaxSpanFreeNonTermSource::SetParameter(const std::string& key, const std::string& value)
{
if (key == "max-span") {
- m_maxSpan = Scan<int>(value);
+ m_maxSpan = Scan<int>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
@@ -84,8 +84,8 @@ void MaxSpanFreeNonTermSource::SetParameter(const std::string& key, const std::s
std::vector<float> MaxSpanFreeNonTermSource::DefaultWeights() const
{
- std::vector<float> ret(1, 1);
- return ret;
+ std::vector<float> ret(1, 1);
+ return ret;
}
}
diff --git a/moses/FF/MaxSpanFreeNonTermSource.h b/moses/FF/MaxSpanFreeNonTermSource.h
index 973b374d8..411b2d51d 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.h
+++ b/moses/FF/MaxSpanFreeNonTermSource.h
@@ -10,33 +10,38 @@ namespace Moses
class MaxSpanFreeNonTermSource : public StatelessFeatureFunction
{
public:
- MaxSpanFreeNonTermSource(const std::string &line);
-
- virtual bool IsUseable(const FactorMask &mask) const
- { return true; }
-
- virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
-
- virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-
- virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
- virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
- void SetParameter(const std::string& key, const std::string& value);
- std::vector<float> DefaultWeights() const;
+ MaxSpanFreeNonTermSource(const std::string &line);
+
+ virtual bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
+
+ virtual void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void SetParameter(const std::string& key, const std::string& value);
+ std::vector<float> DefaultWeights() const;
protected:
int m_maxSpan;
diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp
index b3a5f8f92..3473790c1 100644
--- a/moses/FF/NieceTerminal.cpp
+++ b/moses/FF/NieceTerminal.cpp
@@ -20,25 +20,25 @@ NieceTerminal::NieceTerminal(const std::string &line)
std::vector<float> NieceTerminal::DefaultWeights() const
{
UTIL_THROW_IF2(m_numScoreComponents != 1,
- "NieceTerminal must only have 1 score");
+ "NieceTerminal must only have 1 score");
vector<float> ret(1, 1);
return ret;
}
void NieceTerminal::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
targetPhrase.SetRuleSource(source);
}
void NieceTerminal::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
assert(stackVec);
@@ -47,32 +47,32 @@ void NieceTerminal::EvaluateWithSourceContext(const InputType &input
std::set<Word> terms;
for (size_t i = 0; i < ruleSource->GetSize(); ++i) {
- const Word &word = ruleSource->GetWord(i);
- if (!word.IsNonTerminal()) {
- terms.insert(word);
- }
+ const Word &word = ruleSource->GetWord(i);
+ if (!word.IsNonTerminal()) {
+ terms.insert(word);
+ }
}
for (size_t i = 0; i < stackVec->size(); ++i) {
- const ChartCellLabel &cell = *stackVec->at(i);
- const WordsRange &ntRange = cell.GetCoverage();
- bool containTerm = ContainTerm(input, ntRange, terms);
-
- if (containTerm) {
- //cerr << "ruleSource=" << *ruleSource << " ";
- //cerr << "ntRange=" << ntRange << endl;
-
- // non-term contains 1 of the terms in the rule.
- float score = m_hardConstraint ? - std::numeric_limits<float>::infinity() : 1;
- scoreBreakdown.PlusEquals(this, score);
- return;
- }
+ const ChartCellLabel &cell = *stackVec->at(i);
+ const WordsRange &ntRange = cell.GetCoverage();
+ bool containTerm = ContainTerm(input, ntRange, terms);
+
+ if (containTerm) {
+ //cerr << "ruleSource=" << *ruleSource << " ";
+ //cerr << "ntRange=" << ntRange << endl;
+
+ // non-term contains 1 of the terms in the rule.
+ float score = m_hardConstraint ? - std::numeric_limits<float>::infinity() : 1;
+ scoreBreakdown.PlusEquals(this, score);
+ return;
+ }
}
}
void NieceTerminal::EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{}
void NieceTerminal::EvaluateWhenApplied(const ChartHypothesis &hypo,
@@ -80,26 +80,26 @@ void NieceTerminal::EvaluateWhenApplied(const ChartHypothesis &hypo,
{}
bool NieceTerminal::ContainTerm(const InputType &input,
- const WordsRange &ntRange,
- const std::set<Word> &terms) const
+ const WordsRange &ntRange,
+ const std::set<Word> &terms) const
{
- std::set<Word>::const_iterator iter;
+ std::set<Word>::const_iterator iter;
- for (size_t pos = ntRange.GetStartPos(); pos <= ntRange.GetEndPos(); ++pos) {
- const Word &word = input.GetWord(pos);
- iter = terms.find(word);
+ for (size_t pos = ntRange.GetStartPos(); pos <= ntRange.GetEndPos(); ++pos) {
+ const Word &word = input.GetWord(pos);
+ iter = terms.find(word);
- if (iter != terms.end()) {
- return true;
- }
- }
- return false;
+ if (iter != terms.end()) {
+ return true;
+ }
+ }
+ return false;
}
void NieceTerminal::SetParameter(const std::string& key, const std::string& value)
{
if (key == "hard-constraint") {
- m_hardConstraint = Scan<bool>(value);
+ m_hardConstraint = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h
index 7daf2963e..2ee019443 100644
--- a/moses/FF/NieceTerminal.h
+++ b/moses/FF/NieceTerminal.h
@@ -20,19 +20,24 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void SetParameter(const std::string& key, const std::string& value);
std::vector<float> DefaultWeights() const;
@@ -40,8 +45,8 @@ public:
protected:
bool m_hardConstraint;
bool ContainTerm(const InputType &input,
- const WordsRange &ntRange,
- const std::set<Word> &terms) const;
+ const WordsRange &ntRange,
+ const std::set<Word> &terms) const;
};
}
diff --git a/moses/FF/OSM-Feature/KenOSM.cpp b/moses/FF/OSM-Feature/KenOSM.cpp
index e517200c3..4047406e5 100644
--- a/moses/FF/OSM-Feature/KenOSM.cpp
+++ b/moses/FF/OSM-Feature/KenOSM.cpp
@@ -5,28 +5,28 @@ namespace Moses
OSMLM* ConstructOSMLM(const std::string &file)
{
- lm::ngram::ModelType model_type;
- if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
+ lm::ngram::ModelType model_type;
+ if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
- switch(model_type) {
- case lm::ngram::PROBING:
- return new KenOSM<lm::ngram::ProbingModel>(file);
- case lm::ngram::REST_PROBING:
- return new KenOSM<lm::ngram::RestProbingModel>(file);
- case lm::ngram::TRIE:
- return new KenOSM<lm::ngram::TrieModel>(file);
- case lm::ngram::QUANT_TRIE:
- return new KenOSM<lm::ngram::QuantTrieModel>(file);
- case lm::ngram::ARRAY_TRIE:
- return new KenOSM<lm::ngram::ArrayTrieModel>(file);
- case lm::ngram::QUANT_ARRAY_TRIE:
- return new KenOSM<lm::ngram::QuantArrayTrieModel>(file);
- default:
- UTIL_THROW2("Unrecognized kenlm model type " << model_type);
- }
- } else {
+ switch(model_type) {
+ case lm::ngram::PROBING:
return new KenOSM<lm::ngram::ProbingModel>(file);
+ case lm::ngram::REST_PROBING:
+ return new KenOSM<lm::ngram::RestProbingModel>(file);
+ case lm::ngram::TRIE:
+ return new KenOSM<lm::ngram::TrieModel>(file);
+ case lm::ngram::QUANT_TRIE:
+ return new KenOSM<lm::ngram::QuantTrieModel>(file);
+ case lm::ngram::ARRAY_TRIE:
+ return new KenOSM<lm::ngram::ArrayTrieModel>(file);
+ case lm::ngram::QUANT_ARRAY_TRIE:
+ return new KenOSM<lm::ngram::QuantArrayTrieModel>(file);
+ default:
+ UTIL_THROW2("Unrecognized kenlm model type " << model_type);
}
+ } else {
+ return new KenOSM<lm::ngram::ProbingModel>(file);
+ }
}
} // namespace
diff --git a/moses/FF/OSM-Feature/KenOSM.h b/moses/FF/OSM-Feature/KenOSM.h
index d3d8672d3..a50589edc 100644
--- a/moses/FF/OSM-Feature/KenOSM.h
+++ b/moses/FF/OSM-Feature/KenOSM.h
@@ -7,39 +7,41 @@
namespace Moses
{
-class KenOSMBase {
- public:
- virtual float Score(const lm::ngram::State&, const std::string&,
- lm::ngram::State&) const = 0;
-
- virtual const lm::ngram::State &BeginSentenceState() const = 0;
-
- virtual const lm::ngram::State &NullContextState() const = 0;
+class KenOSMBase
+{
+public:
+ virtual float Score(const lm::ngram::State&, const std::string&,
+ lm::ngram::State&) const = 0;
+
+ virtual const lm::ngram::State &BeginSentenceState() const = 0;
+
+ virtual const lm::ngram::State &NullContextState() const = 0;
};
template <class KenModel>
-class KenOSM : public KenOSMBase {
- public:
- KenOSM(const std::string& file)
+class KenOSM : public KenOSMBase
+{
+public:
+ KenOSM(const std::string& file)
: m_kenlm(new KenModel(file.c_str())) {}
-
- virtual float Score(const lm::ngram::State &in_state,
- const std::string& word,
- lm::ngram::State &out_state) const {
- return m_kenlm->Score(in_state, m_kenlm->GetVocabulary().Index(word),
- out_state);
- }
-
- virtual const lm::ngram::State &BeginSentenceState() const {
- return m_kenlm->BeginSentenceState();
- }
-
- virtual const lm::ngram::State &NullContextState() const {
- return m_kenlm->NullContextState();
- }
-
- private:
- boost::shared_ptr<KenModel> m_kenlm;
+
+ virtual float Score(const lm::ngram::State &in_state,
+ const std::string& word,
+ lm::ngram::State &out_state) const {
+ return m_kenlm->Score(in_state, m_kenlm->GetVocabulary().Index(word),
+ out_state);
+ }
+
+ virtual const lm::ngram::State &BeginSentenceState() const {
+ return m_kenlm->BeginSentenceState();
+ }
+
+ virtual const lm::ngram::State &NullContextState() const {
+ return m_kenlm->NullContextState();
+ }
+
+private:
+ boost::shared_ptr<KenModel> m_kenlm;
};
typedef KenOSMBase OSMLM;
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index 6d839f0cc..43ed5f346 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -19,15 +19,16 @@ OpSequenceModel::OpSequenceModel(const std::string &line)
ReadParameters();
}
-OpSequenceModel::~OpSequenceModel() {
- delete OSM;
+OpSequenceModel::~OpSequenceModel()
+{
+ delete OSM;
}
void OpSequenceModel :: readLanguageModel(const char *lmFile)
{
string unkOp = "_TRANS_SLF_";
OSM = ConstructOSMLM(m_lmPath);
-
+
State startState = OSM->NullContextState();
State endState;
unkOpProb = OSM->Score(startState,unkOp,endState);
@@ -42,9 +43,9 @@ void OpSequenceModel::Load()
void OpSequenceModel:: EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
osmHypothesis obj;
@@ -198,7 +199,7 @@ FFState* OpSequenceModel::EvaluateWhenApplied(
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
{
- UTIL_THROW2("Chart decoding not support by UTIL_THROW2");
+ UTIL_THROW2("Chart decoding not support by UTIL_THROW2");
}
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h
index 8c71e8152..b59eb681a 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@@ -38,16 +38,21 @@ public:
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h
index e4c3ca3ba..a5b55e1ef 100644
--- a/moses/FF/PhraseBoundaryFeature.h
+++ b/moses/FF/PhraseBoundaryFeature.h
@@ -45,26 +45,30 @@ public:
virtual const FFState* EmptyHypothesisState(const InputType &) const;
virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */,
- int /* featureID */,
- ScoreComponentCollection* ) const {
+ int /* featureID */,
+ ScoreComponentCollection* ) const {
throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/PhraseLengthFeature.cpp b/moses/FF/PhraseLengthFeature.cpp
index 7850c374a..0eb0740b8 100644
--- a/moses/FF/PhraseLengthFeature.cpp
+++ b/moses/FF/PhraseLengthFeature.cpp
@@ -16,9 +16,9 @@ PhraseLengthFeature::PhraseLengthFeature(const std::string &line)
}
void PhraseLengthFeature::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
// get length of source and target phrase
size_t targetLength = targetPhrase.GetSize();
diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h
index 9233aa3e7..9e576946f 100644
--- a/moses/FF/PhraseLengthFeature.h
+++ b/moses/FF/PhraseLengthFeature.h
@@ -25,25 +25,28 @@ public:
}
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis& hypo,
- ScoreComponentCollection*) const
- {}
+ ScoreComponentCollection*) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
};
diff --git a/moses/FF/PhraseOrientationFeature.cpp b/moses/FF/PhraseOrientationFeature.cpp
index 0f6d8bcb1..d2d4f881c 100644
--- a/moses/FF/PhraseOrientationFeature.cpp
+++ b/moses/FF/PhraseOrientationFeature.cpp
@@ -1,4 +1,13 @@
-#include <vector>
+//
+// REFERENCE
+// ---------
+// When using this feature, please cite:
+//
+// Matthias Huck, Joern Wuebker, Felix Rietig, and Hermann Ney.
+// A Phrase Orientation Model for Hierarchical Machine Translation.
+// In ACL 2013 Eighth Workshop on Statistical Machine Translation (WMT 2013), pages 452-463, Sofia, Bulgaria, August 2013.
+//
+
#include "PhraseOrientationFeature.h"
#include "moses/InputFileStream.h"
#include "moses/ScoreComponentCollection.h"
@@ -6,196 +15,807 @@
#include "moses/Hypothesis.h"
#include "moses/ChartHypothesis.h"
#include "moses/ChartManager.h"
-#include "moses/FactorCollection.h"
-#include "moses/PP/OrientationPhraseProperty.h"
#include "phrase-extract/extract-ghkm/Alignment.h"
-using namespace std;
namespace Moses
{
+
+const std::string PhraseOrientationFeature::MORIENT("M");
+const std::string PhraseOrientationFeature::SORIENT("S");
+const std::string PhraseOrientationFeature::DORIENT("D");
+
+
PhraseOrientationFeature::PhraseOrientationFeature(const std::string &line)
- : StatelessFeatureFunction(8, line)
+ : StatefulFeatureFunction(6, line)
+ , m_glueTargetLHSStr("Q")
+ , m_distinguishStates(true)
+ , m_useSparseWord(false)
+ , m_useSparseNT(false)
+ , m_offsetR2LScores(m_numScoreComponents/2)
+ , m_weightsVector(StaticData::Instance().GetAllWeights().GetScoresForProducer(this))
+ , m_useTargetWordList(false)
+ , m_useSourceWordList(false)
{
VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
ReadParameters();
- VERBOSE(1, " Done.");
+ FactorCollection &factorCollection = FactorCollection::Instance();
+ m_glueTargetLHS = factorCollection.AddFactor(m_glueTargetLHSStr, true);
+ VERBOSE(1, " Done." << std::endl);
}
+
void PhraseOrientationFeature::SetParameter(const std::string& key, const std::string& value)
{
- if (key == "tuneable") {
- m_tuneable = Scan<bool>(value);
+ if (key == "glueTargetLHS") {
+ m_glueTargetLHSStr = value;
+ } else if (key == "distinguishStates") {
+ m_distinguishStates = Scan<bool>(value);
+ } else if (key == "sparseWord") {
+ m_useSparseWord = Scan<bool>(value);
+ } else if (key == "sparseNT") {
+ m_useSparseNT = Scan<bool>(value);
+ } else if (key == "targetWordList") {
+ m_filenameTargetWordList = value;
+ } else if (key == "sourceWordList") {
+ m_filenameSourceWordList = value;
} else {
- StatelessFeatureFunction::SetParameter(key, value);
+ StatefulFeatureFunction::SetParameter(key, value);
+ }
+}
+
+
+void PhraseOrientationFeature::Load()
+{
+ if ( !m_filenameTargetWordList.empty() ) {
+ LoadWordList(m_filenameTargetWordList,m_targetWordList);
+ m_useTargetWordList = true;
+ }
+ if ( !m_filenameSourceWordList.empty() ) {
+ LoadWordList(m_filenameSourceWordList,m_sourceWordList);
+ m_useSourceWordList = true;
}
}
-void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+
+void PhraseOrientationFeature::LoadWordList(const std::string& filename,
+ boost::unordered_set<const Factor*>& list)
+{
+ FEATUREVERBOSE(2, "Loading word list from file " << filename << std::endl);
+ FactorCollection &factorCollection = FactorCollection::Instance();
+ list.clear();
+ std::string line;
+ InputFileStream inFile(filename);
+
+ while (getline(inFile, line)) {
+ const Factor *factor = factorCollection.AddFactor(line, false);
+ list.insert(factor);
+ }
+
+ inFile.Close();
+}
+
+
+void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
+ const TargetPhrase &targetPhrase,
+ ScoreComponentCollection &scoreBreakdown,
+ ScoreComponentCollection &estimatedFutureScore) const
{
targetPhrase.SetRuleSource(source);
+
+ if (const PhraseProperty *property = targetPhrase.GetProperty("Orientation")) {
+ const OrientationPhraseProperty *orientationPhraseProperty = static_cast<const OrientationPhraseProperty*>(property);
+ LookaheadScore(orientationPhraseProperty, scoreBreakdown);
+ } else {
+ // abort with error message if the phrase does not translate an unknown word
+ UTIL_THROW_IF2(!targetPhrase.GetWord(0).IsOOV(), GetScoreProducerDescription()
+ << ": Missing Orientation property. "
+ << "Please check phrase table and glue rules.");
+ }
+}
+
+
+void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty,
+ ScoreComponentCollection &scoreBreakdown,
+ bool subtract) const
+{
+ size_t ffScoreIndex = scoreBreakdown.GetIndexes(this).first;
+
+ std::vector<float> scoresL2R;
+ scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono()) );
+ scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap()) );
+ scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous()) );
+ size_t heuristicScoreIndexL2R = GetHeuristicScoreIndex(scoresL2R, 0);
+
+ if (subtract) {
+ scoreBreakdown.PlusEquals(ffScoreIndex+heuristicScoreIndexL2R,
+ -scoresL2R[heuristicScoreIndexL2R]);
+ } else {
+ scoreBreakdown.PlusEquals(ffScoreIndex+heuristicScoreIndexL2R,
+ scoresL2R[heuristicScoreIndexL2R]);
+ }
+
+ std::vector<float> scoresR2L;
+ scoresR2L.push_back( TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono()) );
+ scoresR2L.push_back( TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap()) );
+ scoresR2L.push_back( TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous()) );
+ size_t heuristicScoreIndexR2L = GetHeuristicScoreIndex(scoresR2L, m_offsetR2LScores);
+
+ if (subtract) {
+ scoreBreakdown.PlusEquals(ffScoreIndex+m_offsetR2LScores+heuristicScoreIndexR2L,
+ -scoresR2L[heuristicScoreIndexR2L]);
+ } else {
+ scoreBreakdown.PlusEquals(ffScoreIndex+m_offsetR2LScores+heuristicScoreIndexR2L,
+ scoresR2L[heuristicScoreIndexR2L]);
+ }
}
-void PhraseOrientationFeature::EvaluateWhenApplied(
+FFState* PhraseOrientationFeature::EvaluateWhenApplied(
const ChartHypothesis& hypo,
+ int featureID, // used to index the state in the previous hypotheses
ScoreComponentCollection* accumulator) const
{
// Dense scores
- std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 8
+ std::vector<float> newScores(m_numScoreComponents,0);
// Read Orientation property
const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase();
+ const Factor* currTarPhrLHS = currTarPhr.GetTargetLHS()[0];
const Phrase *currSrcPhr = currTarPhr.GetRuleSource();
// const Factor* targetLHS = currTarPhr.GetTargetLHS()[0];
// bool isGlueGrammarRule = false;
- std::map<size_t,size_t> alignMap;
- alignMap.insert(
- currTarPhr.GetAlignTerm().begin(),
- currTarPhr.GetAlignTerm().end());
- alignMap.insert(
- currTarPhr.GetAlignNonTerm().begin(),
- currTarPhr.GetAlignNonTerm().end());
+ // State: used to propagate orientation probabilities in case of boundary non-terminals
+ PhraseOrientationFeatureState *state = new PhraseOrientationFeatureState(m_distinguishStates,m_useSparseWord,m_useSparseNT);
- Moses::GHKM::Alignment alignment;
- std::vector<int> alignmentNTs(currTarPhr.GetSize(),-1); // TODO: can be smaller (number of right-hand side non-terminals)
+ IFFEATUREVERBOSE(2) {
+ FEATUREVERBOSE(2, *currSrcPhr << std::endl);
+ FEATUREVERBOSE(2, currTarPhr << std::endl);
- for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignTerm().begin();
- it!=currTarPhr.GetAlignTerm().end(); ++it) {
- alignment.push_back(std::make_pair(it->first, it->second));
-// std::cerr << "alignTerm " << it->first << " " << it->second << std::endl;
- }
+ for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignTerm().begin();
+ it!=currTarPhr.GetAlignTerm().end(); ++it) {
+ FEATUREVERBOSE(2, "alignTerm " << it->first << " " << it->second << std::endl);
+ }
- for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignNonTerm().begin();
- it!=currTarPhr.GetAlignNonTerm().end(); ++it) {
- alignment.push_back(std::make_pair(it->first, it->second));
- alignmentNTs[it->second] = it->first;
-// std::cerr << "alignNonTerm " << it->first << " " << it->second << std::endl;
+ for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignNonTerm().begin();
+ it!=currTarPhr.GetAlignNonTerm().end(); ++it) {
+ FEATUREVERBOSE(2, "alignNonTerm " << it->first << " " << it->second << std::endl);
+ }
}
// Initialize phrase orientation scoring object
- Moses::GHKM::PhraseOrientation phraseOrientation(currSrcPhr->GetSize(), currTarPhr.GetSize(), alignment);
- // TODO: Efficiency! This should be precomputed.
-
-// std::cerr << *currSrcPhr << std::endl;
-// std::cerr << currTarPhr << std::endl;
-// std::cerr << currSrcPhr->GetSize() << std::endl;
-// std::cerr << currTarPhr.GetSize() << std::endl;
-
+ Moses::GHKM::PhraseOrientation phraseOrientation(currSrcPhr->GetSize(), currTarPhr.GetSize(),
+ currTarPhr.GetAlignTerm(), currTarPhr.GetAlignNonTerm());
+
// Get index map for underlying hypotheses
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
- currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
+ currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
// Determine & score orientations
- size_t nonTerminalNumber = 0;
-
- for (size_t phrasePos=0; phrasePos<currTarPhr.GetSize(); ++phrasePos) {
- // consult rule for either word or non-terminal
- const Word &word = currTarPhr.GetWord(phrasePos);
- if ( word.IsNonTerminal() ) {
- // non-terminal: consult subderivation
- size_t nonTermIndex = nonTermIndexMap[phrasePos];
- const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
- const TargetPhrase &prevTarPhr = prevHypo->GetCurrTargetPhrase();
- if (const PhraseProperty *property = prevTarPhr.GetProperty("Orientation")) {
- const OrientationPhraseProperty *orientationPhraseProperty = static_cast<const OrientationPhraseProperty*>(property);
-
-// std::cerr << "L2R_Mono " << orientationPhraseProperty->GetLeftToRightProbabilityMono();
-// std::cerr << " L2R_Swap " << orientationPhraseProperty->GetLeftToRightProbabilitySwap();
-// std::cerr << " L2R_Dright " << orientationPhraseProperty->GetLeftToRightProbabilityDright();
-// std::cerr << " L2R_Dleft " << orientationPhraseProperty->GetLeftToRightProbabilityDleft();
-// std::cerr << " R2L_Mono " << orientationPhraseProperty->GetRightToLeftProbabilityMono();
-// std::cerr << " R2L_Swap " << orientationPhraseProperty->GetRightToLeftProbabilitySwap();
-// std::cerr << " R2L_Dright " << orientationPhraseProperty->GetRightToLeftProbabilityDright();
-// std::cerr << " R2L_Dleft " << orientationPhraseProperty->GetRightToLeftProbabilityDleft();
-// std::cerr << std::endl;
-
- Moses::GHKM::REO_POS l2rOrientation=Moses::GHKM::UNKNOWN, r2lOrientation=Moses::GHKM::UNKNOWN;
- int sourceIndex = alignmentNTs[phrasePos];
-// std::cerr << "targetIndex " << phrasePos << " sourceIndex " << sourceIndex << std::endl;
- l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::L2R);
- r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::R2L);
-
-// std::cerr << "l2rOrientation ";
- switch(l2rOrientation) {
- case Moses::GHKM::LEFT:
- newScores[0] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityMono());
-// std::cerr << "mono" << std::endl;
- break;
- case Moses::GHKM::RIGHT:
- newScores[1] += std::log(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
-// std::cerr << "swap" << std::endl;
- break;
- case Moses::GHKM::DRIGHT:
- newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDright());
-// std::cerr << "dright" << std::endl;
- break;
- case Moses::GHKM::DLEFT:
- newScores[3] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDleft());
-// std::cerr << "dleft" << std::endl;
- break;
- case Moses::GHKM::UNKNOWN:
- // modelType == Moses::GHKM::REO_MSLR
- newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDright());
-// std::cerr << "unknown->dright" << std::endl;
- break;
- default:
- UTIL_THROW2(GetScoreProducerDescription()
- << ": Unsupported orientation type.");
- break;
- }
-
-// std::cerr << "r2lOrientation ";
- switch(r2lOrientation) {
- case Moses::GHKM::LEFT:
- newScores[4] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityMono());
-// std::cerr << "mono" << std::endl;
- break;
- case Moses::GHKM::RIGHT:
- newScores[5] += std::log(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
-// std::cerr << "swap" << std::endl;
- break;
- case Moses::GHKM::DRIGHT:
- newScores[6] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDright());
-// std::cerr << "dright" << std::endl;
- break;
- case Moses::GHKM::DLEFT:
- newScores[7] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDleft());
-// std::cerr << "dleft" << std::endl;
- break;
- case Moses::GHKM::UNKNOWN:
- // modelType == Moses::GHKM::REO_MSLR
- newScores[6] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDright());
-// std::cerr << "unknown->dright" << std::endl;
- break;
- default:
- UTIL_THROW2(GetScoreProducerDescription()
- << ": Unsupported orientation type.");
- break;
- }
-
- // TODO: Handle degenerate cases (boundary non-terminals)
-
- } else {
- // abort with error message if the phrase does not translate an unknown word
- UTIL_THROW_IF2(!prevTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
- << ": Missing Orientation property. "
- << "Please check phrase table and glue rules.");
+ for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignNonTerm().begin();
+ it!=currTarPhr.GetAlignNonTerm().end(); ++it) {
+ size_t sourceIndex = it->first;
+ size_t targetIndex = it->second;
+ size_t nonTermIndex = nonTermIndexMap[targetIndex];
+
+ FEATUREVERBOSE(2, "Scoring nonTermIndex= " << nonTermIndex << " targetIndex= " << targetIndex << " sourceIndex= " << sourceIndex << std::endl);
+
+ // consult subderivation
+ const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
+ const TargetPhrase &prevTarPhr = prevHypo->GetCurrTargetPhrase();
+ const Factor* prevTarPhrLHS = prevTarPhr.GetTargetLHS()[0];
+
+ if (const PhraseProperty *property = prevTarPhr.GetProperty("Orientation")) {
+ const OrientationPhraseProperty *orientationPhraseProperty = static_cast<const OrientationPhraseProperty*>(property);
+
+ FEATUREVERBOSE(5, "orientationPhraseProperty: "
+ << "L2R_Mono " << orientationPhraseProperty->GetLeftToRightProbabilityMono()
+ << " L2R_Swap " << orientationPhraseProperty->GetLeftToRightProbabilitySwap()
+ << " L2R_Dright " << orientationPhraseProperty->GetLeftToRightProbabilityDright()
+ << " L2R_Dleft " << orientationPhraseProperty->GetLeftToRightProbabilityDleft()
+ << " R2L_Mono " << orientationPhraseProperty->GetRightToLeftProbabilityMono()
+ << " R2L_Swap " << orientationPhraseProperty->GetRightToLeftProbabilitySwap()
+ << " R2L_Dright " << orientationPhraseProperty->GetRightToLeftProbabilityDright()
+ << " R2L_Dleft " << orientationPhraseProperty->GetRightToLeftProbabilityDleft()
+ << std::endl);
+
+ LookaheadScore(orientationPhraseProperty, *accumulator, true);
+
+ const PhraseOrientationFeatureState* prevState =
+ static_cast<const PhraseOrientationFeatureState*>(prevHypo->GetFFState(featureID));
+
+
+ // LEFT-TO-RIGHT DIRECTION
+
+ Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
+
+ IFFEATUREVERBOSE(2) {
+ FEATUREVERBOSE(2, "l2rOrientation ");
+ switch (l2rOrientation) {
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
+ FEATUREVERBOSE2(2, "mono" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
+ FEATUREVERBOSE2(2, "swap" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+ FEATUREVERBOSE2(2, "dleft" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+ FEATUREVERBOSE2(2, "dright" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+ // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
+ FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
+ break;
+ default:
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Unsupported orientation type.");
+ break;
+ }
+ }
+
+ bool delayedScoringL2R = false;
+
+ if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
+ && (currTarPhrLHS != m_glueTargetLHS) ) { // and not glue rule
+ // delay left-to-right scoring
+
+ FEATUREVERBOSE(3, "Left boundary");
+ if (targetIndex != 0) {
+ FEATUREVERBOSE2(3, " (with targetIndex!=0)");
+ }
+ FEATUREVERBOSE2(3, std::endl);
+
+ bool previousSourceSpanIsAligned = ( (sourceIndex > 0) && phraseOrientation.SourceSpanIsAligned(0,sourceIndex-1) );
+ bool followingSourceSpanIsAligned = ( (sourceIndex < currSrcPhr->GetSize()-1) && phraseOrientation.SourceSpanIsAligned(sourceIndex,currSrcPhr->GetSize()-1) );
+
+ FEATUREVERBOSE(4, "previousSourceSpanIsAligned = " << previousSourceSpanIsAligned << std::endl);
+ FEATUREVERBOSE(4, "followingSourceSpanIsAligned = " << followingSourceSpanIsAligned << std::endl;);
+
+ if (previousSourceSpanIsAligned && followingSourceSpanIsAligned) {
+ // discontinuous
+ l2rOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
+ } else {
+ FEATUREVERBOSE(3, "Delaying left-to-right scoring" << std::endl);
+
+ delayedScoringL2R = true;
+ std::bitset<3> possibleFutureOrientationsL2R(0x7);
+ possibleFutureOrientationsL2R[0] = !previousSourceSpanIsAligned;
+ possibleFutureOrientationsL2R[1] = !followingSourceSpanIsAligned;
+
+ // add heuristic scores
+
+ std::vector<float> scoresL2R;
+ scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono()) );
+ scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap()) );
+ scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous()) );
+
+ size_t heuristicScoreIndexL2R = GetHeuristicScoreIndex(scoresL2R, 0, possibleFutureOrientationsL2R);
+
+ newScores[heuristicScoreIndexL2R] += scoresL2R[heuristicScoreIndexL2R];
+ state->SetLeftBoundaryL2R(scoresL2R, heuristicScoreIndexL2R, possibleFutureOrientationsL2R, prevTarPhrLHS, prevState);
+
+ if ( (possibleFutureOrientationsL2R & prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) == 0x4 ) {
+ // recursive: discontinuous orientation
+ FEATUREVERBOSE(5, "previous state: L2R discontinuous orientation "
+ << possibleFutureOrientationsL2R << " & " << prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations
+ << " = " << (possibleFutureOrientationsL2R & prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations)
+ << std::endl);
+ LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores, accumulator);
+ state->m_leftBoundaryRecursionGuard = true; // prevent subderivation from being scored recursively multiple times
}
+ }
+ }
+
+ if (!delayedScoringL2R) {
+
+ if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
+
+ newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
+
+ } else if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
+
+ newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
+
+ } else if ( ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
+ ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
+ ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
- ++nonTerminalNumber;
+ newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores, accumulator);
+
+ } else {
+
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Unsupported orientation type.");
+ }
+
+ // sparse scores
+ if ( m_useSparseWord ) {
+ SparseWordL2RScore(prevHypo,accumulator,ToString(l2rOrientation));
+ }
+ if ( m_useSparseNT ) {
+ SparseNonTerminalL2RScore(prevTarPhrLHS,accumulator,ToString(l2rOrientation));
+ }
+ }
+
+
+ // RIGHT-TO-LEFT DIRECTION
+
+ Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
+
+ IFFEATUREVERBOSE(2) {
+ FEATUREVERBOSE(2, "r2lOrientation ");
+ switch (r2lOrientation) {
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
+ FEATUREVERBOSE2(2, "mono" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
+ FEATUREVERBOSE2(2, "swap" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+ FEATUREVERBOSE2(2, "dleft" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+ FEATUREVERBOSE2(2, "dright" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+ // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
+ FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
+ break;
+ default:
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Unsupported orientation type.");
+ break;
+ }
+ }
+
+ bool delayedScoringR2L = false;
+
+ if ( ((targetIndex == currTarPhr.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,currTarPhr.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
+ && (currTarPhrLHS != m_glueTargetLHS) ) { // and not glue rule
+ // delay right-to-left scoring
+
+ FEATUREVERBOSE(3, "Right boundary");
+ if (targetIndex != currTarPhr.GetSize()-1) {
+ FEATUREVERBOSE2(3, " (with targetIndex!=currTarPhr.GetSize()-1)");
+ }
+ FEATUREVERBOSE2(3, std::endl);
+
+ bool previousSourceSpanIsAligned = ( (sourceIndex > 0) && phraseOrientation.SourceSpanIsAligned(0,sourceIndex-1) );
+ bool followingSourceSpanIsAligned = ( (sourceIndex < currSrcPhr->GetSize()-1) && phraseOrientation.SourceSpanIsAligned(sourceIndex,currSrcPhr->GetSize()-1) );
+
+ FEATUREVERBOSE(4, "previousSourceSpanIsAligned = " << previousSourceSpanIsAligned << std::endl);
+ FEATUREVERBOSE(4, "followingSourceSpanIsAligned = " << followingSourceSpanIsAligned << std::endl;);
+
+ if (previousSourceSpanIsAligned && followingSourceSpanIsAligned) {
+ // discontinuous
+ r2lOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
+ } else {
+ FEATUREVERBOSE(3, "Delaying right-to-left scoring" << std::endl);
+
+ delayedScoringR2L = true;
+ std::bitset<3> possibleFutureOrientationsR2L(0x7);
+ possibleFutureOrientationsR2L[0] = !followingSourceSpanIsAligned;
+ possibleFutureOrientationsR2L[1] = !previousSourceSpanIsAligned;
+
+ // add heuristic scores
+
+ std::vector<float> scoresR2L;
+ scoresR2L.push_back( TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono()) );
+ scoresR2L.push_back( TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap()) );
+ scoresR2L.push_back( TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous()) );
+
+ size_t heuristicScoreIndexR2L = GetHeuristicScoreIndex(scoresR2L, m_offsetR2LScores, possibleFutureOrientationsR2L);
+
+ newScores[m_offsetR2LScores+heuristicScoreIndexR2L] += scoresR2L[heuristicScoreIndexR2L];
+ state->SetRightBoundaryR2L(scoresR2L, heuristicScoreIndexR2L, possibleFutureOrientationsR2L, prevTarPhrLHS, prevState);
+
+ if ( (possibleFutureOrientationsR2L & prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) == 0x4 ) {
+ // recursive: discontinuous orientation
+ FEATUREVERBOSE(5, "previous state: R2L discontinuous orientation "
+ << possibleFutureOrientationsR2L << " & " << prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations
+ << " = " << (possibleFutureOrientationsR2L & prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations)
+ << std::endl);
+ RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores, accumulator);
+ state->m_rightBoundaryRecursionGuard = true; // prevent subderivation from being scored recursively multiple times
+ }
+ }
}
+
+ if (!delayedScoringR2L) {
+
+ if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
+
+ newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
+
+ } else if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
+
+ newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
+
+ } else if ( ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
+ ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
+ ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
+
+ newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores, accumulator);
+
+ } else {
+
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Unsupported orientation type.");
+ }
+
+ // sparse scores
+ if ( m_useSparseWord ) {
+ SparseWordR2LScore(prevHypo,accumulator,ToString(r2lOrientation));
+ }
+ if ( m_useSparseNT ) {
+ SparseNonTerminalR2LScore(prevTarPhrLHS,accumulator,ToString(r2lOrientation));
+ }
+ }
+
+ } else {
+ // abort with error message if the phrase does not translate an unknown word
+ UTIL_THROW_IF2(!prevTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
+ << ": Missing Orientation property. "
+ << "Please check phrase table and glue rules.");
+ }
}
accumulator->PlusEquals(this, newScores);
+
+ return state;
+}
+
+
+size_t PhraseOrientationFeature::GetHeuristicScoreIndex(const std::vector<float>& scores,
+ size_t weightsVectorOffset,
+ const std::bitset<3> possibleFutureOrientations) const
+{
+ std::vector<float> weightedScores;
+ for ( size_t i=0; i<3; ++i ) {
+ weightedScores.push_back( m_weightsVector[weightsVectorOffset+i] * scores[i] );
+ }
+
+ size_t heuristicScoreIndex = 0;
+ for (size_t i=1; i<3; ++i) {
+ if (possibleFutureOrientations[i]) {
+ if (weightedScores[i] > weightedScores[heuristicScoreIndex]) {
+ heuristicScoreIndex = i;
+ }
+ }
+ }
+
+ IFFEATUREVERBOSE(5) {
+ FEATUREVERBOSE(5, "Heuristic score computation: "
+ << "heuristicScoreIndex= " << heuristicScoreIndex);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " m_weightsVector[" << weightsVectorOffset+i << "]= " << m_weightsVector[weightsVectorOffset+i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " scores[" << i << "]= " << scores[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " weightedScores[" << i << "]= " << weightedScores[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " possibleFutureOrientations[" << i << "]= " << possibleFutureOrientations[i]);
+ if ( possibleFutureOrientations == 0x7 ) {
+ FEATUREVERBOSE2(5, " (all orientations possible)");
+ }
+ FEATUREVERBOSE2(5, std::endl);
+ }
+
+ return heuristicScoreIndex;
+}
+
+
+void PhraseOrientationFeature::LeftBoundaryL2RScoreRecursive(int featureID,
+ const PhraseOrientationFeatureState *state,
+ const std::bitset<3> orientation,
+ std::vector<float>& newScores,
+ ScoreComponentCollection* scoreBreakdown) const
+ // TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores)
+{
+ if (state->m_leftBoundaryIsSet) {
+ const std::string* recursiveOrientationString;
+
+ // subtract heuristic score from subderivation
+ newScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex] -= state->m_leftBoundaryNonTerminalL2RScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex];
+
+ // add actual score
+ std::bitset<3> recursiveOrientation = orientation;
+ if ( (orientation == 0x4) || (orientation == 0x0) ) {
+ // discontinuous
+ recursiveOrientationString = &DORIENT;
+ newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
+ } else {
+ recursiveOrientation &= state->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations;
+ if ( recursiveOrientation == 0x1 ) {
+ // monotone
+ recursiveOrientationString = &MORIENT;
+ newScores[0] += state->GetLeftBoundaryL2RScoreMono();
+ } else if ( recursiveOrientation == 0x2 ) {
+ // swap
+ recursiveOrientationString = &SORIENT;
+ newScores[1] += state->GetLeftBoundaryL2RScoreSwap();
+ } else if ( recursiveOrientation == 0x4 ) {
+ // discontinuous
+ recursiveOrientationString = &DORIENT;
+ newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
+ } else if ( recursiveOrientation == 0x0 ) {
+ // discontinuous
+ recursiveOrientationString = &DORIENT;
+ newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
+ } else {
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Error in recursive scoring.");
+ }
+ }
+
+ if ( m_useSparseNT ) {
+ SparseNonTerminalL2RScore(state->m_leftBoundaryNonTerminalSymbol,scoreBreakdown,recursiveOrientationString);
+ }
+
+ FEATUREVERBOSE(6, "Left boundary recursion: " << orientation << " & " << state->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations << " = " << recursiveOrientation
+ << " --- Subtracted heuristic score: " << state->m_leftBoundaryNonTerminalL2RScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex] << std::endl);
+
+ if (!state->m_leftBoundaryRecursionGuard) {
+ // recursive call
+ const PhraseOrientationFeatureState* prevState = state->m_leftBoundaryPrevState;
+ LeftBoundaryL2RScoreRecursive(featureID, prevState, recursiveOrientation, newScores, scoreBreakdown);
+ } else {
+ FEATUREVERBOSE(6, "m_leftBoundaryRecursionGuard" << std::endl);
+ }
+ }
+}
+
+
+void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID,
+ const PhraseOrientationFeatureState *state,
+ const std::bitset<3> orientation,
+ std::vector<float>& newScores,
+ ScoreComponentCollection* scoreBreakdown) const
+ // TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores)
+{
+ if (state->m_rightBoundaryIsSet) {
+ const std::string* recursiveOrientationString;
+
+ // subtract heuristic score from subderivation
+ newScores[m_offsetR2LScores+state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex] -= state->m_rightBoundaryNonTerminalR2LScores[state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex];
+
+ // add actual score
+ std::bitset<3> recursiveOrientation = orientation;
+ if ( (orientation == 0x4) || (orientation == 0x0) ) {
+ // discontinuous
+ recursiveOrientationString = &DORIENT;
+ newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
+ } else {
+ recursiveOrientation &= state->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations;
+ if ( recursiveOrientation == 0x1 ) {
+ // monotone
+ recursiveOrientationString = &MORIENT;
+ newScores[m_offsetR2LScores+0] += state->GetRightBoundaryR2LScoreMono();
+ } else if ( recursiveOrientation == 0x2 ) {
+ // swap
+ recursiveOrientationString = &SORIENT;
+ newScores[m_offsetR2LScores+1] += state->GetRightBoundaryR2LScoreSwap();
+ } else if ( recursiveOrientation == 0x4 ) {
+ // discontinuous
+ recursiveOrientationString = &DORIENT;
+ newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
+ } else if ( recursiveOrientation == 0x0 ) {
+ // discontinuous
+ recursiveOrientationString = &DORIENT;
+ newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
+ } else {
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Error in recursive scoring.");
+ }
+ }
+
+ if ( m_useSparseNT ) {
+ SparseNonTerminalR2LScore(state->m_rightBoundaryNonTerminalSymbol,scoreBreakdown,recursiveOrientationString);
+ }
+
+ FEATUREVERBOSE(6, "Right boundary recursion: " << orientation << " & " << state->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations << " = " << recursiveOrientation
+ << " --- Subtracted heuristic score: " << state->m_rightBoundaryNonTerminalR2LScores[state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex] << std::endl);
+
+ if (!state->m_rightBoundaryRecursionGuard) {
+ // recursive call
+ const PhraseOrientationFeatureState* prevState = state->m_rightBoundaryPrevState;
+ RightBoundaryR2LScoreRecursive(featureID, prevState, recursiveOrientation, newScores, scoreBreakdown);
+ } else {
+ FEATUREVERBOSE(6, "m_rightBoundaryRecursionGuard" << std::endl);
+ }
+ }
}
-
+
+void PhraseOrientationFeature::SparseWordL2RScore(const ChartHypothesis* hypo,
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const
+{
+ // target word
+
+ const ChartHypothesis* currHypo = hypo;
+ const TargetPhrase* targetPhrase = &currHypo->GetCurrTargetPhrase();
+ const Word* targetWord = &targetPhrase->GetWord(0);
+
+ // TODO: boundary words in the feature state?
+ while ( targetWord->IsNonTerminal() ) {
+ const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+ targetPhrase->GetAlignNonTerm().GetNonTermIndexMap();
+ size_t nonTermIndex = nonTermIndexMap[0];
+ currHypo = currHypo->GetPrevHypo(nonTermIndex);
+ targetPhrase = &currHypo->GetCurrTargetPhrase();
+ targetWord = &targetPhrase->GetWord(0);
+ }
+
+ const std::string& targetWordString = (*targetWord)[0]->GetString().as_string();
+ if (targetWordString != "<s>" && targetWordString != "</s>") {
+ if ( !m_useTargetWordList || m_targetWordList.find((*targetWord)[0]) != m_targetWordList.end() ) {
+ scoreBreakdown->PlusEquals(this,
+ "L2R"+*o+"_tw_"+targetWordString,
+ 1);
+ FEATUREVERBOSE(3, "Sparse: L2R"+*o+"_tw_"+targetWordString << std::endl);
+ } else {
+ scoreBreakdown->PlusEquals(this,
+ "L2R"+*o+"_tw_OTHER",
+ 1);
+ FEATUREVERBOSE(3, "Sparse: L2R"+*o+"_tw_OTHER" << std::endl);
+ }
+ }
+
+ // source word
+
+ WordsRange sourceSpan = hypo->GetCurrSourceRange();
+ const InputType& input = hypo->GetManager().GetSource();
+ const Sentence& sourceSentence = static_cast<const Sentence&>(input);
+ const Word& sourceWord = sourceSentence.GetWord(sourceSpan.GetStartPos());
+
+ const std::string& sourceWordString = sourceWord[0]->GetString().as_string();
+ if (sourceWordString != "<s>" && sourceWordString != "</s>") {
+ if ( !m_useSourceWordList || m_sourceWordList.find(sourceWord[0]) != m_sourceWordList.end() ) {
+ scoreBreakdown->PlusEquals(this,
+ "L2R"+*o+"_sw_"+sourceWordString,
+ 1);
+ FEATUREVERBOSE(3, "Sparse: L2R"+*o+"_sw_"+sourceWordString << std::endl);
+ } else {
+ scoreBreakdown->PlusEquals(this,
+ "L2R"+*o+"_sw_OTHER",
+ 1);
+ FEATUREVERBOSE(3, "Sparse: L2R"+*o+"_sw_OTHER" << std::endl);
+ }
+ }
+}
+
+
+void PhraseOrientationFeature::SparseWordR2LScore(const ChartHypothesis* hypo,
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const
+{
+ // target word
+
+ const ChartHypothesis* currHypo = hypo;
+ const TargetPhrase* targetPhrase = &currHypo->GetCurrTargetPhrase();
+ const Word* targetWord = &targetPhrase->GetWord(targetPhrase->GetSize()-1);
+
+ // TODO: boundary words in the feature state?
+ while ( targetWord->IsNonTerminal() ) {
+ const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+ targetPhrase->GetAlignNonTerm().GetNonTermIndexMap();
+ size_t nonTermIndex = nonTermIndexMap[targetPhrase->GetSize()-1];
+ currHypo = currHypo->GetPrevHypo(nonTermIndex);
+ targetPhrase = &currHypo->GetCurrTargetPhrase();
+ targetWord = &targetPhrase->GetWord(targetPhrase->GetSize()-1);
+ }
+
+ const std::string& targetWordString = (*targetWord)[0]->GetString().as_string();
+ if (targetWordString != "<s>" && targetWordString != "</s>") {
+ if ( !m_useTargetWordList || m_targetWordList.find((*targetWord)[0]) != m_targetWordList.end() ) {
+ scoreBreakdown->PlusEquals(this,
+ "R2L"+*o+"_tw_"+targetWordString,
+ 1);
+ FEATUREVERBOSE(3, "Sparse: R2L"+*o+"_tw_"+targetWordString << std::endl);
+ } else {
+ scoreBreakdown->PlusEquals(this,
+ "R2L"+*o+"_tw_OTHER",
+ 1);
+ FEATUREVERBOSE(3, "Sparse: R2L"+*o+"_tw_OTHER" << std::endl);
+ }
+ }
+
+ // source word
+
+ WordsRange sourceSpan = hypo->GetCurrSourceRange();
+ const InputType& input = hypo->GetManager().GetSource();
+ const Sentence& sourceSentence = static_cast<const Sentence&>(input);
+ const Word& sourceWord = sourceSentence.GetWord(sourceSpan.GetEndPos());
+
+ const std::string& sourceWordString = sourceWord[0]->GetString().as_string();
+ if (sourceWordString != "<s>" && sourceWordString != "</s>") {
+ if ( !m_useSourceWordList || m_sourceWordList.find(sourceWord[0]) != m_sourceWordList.end() ) {
+ scoreBreakdown->PlusEquals(this,
+ "R2L"+*o+"_sw_"+sourceWordString,
+ 1);
+ FEATUREVERBOSE(3, "Sparse: R2L"+*o+"_sw_"+sourceWordString << std::endl);
+ } else {
+ scoreBreakdown->PlusEquals(this,
+ "R2L"+*o+"_sw_OTHER",
+ 1);
+ FEATUREVERBOSE(3, "Sparse: R2L"+*o+"_sw_OTHER" << std::endl);
+ }
+ }
+}
+
+
+void PhraseOrientationFeature::SparseNonTerminalL2RScore(const Factor* nonTerminalSymbol,
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const
+{
+ if ( nonTerminalSymbol != m_glueTargetLHS ) {
+ const std::string& nonTerminalString = nonTerminalSymbol->GetString().as_string();
+ scoreBreakdown->PlusEquals(this,
+ "L2R"+*o+"_n_"+nonTerminalString,
+ 1);
+ FEATUREVERBOSE(3, "Sparse: L2R"+*o+"_n_"+nonTerminalString << std::endl);
+ }
+}
+
+
+void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTerminalSymbol,
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const
+{
+ if ( nonTerminalSymbol != m_glueTargetLHS ) {
+ const std::string& nonTerminalString = nonTerminalSymbol->GetString().as_string();
+ scoreBreakdown->PlusEquals(this,
+ "R2L"+*o+"_n_"+nonTerminalString,
+ 1);
+ FEATUREVERBOSE(3, "Sparse: R2L"+*o+"_n_"+nonTerminalString << std::endl);
+ }
+}
+
+
+const std::string* PhraseOrientationFeature::ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const
+{
+ if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
+ return &MORIENT;
+
+ } else if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
+ return &SORIENT;
+
+ } else if ( ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
+ ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
+ ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
+ return &DORIENT;
+
+ } else {
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Unsupported orientation type.");
+ }
+ return NULL;
+}
+
+
}
diff --git a/moses/FF/PhraseOrientationFeature.h b/moses/FF/PhraseOrientationFeature.h
index a367bc58d..9abbe9a8e 100644
--- a/moses/FF/PhraseOrientationFeature.h
+++ b/moses/FF/PhraseOrientationFeature.h
@@ -1,18 +1,305 @@
+//
+// REFERENCE
+// ---------
+// When using this feature, please cite:
+//
+// Matthias Huck, Joern Wuebker, Felix Rietig, and Hermann Ney.
+// A Phrase Orientation Model for Hierarchical Machine Translation.
+// In ACL 2013 Eighth Workshop on Statistical Machine Translation (WMT 2013), pages 452-463, Sofia, Bulgaria, August 2013.
+//
+
#pragma once
+#include <bitset>
#include <string>
-#include "StatelessFeatureFunction.h"
+#include <vector>
+#include "StatefulFeatureFunction.h"
#include "FFState.h"
#include "moses/Factor.h"
#include "phrase-extract/extract-ghkm/PhraseOrientation.h"
+#include "moses/PP/OrientationPhraseProperty.h"
+#include <boost/unordered_set.hpp>
+
namespace Moses
{
+class PhraseOrientationFeatureState : public FFState
+{
+public:
+
+ friend class PhraseOrientationFeature;
+
+ PhraseOrientationFeatureState(bool distinguishStates, bool useSparseWord, bool useSparseNT)
+ : m_leftBoundaryNonTerminalL2RScores(3,0)
+ , m_rightBoundaryNonTerminalR2LScores(3,0)
+ , m_leftBoundaryNonTerminalL2RPossibleFutureOrientations(0x7)
+ , m_rightBoundaryNonTerminalR2LPossibleFutureOrientations(0x7)
+ , m_leftBoundaryRecursionGuard(false)
+ , m_rightBoundaryRecursionGuard(false)
+ , m_leftBoundaryIsSet(false)
+ , m_rightBoundaryIsSet(false)
+ , m_distinguishStates(distinguishStates)
+ , m_useSparseWord(useSparseWord)
+ , m_useSparseNT(useSparseNT)
+ {}
+
+ void SetLeftBoundaryL2R(const std::vector<float> &scores,
+ size_t heuristicScoreIndex,
+ std::bitset<3> &possibleFutureOrientations,
+ const Factor* leftBoundaryNonTerminalSymbol,
+ const PhraseOrientationFeatureState* prevState) {
+ for (size_t i=0; i<3; ++i) {
+ m_leftBoundaryNonTerminalL2RScores[i] = scores[i];
+ m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i] = possibleFutureOrientations[i];
+ }
+ m_leftBoundaryNonTerminalL2RHeuristicScoreIndex = heuristicScoreIndex;
+ m_leftBoundaryNonTerminalSymbol = leftBoundaryNonTerminalSymbol;
+ m_leftBoundaryPrevState = prevState;
+ m_leftBoundaryIsSet = true;
+ }
+
+ void SetRightBoundaryR2L(const std::vector<float> &scores,
+ size_t heuristicScoreIndex,
+ std::bitset<3> &possibleFutureOrientations,
+ const Factor* rightBoundaryNonTerminalSymbol,
+ const PhraseOrientationFeatureState* prevState) {
+ for (size_t i=0; i<3; ++i) {
+ m_rightBoundaryNonTerminalR2LScores[i] = scores[i];
+ m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i] = possibleFutureOrientations[i];
+ }
+ m_rightBoundaryNonTerminalR2LHeuristicScoreIndex = heuristicScoreIndex;
+ m_rightBoundaryNonTerminalSymbol = rightBoundaryNonTerminalSymbol;
+ m_rightBoundaryPrevState = prevState;
+ m_rightBoundaryIsSet = true;
+ }
+
+ float GetLeftBoundaryL2RScoreMono() const {
+ return m_leftBoundaryNonTerminalL2RScores[0];
+ }
+
+ float GetLeftBoundaryL2RScoreSwap() const {
+ return m_leftBoundaryNonTerminalL2RScores[1];
+ }
+
+ float GetLeftBoundaryL2RScoreDiscontinuous() const {
+ return m_leftBoundaryNonTerminalL2RScores[2];
+ }
+
+
+ float GetRightBoundaryR2LScoreMono() const {
+ return m_rightBoundaryNonTerminalR2LScores[0];
+ }
+
+ float GetRightBoundaryR2LScoreSwap() const {
+ return m_rightBoundaryNonTerminalR2LScores[1];
+ }
+
+ float GetRightBoundaryR2LScoreDiscontinuous() const {
+ return m_rightBoundaryNonTerminalR2LScores[2];
+ }
+
+
+ int Compare(const FFState& other) const {
+ if (!m_distinguishStates) {
+ return 0;
+ }
+
+ const PhraseOrientationFeatureState &otherState = static_cast<const PhraseOrientationFeatureState&>(other);
+
+ if (!m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet &&
+ !m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
+ return 0;
+ }
+ if (m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet) {
+ return 1;
+ }
+ if (!m_leftBoundaryIsSet && otherState.m_leftBoundaryIsSet) {
+ return -1;
+ }
+ if (m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
+ return 1;
+ }
+ if (!m_rightBoundaryIsSet && otherState.m_rightBoundaryIsSet) {
+ return -1;
+ }
+
+ if (m_leftBoundaryIsSet) {
+ int compareLeft = CompareLeftBoundaryRecursive(*this, otherState, m_useSparseNT);
+ if (compareLeft != 0) {
+ return compareLeft;
+ }
+ }
+ if (m_rightBoundaryIsSet) {
+ int compareRight = CompareRightBoundaryRecursive(*this, otherState, m_useSparseNT);
+ if (compareRight != 0) {
+ return compareRight;
+ }
+ }
+
+ return 0;
+ };
+
+protected:
+
+ static int CompareLeftBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState, bool useSparseNT) {
+ if (!state.m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet) {
+ return 0;
+ }
+ if (state.m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet) {
+ return 1;
+ }
+ if (!state.m_leftBoundaryIsSet && otherState.m_leftBoundaryIsSet) {
+ return -1;
+ }
+
+ if (useSparseNT) {
+ if ( otherState.m_leftBoundaryNonTerminalSymbol < state.m_leftBoundaryNonTerminalSymbol ) {
+ return 1;
+ }
+ if ( state.m_leftBoundaryNonTerminalSymbol < otherState.m_leftBoundaryNonTerminalSymbol ) {
+ return -1;
+ }
+ }
+
+ if ( otherState.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex < state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ) {
+ return 1;
+ }
+ if ( state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex < otherState.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ) {
+ return -1;
+ }
+ if ( Smaller(otherState.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations, state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) ) {
+ return 1;
+ }
+ if ( Smaller(state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations, otherState.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) ) {
+ return -1;
+ }
+ for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i) {
+ // compare only for possible future orientations
+ // (possible future orientations of state and otherState are the same at this point due to the previous two conditional blocks)
+ if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) {
+ if (state.m_leftBoundaryNonTerminalL2RScores[i] > otherState.m_leftBoundaryNonTerminalL2RScores[i]) {
+ return 1;
+ }
+ if (state.m_leftBoundaryNonTerminalL2RScores[i] < otherState.m_leftBoundaryNonTerminalL2RScores[i]) {
+ return -1;
+ }
+ }
+ }
+
+ if (state.m_leftBoundaryRecursionGuard && otherState.m_leftBoundaryRecursionGuard) {
+ return 0;
+ }
+ if (state.m_leftBoundaryRecursionGuard && !otherState.m_leftBoundaryRecursionGuard) {
+ return 1;
+ }
+ if (!state.m_leftBoundaryRecursionGuard && otherState.m_leftBoundaryRecursionGuard) {
+ return -1;
+ }
+
+ const PhraseOrientationFeatureState *prevState = state.m_leftBoundaryPrevState;
+ const PhraseOrientationFeatureState *otherPrevState = otherState.m_leftBoundaryPrevState;
-class PhraseOrientationFeature : public StatelessFeatureFunction
+ return CompareLeftBoundaryRecursive(*prevState, *otherPrevState, useSparseNT);
+ };
+
+ static int CompareRightBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState, bool useSparseNT) {
+ if (!state.m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
+ return 0;
+ }
+ if (state.m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
+ return 1;
+ }
+ if (!state.m_rightBoundaryIsSet && otherState.m_rightBoundaryIsSet) {
+ return -1;
+ }
+
+ if (useSparseNT) {
+ if ( otherState.m_rightBoundaryNonTerminalSymbol < state.m_rightBoundaryNonTerminalSymbol ) {
+ return 1;
+ }
+ if ( state.m_rightBoundaryNonTerminalSymbol < otherState.m_rightBoundaryNonTerminalSymbol ) {
+ return -1;
+ }
+ }
+
+ if ( otherState.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex < state.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ) {
+ return 1;
+ }
+ if ( state.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex < otherState.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ) {
+ return -1;
+ }
+ if ( Smaller(otherState.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations, state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) ) {
+ return 1;
+ }
+ if ( Smaller(state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations, otherState.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) ) {
+ return -1;
+ }
+ for (size_t i=0; i<state.m_rightBoundaryNonTerminalR2LScores.size(); ++i) {
+ // compare only for possible future orientations
+ // (possible future orientations of state and otherState are the same at this point due to the previous two conditional blocks)
+ if ( state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i]) {
+ if (state.m_rightBoundaryNonTerminalR2LScores[i] > otherState.m_rightBoundaryNonTerminalR2LScores[i]) {
+ return 1;
+ }
+ if (state.m_rightBoundaryNonTerminalR2LScores[i] < otherState.m_rightBoundaryNonTerminalR2LScores[i]) {
+ return -1;
+ }
+ }
+ }
+
+ if (state.m_rightBoundaryRecursionGuard && otherState.m_rightBoundaryRecursionGuard) {
+ return 0;
+ }
+ if (state.m_rightBoundaryRecursionGuard && !otherState.m_rightBoundaryRecursionGuard) {
+ return 1;
+ }
+ if (!state.m_rightBoundaryRecursionGuard && otherState.m_rightBoundaryRecursionGuard) {
+ return -1;
+ }
+
+ const PhraseOrientationFeatureState *prevState = state.m_rightBoundaryPrevState;
+ const PhraseOrientationFeatureState *otherPrevState = otherState.m_rightBoundaryPrevState;
+
+ return CompareRightBoundaryRecursive(*prevState, *otherPrevState, useSparseNT);
+ };
+
+ template<std::size_t N> static bool Smaller(const std::bitset<N>& x, const std::bitset<N>& y) {
+ for (size_t i=0; i<N; ++i) {
+ if (x[i] ^ y[i])
+ return y[i];
+ }
+ return false;
+ }
+
+ std::vector<float> m_leftBoundaryNonTerminalL2RScores;
+ std::vector<float> m_rightBoundaryNonTerminalR2LScores;
+
+ size_t m_leftBoundaryNonTerminalL2RHeuristicScoreIndex;
+ size_t m_rightBoundaryNonTerminalR2LHeuristicScoreIndex;
+
+ std::bitset<3> m_leftBoundaryNonTerminalL2RPossibleFutureOrientations;
+ std::bitset<3> m_rightBoundaryNonTerminalR2LPossibleFutureOrientations;
+
+ bool m_leftBoundaryRecursionGuard;
+ bool m_rightBoundaryRecursionGuard;
+ bool m_leftBoundaryIsSet;
+ bool m_rightBoundaryIsSet;
+ const PhraseOrientationFeatureState* m_leftBoundaryPrevState;
+ const PhraseOrientationFeatureState* m_rightBoundaryPrevState;
+ const bool m_distinguishStates;
+ const bool m_useSparseWord;
+ const bool m_useSparseNT;
+ const Factor* m_leftBoundaryNonTerminalSymbol;
+ const Factor* m_rightBoundaryNonTerminalSymbol;
+};
+
+
+
+class PhraseOrientationFeature : public StatefulFeatureFunction
{
public:
+
PhraseOrientationFeature(const std::string &line);
~PhraseOrientationFeature() {
@@ -22,30 +309,106 @@ public:
return true;
}
+ virtual const FFState* EmptyHypothesisState(const InputType &input) const {
+ return new PhraseOrientationFeatureState(m_distinguishStates,m_useSparseWord,m_useSparseNT);
+ }
+
void SetParameter(const std::string& key, const std::string& value);
+
+ void Load();
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const
{};
- void EvaluateWhenApplied(
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const
+ {}
+
+ FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const
- {};
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const {
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": EvaluateWhenApplied(const Hypothesis&, ...) not implemented");
+ return new PhraseOrientationFeatureState(m_distinguishStates,m_useSparseWord,m_useSparseNT);
+ };
- void EvaluateWhenApplied(
+ FFState* EvaluateWhenApplied(
const ChartHypothesis& cur_hypo,
+ int featureID, // used to index the state in the previous hypotheses
ScoreComponentCollection* accumulator) const;
+protected:
+
+ void LoadWordList(const std::string& filename,
+ boost::unordered_set<const Factor*>& list);
+
+ void LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty,
+ ScoreComponentCollection &scoreBreakdown,
+ bool subtract=false) const;
+
+ size_t GetHeuristicScoreIndex(const std::vector<float>& scores,
+ size_t weightsVectorOffset,
+ const std::bitset<3> possibleFutureOrientations = 0x7) const;
+
+ void LeftBoundaryL2RScoreRecursive(int featureID,
+ const PhraseOrientationFeatureState *state,
+ const std::bitset<3> orientation,
+ std::vector<float>& newScores,
+ ScoreComponentCollection* scoreBreakdown) const;
+
+ void RightBoundaryR2LScoreRecursive(int featureID,
+ const PhraseOrientationFeatureState *state,
+ const std::bitset<3> orientation,
+ std::vector<float>& newScores,
+ ScoreComponentCollection* scoreBreakdown) const;
+
+ void SparseWordL2RScore(const ChartHypothesis* hypo,
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const;
+
+ void SparseWordR2LScore(const ChartHypothesis* hypo,
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const;
+
+ void SparseNonTerminalL2RScore(const Factor* nonTerminalSymbol,
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const;
+
+ void SparseNonTerminalR2LScore(const Factor* nonTerminalSymbol,
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const;
+
+ const std::string* ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const;
+
+ static const std::string MORIENT;
+ static const std::string SORIENT;
+ static const std::string DORIENT;
+
+ std::string m_glueTargetLHSStr;
+ const Factor* m_glueTargetLHS;
+ bool m_distinguishStates;
+ bool m_useSparseWord;
+ bool m_useSparseNT;
+ size_t m_offsetR2LScores;
+ const std::vector<float> m_weightsVector;
+ std::string m_filenameTargetWordList;
+ boost::unordered_set<const Factor*> m_targetWordList;
+ bool m_useTargetWordList;
+ std::string m_filenameSourceWordList;
+ boost::unordered_set<const Factor*> m_sourceWordList;
+ bool m_useSourceWordList;
+
};
diff --git a/moses/FF/PhrasePairFeature.cpp b/moses/FF/PhrasePairFeature.cpp
index 6daab7e25..0bf5f71f9 100644
--- a/moses/FF/PhrasePairFeature.cpp
+++ b/moses/FF/PhrasePairFeature.cpp
@@ -107,11 +107,11 @@ void PhrasePairFeature::Load()
}
void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
const Phrase& source = inputPath.GetPhrase();
if (m_simple) {
diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h
index b0f380d0a..ff22340e9 100644
--- a/moses/FF/PhrasePairFeature.h
+++ b/moses/FF/PhrasePairFeature.h
@@ -41,25 +41,28 @@ public:
bool IsUseable(const FactorMask &mask) const;
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
-
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis& hypo,
- ScoreComponentCollection*) const
- {}
+ ScoreComponentCollection*) const {
+ }
};
diff --git a/moses/FF/PhrasePenalty.cpp b/moses/FF/PhrasePenalty.cpp
index cd1b735df..e4ee294fa 100644
--- a/moses/FF/PhrasePenalty.cpp
+++ b/moses/FF/PhrasePenalty.cpp
@@ -9,41 +9,39 @@ using namespace std;
namespace Moses
{
PhrasePenalty::PhrasePenalty(const std::string &line)
-: StatelessFeatureFunction(1, line)
-, m_perPhraseTable(false)
+ : StatelessFeatureFunction(1, line)
+ , m_perPhraseTable(false)
{
ReadParameters();
}
void PhrasePenalty::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
if (m_perPhraseTable) {
- const PhraseDictionary *pt = targetPhrase.GetContainer();
- if (pt) {
- size_t ptId = pt->GetId();
- UTIL_THROW_IF2(ptId >= m_numScoreComponents, "Wrong number of scores");
+ const PhraseDictionary *pt = targetPhrase.GetContainer();
+ if (pt) {
+ size_t ptId = pt->GetId();
+ UTIL_THROW_IF2(ptId >= m_numScoreComponents, "Wrong number of scores");
- vector<float> scores(m_numScoreComponents, 0);
- scores[ptId] = 1.0f;
+ vector<float> scores(m_numScoreComponents, 0);
+ scores[ptId] = 1.0f;
- scoreBreakdown.Assign(this, scores);
- }
+ scoreBreakdown.Assign(this, scores);
+ }
- }
- else {
- scoreBreakdown.Assign(this, 1.0f);
+ } else {
+ scoreBreakdown.Assign(this, 1.0f);
}
}
void PhrasePenalty::SetParameter(const std::string& key, const std::string& value)
{
if (key == "per-phrase-table") {
- m_perPhraseTable =Scan<bool>(value);
- }
- else {
+ m_perPhraseTable =Scan<bool>(value);
+ } else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
diff --git a/moses/FF/PhrasePenalty.h b/moses/FF/PhrasePenalty.h
index 80635b4e1..044184755 100644
--- a/moses/FF/PhrasePenalty.h
+++ b/moses/FF/PhrasePenalty.h
@@ -15,28 +15,31 @@ public:
}
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
void SetParameter(const std::string& key, const std::string& value);
protected:
diff --git a/moses/FF/ReferenceComparison.cpp b/moses/FF/ReferenceComparison.cpp
index b11d133c2..80dcbd234 100644
--- a/moses/FF/ReferenceComparison.cpp
+++ b/moses/FF/ReferenceComparison.cpp
@@ -3,7 +3,7 @@
namespace Moses
{
ReferenceComparison::ReferenceComparison(const std::string &line)
-:StatelessFeatureFunction(0, line)
+ :StatelessFeatureFunction(0, line)
{
}
diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h
index 62cf15ced..c28cdc5d7 100644
--- a/moses/FF/ReferenceComparison.h
+++ b/moses/FF/ReferenceComparison.h
@@ -10,35 +10,41 @@ namespace Moses
class ReferenceComparison : public StatelessFeatureFunction
{
public:
- ReferenceComparison(const std::string &line);
-
- virtual bool IsUseable(const FactorMask &mask) const
- { return true; }
-
- virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
-
- virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
- virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
- virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
- std::vector<float> DefaultWeights() const
- { return std::vector<float>(); }
+ ReferenceComparison(const std::string &line);
+
+ virtual bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+
+ virtual void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ std::vector<float> DefaultWeights() const {
+ return std::vector<float>();
+ }
protected:
diff --git a/moses/FF/RuleScope.cpp b/moses/FF/RuleScope.cpp
index ed329c4ca..5f38a59ae 100644
--- a/moses/FF/RuleScope.cpp
+++ b/moses/FF/RuleScope.cpp
@@ -5,8 +5,8 @@
namespace Moses
{
RuleScope::RuleScope(const std::string &line)
-:StatelessFeatureFunction(1, line)
-,m_sourceSyntax(true)
+ :StatelessFeatureFunction(1, line)
+ ,m_sourceSyntax(true)
{
}
@@ -17,9 +17,9 @@ bool IsAmbiguous(const Word &word, bool sourceSyntax)
}
void RuleScope::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
// adjacent non-term count as 1 ammbiguity, rather than 2 as in rule scope
// source can't be empty, right?
@@ -27,23 +27,22 @@ void RuleScope::EvaluateInIsolation(const Phrase &source
int count = 0;
for (size_t i = 0; i < source.GetSize() - 0; ++i) {
- const Word &word = source.GetWord(i);
- bool ambiguous = IsAmbiguous(word, m_sourceSyntax);
- if (ambiguous) {
- ++count;
- }
- else {
- if (count > 0) {
- score += count;
- }
- count = -1;
- }
+ const Word &word = source.GetWord(i);
+ bool ambiguous = IsAmbiguous(word, m_sourceSyntax);
+ if (ambiguous) {
+ ++count;
+ } else {
+ if (count > 0) {
+ score += count;
+ }
+ count = -1;
+ }
}
// 1st & last always adjacent to ambiguity
++count;
if (count > 0) {
- score += count;
+ score += count;
}
scoreBreakdown.PlusEquals(this, score);
@@ -52,7 +51,7 @@ void RuleScope::EvaluateInIsolation(const Phrase &source
void RuleScope::SetParameter(const std::string& key, const std::string& value)
{
if (key == "source-syntax") {
- m_sourceSyntax = Scan<bool>(value);
+ m_sourceSyntax = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
diff --git a/moses/FF/RuleScope.h b/moses/FF/RuleScope.h
index a2c9e06f3..8bf7b7670 100644
--- a/moses/FF/RuleScope.h
+++ b/moses/FF/RuleScope.h
@@ -9,33 +9,39 @@ namespace Moses
class RuleScope : public StatelessFeatureFunction
{
public:
- RuleScope(const std::string &line);
+ RuleScope(const std::string &line);
- virtual bool IsUseable(const FactorMask &mask) const
- { return true; }
+ virtual bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
- virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
- virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ virtual void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
- virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
- virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
- void SetParameter(const std::string& key, const std::string& value);
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void SetParameter(const std::string& key, const std::string& value);
protected:
bool m_sourceSyntax;
diff --git a/moses/FF/SetSourcePhrase.cpp b/moses/FF/SetSourcePhrase.cpp
index f89683f28..115affa52 100644
--- a/moses/FF/SetSourcePhrase.cpp
+++ b/moses/FF/SetSourcePhrase.cpp
@@ -4,18 +4,18 @@
namespace Moses
{
SetSourcePhrase::SetSourcePhrase(const std::string &line)
-:StatelessFeatureFunction(0, line)
+ :StatelessFeatureFunction(0, line)
{
m_tuneable = false;
ReadParameters();
}
void SetSourcePhrase::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
- targetPhrase.SetRuleSource(source);
+ targetPhrase.SetRuleSource(source);
}
}
diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h
index 81f293dde..e34e618f2 100644
--- a/moses/FF/SetSourcePhrase.h
+++ b/moses/FF/SetSourcePhrase.h
@@ -11,32 +11,38 @@ class SetSourcePhrase : public StatelessFeatureFunction
public:
SetSourcePhrase(const std::string &line);
- virtual bool IsUseable(const FactorMask &mask) const
- { return true; }
+ virtual bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
- std::vector<float> DefaultWeights() const
- { return std::vector<float>(); }
+ std::vector<float> DefaultWeights() const {
+ return std::vector<float>();
+ }
};
diff --git a/moses/FF/SkeletonChangeInput.cpp b/moses/FF/SkeletonChangeInput.cpp
index 74a85ba5e..7ab267d96 100644
--- a/moses/FF/SkeletonChangeInput.cpp
+++ b/moses/FF/SkeletonChangeInput.cpp
@@ -17,9 +17,9 @@ SkeletonChangeInput::SkeletonChangeInput(const std::string &line)
}
void SkeletonChangeInput::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
// dense scores
vector<float> newScores(m_numScoreComponents);
@@ -33,26 +33,30 @@ void SkeletonChangeInput::EvaluateInIsolation(const Phrase &source
}
void SkeletonChangeInput::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
- if (targetPhrase.GetNumNonTerminals()) {
- vector<float> newScores(m_numScoreComponents);
- newScores[0] = - std::numeric_limits<float>::infinity();
- scoreBreakdown.PlusEquals(this, newScores);
- }
+ if (targetPhrase.GetNumNonTerminals()) {
+ vector<float> newScores(m_numScoreComponents);
+ newScores[0] = - std::numeric_limits<float>::infinity();
+ scoreBreakdown.PlusEquals(this, newScores);
+ }
}
+void SkeletonChangeInput::EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const
+{}
+
void SkeletonChangeInput::EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{}
void SkeletonChangeInput::EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{}
void SkeletonChangeInput::ChangeSource(InputType *&input) const
@@ -66,16 +70,16 @@ void SkeletonChangeInput::ChangeSource(InputType *&input) const
size_t size = sentence->GetSize();
for (size_t i = 0; i < size; ++i) {
- Word &word = sentence->Phrase::GetWord(i);
- const Factor *factor0 = word[0];
+ Word &word = sentence->Phrase::GetWord(i);
+ const Factor *factor0 = word[0];
- std::string str = factor0->GetString().as_string();
- if (str.length() > 4) {
- str = str.substr(0, 4);
- }
+ std::string str = factor0->GetString().as_string();
+ if (str.length() > 4) {
+ str = str.substr(0, 4);
+ }
- const Factor *factor1 = fc.AddFactor(str);
- word.SetFactor(1, factor1);
+ const Factor *factor1 = fc.AddFactor(str);
+ word.SetFactor(1, factor1);
}
}
diff --git a/moses/FF/SkeletonChangeInput.h b/moses/FF/SkeletonChangeInput.h
index 07b19e768..23ede5c97 100644
--- a/moses/FF/SkeletonChangeInput.h
+++ b/moses/FF/SkeletonChangeInput.h
@@ -16,22 +16,26 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void ChangeSource(InputType *&input) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const;
+
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp
index fe81aeeae..931556007 100644
--- a/moses/FF/SkeletonStatefulFF.cpp
+++ b/moses/FF/SkeletonStatefulFF.cpp
@@ -24,17 +24,21 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line)
}
void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{}
void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
+{}
+
+void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const
{}
FFState* SkeletonStatefulFF::EvaluateWhenApplied(
diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h
index 6fa26803e..cc0bc07a0 100644
--- a/moses/FF/SkeletonStatefulFF.h
+++ b/moses/FF/SkeletonStatefulFF.h
@@ -12,8 +12,8 @@ class SkeletonState : public FFState
int m_targetLen;
public:
SkeletonState(int targetLen)
- :m_targetLen(targetLen)
- {}
+ :m_targetLen(targetLen) {
+ }
int Compare(const FFState& other) const;
};
@@ -31,15 +31,19 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const;
+
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/SkeletonStatelessFF.cpp
index 80c7d130e..209409a8e 100644
--- a/moses/FF/SkeletonStatelessFF.cpp
+++ b/moses/FF/SkeletonStatelessFF.cpp
@@ -14,9 +14,9 @@ SkeletonStatelessFF::SkeletonStatelessFF(const std::string &line)
}
void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
// dense scores
vector<float> newScores(m_numScoreComponents);
@@ -30,26 +30,30 @@ void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source
}
void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
- if (targetPhrase.GetNumNonTerminals()) {
- vector<float> newScores(m_numScoreComponents);
- newScores[0] = - std::numeric_limits<float>::infinity();
- scoreBreakdown.PlusEquals(this, newScores);
- }
-
+ if (targetPhrase.GetNumNonTerminals()) {
+ vector<float> newScores(m_numScoreComponents);
+ newScores[0] = - std::numeric_limits<float>::infinity();
+ scoreBreakdown.PlusEquals(this, newScores);
+ }
}
+void SkeletonStatelessFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input
+
+ , const TranslationOptionList &translationOptionList) const
+{}
+
void SkeletonStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{}
void SkeletonStatelessFF::EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{}
void SkeletonStatelessFF::SetParameter(const std::string& key, const std::string& value)
diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/SkeletonStatelessFF.h
index 520ec1405..7fb6634c2 100644
--- a/moses/FF/SkeletonStatelessFF.h
+++ b/moses/FF/SkeletonStatelessFF.h
@@ -16,19 +16,24 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const;
+
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
+
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/SkeletonTranslationOptionListFeature.h b/moses/FF/SkeletonTranslationOptionListFeature.h
new file mode 100644
index 000000000..1d88717e1
--- /dev/null
+++ b/moses/FF/SkeletonTranslationOptionListFeature.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include <string>
+#include "StatelessFeatureFunction.h"
+
+namespace Moses
+{
+
+class SkeletonTranslationOptionListFeature : public StatelessFeatureFunction
+{
+public:
+ SkeletonTranslationOptionListFeature(const std::string &line)
+ :StatelessFeatureFunction(1, line) {
+ ReadParameters();
+ }
+
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+
+ void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ vector<float> newScores(m_numScoreComponents);
+ newScores[0] = translationOptionList.size();
+
+ TranslationOptionList::const_iterator iterTransOpt;
+ for(iterTransOpt = translationOptionList.begin() ;
+ iterTransOpt != translationOptionList.end() ; ++iterTransOpt) {
+ TranslationOption &transOpt = **iterTransOpt;
+
+ ScoreComponentCollection &scoreBreakDown = transOpt.GetScoreBreakdown();
+ scoreBreakDown.PlusEquals(this, newScores);
+
+ transOpt.UpdateScore();
+ }
+ }
+
+ void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+
+ void SetParameter(const std::string& key, const std::string& value) {
+ }
+
+};
+
+}
+
diff --git a/moses/FF/SoftMatchingFeature.cpp b/moses/FF/SoftMatchingFeature.cpp
index 0475547da..b2d8e7ea5 100644
--- a/moses/FF/SoftMatchingFeature.cpp
+++ b/moses/FF/SoftMatchingFeature.cpp
@@ -24,8 +24,8 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string
m_tuneable = Scan<bool>(value);
} else if (key == "filterable") { //ignore
} else if (key == "path") {
- const std::string filePath = value;
- Load(filePath);
+ const std::string filePath = value;
+ Load(filePath);
} else {
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
}
@@ -35,34 +35,34 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string
bool SoftMatchingFeature::Load(const std::string& filePath)
{
- StaticData &staticData = StaticData::InstanceNonConst();
+ StaticData &staticData = StaticData::InstanceNonConst();
- InputFileStream inStream(filePath);
- std::string line;
- while(getline(inStream, line)) {
- std::vector<std::string> tokens = Tokenize(line);
- UTIL_THROW_IF2(tokens.size() != 2, "Error: wrong format of SoftMatching file: must have two nonterminals per line");
+ InputFileStream inStream(filePath);
+ std::string line;
+ while(getline(inStream, line)) {
+ std::vector<std::string> tokens = Tokenize(line);
+ UTIL_THROW_IF2(tokens.size() != 2, "Error: wrong format of SoftMatching file: must have two nonterminals per line");
- // no soft matching necessary if LHS and RHS are the same
- if (tokens[0] == tokens[1]) {
- continue;
- }
+ // no soft matching necessary if LHS and RHS are the same
+ if (tokens[0] == tokens[1]) {
+ continue;
+ }
- Word LHS, RHS;
- LHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[0], true);
- RHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[1], true);
+ Word LHS, RHS;
+ LHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[0], true);
+ RHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[1], true);
- m_softMatches[RHS[0]->GetId()].push_back(LHS);
- GetOrSetFeatureName(RHS, LHS);
- }
+ m_softMatches[RHS[0]->GetId()].push_back(LHS);
+ GetOrSetFeatureName(RHS, LHS);
+ }
- staticData.SetSoftMatches(m_softMatches);
+ staticData.SetSoftMatches(m_softMatches);
- return true;
+ return true;
}
void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{
const TargetPhrase& target = hypo.GetCurrTargetPhrase();
@@ -87,7 +87,8 @@ void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
}
// when loading, or when we notice that non-terminals have been added after loading, we resize vectors
-void SoftMatchingFeature::ResizeCache() const {
+void SoftMatchingFeature::ResizeCache() const
+{
FactorCollection& fc = FactorCollection::Instance();
size_t numNonTerminals = fc.GetNumNonTerminals();
@@ -98,7 +99,8 @@ void SoftMatchingFeature::ResizeCache() const {
}
-const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, const Word& LHS) const {
+const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, const Word& LHS) const
+{
try {
#ifdef WITH_THREADS //try read-only lock
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
@@ -107,23 +109,22 @@ const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, con
if (!name.empty()) {
return name;
}
- }
- catch (const std::out_of_range& oor) {
+ } catch (const std::out_of_range& oor) {
#ifdef WITH_THREADS //need to resize cache; write lock
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
ResizeCache();
}
#ifdef WITH_THREADS //need to update cache; write lock
- boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
+ boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
- std::string &name = m_nameCache[RHS[0]->GetId()][LHS[0]->GetId()];
- const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
- std::string LHS_string = LHS.GetString(outputFactorOrder, false);
- std::string RHS_string = RHS.GetString(outputFactorOrder, false);
- name = LHS_string + "->" + RHS_string;
- return name;
- }
+ std::string &name = m_nameCache[RHS[0]->GetId()][LHS[0]->GetId()];
+ const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
+ std::string LHS_string = LHS.GetString(outputFactorOrder, false);
+ std::string RHS_string = RHS.GetString(outputFactorOrder, false);
+ name = LHS_string + "->" + RHS_string;
+ return name;
+}
}
diff --git a/moses/FF/SoftMatchingFeature.h b/moses/FF/SoftMatchingFeature.h
index ff923ea08..d524a1d07 100644
--- a/moses/FF/SoftMatchingFeature.h
+++ b/moses/FF/SoftMatchingFeature.h
@@ -20,20 +20,25 @@ public:
}
virtual void EvaluateWhenApplied(const ChartHypothesis& hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const {};
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {};
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const {};
+ ScoreComponentCollection* accumulator) const {};
bool Load(const std::string &filePath);
diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
index fe1144465..af8e89a3a 100644
--- a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
@@ -18,38 +18,51 @@ using namespace std;
namespace Moses
{
+
SoftSourceSyntacticConstraintsFeature::SoftSourceSyntacticConstraintsFeature(const std::string &line)
- : StatelessFeatureFunction(3, line), m_featureVariant(0)
+ : StatelessFeatureFunction(6, line)
+ , m_useCoreSourceLabels(false)
+ , m_useLogprobs(true)
+ , m_useSparse(false)
+ , m_noMismatches(false)
{
VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
ReadParameters();
VERBOSE(1, " Done.");
- VERBOSE(1, " Feature variant: " << m_featureVariant << "." << std::endl);
+ VERBOSE(1, " Config:");
+ VERBOSE(1, " Log probabilities"); if ( m_useLogprobs ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); }
+ VERBOSE(1, " Sparse scores"); if ( m_useSparse ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); }
+ VERBOSE(1, " Core labels"); if ( m_useCoreSourceLabels ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); }
+ VERBOSE(1, " No mismatches"); if ( m_noMismatches ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); }
+ VERBOSE(1, std::endl);
}
+
void SoftSourceSyntacticConstraintsFeature::SetParameter(const std::string& key, const std::string& value)
{
if (key == "sourceLabelSetFile") {
m_sourceLabelSetFile = value;
} else if (key == "coreSourceLabelSetFile") {
m_coreSourceLabelSetFile = value;
+ m_useCoreSourceLabels = true;
} else if (key == "targetSourceLeftHandSideJointCountFile") {
m_targetSourceLHSJointCountFile = value;
- } else if (key == "tuneable") {
- m_tuneable = Scan<bool>(value);
- } else if (key == "featureVariant") {
- m_featureVariant = Scan<size_t>(value); // 0: only dense features, 1: no mismatches (also set weights 1 0 0 and tuneable=false), 2: with sparse features, 3: with sparse features for core labels only
+ } else if (key == "noMismatches") {
+ m_noMismatches = Scan<bool>(value); // for a hard constraint, allow no mismatches (also set: weights 1 0 0 0 0 0, tuneable=false)
+ } else if (key == "logProbabilities") {
+ m_useLogprobs = Scan<bool>(value);
+ } else if (key == "sparse") {
+ m_useSparse = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
-
void SoftSourceSyntacticConstraintsFeature::Load()
{
// don't change the loading order!
LoadSourceLabelSet();
- if (m_featureVariant == 3) {
+ if (!m_coreSourceLabelSetFile.empty()) {
LoadCoreSourceLabelSet();
}
if (!m_targetSourceLHSJointCountFile.empty()) {
@@ -59,7 +72,7 @@ void SoftSourceSyntacticConstraintsFeature::Load()
void SoftSourceSyntacticConstraintsFeature::LoadSourceLabelSet()
{
- VERBOSE(2, GetScoreProducerDescription() << ": Loading source label set from file " << m_sourceLabelSetFile << std::endl);
+ FEATUREVERBOSE(2, "Loading source label set from file " << m_sourceLabelSetFile << std::endl);
InputFileStream inFile(m_sourceLabelSetFile);
FactorCollection &factorCollection = FactorCollection::Instance();
@@ -68,6 +81,10 @@ void SoftSourceSyntacticConstraintsFeature::LoadSourceLabelSet()
std::string line;
m_sourceLabels.clear();
m_sourceLabelsByIndex.clear();
+ m_sourceLabelsByIndex_RHS_1.clear();
+ m_sourceLabelsByIndex_RHS_0.clear();
+ m_sourceLabelsByIndex_LHS_1.clear();
+ m_sourceLabelsByIndex_LHS_0.clear();
m_sourceLabelIndexesByFactor.clear();
while (getline(inFile, line)) {
std::istringstream tokenizer(line);
@@ -76,17 +93,25 @@ void SoftSourceSyntacticConstraintsFeature::LoadSourceLabelSet()
try {
tokenizer >> label >> index;
} catch (const std::exception &e) {
- UTIL_THROW2(GetScoreProducerDescription()
+ UTIL_THROW2(GetScoreProducerDescription()
<< ": Error reading source label set file " << m_sourceLabelSetFile << " .");
}
std::pair< boost::unordered_map<std::string,size_t>::iterator, bool > inserted = m_sourceLabels.insert( std::pair<std::string,size_t>(label,index) );
UTIL_THROW_IF2(!inserted.second, GetScoreProducerDescription()
<< ": Source label set file " << m_sourceLabelSetFile << " should contain each syntactic label only once.");
-
- if (index >= m_sourceLabelsByIndex.size()) {
+
+ if (index >= m_sourceLabelsByIndex.size()) {
m_sourceLabelsByIndex.resize(index+1);
+ m_sourceLabelsByIndex_RHS_1.resize(index+1);
+ m_sourceLabelsByIndex_RHS_0.resize(index+1);
+ m_sourceLabelsByIndex_LHS_1.resize(index+1);
+ m_sourceLabelsByIndex_LHS_0.resize(index+1);
}
m_sourceLabelsByIndex[index] = label;
+ m_sourceLabelsByIndex_RHS_1[index] = "RHS_1_" + label;
+ m_sourceLabelsByIndex_RHS_0[index] = "RHS_0_" + label;
+ m_sourceLabelsByIndex_LHS_1[index] = "LHS_1_" + label;
+ m_sourceLabelsByIndex_LHS_0[index] = "LHS_0_" + label;
const Factor* sourceLabelFactor = factorCollection.AddFactor(label,true);
m_sourceLabelIndexesByFactor[sourceLabelFactor] = index;
}
@@ -113,36 +138,42 @@ void SoftSourceSyntacticConstraintsFeature::LoadSourceLabelSet()
}
}
+
void SoftSourceSyntacticConstraintsFeature::LoadCoreSourceLabelSet()
{
- VERBOSE(2, GetScoreProducerDescription() << ": Loading core source label set from file " << m_coreSourceLabelSetFile << std::endl);
- InputFileStream inFile(m_coreSourceLabelSetFile);
-
+ FEATUREVERBOSE(2, "Loading core source label set from file " << m_coreSourceLabelSetFile << std::endl);
// read core source label set
+ LoadLabelSet(m_coreSourceLabelSetFile, m_coreSourceLabels);
+}
+
+void SoftSourceSyntacticConstraintsFeature::LoadLabelSet(std::string &filename,
+ boost::unordered_set<size_t> &labelSet)
+{
+ FEATUREVERBOSE(2, "Loading core source label set from file " << m_coreSourceLabelSetFile << std::endl);
+ InputFileStream inFile(filename);
std::string line;
- m_coreSourceLabels.clear();
+ labelSet.clear();
while (getline(inFile, line)) {
istringstream tokenizer(line);
std::string label;
tokenizer >> label;
boost::unordered_map<std::string,size_t>::iterator foundSourceLabelIndex = m_sourceLabels.find( label );
if ( foundSourceLabelIndex != m_sourceLabels.end() ) {
- m_coreSourceLabels.insert(foundSourceLabelIndex->second);
+ labelSet.insert(foundSourceLabelIndex->second);
} else {
- VERBOSE(2, GetScoreProducerDescription()
- << ": Ignoring unknown source label \"" << label << "\" "
- << "from core source label set file " << m_coreSourceLabelSetFile << "."
- << std::endl);
+ FEATUREVERBOSE(2, "Ignoring unknown source label \"" << label << "\" "
+ << "from core source label set file " << filename << "."
+ << std::endl);
}
}
-
inFile.Close();
}
+
void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCountFile()
{
- VERBOSE(2, GetScoreProducerDescription() << ": Loading target/source label joint counts from file " << m_targetSourceLHSJointCountFile << std::endl);
+ FEATUREVERBOSE(2, "Loading target/source label joint counts from file " << m_targetSourceLHSJointCountFile << std::endl);
InputFileStream inFile(m_targetSourceLHSJointCountFile);
for (boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator iter=m_labelPairProbabilities.begin();
@@ -174,11 +205,11 @@ void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCou
const Factor* targetLabelFactor = factorCollection.AddFactor(targetLabel,true);
sourceLHSCounts[foundSourceLabelIndex->second] += count;
- std::pair< boost::unordered_map<const Factor*,float >::iterator, bool > insertedTargetLHSCount =
+ std::pair< boost::unordered_map<const Factor*,float >::iterator, bool > insertedTargetLHSCount =
targetLHSCounts.insert( std::pair<const Factor*,float>(targetLabelFactor,count) );
if (!insertedTargetLHSCount.second) {
(insertedTargetLHSCount.first)->second += count;
- boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator jointCountIt =
+ boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator jointCountIt =
m_labelPairProbabilities.find( targetLabelFactor );
assert(jointCountIt != m_labelPairProbabilities.end());
(jointCountIt->second)->at(foundSourceLabelIndex->second).first += count;
@@ -187,7 +218,7 @@ void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCou
std::pair<float,float> init(0.0,0.0);
std::vector< std::pair<float,float> >* sourceVector = new std::vector< std::pair<float,float> >(m_sourceLabels.size(),init);
sourceVector->at(foundSourceLabelIndex->second) = std::pair<float,float>(count,count);
- std::pair< boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator, bool > insertedJointCount =
+ std::pair< boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator, bool > insertedJointCount =
m_labelPairProbabilities.insert( std::pair<const Factor*, std::vector< std::pair<float,float> >* >(targetLabelFactor,sourceVector) );
assert(insertedJointCount.second);
}
@@ -219,14 +250,33 @@ void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCou
}
-void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
- const ChartHypothesis& hypo,
- ScoreComponentCollection* accumulator) const
+void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
+ assert(stackVec);
+
+ IFFEATUREVERBOSE(2) {
+ FEATUREVERBOSE(2, targetPhrase << std::endl);
+ FEATUREVERBOSE(2, inputPath << std::endl);
+ for (size_t i = 0; i < stackVec->size(); ++i) {
+ const ChartCellLabel &cell = *stackVec->at(i);
+ const WordsRange &ntRange = cell.GetCoverage();
+ FEATUREVERBOSE(2, "stackVec[ " << i << " ] : " << ntRange.GetStartPos() << " - " << ntRange.GetEndPos() << std::endl);
+ }
+
+ for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
+ it!=targetPhrase.GetAlignNonTerm().end(); ++it) {
+ FEATUREVERBOSE(2, "alignNonTerm " << it->first << " " << it->second << std::endl);
+ }
+ }
+
// dense scores
- std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 3
+ std::vector<float> newScores(m_numScoreComponents,0);
- const InputType& input = hypo.GetManager().GetSource();
const TreeInput& treeInput = static_cast<const TreeInput&>(input);
const StaticData& staticData = StaticData::Instance();
const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();
@@ -237,76 +287,85 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
bool hasCompleteTreeInputMatch = false;
float t2sLabelsProb = 1;
float s2tLabelsProb = 1;
- float ruleLabelledProbability = 1;
+ float ruleLabelledProbability = 0.0;
+ float treeInputMatchProbRHS = 0.0;
+ float treeInputMatchProbLHS = 0.0;
// read SourceLabels property
- const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase();
- const Factor* targetLHS = currTarPhr.GetTargetLHS()[0];
+ const Factor* targetLHS = targetPhrase.GetTargetLHS()[0];
bool isGlueGrammarRule = false;
bool isUnkRule = false;
- if (const PhraseProperty *property = currTarPhr.GetProperty("SourceLabels")) {
+ if (const PhraseProperty *property = targetPhrase.GetProperty("SourceLabels")) {
- const SourceLabelsPhraseProperty *sourceLabelsPhraseProperty = static_cast<const SourceLabelsPhraseProperty*>(property);
+ const SourceLabelsPhraseProperty *sourceLabelsPhraseProperty = static_cast<const SourceLabelsPhraseProperty*>(property);
- nNTs = sourceLabelsPhraseProperty->GetNumberOfNonTerminals();
+ nNTs = sourceLabelsPhraseProperty->GetNumberOfNonTerminals();
float totalCount = sourceLabelsPhraseProperty->GetTotalCount();
-
+
// prepare for input tree label matching
std::vector< boost::unordered_set<size_t> > treeInputLabelsRHS(nNTs-1);
boost::unordered_set<size_t> treeInputLabelsLHS;
// get index map for underlying hypotheses
- const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
- currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
+// const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+// targetPhrase.GetAlignNonTerm().GetNonTermIndexMap();
+ const WordsRange& wordsRange = inputPath.GetWordsRange();
+ size_t startPos = wordsRange.GetStartPos();
+ size_t endPos = wordsRange.GetEndPos();
+ const Phrase *sourcePhrase = targetPhrase.GetRuleSource();
- std::vector<const Factor*> targetLabelsRHS;
+// std::vector<const Factor*> targetLabelsRHS;
if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
size_t nonTerminalNumber = 0;
-
- for (size_t phrasePos=0; phrasePos<currTarPhr.GetSize(); ++phrasePos) {
+ size_t sourceSentPos = startPos;
+
+ for (size_t sourcePhrasePos=0; sourcePhrasePos<sourcePhrase->GetSize(); ++sourcePhrasePos) {
// consult rule for either word or non-terminal
- const Word &word = currTarPhr.GetWord(phrasePos);
+ const Word &word = sourcePhrase->GetWord(sourcePhrasePos);
+ size_t symbolStartPos = sourceSentPos;
+ size_t symbolEndPos = sourceSentPos;
if ( word.IsNonTerminal() ) {
// non-terminal: consult subderivation
- size_t nonTermIndex = nonTermIndexMap[phrasePos];
- const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
- targetLabelsRHS.push_back( prevHypo->GetTargetLHS()[0] );
+// size_t nonTermIndex = nonTermIndexMap[phrasePos];
+// targetLabelsRHS.push_back( word[0] );
// retrieve information that is required for input tree label matching (RHS)
- const WordsRange& prevWordsRange = prevHypo->GetCurrSourceRange();
- size_t prevStartPos = prevWordsRange.GetStartPos();
- size_t prevEndPos = prevWordsRange.GetEndPos();
- const NonTerminalSet& prevTreeInputLabels = treeInput.GetLabelSet(prevStartPos,prevEndPos);
-
- for (NonTerminalSet::const_iterator prevTreeInputLabelsIt = prevTreeInputLabels.begin();
- prevTreeInputLabelsIt != prevTreeInputLabels.end(); ++prevTreeInputLabelsIt) {
- if (*prevTreeInputLabelsIt != outputDefaultNonTerminal) {
- boost::unordered_map<const Factor*,size_t>::const_iterator foundPrevTreeInputLabel
- = m_sourceLabelIndexesByFactor.find((*prevTreeInputLabelsIt)[0]);
- if (foundPrevTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
- size_t prevTreeInputLabelIndex = foundPrevTreeInputLabel->second;
- treeInputLabelsRHS[nonTerminalNumber].insert(prevTreeInputLabelIndex);
- }
+ const ChartCellLabel &cell = *stackVec->at(nonTerminalNumber);
+ const WordsRange& prevWordsRange = cell.GetCoverage();
+ symbolStartPos = prevWordsRange.GetStartPos();
+ symbolEndPos = prevWordsRange.GetEndPos();
+ }
+
+ const NonTerminalSet& treeInputLabels = treeInput.GetLabelSet(symbolStartPos,symbolEndPos);
+
+ for (NonTerminalSet::const_iterator treeInputLabelsIt = treeInputLabels.begin();
+ treeInputLabelsIt != treeInputLabels.end(); ++treeInputLabelsIt) {
+ if (*treeInputLabelsIt != outputDefaultNonTerminal) {
+ boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel
+ = m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]);
+ if (foundTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
+ size_t treeInputLabelIndex = foundTreeInputLabel->second;
+ treeInputLabelsRHS[sourcePhrasePos].insert(treeInputLabelIndex);
}
}
-
+ }
+
+ if ( word.IsNonTerminal() ) {
++nonTerminalNumber;
}
+ sourceSentPos = symbolEndPos + 1;
}
}
// retrieve information that is required for input tree label matching (LHS)
- const WordsRange& wordsRange = hypo.GetCurrSourceRange();
- size_t startPos = wordsRange.GetStartPos();
- size_t endPos = wordsRange.GetEndPos();
const NonTerminalSet& treeInputLabels = treeInput.GetLabelSet(startPos,endPos);
for (NonTerminalSet::const_iterator treeInputLabelsIt = treeInputLabels.begin();
treeInputLabelsIt != treeInputLabels.end(); ++treeInputLabelsIt) {
if (*treeInputLabelsIt != outputDefaultNonTerminal) {
- boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel
- = m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]);
+ boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel
+ = m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]);
if (foundTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
size_t treeInputLabelIndex = foundTreeInputLabel->second;
treeInputLabelsLHS.insert(treeInputLabelIndex);
@@ -314,7 +373,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
}
}
-
+
// inspect source-labelled rule items
std::vector< boost::unordered_set<size_t> > sparseScoredTreeInputLabelsRHS(nNTs-1);
@@ -322,6 +381,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
std::vector<bool> sourceLabelSeenAsLHS(m_sourceLabels.size(),false);
std::vector<bool> treeInputMatchRHSCountByNonTerminal(nNTs-1,false);
+ std::vector<float> treeInputMatchProbRHSByNonTerminal(nNTs-1,0.0);
const std::list<SourceLabelsPhrasePropertyItem> &sourceLabelItems = sourceLabelsPhraseProperty->GetSourceLabelItems();
@@ -329,8 +389,8 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
sourceLabelItem != sourceLabelItems.end() && !hasCompleteTreeInputMatch; ++sourceLabelItem) {
const std::list<size_t> &sourceLabelsRHS = sourceLabelItem->GetSourceLabelsRHS();
- // float sourceLabelsRHSCount = sourceLabelItem->GetSourceLabelsRHSCount();
const std::list< std::pair<size_t,float> > &sourceLabelsLHSList = sourceLabelItem->GetSourceLabelsLHSList();
+ float sourceLabelsRHSCount = sourceLabelItem->GetSourceLabelsRHSCount();
assert(sourceLabelsRHS.size() == nNTs-1);
@@ -343,16 +403,17 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
if (treeInputLabelsRHS[nonTerminalNumber].find(*sourceLabelsRHSIt) != treeInputLabelsRHS[nonTerminalNumber].end()) {
treeInputMatchRHSCountByNonTerminal[nonTerminalNumber] = true;
+ treeInputMatchProbRHSByNonTerminal[nonTerminalNumber] += sourceLabelsRHSCount; // to be normalized later on
- if ( m_featureVariant == 2 ||
- (m_featureVariant == 3 && m_coreSourceLabels.find(*sourceLabelsRHSIt) != m_coreSourceLabels.end()) ) {
+ if ( m_useSparse &&
+ (!m_useCoreSourceLabels || m_coreSourceLabels.find(*sourceLabelsRHSIt) != m_coreSourceLabels.end()) ) {
// score sparse features: RHS match
if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*sourceLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
- // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
+ // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
float score_RHS_1 = (float)1/treeInputLabelsRHS[nonTerminalNumber].size();
- accumulator->PlusEquals(this,
- std::string("RHS_1_" + m_sourceLabelsByIndex[*sourceLabelsRHSIt]),
- score_RHS_1);
+ scoreBreakdown.PlusEquals(this,
+ m_sourceLabelsByIndex_RHS_1[*sourceLabelsRHSIt],
+ score_RHS_1);
sparseScoredTreeInputLabelsRHS[nonTerminalNumber].insert(*sourceLabelsRHSIt);
}
}
@@ -364,12 +425,8 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
}
}
- // LHS source non-terminal labels seen with this RHS
- bool currentSourceLabelItemHasLHSTreeInputMatch = false;
- //float ruleLabelledCount = 0;
- std::list< std::pair<size_t,float> >::const_iterator sourceLabelsLHSIt;
-
- for (sourceLabelsLHSIt = sourceLabelsLHSList.begin(); sourceLabelsLHSIt != sourceLabelsLHSList.end(); ++sourceLabelsLHSIt) {
+ for (std::list< std::pair<size_t,float> >::const_iterator sourceLabelsLHSIt = sourceLabelsLHSList.begin();
+ sourceLabelsLHSIt != sourceLabelsLHSList.end(); ++sourceLabelsLHSIt) {
if ( sourceLabelsLHSIt->first == m_GlueTopLabel ) {
isGlueGrammarRule = true;
@@ -377,53 +434,60 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
if (treeInputLabelsLHS.find(sourceLabelsLHSIt->first) != treeInputLabelsLHS.end()) {
- currentSourceLabelItemHasLHSTreeInputMatch = true;
+ treeInputMismatchLHSBinary = false;
+ treeInputMatchProbLHS += sourceLabelsLHSIt->second; // to be normalized later on
- if ( m_featureVariant == 2 ||
- (m_featureVariant == 3 && m_coreSourceLabels.find(sourceLabelsLHSIt->first) != m_coreSourceLabels.end()) ) {
+ if ( m_useSparse &&
+ (!m_useCoreSourceLabels || m_coreSourceLabels.find(sourceLabelsLHSIt->first) != m_coreSourceLabels.end()) ) {
// score sparse features: LHS match
if (sparseScoredTreeInputLabelsLHS.find(sourceLabelsLHSIt->first) == sparseScoredTreeInputLabelsLHS.end()) {
- // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
+ // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
float score_LHS_1 = (float)1/treeInputLabelsLHS.size();
- accumulator->PlusEquals(this,
- std::string("LHS_1_" + m_sourceLabelsByIndex[sourceLabelsLHSIt->first]),
- score_LHS_1);
+ scoreBreakdown.PlusEquals(this,
+ m_sourceLabelsByIndex_LHS_1[sourceLabelsLHSIt->first],
+ score_LHS_1);
sparseScoredTreeInputLabelsLHS.insert(sourceLabelsLHSIt->first);
}
}
- break;
+
+ if ( currentSourceLabelItemIsCompleteTreeInputMatch ) {
+ ruleLabelledProbability += sourceLabelsLHSIt->second; // to be normalized later on
+ hasCompleteTreeInputMatch = true;
+ }
}
}
- if (currentSourceLabelItemHasLHSTreeInputMatch) {
- // input tree matching (LHS)
- treeInputMismatchLHSBinary = false;
- } else {
- currentSourceLabelItemIsCompleteTreeInputMatch = false;
- }
+// if ( hasCompleteTreeInputMatch ) {
+//
+// std::pair<float,float> probPair = GetLabelPairProbabilities( targetLHS, sourceLabelsLHSIt->first);
+// t2sLabelsProb = probPair.first;
+// s2tLabelsProb = probPair.second;
+// nonTerminalNumber=0;
+// for (std::list<size_t>::const_iterator sourceLabelsRHSIt = sourceLabelsRHS.begin();
+// sourceLabelsRHSIt != sourceLabelsRHS.end(); ++sourceLabelsRHSIt, ++nonTerminalNumber) {
+// probPair = GetLabelPairProbabilities( targetLabelsRHS[nonTerminalNumber], *sourceLabelsRHSIt );
+// t2sLabelsProb += probPair.first;
+// s2tLabelsProb += probPair.second;
+// }
+// t2sLabelsProb /= nNTs;
+// s2tLabelsProb /= nNTs;
+// assert(t2sLabelsProb != 0);
+// assert(s2tLabelsProb != 0);
+// }
- if (currentSourceLabelItemIsCompleteTreeInputMatch) {
- hasCompleteTreeInputMatch = true;
-
- ruleLabelledProbability = sourceLabelsLHSIt->second / totalCount;
- std::pair<float,float> probPair = GetLabelPairProbabilities( targetLHS, sourceLabelsLHSIt->first);
- t2sLabelsProb = probPair.first;
- s2tLabelsProb = probPair.second;
- nonTerminalNumber=0;
- for (std::list<size_t>::const_iterator sourceLabelsRHSIt = sourceLabelsRHS.begin();
- sourceLabelsRHSIt != sourceLabelsRHS.end(); ++sourceLabelsRHSIt, ++nonTerminalNumber) {
- probPair = GetLabelPairProbabilities( targetLabelsRHS[nonTerminalNumber], *sourceLabelsRHSIt );
- t2sLabelsProb += probPair.first;
- s2tLabelsProb += probPair.second;
- }
- t2sLabelsProb /= nNTs;
- s2tLabelsProb /= nNTs;
- assert(t2sLabelsProb != 0);
- assert(s2tLabelsProb != 0);
- }
+ }
+ // normalization
+ for (std::vector<float>::iterator treeInputMatchProbRHSByNonTerminalIt = treeInputMatchProbRHSByNonTerminal.begin();
+ treeInputMatchProbRHSByNonTerminalIt != treeInputMatchProbRHSByNonTerminal.end(); ++treeInputMatchProbRHSByNonTerminalIt) {
+ *treeInputMatchProbRHSByNonTerminalIt /= totalCount;
+ if ( *treeInputMatchProbRHSByNonTerminalIt != 0 ) {
+ treeInputMatchProbRHS += ( m_useLogprobs ? TransformScore(*treeInputMatchProbRHSByNonTerminalIt) : *treeInputMatchProbRHSByNonTerminalIt );
+ }
}
+ treeInputMatchProbLHS /= totalCount;
+ ruleLabelledProbability /= totalCount;
// input tree matching (RHS)
if ( !hasCompleteTreeInputMatch ) {
@@ -437,25 +501,24 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
}
// score sparse features: mismatches
- if ( m_featureVariant == 2 || m_featureVariant == 3 ) {
+ if ( m_useSparse ) {
// RHS
for (size_t nonTerminalNumber = 0; nonTerminalNumber < nNTs-1; ++nonTerminalNumber) {
- // nNTs-1 because nNTs also counts the left-hand side non-terminal
+ // nNTs-1 because nNTs also counts the left-hand side non-terminal
float score_RHS_0 = (float)1/treeInputLabelsRHS[nonTerminalNumber].size();
for (boost::unordered_set<size_t>::const_iterator treeInputLabelsRHSIt = treeInputLabelsRHS[nonTerminalNumber].begin();
treeInputLabelsRHSIt != treeInputLabelsRHS[nonTerminalNumber].end(); ++treeInputLabelsRHSIt) {
- if ( m_featureVariant == 2 ||
- (m_featureVariant == 3 && m_coreSourceLabels.find(*treeInputLabelsRHSIt) != m_coreSourceLabels.end()) ) {
+ if ( !m_useCoreSourceLabels || m_coreSourceLabels.find(*treeInputLabelsRHSIt) != m_coreSourceLabels.end() ) {
if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*treeInputLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
// score sparse features: RHS mismatch
- accumulator->PlusEquals(this,
- std::string("RHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsRHSIt]),
- score_RHS_0);
+ scoreBreakdown.PlusEquals(this,
+ m_sourceLabelsByIndex_RHS_0[*treeInputLabelsRHSIt],
+ score_RHS_0);
}
}
}
@@ -463,67 +526,75 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
// LHS
- float score_LHS_0 = (float)1/treeInputLabelsLHS.size();
- for (boost::unordered_set<size_t>::const_iterator treeInputLabelsLHSIt = treeInputLabelsLHS.begin();
- treeInputLabelsLHSIt != treeInputLabelsLHS.end(); ++treeInputLabelsLHSIt) {
+ if ( m_useSparse ) {
+
+ float score_LHS_0 = (float)1/treeInputLabelsLHS.size();
+ for (boost::unordered_set<size_t>::const_iterator treeInputLabelsLHSIt = treeInputLabelsLHS.begin();
+ treeInputLabelsLHSIt != treeInputLabelsLHS.end(); ++treeInputLabelsLHSIt) {
- if ( m_featureVariant == 2 ||
- (m_featureVariant == 3 && m_coreSourceLabels.find(*treeInputLabelsLHSIt) != m_coreSourceLabels.end()) ) {
+ if ( !m_useCoreSourceLabels || m_coreSourceLabels.find(*treeInputLabelsLHSIt) != m_coreSourceLabels.end() ) {
- if (sparseScoredTreeInputLabelsLHS.find(*treeInputLabelsLHSIt) == sparseScoredTreeInputLabelsLHS.end()) {
- // score sparse features: RHS mismatch
- accumulator->PlusEquals(this,
- std::string("LHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsLHSIt]),
- score_LHS_0);
+ if (sparseScoredTreeInputLabelsLHS.find(*treeInputLabelsLHSIt) == sparseScoredTreeInputLabelsLHS.end()) {
+ // score sparse features: RHS mismatch
+ scoreBreakdown.PlusEquals(this,
+ m_sourceLabelsByIndex_LHS_0[*treeInputLabelsLHSIt],
+ score_LHS_0);
+ }
}
}
}
}
-
+
} else {
// abort with error message if the phrase does not translate an unknown word
- UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
+ UTIL_THROW_IF2(!targetPhrase.GetWord(0).IsOOV(), GetScoreProducerDescription()
<< ": Missing SourceLabels property. "
<< "Please check phrase table and glue rules.");
// unknown word
isUnkRule = true;
+// ruleLabelledProbability = 1;
}
// add scores
// input tree matching
- switch (m_featureVariant) {
-
- case 0:
- newScores[0] = hasCompleteTreeInputMatch;
- break;
-
- case 1:
- newScores[0] = ( (hasCompleteTreeInputMatch || isGlueGrammarRule || isUnkRule) ? 0 : std::numeric_limits<float>::min() );
- break;
-
- default:
- newScores[0] = hasCompleteTreeInputMatch;
+ newScores[0] = !hasCompleteTreeInputMatch;
+ if ( m_noMismatches ) {
+ newScores[0] = ( (hasCompleteTreeInputMatch || isGlueGrammarRule || isUnkRule) ? 0 : -std::numeric_limits<float>::infinity() );
}
newScores[1] = treeInputMismatchLHSBinary;
newScores[2] = treeInputMismatchRHSCount;
-// newScores[3] = hasCompleteTreeInputMatch ? std::log(t2sLabelsProb) : 0;
-// newScores[4] = hasCompleteTreeInputMatch ? std::log(s2tLabelsProb) : 0;
-// newScores[3] = hasCompleteTreeInputMatch ? std::log(ruleLabelledProbability) : 0;
- accumulator->PlusEquals(this, newScores);
+// newScores[3] = hasCompleteTreeInputMatch ? TransformScore(ruleLabelledProbability) : 0;
+// newScores[4] = hasCompleteTreeInputMatch ? TransformScore(t2sLabelsProb) : 0;
+// newScores[5] = hasCompleteTreeInputMatch ? TransformScore(s2tLabelsProb) : 0;
+
+ if ( m_useLogprobs ) {
+ if ( ruleLabelledProbability != 0 ) {
+ ruleLabelledProbability = TransformScore(ruleLabelledProbability);
+ }
+ if ( treeInputMatchProbLHS != 0 ) {
+ treeInputMatchProbLHS = TransformScore(treeInputMatchProbLHS);
+ }
+ }
+
+ newScores[3] = ruleLabelledProbability;
+ newScores[4] = treeInputMatchProbLHS;
+ newScores[5] = treeInputMatchProbRHS;
+
+ scoreBreakdown.PlusEquals(this, newScores);
}
-
+
std::pair<float,float> SoftSourceSyntacticConstraintsFeature::GetLabelPairProbabilities(
- const Factor* target,
+ const Factor* target,
const size_t source) const
{
- boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::const_iterator found =
+ boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::const_iterator found =
m_labelPairProbabilities.find(target);
if ( found == m_labelPairProbabilities.end() ) {
return std::pair<float,float>(0,0);
@@ -531,6 +602,6 @@ std::pair<float,float> SoftSourceSyntacticConstraintsFeature::GetLabelPairProbab
return found->second->at(source);
}
-
+
}
diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.h b/moses/FF/SoftSourceSyntacticConstraintsFeature.h
index 725a836eb..e73993df1 100644
--- a/moses/FF/SoftSourceSyntacticConstraintsFeature.h
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.h
@@ -13,7 +13,9 @@ namespace Moses
class SoftSourceSyntacticConstraintsFeature : public StatelessFeatureFunction
{
+
public:
+
SoftSourceSyntacticConstraintsFeature(const std::string &line);
~SoftSourceSyntacticConstraintsFeature() {
@@ -28,20 +30,26 @@ public:
}
void SetParameter(const std::string& key, const std::string& value);
+
+ void Load();
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {};
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ targetPhrase.SetRuleSource(source);
+ };
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {};
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const
+ {}
void EvaluateWhenApplied(
const Hypothesis& cur_hypo,
@@ -50,17 +58,27 @@ public:
void EvaluateWhenApplied(
const ChartHypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const
+ {};
+
+
+protected:
-private:
std::string m_sourceLabelSetFile;
std::string m_coreSourceLabelSetFile;
std::string m_targetSourceLHSJointCountFile;
std::string m_unknownLeftHandSideFile;
- size_t m_featureVariant;
+ bool m_useCoreSourceLabels;
+ bool m_useLogprobs;
+ bool m_useSparse;
+ bool m_noMismatches;
boost::unordered_map<std::string,size_t> m_sourceLabels;
std::vector<std::string> m_sourceLabelsByIndex;
+ std::vector<std::string> m_sourceLabelsByIndex_RHS_1;
+ std::vector<std::string> m_sourceLabelsByIndex_RHS_0;
+ std::vector<std::string> m_sourceLabelsByIndex_LHS_1;
+ std::vector<std::string> m_sourceLabelsByIndex_LHS_0;
boost::unordered_set<size_t> m_coreSourceLabels;
boost::unordered_map<const Factor*,size_t> m_sourceLabelIndexesByFactor;
size_t m_GlueTopLabel;
@@ -72,13 +90,14 @@ private:
float m_smoothingWeight;
float m_unseenLHSSmoothingFactorForUnknowns;
- void Load();
void LoadSourceLabelSet();
void LoadCoreSourceLabelSet();
void LoadTargetSourceLeftHandSideJointCountFile();
- std::pair<float,float> GetLabelPairProbabilities(const Factor* target,
- const size_t source) const;
+ void LoadLabelSet(std::string &filename, boost::unordered_set<size_t> &labelSet);
+
+ std::pair<float,float> GetLabelPairProbabilities(const Factor* target,
+ const size_t source) const;
};
diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
index 38238b10c..15509f1e4 100644
--- a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
+++ b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
@@ -33,11 +33,11 @@ void SourceGHKMTreeInputMatchFeature::SetParameter(const std::string& key, const
// assumes that source-side syntax labels are stored in the target non-terminal field of the rules
void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
const WordsRange& wordsRange = inputPath.GetWordsRange();
size_t startPos = wordsRange.GetStartPos();
diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h
index 743871b1c..15f1877de 100644
--- a/moses/FF/SourceGHKMTreeInputMatchFeature.h
+++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h
@@ -18,22 +18,27 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const {};
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {};
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const {};
+ ScoreComponentCollection* accumulator) const {};
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const {};
+ ScoreComponentCollection* accumulator) const {};
};
diff --git a/moses/FF/SourceWordDeletionFeature.cpp b/moses/FF/SourceWordDeletionFeature.cpp
index dcbba6a0a..91a3137b4 100644
--- a/moses/FF/SourceWordDeletionFeature.cpp
+++ b/moses/FF/SourceWordDeletionFeature.cpp
@@ -6,7 +6,6 @@
#include "moses/ChartHypothesis.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/TranslationOption.h"
-#include "moses/UserMessage.h"
#include "moses/Util.h"
#include "util/string_piece_hash.hh"
@@ -21,8 +20,9 @@ SourceWordDeletionFeature::SourceWordDeletionFeature(const std::string &line)
:StatelessFeatureFunction(0, line),
m_unrestricted(true)
{
- std::cerr << "Initializing source word deletion feature.." << std::endl;
+ VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
ReadParameters();
+ VERBOSE(1, " Done." << std::endl);
}
void SourceWordDeletionFeature::SetParameter(const std::string& key, const std::string& value)
@@ -42,8 +42,7 @@ void SourceWordDeletionFeature::Load()
return;
}
- cerr << "loading source word deletion word list from " << m_filename << endl;
-
+ FEATUREVERBOSE(1, "Loading source word deletion word list from " << m_filename << std::endl);
ifstream inFile(m_filename.c_str());
UTIL_THROW_IF2(!inFile, "Can't open file " << m_filename);
diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h
index 8211ef0ca..5ec87b21a 100644
--- a/moses/FF/SourceWordDeletionFeature.h
+++ b/moses/FF/SourceWordDeletionFeature.h
@@ -1,5 +1,4 @@
-#ifndef moses_SourceWordDeletionFeature_h
-#define moses_SourceWordDeletionFeature_h
+#pragma once
#include <string>
#include <boost/unordered_set.hpp>
@@ -29,22 +28,28 @@ public:
bool IsUseable(const FactorMask &mask) const;
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
+
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void ComputeFeatures(const Phrase &source,
const TargetPhrase& targetPhrase,
@@ -56,4 +61,3 @@ public:
}
-#endif // moses_SourceWordDeletionFeature_h
diff --git a/moses/FF/SpanLength.cpp b/moses/FF/SpanLength.cpp
index 7a7c87be8..0e14069ee 100644
--- a/moses/FF/SpanLength.cpp
+++ b/moses/FF/SpanLength.cpp
@@ -14,33 +14,33 @@ using namespace std;
namespace Moses
{
SpanLength::SpanLength(const std::string &line)
-:StatelessFeatureFunction(1, line)
-,m_smoothingMethod(None)
-,m_const(0)
+ :StatelessFeatureFunction(1, line)
+ ,m_smoothingMethod(None)
+ ,m_const(0)
{
ReadParameters();
}
void SpanLength::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
targetPhrase.SetRuleSource(source);
}
void SpanLength::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
assert(stackVec);
const PhraseProperty *property = targetPhrase.GetProperty("SpanLength");
if (property == NULL) {
- return;
+ return;
}
const SpanLengthPhraseProperty *slProp = static_cast<const SpanLengthPhraseProperty*>(property);
@@ -50,17 +50,17 @@ void SpanLength::EvaluateWithSourceContext(const InputType &input
float score = 0;
for (size_t i = 0; i < stackVec->size(); ++i) {
- const ChartCellLabel &cell = *stackVec->at(i);
- const WordsRange &ntRange = cell.GetCoverage();
- size_t sourceWidth = ntRange.GetNumWordsCovered();
- float prob = slProp->GetProb(i, sourceWidth, m_const);
- score += TransformScore(prob);
+ const ChartCellLabel &cell = *stackVec->at(i);
+ const WordsRange &ntRange = cell.GetCoverage();
+ size_t sourceWidth = ntRange.GetNumWordsCovered();
+ float prob = slProp->GetProb(i, sourceWidth, m_const);
+ score += TransformScore(prob);
}
if (score < -100.0f) {
float weight = StaticData::Instance().GetWeight(this);
if (weight < 0) {
- score = -100;
+ score = -100;
}
}
@@ -71,20 +71,16 @@ void SpanLength::EvaluateWithSourceContext(const InputType &input
void SpanLength::SetParameter(const std::string& key, const std::string& value)
{
if (key == "smoothing") {
- if (value == "plus-constant") {
- m_smoothingMethod = PlusConst;
- }
- else if (value == "none") {
- m_smoothingMethod = None;
- }
- else {
- UTIL_THROW(util::Exception, "Unknown smoothing type " << value);
- }
- }
- else if (key == "constant") {
- m_const = Scan<float>(value);
- }
- else {
+ if (value == "plus-constant") {
+ m_smoothingMethod = PlusConst;
+ } else if (value == "none") {
+ m_smoothingMethod = None;
+ } else {
+ UTIL_THROW(util::Exception, "Unknown smoothing type " << value);
+ }
+ } else if (key == "constant") {
+ m_const = Scan<float>(value);
+ } else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
diff --git a/moses/FF/SpanLength.h b/moses/FF/SpanLength.h
index dc5564fcd..b3998e462 100644
--- a/moses/FF/SpanLength.h
+++ b/moses/FF/SpanLength.h
@@ -9,43 +9,47 @@ namespace Moses
class SpanLength : public StatelessFeatureFunction
{
public:
- SpanLength(const std::string &line);
+ SpanLength(const std::string &line);
- virtual bool IsUseable(const FactorMask &mask) const
- { return true; }
+ virtual bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
- virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
- virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ virtual void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
- virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
- virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
- void SetParameter(const std::string& key, const std::string& value);
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void SetParameter(const std::string& key, const std::string& value);
protected:
- enum SmoothingMethod
- {
- None,
- PlusConst,
- };
- SmoothingMethod m_smoothingMethod;
-
- float m_const;
+ enum SmoothingMethod {
+ None,
+ PlusConst,
+ };
+ SmoothingMethod m_smoothingMethod;
+
+ float m_const;
};
}
diff --git a/moses/FF/SparseHieroReorderingFeature.cpp b/moses/FF/SparseHieroReorderingFeature.cpp
index 0c6ac4767..ee9d4b719 100644
--- a/moses/FF/SparseHieroReorderingFeature.cpp
+++ b/moses/FF/SparseHieroReorderingFeature.cpp
@@ -16,11 +16,11 @@ namespace Moses
SparseHieroReorderingFeature::SparseHieroReorderingFeature(const std::string &line)
:StatelessFeatureFunction(0, line),
- m_type(SourceCombined),
- m_sourceFactor(0),
- m_targetFactor(0),
- m_sourceVocabFile(""),
- m_targetVocabFile("")
+ m_type(SourceCombined),
+ m_sourceFactor(0),
+ m_targetFactor(0),
+ m_sourceVocabFile(""),
+ m_targetVocabFile("")
{
/*
@@ -39,7 +39,8 @@ SparseHieroReorderingFeature::SparseHieroReorderingFeature(const std::string &li
LoadVocabulary(m_targetVocabFile, m_targetVocab);
}
-void SparseHieroReorderingFeature::SetParameter(const std::string& key, const std::string& value) {
+void SparseHieroReorderingFeature::SetParameter(const std::string& key, const std::string& value)
+{
if (key == "input-factor") {
m_sourceFactor = Scan<FactorType>(value);
} else if (key == "output-factor") {
@@ -70,12 +71,13 @@ void SparseHieroReorderingFeature::LoadVocabulary(const std::string& filename, V
UTIL_THROW_IF(!in, util::Exception, "Unable to open vocab file: " << filename);
string line;
while(getline(in,line)) {
- vocab.insert(FactorCollection::Instance().AddFactor(line));
+ vocab.insert(FactorCollection::Instance().AddFactor(line));
}
in.close();
}
-const Factor* SparseHieroReorderingFeature::GetFactor(const Word& word, const Vocab& vocab, FactorType factorType) const {
+const Factor* SparseHieroReorderingFeature::GetFactor(const Word& word, const Vocab& vocab, FactorType factorType) const
+{
const Factor* factor = word.GetFactor(factorType);
if (vocab.size() && vocab.find(factor) == vocab.end()) return m_otherFactor;
return factor;
@@ -88,21 +90,21 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
// get index map for underlying hypotheses
//const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
// cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
-
+
//The Huck features. For a rule with source side:
// abXcdXef
//We first have to split into blocks:
// ab X cd X ef
//Then we extract features based in the boundary words of the neighbouring blocks
- //For the block pair, we use the right word of the left block, and the left
+ //For the block pair, we use the right word of the left block, and the left
//word of the right block.
- //Need to get blocks, and their alignment. Each block has a word range (on the
+ //Need to get blocks, and their alignment. Each block has a word range (on the
// on the source), a non-terminal flag, and a set of alignment points in the target phrase
//We need to be able to map source word position to target word position, as
//much as possible (don't need interior of non-terminals). The alignment info
- //objects just give us the mappings between *rule* positions. So if we can
+ //objects just give us the mappings between *rule* positions. So if we can
//map source word position to source rule position, and target rule position
//to target word position, then we can map right through.
@@ -114,16 +116,16 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
sourceNTSpans.push_back(cur_hypo.GetPrevHypo(prevHypoId)->GetCurrSourceRange());
}
//put in source order. Is this necessary?
- sort(sourceNTSpans.begin(), sourceNTSpans.end());
+ sort(sourceNTSpans.begin(), sourceNTSpans.end());
//cerr << "Source NTs: ";
//for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " ";
//cerr << endl;
typedef pair<WordsRange,bool> Block;//flag indicates NT
- vector<Block> sourceBlocks;
+ vector<Block> sourceBlocks;
sourceBlocks.push_back(Block(cur_hypo.GetCurrSourceRange(),false));
- for (vector<WordsRange>::const_iterator i = sourceNTSpans.begin();
- i != sourceNTSpans.end(); ++i) {
+ for (vector<WordsRange>::const_iterator i = sourceNTSpans.begin();
+ i != sourceNTSpans.end(); ++i) {
const WordsRange& prevHypoRange = *i;
Block lastBlock = sourceBlocks.back();
sourceBlocks.pop_back();
@@ -155,12 +157,12 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
//vector<size_t> alignMapTerm = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm()
size_t sourceRulePos = 0;
//cerr << "SW->RP ";
- for (vector<Block>::const_iterator sourceBlockIt = sourceBlocks.begin();
- sourceBlockIt != sourceBlocks.end(); ++sourceBlockIt) {
+ for (vector<Block>::const_iterator sourceBlockIt = sourceBlocks.begin();
+ sourceBlockIt != sourceBlocks.end(); ++sourceBlockIt) {
for (size_t sourceWordPos = sourceBlockIt->first.GetStartPos();
- sourceWordPos <= sourceBlockIt->first.GetEndPos(); ++sourceWordPos) {
+ sourceWordPos <= sourceBlockIt->first.GetEndPos(); ++sourceWordPos) {
sourceWordToTargetRulePos[sourceWordPos - sourceStart] = alignMap[sourceRulePos];
- // cerr << sourceWordPos - sourceStart << "-" << alignMap[sourceRulePos] << " ";
+ // cerr << sourceWordPos - sourceStart << "-" << alignMap[sourceRulePos] << " ";
if (! sourceBlockIt->second) {
//T
++sourceRulePos;
@@ -174,7 +176,7 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
//cerr << endl;
//Iterate through block pairs
- const Sentence& sentence =
+ const Sentence& sentence =
dynamic_cast<const Sentence&>(cur_hypo.GetManager().GetSource());
//const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
for (size_t i = 0; i < sourceBlocks.size()-1; ++i) {
@@ -186,19 +188,18 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
const Word& sourceRightBoundaryWord = sentence.GetWord(sourceRightBoundaryPos);
sourceLeftBoundaryPos -= sourceStart;
sourceRightBoundaryPos -= sourceStart;
-
+
// Need to figure out where these map to on the target.
- size_t targetLeftRulePos =
+ size_t targetLeftRulePos =
sourceWordToTargetRulePos[sourceLeftBoundaryPos];
- size_t targetRightRulePos =
+ size_t targetRightRulePos =
sourceWordToTargetRulePos[sourceRightBoundaryPos];
bool isMonotone = true;
if ((sourceLeftBoundaryPos < sourceRightBoundaryPos &&
- targetLeftRulePos > targetRightRulePos) ||
- ((sourceLeftBoundaryPos > sourceRightBoundaryPos &&
- targetLeftRulePos < targetRightRulePos)))
- {
+ targetLeftRulePos > targetRightRulePos) ||
+ ((sourceLeftBoundaryPos > sourceRightBoundaryPos &&
+ targetLeftRulePos < targetRightRulePos))) {
isMonotone = false;
}
stringstream buf;
@@ -208,7 +209,7 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
buf << "_";
}
if (m_type == SourceRight || m_type == SourceCombined) {
- buf << GetFactor(sourceRightBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();
+ buf << GetFactor(sourceRightBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();
buf << "_";
}
buf << (isMonotone ? "M" : "S");
diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h
index d631fdec1..945402412 100644
--- a/moses/FF/SparseHieroReorderingFeature.h
+++ b/moses/FF/SparseHieroReorderingFeature.h
@@ -24,31 +24,36 @@ public:
SourceRight
};
- SparseHieroReorderingFeature(const std::string &line);
+ SparseHieroReorderingFeature(const std::string &line);
- bool IsUseable(const FactorMask &mask) const
- { return true; }
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
void SetParameter(const std::string& key, const std::string& value);
- void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
private:
@@ -57,7 +62,7 @@ private:
void AddNonTerminalPairFeatures(
const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2,
- bool isMonotone, ScoreComponentCollection* accumulator) const;
+ bool isMonotone, ScoreComponentCollection* accumulator) const;
void LoadVocabulary(const std::string& filename, Vocab& vocab);
const Factor* GetFactor(const Word& word, const Vocab& vocab, FactorType factor) const;
@@ -69,7 +74,7 @@ private:
std::string m_targetVocabFile;
const Factor* m_otherFactor;
-
+
Vocab m_sourceVocab;
Vocab m_targetVocab;
diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h
index 950b122e9..08b7c607d 100644
--- a/moses/FF/StatefulFeatureFunction.h
+++ b/moses/FF/StatefulFeatureFunction.h
@@ -44,7 +44,10 @@ public:
virtual FFState* EvaluateWhenApplied(
const Syntax::SHyperedge& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
- ScoreComponentCollection* accumulator) const { assert(false); return 0; /* FIXME */ }
+ ScoreComponentCollection* accumulator) const {
+ assert(false);
+ return 0; /* FIXME */
+ }
//! return the state associated with the empty hypothesis for a given sentence
virtual const FFState* EmptyHypothesisState(const InputType &input) const = 0;
diff --git a/moses/FF/StatelessFeatureFunction.h b/moses/FF/StatelessFeatureFunction.h
index abf029f0c..e5d3f3812 100644
--- a/moses/FF/StatelessFeatureFunction.h
+++ b/moses/FF/StatelessFeatureFunction.h
@@ -22,20 +22,23 @@ public:
StatelessFeatureFunction(const std::string &line);
StatelessFeatureFunction(size_t numScoreComponents, const std::string &line);
+
/**
* This should be implemented for features that apply to phrase-based models.
**/
virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const = 0;
+ ScoreComponentCollection* accumulator) const = 0;
/**
* Same for chart-based features.
**/
virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const = 0;
+ ScoreComponentCollection* accumulator) const = 0;
virtual void EvaluateWhenApplied(const Syntax::SHyperedge &,
- ScoreComponentCollection*) const { assert(false); }
+ ScoreComponentCollection*) const {
+ assert(false);
+ }
virtual bool IsStateless() const {
return true;
diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp
index 24b3bf062..80f9b21bc 100644
--- a/moses/FF/SyntaxRHS.cpp
+++ b/moses/FF/SyntaxRHS.cpp
@@ -9,36 +9,36 @@ using namespace std;
namespace Moses
{
SyntaxRHS::SyntaxRHS(const std::string &line)
-:StatelessFeatureFunction(1, line)
+ :StatelessFeatureFunction(1, line)
{
ReadParameters();
}
void SyntaxRHS::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
}
void SyntaxRHS::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
- assert(stackVec);
- for (size_t i = 0; i < stackVec->size(); ++i) {
- const ChartCellLabel &cell = *stackVec->at(i);
+ assert(stackVec);
+ for (size_t i = 0; i < stackVec->size(); ++i) {
+ const ChartCellLabel &cell = *stackVec->at(i);
- }
+ }
- if (targetPhrase.GetNumNonTerminals()) {
- vector<float> newScores(m_numScoreComponents);
- newScores[0] = - std::numeric_limits<float>::infinity();
- scoreBreakdown.PlusEquals(this, newScores);
- }
+ if (targetPhrase.GetNumNonTerminals()) {
+ vector<float> newScores(m_numScoreComponents);
+ newScores[0] = - std::numeric_limits<float>::infinity();
+ scoreBreakdown.PlusEquals(this, newScores);
+ }
}
diff --git a/moses/FF/SyntaxRHS.h b/moses/FF/SyntaxRHS.h
index 4413aef72..46911ccd9 100644
--- a/moses/FF/SyntaxRHS.h
+++ b/moses/FF/SyntaxRHS.h
@@ -16,22 +16,27 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
};
diff --git a/moses/FF/TargetBigramFeature.cpp b/moses/FF/TargetBigramFeature.cpp
index f1da62b7d..6816410f8 100644
--- a/moses/FF/TargetBigramFeature.cpp
+++ b/moses/FF/TargetBigramFeature.cpp
@@ -65,8 +65,8 @@ const FFState* TargetBigramFeature::EmptyHypothesisState(const InputType &/*inpu
}
FFState* TargetBigramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const
{
const TargetBigramState* tbState = dynamic_cast<const TargetBigramState*>(prev_state);
assert(tbState);
diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h
index c63f3caa4..f6e965808 100644
--- a/moses/FF/TargetBigramFeature.h
+++ b/moses/FF/TargetBigramFeature.h
@@ -40,25 +40,29 @@ public:
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */,
- int /* featureID */,
- ScoreComponentCollection* ) const {
+ int /* featureID */,
+ ScoreComponentCollection* ) const {
throw std::logic_error("TargetBigramFeature not valid in chart decoder");
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/TargetNgramFeature.cpp b/moses/FF/TargetNgramFeature.cpp
index a43410990..8414e1bc2 100644
--- a/moses/FF/TargetNgramFeature.cpp
+++ b/moses/FF/TargetNgramFeature.cpp
@@ -61,7 +61,7 @@ void TargetNgramFeature::SetParameter(const std::string& key, const std::string&
} else if (key == "lower-ngrams") {
m_lower_ngrams = Scan<bool>(value);
} else if (key == "file") {
- m_file = value;
+ m_file = value;
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
@@ -74,7 +74,7 @@ void TargetNgramFeature::Load()
if (m_file == "*") return; //allow all
ifstream inFile(m_file.c_str());
if (!inFile) {
- UTIL_THROW(util::Exception, "Couldn't open file" << m_file);
+ UTIL_THROW(util::Exception, "Couldn't open file" << m_file);
}
std::string line;
@@ -96,8 +96,8 @@ const FFState* TargetNgramFeature::EmptyHypothesisState(const InputType &/*input
}
FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const
{
const TargetNgramState* tnState = static_cast<const TargetNgramState*>(prev_state);
assert(tnState);
diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h
index e87252670..2e9e71db0 100644
--- a/moses/FF/TargetNgramFeature.h
+++ b/moses/FF/TargetNgramFeature.h
@@ -187,23 +187,28 @@ public:
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/TargetWordInsertionFeature.cpp b/moses/FF/TargetWordInsertionFeature.cpp
index 09a7b4472..215fbf7a0 100644
--- a/moses/FF/TargetWordInsertionFeature.cpp
+++ b/moses/FF/TargetWordInsertionFeature.cpp
@@ -6,7 +6,6 @@
#include "moses/ChartHypothesis.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/TranslationOption.h"
-#include "moses/UserMessage.h"
#include "util/string_piece_hash.hh"
#include "util/exception.hh"
@@ -19,8 +18,9 @@ TargetWordInsertionFeature::TargetWordInsertionFeature(const std::string &line)
:StatelessFeatureFunction(0, line),
m_unrestricted(true)
{
- std::cerr << "Initializing target word insertion feature.." << std::endl;
+ VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
ReadParameters();
+ VERBOSE(1, " Done." << std::endl);
}
void TargetWordInsertionFeature::SetParameter(const std::string& key, const std::string& value)
@@ -39,7 +39,7 @@ void TargetWordInsertionFeature::Load()
if (m_filename.empty())
return;
- cerr << "loading target word insertion word list from " << m_filename << endl;
+ FEATUREVERBOSE(1, "Loading target word insertion word list from " << m_filename << std::endl);
ifstream inFile(m_filename.c_str());
UTIL_THROW_IF2(!inFile, "could not open file " << m_filename);
diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h
index 06fa25400..77d009227 100644
--- a/moses/FF/TargetWordInsertionFeature.h
+++ b/moses/FF/TargetWordInsertionFeature.h
@@ -1,5 +1,4 @@
-#ifndef moses_TargetWordInsertionFeature_h
-#define moses_TargetWordInsertionFeature_h
+#pragma once
#include <string>
#include <boost/unordered_set.hpp>
@@ -29,22 +28,26 @@ public:
void Load();
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
void ComputeFeatures(const Phrase &source,
const TargetPhrase& targetPhrase,
@@ -56,4 +59,3 @@ public:
}
-#endif // moses_TargetWordInsertionFeature_h
diff --git a/moses/FF/TreeStructureFeature.cpp b/moses/FF/TreeStructureFeature.cpp
index 38c21109d..e558b06bc 100644
--- a/moses/FF/TreeStructureFeature.cpp
+++ b/moses/FF/TreeStructureFeature.cpp
@@ -8,7 +8,8 @@
namespace Moses
{
-void TreeStructureFeature::Load() {
+void TreeStructureFeature::Load()
+{
// syntactic constraints can be hooked in here.
m_constraints = NULL;
@@ -20,34 +21,35 @@ void TreeStructureFeature::Load() {
// define NT labels (ints) that are mapped from strings for quicker comparison.
-void TreeStructureFeature::AddNTLabels(TreePointer root) const {
- std::string label = root->GetLabel();
+void TreeStructureFeature::AddNTLabels(TreePointer root) const
+{
+ std::string label = root->GetLabel();
- if (root->IsTerminal()) {
- return;
- }
+ if (root->IsTerminal()) {
+ return;
+ }
- std::map<std::string, NTLabel>::const_iterator it = m_labelset->string_to_label.find(label);
- if (it != m_labelset->string_to_label.end()) {
- root->SetNTLabel(it->second);
- }
+ std::map<std::string, NTLabel>::const_iterator it = m_labelset->string_to_label.find(label);
+ if (it != m_labelset->string_to_label.end()) {
+ root->SetNTLabel(it->second);
+ }
- std::vector<TreePointer> children = root->GetChildren();
- for (std::vector<TreePointer>::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) {
- AddNTLabels(*it2);
- }
+ std::vector<TreePointer> children = root->GetChildren();
+ for (std::vector<TreePointer>::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) {
+ AddNTLabels(*it2);
+ }
}
FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
- , int featureID /* used to index the state in the previous hypotheses */
- , ScoreComponentCollection* accumulator) const
+ , int featureID /* used to index the state in the previous hypotheses */
+ , ScoreComponentCollection* accumulator) const
{
if (const PhraseProperty *property = cur_hypo.GetCurrTargetPhrase().GetProperty("Tree")) {
const std::string *tree = property->GetValueString();
TreePointer mytree (boost::make_shared<InternalTree>(*tree));
if (m_labelset) {
- AddNTLabels(mytree);
+ AddNTLabels(mytree);
}
//get subtrees (in target order)
@@ -69,8 +71,7 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
mytree->Combine(previous_trees);
return new TreeState(mytree);
- }
- else {
+ } else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
}
diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h
index 8dae3e001..d5ec4edda 100644
--- a/moses/FF/TreeStructureFeature.h
+++ b/moses/FF/TreeStructureFeature.h
@@ -14,10 +14,9 @@ typedef int NTLabel;
// mapping from string nonterminal label to int representation.
// allows abstraction if multiple nonterminal strings should map to same label.
-struct LabelSet
-{
+struct LabelSet {
public:
- std::map<std::string, NTLabel> string_to_label;
+ std::map<std::string, NTLabel> string_to_label;
};
@@ -26,8 +25,8 @@ public:
class SyntaxConstraints
{
public:
- virtual void SyntacticRules(TreePointer root, const std::vector<TreePointer> &previous, const FeatureFunction* sp, ScoreComponentCollection* accumulator) = 0;
- virtual ~SyntaxConstraints() {};
+ virtual void SyntacticRules(TreePointer root, const std::vector<TreePointer> &previous, const FeatureFunction* sp, ScoreComponentCollection* accumulator) = 0;
+ virtual ~SyntaxConstraints() {};
};
@@ -38,9 +37,11 @@ class TreeStructureFeature : public StatefulFeatureFunction
public:
TreeStructureFeature(const std::string &line)
:StatefulFeatureFunction(0, line) {
- ReadParameters();
- }
- ~TreeStructureFeature() {delete m_constraints;};
+ ReadParameters();
+ }
+ ~TreeStructureFeature() {
+ delete m_constraints;
+ };
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
return new TreeState(TreePointer());
@@ -53,19 +54,27 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const {};
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {};
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
+
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
- ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");};
+ ScoreComponentCollection* accumulator) const {
+ UTIL_THROW(util::Exception, "Not implemented");
+ };
FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
diff --git a/moses/FF/UnalignedWordCountFeature.cpp b/moses/FF/UnalignedWordCountFeature.cpp
new file mode 100644
index 000000000..83a2ac0c3
--- /dev/null
+++ b/moses/FF/UnalignedWordCountFeature.cpp
@@ -0,0 +1,85 @@
+#include "UnalignedWordCountFeature.h"
+#include "moses/Phrase.h"
+#include "moses/TargetPhrase.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/StaticData.h"
+#include "moses/Util.h"
+
+namespace Moses
+{
+
+using namespace std;
+
+UnalignedWordCountFeature::UnalignedWordCountFeature(const std::string &line)
+ : StatelessFeatureFunction(2, line)
+{
+ VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
+ ReadParameters();
+ VERBOSE(1, " Done." << std::endl);
+}
+
+void UnalignedWordCountFeature::EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
+{
+ const size_t ffScoreIndex(scoreBreakdown.GetIndexes(this).first);
+
+ const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
+ const size_t sourceLength = source.GetSize();
+ const size_t targetLength = targetPhrase.GetSize();
+
+ std::vector<bool> alignedSource(sourceLength, false);
+ std::vector<bool> alignedTarget(targetLength, false);
+
+ for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); ++alignmentPoint)
+ {
+ alignedSource[ alignmentPoint->first ] = true;
+ alignedTarget[ alignmentPoint->second ] = true;
+ }
+
+ size_t sourceUnalignedCount = 0;
+
+ for (size_t j=0; j<sourceLength; ++j) {
+ if (!alignedSource[j]) {
+ if (!source.GetWord(j).IsNonTerminal()) {
+ ++sourceUnalignedCount;
+ }
+ }
+ }
+
+ size_t targetUnalignedCount = 0;
+
+ for (size_t i=0; i<targetLength; i++) {
+ if (!alignedTarget[i]) {
+ if (!targetPhrase.GetWord(i).IsNonTerminal()) {
+ ++targetUnalignedCount;
+ }
+ }
+ }
+
+ scoreBreakdown.PlusEquals(ffScoreIndex, sourceUnalignedCount);
+ scoreBreakdown.PlusEquals(ffScoreIndex+1, targetUnalignedCount);
+
+ IFFEATUREVERBOSE(2) {
+ FEATUREVERBOSE(2, source << std::endl);
+ FEATUREVERBOSE(2, targetPhrase << std::endl);
+
+ for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignTerm().begin();
+ it!=targetPhrase.GetAlignTerm().end(); ++it) {
+ FEATUREVERBOSE(2, "alignTerm " << it->first << " " << it->second << std::endl);
+ }
+
+ for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
+ it!=targetPhrase.GetAlignNonTerm().end(); ++it) {
+ FEATUREVERBOSE(2, "alignNonTerm " << it->first << " " << it->second << std::endl);
+ }
+
+ FEATUREVERBOSE(2, "sourceLength= " << sourceLength << std::endl);
+ FEATUREVERBOSE(2, "targetLength= " << targetLength << std::endl);
+ FEATUREVERBOSE(2, "sourceUnalignedCount= " << sourceUnalignedCount << std::endl);
+ FEATUREVERBOSE(2, "targetUnalignedCount= " << targetUnalignedCount << std::endl);
+ }
+}
+
+}
diff --git a/moses/FF/UnalignedWordCountFeature.h b/moses/FF/UnalignedWordCountFeature.h
new file mode 100644
index 000000000..f0efbb70e
--- /dev/null
+++ b/moses/FF/UnalignedWordCountFeature.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include "StatelessFeatureFunction.h"
+#include "moses/FactorCollection.h"
+#include "moses/AlignmentInfo.h"
+
+namespace Moses
+{
+
+class UnalignedWordCountFeature : public StatelessFeatureFunction
+{
+public:
+ UnalignedWordCountFeature(const std::string &line);
+
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
+
+ void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
+ void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+};
+
+}
+
diff --git a/moses/FF/UnknownWordPenaltyProducer.h b/moses/FF/UnknownWordPenaltyProducer.h
index 1aa6cbbcf..bef6bd58c 100644
--- a/moses/FF/UnknownWordPenaltyProducer.h
+++ b/moses/FF/UnknownWordPenaltyProducer.h
@@ -32,26 +32,31 @@ public:
std::vector<float> DefaultWeights() const;
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
};
diff --git a/moses/FF/VW/ThreadLocalByFeatureStorage.h b/moses/FF/VW/ThreadLocalByFeatureStorage.h
new file mode 100644
index 000000000..37bc0f712
--- /dev/null
+++ b/moses/FF/VW/ThreadLocalByFeatureStorage.h
@@ -0,0 +1,82 @@
+#pragma once
+
+#include <string>
+#include <cstdlib>
+#include <vector>
+#include <map>
+
+#include <boost/thread/tss.hpp>
+#include <boost/shared_ptr.hpp>
+
+#include "moses/FF/FeatureFunction.h"
+
+namespace Moses
+{
+
+template <class Value>
+struct DefaultFactory {
+ typedef boost::shared_ptr<Value> ValuePtr;
+
+ ValuePtr operator()() {
+ return ValuePtr(new Value());
+ }
+};
+
+template<class Value, class Factory = DefaultFactory<Value> >
+class ThreadLocalByFeatureStorage
+{
+public:
+ typedef boost::shared_ptr<Value> ValuePtr;
+ typedef std::map<std::string, ValuePtr> NameValueMap;
+ typedef boost::thread_specific_ptr<NameValueMap> TSNameValueMap;
+
+ ThreadLocalByFeatureStorage(FeatureFunction* ff,
+ Factory factory = Factory())
+ : m_ff(ff), m_factory(factory) {}
+
+ virtual ~ThreadLocalByFeatureStorage() {} // provide empty virtual dtor
+
+ virtual ValuePtr GetStored() {
+ if(!m_nameMap.get())
+ m_nameMap.reset(new NameValueMap());
+
+ typename NameValueMap::iterator it
+ = m_nameMap->find(m_ff->GetScoreProducerDescription());
+
+ if(it == m_nameMap->end()) {
+ std::pair<typename NameValueMap::iterator, bool> ret;
+ ret = m_nameMap->insert(
+ std::make_pair(m_ff->GetScoreProducerDescription(), m_factory()));
+
+ return ret.first->second;
+ } else {
+ return it->second;
+ }
+ }
+
+ virtual const ValuePtr GetStored() const {
+ UTIL_THROW_IF2(!m_nameMap.get(),
+ "No thread local storage has been created for: "
+ << m_ff->GetScoreProducerDescription());
+
+ typename NameValueMap::const_iterator it
+ = m_nameMap->find(m_ff->GetScoreProducerDescription());
+
+ UTIL_THROW_IF2(it == m_nameMap->end(),
+ "No features stored for: "
+ << m_ff->GetScoreProducerDescription());
+
+ return it->second;
+ }
+
+private:
+ FeatureFunction* m_ff;
+ Factory m_factory;
+ static TSNameValueMap m_nameMap;
+};
+
+template <class Value, class Factory>
+typename ThreadLocalByFeatureStorage<Value, Factory>::TSNameValueMap
+ThreadLocalByFeatureStorage<Value, Factory>::m_nameMap;
+
+}
diff --git a/moses/FF/VW/VW.h b/moses/FF/VW/VW.h
new file mode 100644
index 000000000..9be44c8b6
--- /dev/null
+++ b/moses/FF/VW/VW.h
@@ -0,0 +1,436 @@
+#pragma once
+
+#include <string>
+#include <map>
+#include <limits>
+
+#include "moses/FF/StatelessFeatureFunction.h"
+#include "moses/PP/CountsPhraseProperty.h"
+#include "moses/TranslationOptionList.h"
+#include "moses/TranslationOption.h"
+#include "moses/Util.h"
+#include "moses/TypeDef.h"
+#include "moses/StaticData.h"
+#include "moses/Phrase.h"
+#include "moses/AlignmentInfo.h"
+
+#include "Normalizer.h"
+#include "Classifier.h"
+#include "VWFeatureBase.h"
+#include "TabbedSentence.h"
+#include "ThreadLocalByFeatureStorage.h"
+
+namespace Moses
+{
+
+const std::string VW_DUMMY_LABEL = "1111"; // VW does not use the actual label, other classifiers might
+
+/**
+ * Helper class for storing alignment constraints.
+ */
+class Constraint
+{
+public:
+ Constraint() : m_min(std::numeric_limits<int>::max()), m_max(-1) {}
+
+ Constraint(int min, int max) : m_min(min), m_max(max) {}
+
+ /**
+ * We are aligned to point => our min cannot be larger, our max cannot be smaller.
+ */
+ void Update(int point) {
+ if (m_min > point) m_min = point;
+ if (m_max < point) m_max = point;
+ }
+
+ bool IsSet() const {
+ return m_max != -1;
+ }
+
+ int GetMin() const {
+ return m_min;
+ }
+
+ int GetMax() const {
+ return m_max;
+ }
+
+private:
+ int m_min, m_max;
+};
+
+/**
+ * VW thread-specific data about target sentence.
+ */
+struct VWTargetSentence {
+ VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}
+
+ void Clear() {
+ if (m_sentence) delete m_sentence;
+ if (m_alignment) delete m_alignment;
+ }
+
+ ~VWTargetSentence() {
+ Clear();
+ }
+
+ void SetConstraints(size_t sourceSize) {
+ // initialize to unconstrained
+ m_sourceConstraints.assign(sourceSize, Constraint());
+ m_targetConstraints.assign(m_sentence->GetSize(), Constraint());
+
+ // set constraints according to alignment points
+ AlignmentInfo::const_iterator it;
+ for (it = m_alignment->begin(); it != m_alignment->end(); it++) {
+ int src = it->first;
+ int tgt = it->second;
+
+ m_sourceConstraints[src].Update(tgt);
+ m_targetConstraints[tgt].Update(src);
+ }
+ }
+
+ Phrase *m_sentence;
+ AlignmentInfo *m_alignment;
+ std::vector<Constraint> m_sourceConstraints, m_targetConstraints;
+};
+
+typedef ThreadLocalByFeatureStorage<Discriminative::Classifier, Discriminative::ClassifierFactory &> TLSClassifier;
+typedef ThreadLocalByFeatureStorage<VWTargetSentence> TLSTargetSentence;
+
+class VW : public StatelessFeatureFunction, public TLSTargetSentence
+{
+public:
+ VW(const std::string &line)
+ : StatelessFeatureFunction(1, line)
+ , TLSTargetSentence(this)
+ , m_train(false) {
+ ReadParameters();
+ Discriminative::ClassifierFactory *classifierFactory = m_train
+ ? new Discriminative::ClassifierFactory(m_modelPath)
+ : new Discriminative::ClassifierFactory(m_modelPath, m_vwOptions);
+
+ m_tlsClassifier = new TLSClassifier(this, *classifierFactory);
+
+ if (! m_normalizer) {
+ VERBOSE(1, "VW :: No loss function specified, assuming logistic loss.\n");
+ m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
+ }
+ }
+
+ virtual ~VW() {
+ delete m_tlsClassifier;
+ delete m_normalizer;
+ }
+
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+
+ void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
+
+ if (translationOptionList.size() == 0)
+ return; // nothing to do
+
+ VERBOSE(2, "VW :: Evaluating translation options\n");
+
+ const std::vector<VWFeatureBase*>& sourceFeatures = VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription());
+
+ const WordsRange &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange();
+ const InputPath &inputPath = translationOptionList.Get(0)->GetInputPath();
+
+ // optionally update translation options using leave-one-out
+ std::vector<bool> keep = (m_leaveOneOut.size() > 0)
+ ? LeaveOneOut(translationOptionList)
+ : std::vector<bool>(translationOptionList.size(), true);
+
+ std::vector<float> losses(translationOptionList.size());
+ std::vector<float>::iterator iterLoss;
+ TranslationOptionList::const_iterator iterTransOpt;
+ std::vector<bool>::const_iterator iterKeep;
+
+ if (m_train) {
+ // check which translation options are correct in advance
+ bool seenCorrect = false;
+ for(iterTransOpt = translationOptionList.begin(), iterLoss = losses.begin(), iterKeep = keep.begin() ;
+ iterTransOpt != translationOptionList.end() ; ++iterTransOpt, ++iterLoss, ++iterKeep) {
+ bool isCorrect = IsCorrectTranslationOption(**iterTransOpt);
+ *iterLoss = isCorrect ? 0.0 : 1.0;
+ if (isCorrect && *iterKeep) seenCorrect = true;
+ }
+
+ // do not train if there are no positive examples
+ if (! seenCorrect) {
+ VERBOSE(2, "VW :: skipping topt collection, no correct translation for span\n");
+ return;
+ }
+ }
+
+ for(size_t i = 0; i < sourceFeatures.size(); ++i)
+ (*sourceFeatures[i])(input, inputPath, sourceRange, classifier);
+
+ const std::vector<VWFeatureBase*>& targetFeatures = VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
+
+ for(iterTransOpt = translationOptionList.begin(), iterLoss = losses.begin(), iterKeep = keep.begin() ;
+ iterTransOpt != translationOptionList.end() ; ++iterTransOpt, ++iterLoss) {
+
+ if (! *iterKeep)
+ continue;
+
+ const TargetPhrase &targetPhrase = (*iterTransOpt)->GetTargetPhrase();
+ for(size_t i = 0; i < targetFeatures.size(); ++i)
+ (*targetFeatures[i])(input, inputPath, targetPhrase, classifier);
+
+ if (! m_train) {
+ *iterLoss = classifier.Predict(MakeTargetLabel(targetPhrase));
+ } else {
+ classifier.Train(MakeTargetLabel(targetPhrase), *iterLoss);
+ }
+ }
+
+ (*m_normalizer)(losses);
+
+ for(iterTransOpt = translationOptionList.begin(), iterLoss = losses.begin(), iterKeep = keep.begin() ;
+ iterTransOpt != translationOptionList.end() ; ++iterTransOpt, ++iterLoss) {
+ if (! *iterKeep)
+ continue;
+
+ TranslationOption &transOpt = **iterTransOpt;
+
+ std::vector<float> newScores(m_numScoreComponents);
+ newScores[0] = FloorScore(TransformScore(*iterLoss));
+
+ ScoreComponentCollection &scoreBreakDown = transOpt.GetScoreBreakdown();
+ scoreBreakDown.PlusEquals(this, newScores);
+
+ transOpt.UpdateScore();
+ }
+ }
+
+ void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+
+ void SetParameter(const std::string& key, const std::string& value) {
+ if (key == "train") {
+ m_train = Scan<bool>(value);
+ } else if (key == "path") {
+ m_modelPath = value;
+ } else if (key == "vw-options") {
+ m_vwOptions = value;
+ } else if (key == "leave-one-out-from") {
+ m_leaveOneOut = value;
+ } else if (key == "loss") {
+ m_normalizer = value == "logistic"
+ ? (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer()
+ : (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
+ } else {
+ StatelessFeatureFunction::SetParameter(key, value);
+ }
+ }
+
+ virtual void InitializeForInput(InputType const& source) {
+ // tabbed sentence is assumed only in training
+ if (! m_train)
+ return;
+
+ UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput, "This feature function requires the TabbedSentence input type");
+
+ const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
+ UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2, "TabbedSentence must contain target<tab>alignment");
+
+
+ // target sentence represented as a phrase
+ Phrase *target = new Phrase();
+ target->CreateFromString(
+ Output
+ , StaticData::Instance().GetOutputFactorOrder()
+ , tabbedSentence.GetColumns()[0]
+ , NULL);
+
+ // word alignment between source and target sentence
+ // we don't store alignment info in AlignmentInfoCollection because we keep alignments of whole
+ // sentences, not phrases
+ AlignmentInfo *alignment = new AlignmentInfo(tabbedSentence.GetColumns()[1]);
+
+ VWTargetSentence &targetSent = *GetStored();
+ targetSent.Clear();
+ targetSent.m_sentence = target;
+ targetSent.m_alignment = alignment;
+
+ //std::cerr << static_cast<const Phrase&>(tabbedSentence) << std::endl;
+ //std::cerr << *target << std::endl;
+ //std::cerr << *alignment << std::endl;
+
+
+ // pre-compute max- and min- aligned points for faster translation option checking
+ targetSent.SetConstraints(source.GetSize());
+ }
+
+
+private:
+ std::string MakeTargetLabel(const TargetPhrase &targetPhrase) const {
+ return VW_DUMMY_LABEL;
+ }
+
+ bool IsCorrectTranslationOption(const TranslationOption &topt) const {
+
+ //std::cerr << topt.GetSourceWordsRange() << std::endl;
+
+ int sourceStart = topt.GetSourceWordsRange().GetStartPos();
+ int sourceEnd = topt.GetSourceWordsRange().GetEndPos();
+
+ const VWTargetSentence &targetSentence = *GetStored();
+
+ // [targetStart, targetEnd] spans aligned target words
+ int targetStart = targetSentence.m_sentence->GetSize();
+ int targetEnd = -1;
+
+ // get the left-most and right-most alignment point within source span
+ for(int i = sourceStart; i <= sourceEnd; ++i) {
+ if(targetSentence.m_sourceConstraints[i].IsSet()) {
+ if(targetStart > targetSentence.m_sourceConstraints[i].GetMin())
+ targetStart = targetSentence.m_sourceConstraints[i].GetMin();
+ if(targetEnd < targetSentence.m_sourceConstraints[i].GetMax())
+ targetEnd = targetSentence.m_sourceConstraints[i].GetMax();
+ }
+ }
+ // there was no alignment
+ if(targetEnd == -1)
+ return false;
+
+ //std::cerr << "Shorter: " << targetStart << " " << targetEnd << std::endl;
+
+ // [targetStart2, targetEnd2] spans unaligned words left and right of [targetStart, targetEnd]
+ int targetStart2 = targetStart;
+ for(int i = targetStart2; i >= 0 && !targetSentence.m_targetConstraints[i].IsSet(); --i)
+ targetStart2 = i;
+
+ int targetEnd2 = targetEnd;
+ for(int i = targetEnd2; i < targetSentence.m_sentence->GetSize() && !targetSentence.m_targetConstraints[i].IsSet(); ++i)
+ targetEnd2 = i;
+
+ //std::cerr << "Longer: " << targetStart2 << " " << targetEnd2 << std::endl;
+
+ const TargetPhrase &tphrase = topt.GetTargetPhrase();
+ //std::cerr << tphrase << std::endl;
+
+ // if target phrase is shorter than inner span return false
+ if(tphrase.GetSize() < targetEnd - targetStart + 1)
+ return false;
+
+ // if target phrase is longer than outer span return false
+ if(tphrase.GetSize() > targetEnd2 - targetStart2 + 1)
+ return false;
+
+ // for each possible starting point
+ for(int tempStart = targetStart2; tempStart <= targetStart; tempStart++) {
+ bool found = true;
+ // check if the target phrase is within longer span
+ for(int i = tempStart; i <= targetEnd2 && i < tphrase.GetSize() + tempStart; ++i) {
+ if(tphrase.GetWord(i - tempStart) != targetSentence.m_sentence->GetWord(i)) {
+ found = false;
+ break;
+ }
+ }
+ // return true if there was a match
+ if(found) {
+ //std::cerr << "Found" << std::endl;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ std::vector<bool> LeaveOneOut(const TranslationOptionList &topts) const {
+ UTIL_THROW_IF2(m_leaveOneOut.size() == 0 || ! m_train, "LeaveOneOut called in wrong setting!");
+
+ float sourceRawCount = 0.0;
+ const float ONE = 1.0001; // I don't understand floating point numbers
+
+ std::vector<bool> keepOpt;
+
+ TranslationOptionList::const_iterator iterTransOpt;
+ for(iterTransOpt = topts.begin(); iterTransOpt != topts.end(); ++iterTransOpt) {
+ const TargetPhrase &targetPhrase = (*iterTransOpt)->GetTargetPhrase();
+
+ // extract raw counts from phrase-table property
+ const CountsPhraseProperty *property = static_cast<const CountsPhraseProperty *>(targetPhrase.GetProperty("Counts"));
+ if (! property) {
+ VERBOSE(1, "VW :: Counts not found for topt! Is this an OOV?\n");
+ // keep all translation opts without updating, this is either OOV or bad usage...
+ keepOpt.assign(topts.size(), true);
+ return keepOpt;
+ }
+
+ if (sourceRawCount == 0.0) {
+ sourceRawCount = property->GetSourceMarginal() - ONE; // discount one occurrence of the source phrase
+ if (sourceRawCount <= 0) {
+ // no translation options survived, source phrase was a singleton
+ keepOpt.assign(topts.size(), false);
+ return keepOpt;
+ }
+ }
+
+ float discount = IsCorrectTranslationOption(**iterTransOpt) ? ONE : 0.0;
+ float target = property->GetTargetMarginal() - discount;
+ float joint = property->GetJointCount() - discount;
+ if (discount != 0.0) VERBOSE(2, "VW :: leaving one out!\n");
+
+ if (joint > 0) {
+ // topt survived leaving one out, update its scores
+ const FeatureFunction *feature = &FindFeatureFunction(m_leaveOneOut);
+ std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(feature);
+ UTIL_THROW_IF2(scores.size() != 4, "Unexpected number of scores in feature " << m_leaveOneOut);
+ scores[0] = TransformScore(joint / target); // P(f|e)
+ scores[2] = TransformScore(joint / sourceRawCount); // P(e|f)
+
+ ScoreComponentCollection &scoreBreakDown = (*iterTransOpt)->GetScoreBreakdown();
+ scoreBreakDown.Assign(feature, scores);
+ (*iterTransOpt)->UpdateScore();
+ keepOpt.push_back(true);
+ } else {
+ // they only occurred together once, discard topt
+ VERBOSE(2, "VW :: discarded topt when leaving one out\n");
+ keepOpt.push_back(false);
+ }
+ }
+
+ return keepOpt;
+ }
+
+ bool m_train; // false means predict
+ std::string m_modelPath;
+ std::string m_vwOptions;
+
+ // optionally contains feature name of a phrase table where we recompute scores with leaving one out
+ std::string m_leaveOneOut;
+
+ Discriminative::Normalizer *m_normalizer = NULL;
+ TLSClassifier *m_tlsClassifier;
+};
+
+}
+
diff --git a/moses/FF/VW/VWFeatureBase.cpp b/moses/FF/VW/VWFeatureBase.cpp
new file mode 100644
index 000000000..874544203
--- /dev/null
+++ b/moses/FF/VW/VWFeatureBase.cpp
@@ -0,0 +1,12 @@
+
+#include <string>
+
+#include "VWFeatureBase.h"
+
+namespace Moses
+{
+std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_features;
+std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_sourceFeatures;
+std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetFeatures;
+}
+
diff --git a/moses/FF/VW/VWFeatureBase.h b/moses/FF/VW/VWFeatureBase.h
new file mode 100644
index 000000000..04eb6974a
--- /dev/null
+++ b/moses/FF/VW/VWFeatureBase.h
@@ -0,0 +1,122 @@
+#pragma once
+
+#include <string>
+#include <boost/thread/tss.hpp>
+
+#include "Classifier.h"
+#include "moses/TypeDef.h"
+#include "moses/Util.h"
+#include "moses/FF/StatelessFeatureFunction.h"
+
+namespace Moses
+{
+
+class VWFeatureBase : public StatelessFeatureFunction
+{
+public:
+ VWFeatureBase(const std::string &line, bool isSource = true)
+ : StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_isSource(isSource) {
+ // defaults
+ m_sourceFactors.push_back(0);
+ m_targetFactors.push_back(0);
+ }
+
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ // Official hooks should do nothing. This is a hack to be able to define
+ // classifier features in the moses.ini configuration file.
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {}
+ void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {}
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {}
+ void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {}
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {}
+
+
+ // Common parameters for classifier features, both source and target features
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ if (key == "used-by") {
+ ParseUsedBy(value);
+ } else if (key == "source-factors") {
+ Tokenize<FactorType>(m_sourceFactors, value, ",");
+ } else if (key == "target-factors") {
+ Tokenize<FactorType>(m_targetFactors, value, ",");
+ } else {
+ StatelessFeatureFunction::SetParameter(key, value);
+ }
+ }
+
+ // Return all classifier features, regardless of type
+ static const std::vector<VWFeatureBase*>& GetFeatures(std::string name = "VW0") {
+ UTIL_THROW_IF2(s_features.count(name) == 0, "No features registered for parent classifier: " + name);
+ return s_features[name];
+ }
+
+ // Return only source-dependent classifier features
+ static const std::vector<VWFeatureBase*>& GetSourceFeatures(std::string name = "VW0") {
+ UTIL_THROW_IF2(s_sourceFeatures.count(name) == 0, "No source features registered for parent classifier: " + name);
+ return s_sourceFeatures[name];
+ }
+
+ // Return only target-dependent classifier features
+ static const std::vector<VWFeatureBase*>& GetTargetFeatures(std::string name = "VW0") {
+ UTIL_THROW_IF2(s_targetFeatures.count(name) == 0, "No target features registered for parent classifier: " + name);
+ return s_targetFeatures[name];
+ }
+
+ // Overload to process source-dependent data, create features once for every
+ // source sentence word range.
+ virtual void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const WordsRange &sourceRange
+ , Discriminative::Classifier &classifier) const = 0;
+
+ // Overload to process target-dependent features, create features once for
+ // every target phrase. One source word range will have at leat one target
+ // phrase, but may have more.
+ virtual void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , Discriminative::Classifier &classifier) const = 0;
+
+protected:
+ std::vector<FactorType> m_sourceFactors, m_targetFactors;
+
+ void UpdateRegister() {
+ for(std::vector<std::string>::const_iterator it = m_usedBy.begin();
+ it != m_usedBy.end(); it++) {
+ s_features[*it].push_back(this);
+ if(m_isSource)
+ s_sourceFeatures[*it].push_back(this);
+ else
+ s_targetFeatures[*it].push_back(this);
+ }
+ }
+
+private:
+ void ParseUsedBy(const std::string &usedBy) {
+ m_usedBy.clear();
+ Tokenize(m_usedBy, usedBy, ",");
+ }
+
+ std::vector<std::string> m_usedBy;
+ bool m_isSource;
+ static std::map<std::string, std::vector<VWFeatureBase*> > s_features;
+ static std::map<std::string, std::vector<VWFeatureBase*> > s_sourceFeatures;
+ static std::map<std::string, std::vector<VWFeatureBase*> > s_targetFeatures;
+};
+
+}
+
diff --git a/moses/FF/VW/VWFeatureSource.h b/moses/FF/VW/VWFeatureSource.h
new file mode 100644
index 000000000..564f4a3b6
--- /dev/null
+++ b/moses/FF/VW/VWFeatureSource.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <string>
+#include "VWFeatureBase.h"
+#include "moses/InputType.h"
+
+namespace Moses
+{
+
+// Inherit from this for source-dependent classifier features. They will
+// automatically register with the classifier class named VW0 or one or more
+// names specified by the used-by=name1,name2,... parameter.
+//
+// The classifier gets a full list by calling
+// VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription())
+
+
+class VWFeatureSource : public VWFeatureBase
+{
+public:
+ VWFeatureSource(const std::string &line)
+ : VWFeatureBase(line, true) {
+ }
+
+ // Gets its pure virtual functions from VWFeatureBase
+
+ virtual void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , Discriminative::Classifier &classifier) const {
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureBase::SetParameter(key, value);
+ }
+
+protected:
+ inline std::string GetWord(const InputType &input, size_t pos) const {
+ return input.GetWord(pos).GetString(m_sourceFactors, false);
+ }
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureSourceBagOfWords.h b/moses/FF/VW/VWFeatureSourceBagOfWords.h
new file mode 100644
index 000000000..12bcaecb8
--- /dev/null
+++ b/moses/FF/VW/VWFeatureSourceBagOfWords.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <string>
+#include "VWFeatureSource.h"
+
+namespace Moses
+{
+
+class VWFeatureSourceBagOfWords : public VWFeatureSource
+{
+public:
+ VWFeatureSourceBagOfWords(const std::string &line)
+ : VWFeatureSource(line) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
+
+ void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const WordsRange &sourceRange
+ , Discriminative::Classifier &classifier) const {
+ for (size_t i = 0; i < input.GetSize(); i++) {
+ classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i));
+ }
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureSource::SetParameter(key, value);
+ }
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureSourceExternalFeatures.h b/moses/FF/VW/VWFeatureSourceExternalFeatures.h
new file mode 100644
index 000000000..09abe517b
--- /dev/null
+++ b/moses/FF/VW/VWFeatureSourceExternalFeatures.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include <string>
+#include <cstdlib>
+
+#include "ThreadLocalByFeatureStorage.h"
+#include "VWFeatureSource.h"
+#include "TabbedSentence.h"
+
+namespace Moses
+{
+
+// Assuming a given column of TabbedSentence contains space separated source features
+class VWFeatureSourceExternalFeatures : public VWFeatureSource
+{
+public:
+ VWFeatureSourceExternalFeatures(const std::string &line)
+ : VWFeatureSource(line), m_tls(this), m_column(0) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
+
+ void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const WordsRange &sourceRange
+ , Discriminative::Classifier &classifier) const {
+ const Features& features = *m_tls.GetStored();
+ for (size_t i = 0; i < features.size(); i++) {
+ classifier.AddLabelIndependentFeature("srcext^" + features[i]);
+ }
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ if(key == "column")
+ m_column = Scan<size_t>(value);
+ else
+ VWFeatureSource::SetParameter(key, value);
+ }
+
+ virtual void InitializeForInput(InputType const& source) {
+ UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
+ "This feature function requires the TabbedSentence input type");
+
+ const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
+ const std::string &column = tabbedSentence.GetColumn(m_column);
+
+ Features& features = *m_tls.GetStored();
+ features.clear();
+
+ Tokenize(features, column, " ");
+ }
+
+private:
+ typedef std::vector<std::string> Features;
+ typedef ThreadLocalByFeatureStorage<Features> TLSFeatures;
+
+ TLSFeatures m_tls;
+ size_t m_column;
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureSourceIndicator.h b/moses/FF/VW/VWFeatureSourceIndicator.h
new file mode 100644
index 000000000..784f2657e
--- /dev/null
+++ b/moses/FF/VW/VWFeatureSourceIndicator.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <string>
+#include <algorithm>
+#include "VWFeatureSource.h"
+#include "moses/Util.h"
+
+namespace Moses
+{
+
+class VWFeatureSourceIndicator : public VWFeatureSource
+{
+public:
+ VWFeatureSourceIndicator(const std::string &line)
+ : VWFeatureSource(line) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
+
+ void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const WordsRange &sourceRange
+ , Discriminative::Classifier &classifier) const {
+ size_t begin = sourceRange.GetStartPos();
+ size_t end = sourceRange.GetEndPos() + 1;
+
+ std::vector<std::string> words(end - begin);
+
+ for (size_t i = 0; i < end - begin; i++)
+ words[i] = GetWord(input, begin + i);
+
+ classifier.AddLabelIndependentFeature("sind^" + Join(" ", words));
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureSource::SetParameter(key, value);
+ }
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureSourcePhraseInternal.h b/moses/FF/VW/VWFeatureSourcePhraseInternal.h
new file mode 100644
index 000000000..6b6f6f933
--- /dev/null
+++ b/moses/FF/VW/VWFeatureSourcePhraseInternal.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <string>
+#include <algorithm>
+#include "VWFeatureSource.h"
+#include "moses/Util.h"
+
+namespace Moses
+{
+
+class VWFeatureSourcePhraseInternal : public VWFeatureSource
+{
+public:
+ VWFeatureSourcePhraseInternal(const std::string &line)
+ : VWFeatureSource(line) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
+
+ void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const WordsRange &sourceRange
+ , Discriminative::Classifier &classifier) const {
+ size_t begin = sourceRange.GetStartPos();
+ size_t end = sourceRange.GetEndPos() + 1;
+
+ while (begin < end) {
+ classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++));
+ }
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureSource::SetParameter(key, value);
+ }
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureSourceWindow.h b/moses/FF/VW/VWFeatureSourceWindow.h
new file mode 100644
index 000000000..844b7efb1
--- /dev/null
+++ b/moses/FF/VW/VWFeatureSourceWindow.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <string>
+#include <algorithm>
+#include "VWFeatureSource.h"
+#include "moses/Util.h"
+
+namespace Moses
+{
+
+class VWFeatureSourceWindow : public VWFeatureSource
+{
+public:
+ VWFeatureSourceWindow(const std::string &line)
+ : VWFeatureSource(line), m_size(DEFAULT_WINDOW_SIZE) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
+
+ void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const WordsRange &sourceRange
+ , Discriminative::Classifier &classifier) const {
+ int begin = sourceRange.GetStartPos();
+ int end = sourceRange.GetEndPos() + 1;
+ int inputLen = input.GetSize();
+
+ for (int i = std::max(0, begin - m_size); i < begin; i++) {
+ classifier.AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i));
+ }
+
+ for (int i = end; i < std::min(end + m_size, inputLen); i++) {
+ classifier.AddLabelIndependentFeature("c^" + SPrint(i - end + 1) + "^" + GetWord(input, i));
+ }
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ if (key == "size") {
+ m_size = Scan<size_t>(value);
+ } else {
+ VWFeatureSource::SetParameter(key, value);
+ }
+ }
+
+private:
+ static const int DEFAULT_WINDOW_SIZE = 3;
+
+ int m_size;
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureTarget.h b/moses/FF/VW/VWFeatureTarget.h
new file mode 100644
index 000000000..d56306aa8
--- /dev/null
+++ b/moses/FF/VW/VWFeatureTarget.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <string>
+#include "VWFeatureBase.h"
+
+namespace Moses
+{
+
+// Inherit from this for target-dependent classifier features. They will
+// automatically register with the classifier class named VW0 or one or more
+// names specified by the used-by=name1,name2,... parameter.
+//
+// The classifier gets a full list by calling
+// VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription())
+
+class VWFeatureTarget : public VWFeatureBase
+{
+public:
+ VWFeatureTarget(const std::string &line)
+ : VWFeatureBase(line, false) {
+ }
+
+ // Gets its pure virtual functions from VWFeatureBase
+
+ virtual void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const WordsRange &sourceRange
+ , Discriminative::Classifier &classifier) const {
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureBase::SetParameter(key, value);
+ }
+
+protected:
+ inline std::string GetWord(const TargetPhrase &phrase, size_t pos) const {
+ return phrase.GetWord(pos).GetString(m_targetFactors, false);
+ }
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureTargetIndicator.h b/moses/FF/VW/VWFeatureTargetIndicator.h
new file mode 100644
index 000000000..39d8a37a0
--- /dev/null
+++ b/moses/FF/VW/VWFeatureTargetIndicator.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <string>
+#include "VWFeatureTarget.h"
+
+namespace Moses
+{
+
+class VWFeatureTargetIndicator : public VWFeatureTarget
+{
+public:
+ VWFeatureTargetIndicator(const std::string &line)
+ : VWFeatureTarget(line) {
+ ReadParameters();
+
+ VWFeatureBase::UpdateRegister();
+ }
+
+ void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , Discriminative::Classifier &classifier) const {
+ classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors));
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureTarget::SetParameter(key, value);
+ }
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureTargetPhraseInternal.h b/moses/FF/VW/VWFeatureTargetPhraseInternal.h
new file mode 100644
index 000000000..e376a1ed3
--- /dev/null
+++ b/moses/FF/VW/VWFeatureTargetPhraseInternal.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <string>
+#include "VWFeatureTarget.h"
+
+namespace Moses
+{
+
+class VWFeatureTargetPhraseInternal : public VWFeatureTarget
+{
+public:
+ VWFeatureTargetPhraseInternal(const std::string &line)
+ : VWFeatureTarget(line) {
+ ReadParameters();
+
+ VWFeatureBase::UpdateRegister();
+ }
+
+ void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , Discriminative::Classifier &classifier) const {
+ for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
+ classifier.AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i));
+ }
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureTarget::SetParameter(key, value);
+ }
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureTargetPhraseScores.h b/moses/FF/VW/VWFeatureTargetPhraseScores.h
new file mode 100644
index 000000000..5a4519fb1
--- /dev/null
+++ b/moses/FF/VW/VWFeatureTargetPhraseScores.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <string>
+#include <set>
+#include <boost/lexical_cast.hpp>
+
+#include "VWFeatureTarget.h"
+
+namespace Moses
+{
+
+class VWFeatureTargetPhraseScores : public VWFeatureTarget
+{
+public:
+ VWFeatureTargetPhraseScores(const std::string &line)
+ : VWFeatureTarget(line) {
+ ReadParameters();
+
+ VWFeatureBase::UpdateRegister();
+ }
+
+ void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , Discriminative::Classifier &classifier) const {
+ std::vector<FeatureFunction*> features = FeatureFunction::GetFeatureFunctions();
+ for (size_t i = 0; i < features.size(); i++) {
+ std::string fname = features[i]->GetScoreProducerDescription();
+ if(!m_fnames.empty() && m_fnames.count(fname) == 0)
+ continue;
+
+ std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(features[i]);
+ for(size_t j = 0; j < scores.size(); ++j)
+ classifier.AddLabelDependentFeature(fname + "^" + boost::lexical_cast<std::string>(j), scores[j]);
+ }
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ if(key == "use") {
+ std::vector<std::string> names;
+ Tokenize(names, value, ",");
+ m_fnames.insert(names.begin(), names.end());
+ } else
+ VWFeatureTarget::SetParameter(key, value);
+ }
+
+private:
+ std::set<std::string> m_fnames;
+
+};
+
+}
diff --git a/moses/FF/WordPenaltyProducer.cpp b/moses/FF/WordPenaltyProducer.cpp
index 1e191d040..835ee8e96 100644
--- a/moses/FF/WordPenaltyProducer.cpp
+++ b/moses/FF/WordPenaltyProducer.cpp
@@ -18,9 +18,9 @@ WordPenaltyProducer::WordPenaltyProducer(const std::string &line)
}
void WordPenaltyProducer::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
float score = - (float) targetPhrase.GetNumTerminals();
scoreBreakdown.Assign(this, score);
diff --git a/moses/FF/WordPenaltyProducer.h b/moses/FF/WordPenaltyProducer.h
index 19e331843..5d4005533 100644
--- a/moses/FF/WordPenaltyProducer.h
+++ b/moses/FF/WordPenaltyProducer.h
@@ -28,26 +28,36 @@ public:
}
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
+
+ /*
+ virtual void Evaluate(const InputType &source
+ , ScoreComponentCollection &scoreBreakdown) const;
+ */
};
}
diff --git a/moses/FF/WordTranslationFeature.cpp b/moses/FF/WordTranslationFeature.cpp
index 5111e677e..4c0233885 100644
--- a/moses/FF/WordTranslationFeature.cpp
+++ b/moses/FF/WordTranslationFeature.cpp
@@ -7,7 +7,6 @@
#include "moses/ChartHypothesis.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/TranslationOption.h"
-#include "moses/UserMessage.h"
#include "moses/InputPath.h"
#include "util/string_piece_hash.hh"
#include "util/exception.hh"
@@ -26,30 +25,30 @@ WordTranslationFeature::WordTranslationFeature(const std::string &line)
,m_ignorePunctuation(false)
,m_domainTrigger(false)
{
- std::cerr << "Initializing word translation feature.. " << endl;
+ VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
ReadParameters();
- if (m_simple == 1) std::cerr << "using simple word translations.. ";
- if (m_sourceContext == 1) std::cerr << "using source context.. ";
- if (m_targetContext == 1) std::cerr << "using target context.. ";
- if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";
+ if (m_simple == 1) VERBOSE(1, " Using simple word translations.");
+ if (m_sourceContext == 1) VERBOSE(1, " Using source context.");
+ if (m_targetContext == 1) VERBOSE(1, " Using target context.");
+ if (m_domainTrigger == 1) VERBOSE(1, " Using domain triggers.");
// compile a list of punctuation characters
if (m_ignorePunctuation) {
- std::cerr << "ignoring punctuation for triggers.. ";
+ VERBOSE(1, " Ignoring punctuation for triggers.");
char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
for (size_t i=0; i < sizeof(punctuation)-1; ++i) {
m_punctuationHash[punctuation[i]] = 1;
}
}
- std::cerr << "done." << std::endl;
+ VERBOSE(1, " Done." << std::endl);
// TODO not sure about this
/*
if (weight[0] != 1) {
AddSparseProducer(wordTranslationFeature);
- cerr << "wt sparse producer weight: " << weight[0] << endl;
+ VERBOSE(1, "wt sparse producer weight: " << weight[0] << std::endl);
if (m_mira)
m_metaFeatureProducer = new MetaFeatureProducer("wt");
}
@@ -95,7 +94,7 @@ void WordTranslationFeature::Load()
return;
} //else if (tokens.size() == 8) {
- cerr << "loading word translation word lists from " << m_filePathSource << " and " << m_filePathTarget << endl;
+ FEATUREVERBOSE(1, "Loading word translation word lists from " << m_filePathSource << " and " << m_filePathTarget << std::endl);
if (m_domainTrigger) {
// domain trigger terms for each input document
ifstream inFileSource(m_filePathSource.c_str());
@@ -138,11 +137,11 @@ void WordTranslationFeature::Load()
}
void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
const Sentence& sentence = static_cast<const Sentence&>(input);
const AlignmentInfo &alignment = targetPhrase.GetAlignTerm();
diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h
index 63e3749c7..9ca41da46 100644
--- a/moses/FF/WordTranslationFeature.h
+++ b/moses/FF/WordTranslationFeature.h
@@ -1,5 +1,4 @@
-#ifndef moses_WordTranslationFeature_h
-#define moses_WordTranslationFeature_h
+#pragma once
#include <string>
#include <boost/unordered_set.hpp>
@@ -49,28 +48,32 @@ public:
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
};
}
-#endif // moses_WordTranslationFeature_h
diff --git a/moses/FactorCollection.cpp b/moses/FactorCollection.cpp
index d701f8409..7b370ff36 100644
--- a/moses/FactorCollection.cpp
+++ b/moses/FactorCollection.cpp
@@ -59,8 +59,7 @@ const Factor *FactorCollection::AddFactor(const StringPiece &factorString, bool
if (isNonTerminal) {
m_factorIdNonTerminal++;
UTIL_THROW_IF2(m_factorIdNonTerminal >= moses_MaxNumNonterminals, "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile");
- }
- else {
+ } else {
m_factorId++;
}
}
@@ -73,7 +72,8 @@ const Factor *FactorCollection::GetFactor(const StringPiece &factorString, bool
to_find.in.m_string = factorString;
to_find.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId;
Set & set = (isNonTerminal) ? m_set : m_setNonTerminal;
- { // read=lock scope
+ {
+ // read=lock scope
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
#endif // WITH_THREADS
diff --git a/moses/FeatureVector.cpp b/moses/FeatureVector.cpp
index ce4a043a6..89c90cfc2 100644
--- a/moses/FeatureVector.cpp
+++ b/moses/FeatureVector.cpp
@@ -385,6 +385,15 @@ void FVector::sparsePlusEquals(const FVector& rhs)
set(i->first, get(i->first) + i->second);
}
+// add only core features
+void FVector::corePlusEquals(const FVector& rhs)
+{
+ if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
+ resize(rhs.m_coreFeatures.size());
+ for (size_t i = 0; i < rhs.m_coreFeatures.size(); ++i)
+ m_coreFeatures[i] += rhs.m_coreFeatures[i];
+}
+
// assign only core features
void FVector::coreAssign(const FVector& rhs)
{
diff --git a/moses/FeatureVector.h b/moses/FeatureVector.h
index fd5d3340c..839e2a5d4 100644
--- a/moses/FeatureVector.h
+++ b/moses/FeatureVector.h
@@ -235,6 +235,7 @@ public:
void capMin(FValue minValue);
void sparsePlusEquals(const FVector& rhs);
+ void corePlusEquals(const FVector& rhs);
void coreAssign(const FVector& rhs);
void incrementSparseHopeFeatures();
diff --git a/moses/File.h b/moses/File.h
index d3336c7e5..cd15165e3 100644
--- a/moses/File.h
+++ b/moses/File.h
@@ -12,7 +12,6 @@
#include <sstream>
#include <vector>
#include "util/exception.hh"
-#include "UserMessage.h"
#include "TypeDef.h"
#include "Util.h"
@@ -44,7 +43,7 @@ static const OFF_T InvalidOffT=-1;
template<typename T> inline size_t fWrite(FILE* f,const T& t)
{
if(fwrite(&t,sizeof(t),1,f)!=1) {
- UTIL_THROW2("ERROR:: fwrite!");
+ UTIL_THROW2("ERROR:: fwrite!");
}
return sizeof(t);
}
@@ -52,7 +51,7 @@ template<typename T> inline size_t fWrite(FILE* f,const T& t)
template<typename T> inline void fRead(FILE* f,T& t)
{
if(fread(&t,sizeof(t),1,f)!=1) {
- UTIL_THROW2("ERROR: fread!");
+ UTIL_THROW2("ERROR: fread!");
}
}
@@ -61,7 +60,7 @@ template<typename T> inline size_t fWrite(FILE* f,const T* b,const T* e)
UINT32 s=std::distance(b,e);
size_t rv=fWrite(f,s);
if(fwrite(b,sizeof(T),s,f)!=s) {
- UTIL_THROW2("ERROR: fwrite!");
+ UTIL_THROW2("ERROR: fwrite!");
}
return rv+sizeof(T)*s;
}
@@ -71,7 +70,7 @@ template<typename T> inline size_t fWrite(FILE* f,const T b,const T e)
UINT32 s=std::distance(b,e);
size_t rv=fWrite(f,s);
if(fwrite(&(*b),sizeof(T),s,f)!=s) {
- UTIL_THROW2("ERROR: fwrite!");
+ UTIL_THROW2("ERROR: fwrite!");
}
return rv+sizeof(T)*s;
}
@@ -81,7 +80,7 @@ template<typename C> inline size_t fWriteVector(FILE* f,const C& v)
UINT32 s=v.size();
size_t rv=fWrite(f,s);
if(fwrite(&v[0],sizeof(typename C::value_type),s,f)!=s) {
- UTIL_THROW2("ERROR: fwrite!");
+ UTIL_THROW2("ERROR: fwrite!");
}
return rv+sizeof(typename C::value_type)*s;
}
@@ -93,7 +92,7 @@ template<typename C> inline void fReadVector(FILE* f, C& v)
v.resize(s);
size_t r=fread(&(*v.begin()),sizeof(typename C::value_type),s,f);
if(r!=s) {
- UTIL_THROW2("ERROR: freadVec! "<<r<<" "<<s);
+ UTIL_THROW2("ERROR: freadVec! "<<r<<" "<<s);
}
}
@@ -112,7 +111,7 @@ inline void fReadString(FILE* f,std::string& e)
fRead(f,s);
char* a=new char[s+1];
if(fread(a,sizeof(char),s,f)!=s) {
- UTIL_THROW2("ERROR: fread!");
+ UTIL_THROW2("ERROR: fread!");
}
a[s]='\0';
e.assign(a);
@@ -148,8 +147,8 @@ inline OFF_T fTell(FILE* f)
inline void fSeek(FILE* f,OFF_T o)
{
if(FSEEKO(f,o,SEEK_SET)<0) {
- std::stringstream strme;
- strme << "ERROR: could not fseeko position " << o <<"\n";
+ std::stringstream strme;
+ strme << "ERROR: could not fseeko position " << o <<"\n";
if(o==InvalidOffT) strme << "You tried to seek for 'InvalidOffT'!\n";
UTIL_THROW2(strme.str());
}
@@ -160,8 +159,7 @@ inline FILE* fOpen(const char* fn,const char* m)
if(FILE* f=fopen(fn,m))
return f;
else {
- UserMessage::Add(std::string("ERROR: could not open file ") + fn + " with mode " + m + "\n");
- UTIL_THROW(util::Exception, "Couldn't open file " << fn);
+ UTIL_THROW(util::Exception, "ERROR: could not open file " << fn << " with mode " << m);
return NULL;
}
}
diff --git a/moses/ForestInput.cpp b/moses/ForestInput.cpp
new file mode 100644
index 000000000..2a0452ec8
--- /dev/null
+++ b/moses/ForestInput.cpp
@@ -0,0 +1,250 @@
+#include "ForestInput.h"
+
+#include <algorithm>
+
+#include <boost/make_shared.hpp>
+
+#include "util/tokenize_piece.hh"
+
+#include "moses/Syntax/F2S/Forest.h"
+#include "moses/TranslationModel/PhraseDictionary.h"
+
+#include "FactorCollection.h"
+#include "StaticData.h"
+#include "Util.h"
+
+namespace Moses
+{
+
+//! populate this InputType with data from in stream
+int ForestInput::Read(std::istream &in,
+ const std::vector<FactorType>& factorOrder)
+{
+ using Syntax::F2S::Forest;
+
+ m_forest = boost::make_shared<Forest>();
+ m_rootVertex = NULL;
+ m_vertexSet.clear();
+
+ std::string line;
+ if (std::getline(in, line, '\n').eof()) {
+ return 0;
+ }
+
+ // The first line contains the sentence number. We ignore this and skip
+ // straight to the second line, which contains the sentence string.
+ std::string sentence;
+ std::getline(in, sentence);
+
+ // If the next line is blank then there was a parse failure. Otherwise,
+ // the next line and any subsequent non-blank lines contain hyperedges.
+ std::getline(in, line);
+ if (line == "") {
+ // Parse failure. We treat this as an empty sentence.
+ sentence = "";
+ // The next line will be blank too.
+ std::getline(in, line);
+ } else {
+ do {
+ ParseHyperedgeLine(line, factorOrder);
+ std::getline(in, line);
+ } while (line != "");
+ }
+
+ // Do base class Read().
+ // TODO Check if this is actually necessary. TreeInput does it, but I'm
+ // not sure ForestInput needs to.
+ std::stringstream strme;
+ strme << "<s> " << sentence << " </s>" << std::endl;
+ Sentence::Read(strme, factorOrder);
+
+ // Find the maximum end position of any vertex (0 if forest is empty).
+ std::size_t maxEnd = FindMaxEnd(*m_forest);
+
+ // Determine which vertices are the top vertices.
+ std::vector<Forest::Vertex *> topVertices;
+ if (!m_forest->vertices.empty()) {
+ FindTopVertices(*m_forest, topVertices);
+ assert(topVertices.size() >= 1);
+ }
+
+ // Add <s> vertex.
+ Forest::Vertex *startSymbol = NULL;
+ {
+ Word symbol;
+ symbol.CreateFromString(Input, factorOrder, "<s>", false);
+ Syntax::PVertex pvertex(WordsRange(0, 0), symbol);
+ startSymbol = new Forest::Vertex(pvertex);
+ m_forest->vertices.push_back(startSymbol);
+ }
+
+ // Add </s> vertex.
+ Forest::Vertex *endSymbol = NULL;
+ {
+ Word symbol;
+ symbol.CreateFromString(Input, factorOrder, "</s>", false);
+ Syntax::PVertex pvertex(WordsRange(maxEnd+1, maxEnd+1), symbol);
+ endSymbol = new Forest::Vertex(pvertex);
+ m_forest->vertices.push_back(endSymbol);
+ }
+
+ // Add root vertex.
+ {
+ Word symbol;
+ symbol.CreateFromString(Input, factorOrder, "Q", true);
+ Syntax::PVertex pvertex(WordsRange(0, maxEnd+1), symbol);
+ m_rootVertex = new Forest::Vertex(pvertex);
+ m_forest->vertices.push_back(m_rootVertex);
+ }
+
+ // Add root's incoming hyperedges.
+ if (topVertices.empty()) {
+ Forest::Hyperedge *e = new Forest::Hyperedge();
+ e->head = m_rootVertex;
+ e->tail.push_back(startSymbol);
+ e->tail.push_back(endSymbol);
+ m_rootVertex->incoming.push_back(e);
+ } else {
+ // Add a hyperedge between [Q] and each top vertex.
+ for (std::vector<Forest::Vertex *>::const_iterator
+ p = topVertices.begin(); p != topVertices.end(); ++p) {
+ Forest::Hyperedge *e = new Forest::Hyperedge();
+ e->head = m_rootVertex;
+ e->tail.push_back(startSymbol);
+ e->tail.push_back(*p);
+ e->tail.push_back(endSymbol);
+ m_rootVertex->incoming.push_back(e);
+ }
+ }
+
+ return 1;
+}
+
+Syntax::F2S::Forest::Vertex *ForestInput::AddOrDeleteVertex(Forest::Vertex *v)
+{
+ std::pair<VertexSet::iterator, bool> ret = m_vertexSet.insert(v);
+ if (ret.second) {
+ m_forest->vertices.push_back(*ret.first);
+ } else {
+ delete v;
+ }
+ return *ret.first;
+}
+
+std::size_t ForestInput::FindMaxEnd(const Forest &forest)
+{
+ std::size_t maxEnd = 0;
+ for (std::vector<Forest::Vertex *>::const_iterator
+ p = forest.vertices.begin(); p != forest.vertices.end(); ++p) {
+ maxEnd = std::max(maxEnd, (*p)->pvertex.span.GetEndPos());
+ }
+ return maxEnd;
+}
+
+void ForestInput::FindTopVertices(Forest &forest,
+ std::vector<Forest::Vertex *> &topVertices)
+{
+ topVertices.clear();
+
+ // The set of all vertices.
+ std::set<Forest::Vertex *> all;
+
+ // The set of all vertices that are the predecessor of another vertex.
+ std::set<Forest::Vertex *> preds;
+
+ // Populate the all and preds sets.
+ for (std::vector<Forest::Vertex *>::const_iterator
+ p = forest.vertices.begin(); p != forest.vertices.end(); ++p) {
+ all.insert(*p);
+ for (std::vector<Forest::Hyperedge *>::const_iterator
+ q = (*p)->incoming.begin(); q != (*p)->incoming.end(); ++q) {
+ for (std::vector<Forest::Vertex*>::const_iterator
+ r = (*q)->tail.begin(); r != (*q)->tail.end(); ++r) {
+ preds.insert(*r);
+ }
+ }
+ }
+
+ // The top vertices are the vertices that are in all but not in preds.
+ std::set_difference(all.begin(), all.end(), preds.begin(), preds.end(),
+ std::back_inserter(topVertices));
+}
+
+void ForestInput::ParseHyperedgeLine(
+ const std::string &line, const std::vector<FactorType>& factorOrder)
+{
+ using Syntax::F2S::Forest;
+
+ const util::AnyCharacter delimiter(" \t");
+ util::TokenIter<util::AnyCharacter, true> p(line, delimiter);
+ Forest::Vertex *v = AddOrDeleteVertex(ParseVertex(*p, factorOrder));
+ Forest::Hyperedge *e = new Forest::Hyperedge();
+ e->head = v;
+ ++p;
+ if (*p != "=>") {
+ // FIXME
+ //throw Exception("");
+ }
+ for (++p; *p != "|||"; ++p) {
+ v = ParseVertex(*p, factorOrder);
+ if (!v->pvertex.symbol.IsNonTerminal()) {
+ // Egret does not give start/end for terminals.
+ v->pvertex.span = WordsRange(e->head->pvertex.span.GetStartPos(),
+ e->head->pvertex.span.GetStartPos());
+ }
+ e->tail.push_back(AddOrDeleteVertex(v));
+ }
+ ++p;
+ std::string tmp;
+ p->CopyToString(&tmp);
+ e->weight = std::atof(tmp.c_str());
+ e->head->incoming.push_back(e);
+}
+
+Syntax::F2S::Forest::Vertex *ForestInput::ParseVertex(
+ const StringPiece &s, const std::vector<FactorType>& factorOrder)
+{
+ using Syntax::F2S::Forest;
+
+ Word symbol;
+ std::size_t pos = s.rfind('[');
+ if (pos == std::string::npos) {
+ symbol.CreateFromString(Input, factorOrder, s, false);
+ // Create vertex: caller will fill in span.
+ WordsRange span(0, 0);
+ return new Forest::Vertex(Syntax::PVertex(span, symbol));
+ }
+ symbol.CreateFromString(Input, factorOrder, s.substr(0, pos), true);
+ std::size_t begin = pos + 1;
+ pos = s.find(',', begin+1);
+ std::string tmp;
+ s.substr(begin, pos-begin).CopyToString(&tmp);
+ std::size_t start = std::atoi(tmp.c_str());
+ s.substr(pos+1, s.size()-pos-2).CopyToString(&tmp);
+ std::size_t end = std::atoi(tmp.c_str());
+ // Create vertex: offset span by 1 to allow for <s> in first position.
+ WordsRange span(start+1, end+1);
+ return new Forest::Vertex(Syntax::PVertex(span, symbol));
+}
+
+//! Output debugging info to stream out
+void ForestInput::Print(std::ostream &out) const
+{
+ out << *this << "\n";
+}
+
+//! create trans options specific to this InputType
+TranslationOptionCollection* ForestInput::
+CreateTranslationOptionCollection() const
+{
+
+ return NULL;
+}
+
+// FIXME
+std::ostream& operator<<(std::ostream &out, const ForestInput &)
+{
+ return out;
+}
+
+} // namespace Moses
diff --git a/moses/ForestInput.h b/moses/ForestInput.h
new file mode 100644
index 000000000..6a2327cd2
--- /dev/null
+++ b/moses/ForestInput.h
@@ -0,0 +1,90 @@
+#ifndef moses_ForestInput_h
+#define moses_ForestInput_h
+
+#include <string>
+#include <vector>
+
+#include <boost/shared_ptr.hpp>
+
+#include <util/string_piece.hh>
+
+#include "moses/Syntax/F2S/Forest.h"
+
+#include "Sentence.h"
+
+namespace Moses
+{
+
+class ForestInput : public Sentence
+{
+ public:
+ friend std::ostream &operator<<(std::ostream&, const ForestInput &);
+
+ ForestInput() : m_rootVertex(NULL) {}
+
+ InputTypeEnum GetType() const {
+ return ForestInputType;
+ }
+
+ //! populate this InputType with data from in stream
+ virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
+
+ //! Output debugging info to stream out
+ virtual void Print(std::ostream&) const;
+
+ //! create trans options specific to this InputType
+ virtual TranslationOptionCollection*
+ CreateTranslationOptionCollection() const;
+
+ boost::shared_ptr<const Syntax::F2S::Forest> GetForest() const
+ {
+ return m_forest;
+ }
+
+ const Syntax::F2S::Forest::Vertex *GetRootVertex() const
+ {
+ return m_rootVertex;
+ }
+
+ private:
+ typedef Syntax::F2S::Forest Forest;
+
+ struct VertexSetHash {
+ std::size_t operator()(const Forest::Vertex *v) const {
+ std::size_t seed = 0;
+ boost::hash_combine(seed, v->pvertex.symbol);
+ boost::hash_combine(seed, v->pvertex.span.GetStartPos());
+ boost::hash_combine(seed, v->pvertex.span.GetEndPos());
+ return seed;
+ }
+ };
+
+ struct VertexSetPred {
+ bool operator()(const Forest::Vertex *v, const Forest::Vertex *w) const {
+ return v->pvertex == w->pvertex;
+ }
+ };
+
+ typedef boost::unordered_set<Forest::Vertex *, VertexSetHash,
+ VertexSetPred> VertexSet;
+
+ Forest::Vertex *AddOrDeleteVertex(Forest::Vertex *);
+
+ std::size_t FindMaxEnd(const Forest &);
+
+ void FindTopVertices(Forest &, std::vector<Forest::Vertex *> &);
+
+ void ParseHyperedgeLine(const std::string &,
+ const std::vector<FactorType> &);
+
+ Forest::Vertex *ParseVertex(const StringPiece &,
+ const std::vector<FactorType> &);
+
+ boost::shared_ptr<Forest> m_forest;
+ Forest::Vertex *m_rootVertex;
+ VertexSet m_vertexSet;
+};
+
+} // namespace Moses
+
+#endif
diff --git a/moses/GenerationDictionary.cpp b/moses/GenerationDictionary.cpp
index 7ff04748e..67e3b9108 100644
--- a/moses/GenerationDictionary.cpp
+++ b/moses/GenerationDictionary.cpp
@@ -27,7 +27,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Util.h"
#include "InputFileStream.h"
#include "StaticData.h"
-#include "UserMessage.h"
#include "util/exception.hh"
using namespace std;
diff --git a/moses/GenerationDictionary.h b/moses/GenerationDictionary.h
index 257162ae1..67bbe1e91 100644
--- a/moses/GenerationDictionary.h
+++ b/moses/GenerationDictionary.h
@@ -55,7 +55,7 @@ protected:
public:
static const std::vector<GenerationDictionary*>& GetColl() {
- return s_staticColl;
+ return s_staticColl;
}
GenerationDictionary(const std::string &line);
diff --git a/moses/HypergraphOutput.cpp b/moses/HypergraphOutput.cpp
index 5c689eaee..47c564882 100644
--- a/moses/HypergraphOutput.cpp
+++ b/moses/HypergraphOutput.cpp
@@ -41,13 +41,19 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
-namespace Moses {
+namespace Moses
+{
template<class M>
HypergraphOutput<M>::HypergraphOutput(size_t precision) :
- m_precision(precision) {
+ m_precision(precision)
+{
const StaticData& staticData = StaticData::Instance();
- vector<string> hypergraphParameters = staticData.GetParam("output-search-graph-hypergraph");
+ vector<string> hypergraphParameters;
+ const PARAM_VEC *params = staticData.GetParameter().GetParam("output-search-graph-hypergraph");
+ if (params) {
+ hypergraphParameters = *params;
+ }
if (hypergraphParameters.size() > 0 && hypergraphParameters[0] == "true") {
m_appendSuffix = true;
@@ -62,7 +68,7 @@ HypergraphOutput<M>::HypergraphOutput(size_t precision) :
m_compression = "txt";
}
UTIL_THROW_IF(m_compression != "txt" && m_compression != "gz" && m_compression != "bz2",
- util::Exception, "Unknown compression type: " << m_compression);
+ util::Exception, "Unknown compression type: " << m_compression);
if ( hypergraphParameters.size() > 2 ) {
m_hypergraphDir = hypergraphParameters[2];
@@ -105,7 +111,7 @@ HypergraphOutput<M>::HypergraphOutput(size_t precision) :
}
UTIL_THROW_IF(!boost::filesystem::is_directory(m_hypergraphDir),
- util::Exception, "Cannot output hypergraphs to " << m_hypergraphDir << " because that path exists, but is not a directory");
+ util::Exception, "Cannot output hypergraphs to " << m_hypergraphDir << " because that path exists, but is not a directory");
ofstream weightsOut;
@@ -121,7 +127,8 @@ HypergraphOutput<M>::HypergraphOutput(size_t precision) :
}
template<class M>
-void HypergraphOutput<M>::Write(const M& manager) const {
+void HypergraphOutput<M>::Write(const M& manager) const
+{
stringstream fileName;
fileName << m_hypergraphDir << "/" << manager.GetSource().GetTranslationId();
@@ -134,7 +141,7 @@ void HypergraphOutput<M>::Write(const M& manager) const {
file.push( boost::iostreams::gzip_compressor() );
} else if ( m_compression == "bz2" ) {
file.push( boost::iostreams::bzip2_compressor() );
- }
+ }
file.push( boost::iostreams::file_sink(fileName.str(), ios_base::out) );
@@ -145,9 +152,9 @@ void HypergraphOutput<M>::Write(const M& manager) const {
file.flush();
} else {
TRACE_ERR("Cannot output hypergraph for line " << manager.GetSource().GetTranslationId()
- << " because the output file " << fileName.str()
- << " is not open or not ready for writing"
- << std::endl);
+ << " because the output file " << fileName.str()
+ << " is not open or not ready for writing"
+ << std::endl);
}
file.pop();
}
@@ -157,7 +164,8 @@ template class HypergraphOutput<ChartManager>;
void ChartSearchGraphWriterMoses::WriteHypos
- (const ChartHypothesisCollection& hypos, const map<unsigned, bool> &reachable) const {
+(const ChartHypothesisCollection& hypos, const map<unsigned, bool> &reachable) const
+{
ChartHypothesisCollection::const_iterator iter;
for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
@@ -180,7 +188,8 @@ void ChartSearchGraphWriterMoses::WriteHypos
}
}
-void ChartSearchGraphWriterHypergraph::WriteHeader(size_t winners, size_t losers) const {
+void ChartSearchGraphWriterHypergraph::WriteHeader(size_t winners, size_t losers) const
+{
(*m_out) << "# target ||| features ||| source-covered" << endl;
(*m_out) << winners << " " << (winners+losers) << endl;
@@ -188,13 +197,14 @@ void ChartSearchGraphWriterHypergraph::WriteHeader(size_t winners, size_t losers
}
void ChartSearchGraphWriterHypergraph::WriteHypos(const ChartHypothesisCollection& hypos,
- const map<unsigned, bool> &reachable) const {
-
+ const map<unsigned, bool> &reachable) const
+{
+
ChartHypothesisCollection::const_iterator iter;
for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
const ChartHypothesis* mainHypo = *iter;
- if (!StaticData::Instance().GetUnprunedSearchGraph() &&
- reachable.find(mainHypo->GetId()) == reachable.end()) {
+ if (!StaticData::Instance().GetUnprunedSearchGraph() &&
+ reachable.find(mainHypo->GetId()) == reachable.end()) {
//Ignore non reachable nodes
continue;
}
@@ -242,7 +252,7 @@ void ChartSearchGraphWriterHypergraph::WriteHypos(const ChartHypothesisCollectio
}
}
}
-
+
} //namespace Moses
diff --git a/moses/HypergraphOutput.h b/moses/HypergraphOutput.h
index 4ec8e2665..6503a9a28 100644
--- a/moses/HypergraphOutput.h
+++ b/moses/HypergraphOutput.h
@@ -29,12 +29,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* Manage the output of hypergraphs.
**/
-namespace Moses {
+namespace Moses
+{
class ChartHypothesisCollection;
template<class M>
-class HypergraphOutput {
+class HypergraphOutput
+{
public:
/** Initialise output directory and create weights file */
@@ -51,42 +53,47 @@ private:
};
-/**
+/**
* ABC for different types of search graph output for chart Moses.
**/
-class ChartSearchGraphWriter {
+class ChartSearchGraphWriter
+{
public:
virtual void WriteHeader(size_t winners, size_t losers) const = 0;
virtual void WriteHypos(const ChartHypothesisCollection& hypos,
- const std::map<unsigned, bool> &reachable) const = 0;
+ const std::map<unsigned, bool> &reachable) const = 0;
};
/** "Moses" format (osg style) */
-class ChartSearchGraphWriterMoses : public virtual ChartSearchGraphWriter {
+class ChartSearchGraphWriterMoses : public virtual ChartSearchGraphWriter
+{
public:
- ChartSearchGraphWriterMoses(std::ostream* out, size_t lineNumber) :
- m_out(out), m_lineNumber(lineNumber) {}
- virtual void WriteHeader(size_t, size_t) const {/* do nothing */}
+ ChartSearchGraphWriterMoses(std::ostream* out, size_t lineNumber) :
+ m_out(out), m_lineNumber(lineNumber) {}
+ virtual void WriteHeader(size_t, size_t) const {
+ /* do nothing */
+ }
virtual void WriteHypos(const ChartHypothesisCollection& hypos,
- const std::map<unsigned, bool> &reachable) const;
-
+ const std::map<unsigned, bool> &reachable) const;
+
private:
- std::ostream* m_out;
+ std::ostream* m_out;
size_t m_lineNumber;
};
/** Modified version of Kenneth's lazy hypergraph format */
-class ChartSearchGraphWriterHypergraph : public virtual ChartSearchGraphWriter {
+class ChartSearchGraphWriterHypergraph : public virtual ChartSearchGraphWriter
+{
public:
- ChartSearchGraphWriterHypergraph(std::ostream* out) :
- m_out(out), m_nodeId(0) {}
+ ChartSearchGraphWriterHypergraph(std::ostream* out) :
+ m_out(out), m_nodeId(0) {}
virtual void WriteHeader(size_t winners, size_t losers) const;
virtual void WriteHypos(const ChartHypothesisCollection& hypos,
- const std::map<unsigned, bool> &reachable) const;
-
+ const std::map<unsigned, bool> &reachable) const;
+
private:
- std::ostream* m_out;
+ std::ostream* m_out;
mutable size_t m_nodeId;
mutable std::map<size_t,size_t> m_hypoIdToNodeId;
};
diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp
index d77d249e7..a8777ed9f 100644
--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@@ -32,6 +32,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "StaticData.h"
#include "InputType.h"
#include "Manager.h"
+#include "IOWrapper.h"
#include "moses/FF/FFState.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
@@ -47,7 +48,7 @@ ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
Hypothesis::Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt)
: m_prevHypo(NULL)
- , m_sourceCompleted(source.GetSize(), manager.m_source.m_sourceCompleted)
+ , m_sourceCompleted(source.GetSize(), manager.GetSource().m_sourceCompleted)
, m_sourceInput(source)
, m_currSourceWordsRange(
m_sourceCompleted.GetFirstGapPos()>0 ? 0 : NOT_FOUND,
@@ -206,7 +207,7 @@ int Hypothesis::RecombineCompare(const Hypothesis &compare) const
}
void Hypothesis::EvaluateWhenApplied(const StatefulFeatureFunction &sfff,
- int state_idx)
+ int state_idx)
{
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored( sfff )) {
@@ -254,8 +255,8 @@ void Hypothesis::EvaluateWhenApplied(const SquareMatrix &futureScore)
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored(ff)) {
m_ffStates[i] = ff.EvaluateWhenApplied(*this,
- m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
- &m_currScoreBreakdown);
+ m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
+ &m_currScoreBreakdown);
}
}
@@ -336,10 +337,10 @@ void Hypothesis::CleanupArcList()
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
// prune arc list only if there too many arcs
- NTH_ELEMENT4(m_arcList->begin()
- , m_arcList->begin() + nBestSize - 1
- , m_arcList->end()
- , CompareHypothesisTotalScore());
+ NTH_ELEMENT4(m_arcList->begin()
+ , m_arcList->begin() + nBestSize - 1
+ , m_arcList->end()
+ , CompareHypothesisTotalScore());
// delete bad ones
ArcList::iterator iter;
@@ -428,5 +429,173 @@ std::string Hypothesis::GetTargetPhraseStringRep() const
return GetTargetPhraseStringRep(allFactors);
}
+void Hypothesis::OutputAlignment(std::ostream &out) const
+{
+ std::vector<const Hypothesis *> edges;
+ const Hypothesis *currentHypo = this;
+ while (currentHypo) {
+ edges.push_back(currentHypo);
+ currentHypo = currentHypo->GetPrevHypo();
+ }
+
+ OutputAlignment(out, edges);
+
+}
+
+void Hypothesis::OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
+{
+ size_t targetOffset = 0;
+
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const TargetPhrase &tp = edge.GetCurrTargetPhrase();
+ size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
+
+ OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
+
+ targetOffset += tp.GetSize();
+ }
+ // Removing std::endl here breaks -alignment-output-file, so stop doing that, please :)
+ // Or fix it somewhere else.
+ out << std::endl;
+}
+
+void Hypothesis::OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset)
+{
+ typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
+ AlignVec alignments = ai.GetSortedAlignments();
+
+ AlignVec::const_iterator it;
+ for (it = alignments.begin(); it != alignments.end(); ++it) {
+ const std::pair<size_t,size_t> &alignment = **it;
+ out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
+ }
+
+}
+
+void Hypothesis::OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
+{
+ if (hypo->GetPrevHypo()) {
+ OutputInput(map, hypo->GetPrevHypo());
+ map[hypo->GetCurrSourceWordsRange().GetStartPos()] = &hypo->GetTranslationOption().GetInputPath().GetPhrase();
+ }
+}
+
+void Hypothesis::OutputInput(std::ostream& os) const
+{
+ size_t len = this->GetInput().GetSize();
+ std::vector<const Phrase*> inp_phrases(len, 0);
+ OutputInput(inp_phrases, this);
+ for (size_t i=0; i<len; ++i)
+ if (inp_phrases[i]) os << *inp_phrases[i];
+}
+
+void Hypothesis::OutputBestSurface(std::ostream &out, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const
+{
+ if (m_prevHypo) {
+ // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
+ m_prevHypo->OutputBestSurface(out, outputFactorOrder, reportSegmentation, reportAllFactors);
+ }
+ OutputSurface(out, *this, outputFactorOrder, reportSegmentation, reportAllFactors);
+}
+
+//////////////////////////////////////////////////////////////////////////
+/***
+ * print surface factor only for the given phrase
+ */
+void Hypothesis::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const
+{
+ UTIL_THROW_IF2(outputFactorOrder.size() == 0,
+ "Must specific at least 1 output factor");
+ const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
+ bool markUnknown = StaticData::Instance().GetMarkUnknown();
+ if (reportAllFactors == true) {
+ out << phrase;
+ } else {
+ FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
+
+ std::map<size_t, const Factor*> placeholders;
+ if (placeholderFactor != NOT_FOUND) {
+ // creates map of target position -> factor for placeholders
+ placeholders = GetPlaceholders(edge, placeholderFactor);
+ }
+
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+
+ if (placeholders.size()) {
+ // do placeholders
+ std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
+ if (iter != placeholders.end()) {
+ factor = iter->second;
+ }
+ }
+
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor 0 at position " << pos);
+
+ //preface surface form with UNK if marking unknowns
+ const Word &word = phrase.GetWord(pos);
+ if(markUnknown && word.IsOOV()) {
+ out << "UNK" << *factor;
+ } else {
+ out << *factor;
+ }
+
+ for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor " << i << " at position " << pos);
+
+ out << "|" << *factor;
+ }
+ out << " ";
+ }
+ }
+
+ // trace ("report segmentation") option "-t" / "-tt"
+ if (reportSegmentation > 0 && phrase.GetSize() > 0) {
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ const int sourceStart = sourceRange.GetStartPos();
+ const int sourceEnd = sourceRange.GetEndPos();
+ out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
+ if (reportSegmentation == 2) {
+ out << ",wa=";
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
+ Hypothesis::OutputAlignment(out, ai, 0, 0);
+ out << ",total=";
+ out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
+ out << ",";
+ ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
+ scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
+ scoreBreakdown.OutputAllFeatureScores(out);
+ }
+ out << "| ";
+ }
+}
+
+std::map<size_t, const Factor*> Hypothesis::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
+{
+ const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
+ const Phrase &inputPhrase = inputPath.GetPhrase();
+
+ std::map<size_t, const Factor*> ret;
+
+ for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
+ const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
+ if (factor) {
+ std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
+ UTIL_THROW_IF2(targetPos.size() != 1,
+ "Placeholder should be aligned to 1, and only 1, word");
+ ret[*targetPos.begin()] = factor;
+ }
+ }
+
+ return ret;
+}
+
}
diff --git a/moses/Hypothesis.h b/moses/Hypothesis.h
index 2b0c98d91..481329338 100644
--- a/moses/Hypothesis.h
+++ b/moses/Hypothesis.h
@@ -81,7 +81,7 @@ protected:
float m_totalScore; /*! score so far */
float m_futureScore; /*! estimated future cost to translate rest of sentence */
/*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
- mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
+ mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
ScoreComponentCollection m_currScoreBreakdown; /*! scores for this hypothesis only */
std::vector<const FFState*> m_ffStates;
const Hypothesis *m_winningHypo;
@@ -269,6 +269,21 @@ public:
const TranslationOption &GetTranslationOption() const {
return m_transOpt;
}
+
+ void OutputAlignment(std::ostream &out) const;
+ static void OutputAlignment(std::ostream &out, const std::vector<const Hypothesis *> &edges);
+ static void OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset);
+
+ void OutputInput(std::ostream& os) const;
+ static void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo);
+
+ void OutputBestSurface(std::ostream &out, const std::vector<Moses::FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors) const;
+ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const;
+
+ // creates a map of TARGET positions which should be replaced by word using placeholder
+ std::map<size_t, const Moses::Factor*> GetPlaceholders(const Moses::Hypothesis &hypo, Moses::FactorType placeholderFactor) const;
+
};
std::ostream& operator<<(std::ostream& out, const Hypothesis& hypothesis);
diff --git a/moses/HypothesisStackCubePruning.cpp b/moses/HypothesisStackCubePruning.cpp
index febb56e9a..c39aa8641 100644
--- a/moses/HypothesisStackCubePruning.cpp
+++ b/moses/HypothesisStackCubePruning.cpp
@@ -145,7 +145,7 @@ void HypothesisStackCubePruning::AddInitial(Hypothesis *hypo)
{
std::pair<iterator, bool> addRet = Add(hypo);
UTIL_THROW_IF2(!addRet.second,
- "Should have added hypothesis " << *hypo);
+ "Should have added hypothesis " << *hypo);
const WordsBitmap &bitmap = hypo->GetWordsBitmap();
m_bitmapAccessor[bitmap] = new BitmapContainer(bitmap, *this);
diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp
index 82c591318..53a78b4bf 100644
--- a/moses/IOWrapper.cpp
+++ b/moses/IOWrapper.cpp
@@ -37,9 +37,9 @@ POSSIBILITY OF SUCH DAMAGE.
#include <boost/algorithm/string.hpp>
#include "moses/Syntax/KBestExtractor.h"
+#include "moses/Syntax/PVertex.h"
#include "moses/Syntax/SHyperedge.h"
#include "moses/Syntax/S2T/DerivationWriter.h"
-#include "moses/Syntax/PVertex.h"
#include "moses/Syntax/SVertex.h"
#include "moses/TypeDef.h"
@@ -53,6 +53,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include "moses/TreeInput.h"
+#include "moses/ForestInput.h"
#include "moses/ConfusionNet.h"
#include "moses/WordLattice.h"
#include "moses/Incremental.h"
@@ -68,17 +69,8 @@ using namespace std;
namespace Moses
{
-IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
- , const std::vector<FactorType> &outputFactorOrder
- , const FactorMask &inputFactorUsed
- , size_t nBestSize
- , const std::string &nBestFilePath
- , const std::string &inputFilePath)
- :m_inputFactorOrder(inputFactorOrder)
- ,m_outputFactorOrder(outputFactorOrder)
- ,m_inputFactorUsed(inputFactorUsed)
- ,m_inputFilePath(inputFilePath)
- ,m_nBestStream(NULL)
+IOWrapper::IOWrapper()
+ :m_nBestStream(NULL)
,m_outputWordGraphStream(NULL)
,m_outputSearchGraphStream(NULL)
@@ -105,12 +97,18 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
{
const StaticData &staticData = StaticData::Instance();
- if (inputFilePath.empty()) {
- m_inputFile = NULL;
- m_inputStream = &cin;
- }
- else {
- m_inputFile = new InputFileStream(inputFilePath);
+ m_inputFactorOrder = &staticData.GetInputFactorOrder();
+
+ size_t nBestSize = staticData.GetNBestSize();
+ string nBestFilePath = staticData.GetNBestFilePath();
+
+ staticData.GetParameter().SetParameter<string>(m_inputFilePath, "input-file", "");
+ if (m_inputFilePath.empty()) {
+ m_inputFile = NULL;
+ m_inputStream = &cin;
+ } else {
+ VERBOSE(2,"IO from File" << endl);
+ m_inputFile = new InputFileStream(m_inputFilePath);
m_inputStream = m_inputFile;
}
@@ -132,10 +130,11 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
// search graph output
if (staticData.GetOutputSearchGraph()) {
string fileName;
- if (staticData.GetOutputSearchGraphExtended())
- fileName = staticData.GetParam("output-search-graph-extended")[0];
- else
- fileName = staticData.GetParam("output-search-graph")[0];
+ if (staticData.GetOutputSearchGraphExtended()) {
+ staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph-extended", "");
+ } else {
+ staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph", "");
+ }
std::ofstream *file = new std::ofstream;
m_outputSearchGraphStream = file;
file->open(fileName.c_str());
@@ -146,18 +145,20 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
m_unknownsCollector = new Moses::OutputCollector(m_unknownsStream);
UTIL_THROW_IF2(!m_unknownsStream->good(),
"File for unknowns words could not be opened: " <<
- staticData.GetOutputUnknownsFile());
+ staticData.GetOutputUnknownsFile());
}
if (!staticData.GetAlignmentOutputFile().empty()) {
m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str());
m_alignmentInfoCollector = new Moses::OutputCollector(m_alignmentInfoStream);
UTIL_THROW_IF2(!m_alignmentInfoStream->good(),
- "File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
+ "File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
}
if (staticData.GetOutputSearchGraph()) {
- string fileName = staticData.GetParam("output-search-graph")[0];
+ string fileName;
+ staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph", "");
+
std::ofstream *file = new std::ofstream;
m_outputSearchGraphStream = file;
file->open(fileName.c_str());
@@ -179,7 +180,9 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
// wordgraph output
if (staticData.GetOutputWordGraph()) {
- string fileName = staticData.GetParam("output-word-graph")[0];
+ string fileName;
+ staticData.GetParameter().SetParameter<string>(fileName, "output-word-graph", "");
+
std::ofstream *file = new std::ofstream;
m_outputWordGraphStream = file;
file->open(fileName.c_str());
@@ -206,10 +209,10 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
m_singleBestOutputCollector = new Moses::OutputCollector(&std::cout);
}
- if (staticData.GetParameter().isParamSpecified("spe-src")) {
- spe_src = new ifstream(staticData.GetParam("spe-src")[0].c_str());
- spe_trg = new ifstream(staticData.GetParam("spe-trg")[0].c_str());
- spe_aln = new ifstream(staticData.GetParam("spe-aln")[0].c_str());
+ if (staticData.GetParameter().GetParam("spe-src")) {
+ spe_src = new ifstream(staticData.GetParameter().GetParam("spe-src")->at(0).c_str());
+ spe_trg = new ifstream(staticData.GetParameter().GetParam("spe-trg")->at(0).c_str());
+ spe_aln = new ifstream(staticData.GetParameter().GetParam("spe-aln")->at(0).c_str());
}
}
@@ -244,7 +247,7 @@ InputType*
IOWrapper::
GetInput(InputType* inputType)
{
- if(inputType->Read(*m_inputStream, m_inputFactorOrder)) {
+ if(inputType->Read(*m_inputStream, *m_inputFactorOrder)) {
return inputType;
} else {
delete inputType;
@@ -252,977 +255,6 @@ GetInput(InputType* inputType)
}
}
-void IOWrapper::FixPrecision(std::ostream &stream, size_t size)
-{
- stream.setf(std::ios::fixed);
- stream.precision(size);
-}
-
-std::map<size_t, const Factor*> IOWrapper::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor)
-{
- const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
- const Phrase &inputPhrase = inputPath.GetPhrase();
-
- std::map<size_t, const Factor*> ret;
-
- for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
- const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
- if (factor) {
- std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
- UTIL_THROW_IF2(targetPos.size() != 1,
- "Placeholder should be aligned to 1, and only 1, word");
- ret[*targetPos.begin()] = factor;
- }
- }
-
- return ret;
-}
-
-
-void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId)
-{
- if (!m_singleBestOutputCollector)
- return;
- std::ostringstream out;
- FixPrecision(out);
- if (hypo != NULL) {
- VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
- VERBOSE(3,"Best path: ");
- Backtrack(hypo);
- VERBOSE(3,"0" << std::endl);
-
- if (StaticData::Instance().GetOutputHypoScore()) {
- out << hypo->GetTotalScore() << " ";
- }
-
- if (StaticData::Instance().IsPathRecoveryEnabled()) {
- out << "||| ";
- }
- Phrase outPhrase(ARRAY_SIZE_INCR);
- hypo->GetOutputPhrase(outPhrase);
-
- // delete 1st & last
- UTIL_THROW_IF2(outPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
-
- outPhrase.RemoveWord(0);
- outPhrase.RemoveWord(outPhrase.GetSize() - 1);
-
- const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
- string output = outPhrase.GetStringRep(outputFactorOrder);
- out << output << endl;
- } else {
- VERBOSE(1, "NO BEST TRANSLATION" << endl);
-
- if (StaticData::Instance().GetOutputHypoScore()) {
- out << "0 ";
- }
-
- out << endl;
- }
- m_singleBestOutputCollector->Write(translationId, out.str());
-}
-
-void IOWrapper::OutputBestHypo(search::Applied applied, long translationId)
-{
- if (!m_singleBestOutputCollector) return;
- std::ostringstream out;
- FixPrecision(out);
- if (StaticData::Instance().GetOutputHypoScore()) {
- out << applied.GetScore() << ' ';
- }
- Phrase outPhrase;
- Incremental::ToPhrase(applied, outPhrase);
- // delete 1st & last
- UTIL_THROW_IF2(outPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
- outPhrase.RemoveWord(0);
- outPhrase.RemoveWord(outPhrase.GetSize() - 1);
- out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
- out << '\n';
- m_singleBestOutputCollector->Write(translationId, out.str());
-
- VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << endl);
-}
-
-void IOWrapper::OutputBestNone(long translationId)
-{
- if (!m_singleBestOutputCollector) return;
- if (StaticData::Instance().GetOutputHypoScore()) {
- m_singleBestOutputCollector->Write(translationId, "0 \n");
- } else {
- m_singleBestOutputCollector->Write(translationId, "\n");
- }
-}
-
-void IOWrapper::Backtrack(const ChartHypothesis *hypo)
-{
- const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
-
- vector<const ChartHypothesis*>::const_iterator iter;
- for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
- const ChartHypothesis *prevHypo = *iter;
-
- VERBOSE(3,prevHypo->GetId() << " <= ");
- Backtrack(prevHypo);
- }
-}
-
-void IOWrapper::OutputDetailedTranslationReport(
- const search::Applied *applied,
- const Sentence &sentence,
- long translationId)
-{
- if (applied == NULL) {
- return;
- }
- std::ostringstream out;
- ApplicationContext applicationContext;
-
- OutputTranslationOptions(out, applicationContext, applied, sentence, translationId);
- UTIL_THROW_IF2(m_detailedTranslationCollector == NULL,
- "No ouput file for detailed reports specified");
- m_detailedTranslationCollector->Write(translationId, out.str());
-}
-
-void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
-{
- if (hypo != NULL) {
- OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
- out << std::endl;
- }
-
- // recursive
- const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
- std::vector<const ChartHypothesis*>::const_iterator iter;
- for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
- const ChartHypothesis *prevHypo = *iter;
- OutputTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
- }
-}
-
-
-void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Sentence &sentence, long translationId)
-{
- if (applied != NULL) {
- OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
- out << std::endl;
- }
-
- // recursive
- const search::Applied *child = applied->Children();
- for (size_t i = 0; i < applied->GetArity(); i++) {
- OutputTranslationOptions(out, applicationContext, child++, sentence, translationId);
- }
-}
-
-void IOWrapper::OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
-{
- ReconstructApplicationContext(*hypo, sentence, applicationContext);
- out << "Trans Opt " << translationId
- << " " << hypo->GetCurrSourceRange()
- << ": ";
- WriteApplicationContext(out, applicationContext);
- out << ": " << hypo->GetCurrTargetPhrase().GetTargetLHS()
- << "->" << hypo->GetCurrTargetPhrase()
- << " " << hypo->GetTotalScore() << hypo->GetScoreBreakdown();
-}
-
-void IOWrapper::OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Sentence &sentence, long translationId)
-{
- ReconstructApplicationContext(applied, sentence, applicationContext);
- const TargetPhrase &phrase = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
- out << "Trans Opt " << translationId
- << " " << applied->GetRange()
- << ": ";
- WriteApplicationContext(out, applicationContext);
- out << ": " << phrase.GetTargetLHS()
- << "->" << phrase
- << " " << applied->GetScore(); // << hypo->GetScoreBreakdown() TODO: missing in incremental search hypothesis
-}
-
-// Given a hypothesis and sentence, reconstructs the 'application context' --
-// the source RHS symbols of the SCFG rule that was applied, plus their spans.
-void IOWrapper::ReconstructApplicationContext(const ChartHypothesis &hypo,
- const Sentence &sentence,
- ApplicationContext &context)
-{
- context.clear();
- const std::vector<const ChartHypothesis*> &prevHypos = hypo.GetPrevHypos();
- std::vector<const ChartHypothesis*>::const_iterator p = prevHypos.begin();
- std::vector<const ChartHypothesis*>::const_iterator end = prevHypos.end();
- const WordsRange &span = hypo.GetCurrSourceRange();
- size_t i = span.GetStartPos();
- while (i <= span.GetEndPos()) {
- if (p == end || i < (*p)->GetCurrSourceRange().GetStartPos()) {
- // Symbol is a terminal.
- const Word &symbol = sentence.GetWord(i);
- context.push_back(std::make_pair(symbol, WordsRange(i, i)));
- ++i;
- } else {
- // Symbol is a non-terminal.
- const Word &symbol = (*p)->GetTargetLHS();
- const WordsRange &range = (*p)->GetCurrSourceRange();
- context.push_back(std::make_pair(symbol, range));
- i = range.GetEndPos()+1;
- ++p;
- }
- }
-}
-
-// Given a hypothesis and sentence, reconstructs the 'application context' --
-// the source RHS symbols of the SCFG rule that was applied, plus their spans.
-void IOWrapper::ReconstructApplicationContext(const search::Applied *applied,
- const Sentence &sentence,
- ApplicationContext &context)
-{
- context.clear();
- const WordsRange &span = applied->GetRange();
- const search::Applied *child = applied->Children();
- size_t i = span.GetStartPos();
- size_t j = 0;
-
- while (i <= span.GetEndPos()) {
- if (j == applied->GetArity() || i < child->GetRange().GetStartPos()) {
- // Symbol is a terminal.
- const Word &symbol = sentence.GetWord(i);
- context.push_back(std::make_pair(symbol, WordsRange(i, i)));
- ++i;
- } else {
- // Symbol is a non-terminal.
- const Word &symbol = static_cast<const TargetPhrase*>(child->GetNote().vp)->GetTargetLHS();
- const WordsRange &range = child->GetRange();
- context.push_back(std::make_pair(symbol, range));
- i = range.GetEndPos()+1;
- ++child;
- ++j;
- }
- }
-}
-
-// Emulates the old operator<<(ostream &, const DottedRule &) function. The
-// output format is a bit odd (reverse order and double spacing between symbols)
-// but there are scripts and tools that expect the output of -T to look like
-// that.
-void IOWrapper::WriteApplicationContext(std::ostream &out,
- const ApplicationContext &context)
-{
- assert(!context.empty());
- ApplicationContext::const_reverse_iterator p = context.rbegin();
- while (true) {
- out << p->second << "=" << p->first << " ";
- if (++p == context.rend()) {
- break;
- }
- out << " ";
- }
-}
-
-void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId)
-{
- if (hypo == NULL) {
- return;
- }
- std::ostringstream out;
- ApplicationContext applicationContext;
-
- OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
- UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
- "No output file for tree fragments specified");
-
- //Tree of full sentence
- const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
- if (treeStructure != NULL) {
- const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
- for( size_t i=0; i<sff.size(); i++ ) {
- if (sff[i] == treeStructure) {
- const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
- out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
- break;
- }
- }
- }
-
- m_detailTreeFragmentsOutputCollector->Write(translationId, out.str());
-
-}
-
-void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
- const search::Applied *applied,
- const Sentence &sentence,
- long translationId)
-{
- if (applied == NULL) {
- return;
- }
- std::ostringstream out;
- ApplicationContext applicationContext;
-
- OutputTreeFragmentsTranslationOptions(out, applicationContext, applied, sentence, translationId);
- UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
- "No output file for tree fragments specified");
-
- //Tree of full sentence
- //TODO: incremental search doesn't support stateful features
-
- m_detailTreeFragmentsOutputCollector->Write(translationId, out.str());
-
-}
-
-void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
-{
-
- if (hypo != NULL) {
- OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
-
- const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
-
- out << " ||| ";
- if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
- out << " " << *property->GetValueString();
- } else {
- out << " " << "noTreeInfo";
- }
- out << std::endl;
- }
-
- // recursive
- const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
- std::vector<const ChartHypothesis*>::const_iterator iter;
- for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
- const ChartHypothesis *prevHypo = *iter;
- OutputTreeFragmentsTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
- }
-}
-
-void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Sentence &sentence, long translationId)
-{
-
- if (applied != NULL) {
- OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
-
- const TargetPhrase &currTarPhr = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
-
- out << " ||| ";
- if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
- out << " " << *property->GetValueString();
- } else {
- out << " " << "noTreeInfo";
- }
- out << std::endl;
- }
-
- // recursive
- const search::Applied *child = applied->Children();
- for (size_t i = 0; i < applied->GetArity(); i++) {
- OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId);
- }
-}
-
-void IOWrapper::OutputNBestList(const std::vector<search::Applied> &nbest, long translationId)
-{
- std::ostringstream out;
- // wtf? copied from the original OutputNBestList
- if (m_nBestOutputCollector->OutputIsCout()) {
- FixPrecision(out);
- }
- Phrase outputPhrase;
- ScoreComponentCollection features;
- for (std::vector<search::Applied>::const_iterator i = nbest.begin(); i != nbest.end(); ++i) {
- Incremental::PhraseAndFeatures(*i, outputPhrase, features);
- // <s> and </s>
- UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
-
- outputPhrase.RemoveWord(0);
- outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
- out << translationId << " ||| ";
- OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
- out << " ||| ";
- OutputAllFeatureScores(features, out);
- out << " ||| " << i->GetScore() << '\n';
- }
- out << std::flush;
- assert(m_nBestOutputCollector);
- m_nBestOutputCollector->Write(translationId, out.str());
-}
-
-/***
- * print surface factor only for the given phrase
- */
-void IOWrapper::OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
-{
- UTIL_THROW_IF2(outputFactorOrder.size() == 0,
- "Cannot be empty phrase");
- if (reportAllFactors == true) {
- out << phrase;
- } else {
- size_t size = phrase.GetSize();
- for (size_t pos = 0 ; pos < size ; pos++) {
- const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
- out << *factor;
- UTIL_THROW_IF2(factor == NULL,
- "Empty factor 0 at position " << pos);
-
- for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
- const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
- UTIL_THROW_IF2(factor == NULL,
- "Empty factor " << i << " at position " << pos);
-
- out << "|" << *factor;
- }
- out << " ";
- }
- }
-}
-
-void IOWrapper::OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo)
-{
- ostringstream out;
-
- if (hypo) {
- Alignments retAlign;
- OutputAlignment(retAlign, hypo, 0);
-
- // output alignments
- Alignments::const_iterator iter;
- for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
- const pair<size_t, size_t> &alignPoint = *iter;
- out << alignPoint.first << "-" << alignPoint.second << " ";
- }
- }
- out << endl;
-
- m_alignmentInfoCollector->Write(translationId, out.str());
-}
-
-size_t IOWrapper::OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget)
-{
- size_t totalTargetSize = 0;
- size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
-
- const TargetPhrase &tp = hypo->GetCurrTargetPhrase();
-
- size_t thisSourceSize = CalcSourceSize(hypo);
-
- // position of each terminal word in translation rule, irrespective of alignment
- // if non-term, number is undefined
- vector<size_t> sourceOffsets(thisSourceSize, 0);
- vector<size_t> targetOffsets(tp.GetSize(), 0);
-
- const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
-
- const AlignmentInfo &aiNonTerm = hypo->GetCurrTargetPhrase().GetAlignNonTerm();
- vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
- const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
-
- UTIL_THROW_IF2(sourceInd2pos.size() != prevHypos.size(), "Error");
-
- size_t targetInd = 0;
- for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
- if (tp.GetWord(targetPos).IsNonTerminal()) {
- UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
- size_t sourceInd = targetPos2SourceInd[targetPos];
- size_t sourcePos = sourceInd2pos[sourceInd];
-
- const ChartHypothesis *prevHypo = prevHypos[sourceInd];
-
- // calc source size
- size_t sourceSize = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
- sourceOffsets[sourcePos] = sourceSize;
-
- // calc target size.
- // Recursively look thru child hypos
- size_t currStartTarget = startTarget + totalTargetSize;
- size_t targetSize = OutputAlignment(retAlign, prevHypo, currStartTarget);
- targetOffsets[targetPos] = targetSize;
-
- totalTargetSize += targetSize;
- ++targetInd;
- } else {
- ++totalTargetSize;
- }
- }
-
- // convert position within translation rule to absolute position within
- // source sentence / output sentence
- ShiftOffsets(sourceOffsets, startSource);
- ShiftOffsets(targetOffsets, startTarget);
-
- // get alignments from this hypo
- const AlignmentInfo &aiTerm = hypo->GetCurrTargetPhrase().GetAlignTerm();
-
- // add to output arg, offsetting by source & target
- AlignmentInfo::const_iterator iter;
- for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
- const std::pair<size_t,size_t> &align = *iter;
- size_t relSource = align.first;
- size_t relTarget = align.second;
- size_t absSource = sourceOffsets[relSource];
- size_t absTarget = targetOffsets[relTarget];
-
- pair<size_t, size_t> alignPoint(absSource, absTarget);
- pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
- UTIL_THROW_IF2(!ret.second, "Error");
-
- }
-
- return totalTargetSize;
-}
-
-size_t IOWrapper::CalcSourceSize(const Moses::ChartHypothesis *hypo)
-{
- size_t ret = hypo->GetCurrSourceRange().GetNumWordsCovered();
- const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
- for (size_t i = 0; i < prevHypos.size(); ++i) {
- size_t childSize = prevHypos[i]->GetCurrSourceRange().GetNumWordsCovered();
- ret -= (childSize - 1);
- }
- return ret;
-}
-
-void IOWrapper::OutputDetailedTranslationReport(
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId)
-{
- if (hypo == NULL) {
- return;
- }
- std::ostringstream out;
- ApplicationContext applicationContext;
-
- OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
- UTIL_THROW_IF2(m_detailedTranslationCollector == NULL,
- "No ouput file for detailed reports specified");
- m_detailedTranslationCollector->Write(translationId, out.str());
-}
-
-//DIMw
-void IOWrapper::OutputDetailedAllTranslationReport(
- const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
- const ChartManager &manager,
- const Sentence &sentence,
- long translationId)
-{
- std::ostringstream out;
- ApplicationContext applicationContext;
-
- const ChartCellCollection& cells = manager.GetChartCellCollection();
- size_t size = manager.GetSource().GetSize();
- for (size_t width = 1; width <= size; ++width) {
- for (size_t startPos = 0; startPos <= size-width; ++startPos) {
- size_t endPos = startPos + width - 1;
- WordsRange range(startPos, endPos);
- const ChartCell& cell = cells.Get(range);
- const HypoList* hyps = cell.GetAllSortedHypotheses();
- out << "Chart Cell [" << startPos << ".." << endPos << "]" << endl;
- HypoList::const_iterator iter;
- size_t c = 1;
- for (iter = hyps->begin(); iter != hyps->end(); ++iter) {
- out << "----------------Item " << c++ << " ---------------------"
- << endl;
- OutputTranslationOptions(out, applicationContext, *iter,
- sentence, translationId);
- }
- }
- }
- UTIL_THROW_IF2(m_detailedTranslationCollector == NULL,
- "No output file for details specified");
- m_detailedTranslationCollector->Write(translationId, out.str());
-}
-
-void IOWrapper::OutputUnknowns(const std::vector<Moses::Phrase*> &unknowns,
- long translationId)
-{
- std::ostringstream out;
- for (std::size_t i = 0; i < unknowns.size(); ++i) {
- out << *(unknowns[i]);
- }
- out << std::endl;
- m_unknownsCollector->Write(translationId, out.str());
-}
-
-void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
- long translationId)
-{
- std::ostringstream out;
-
- if (m_nBestOutputCollector->OutputIsCout()) {
- // Set precision only if we're writing the n-best list to cout. This is to
- // preserve existing behaviour, but should probably be done either way.
- FixPrecision(out);
- }
-
- bool includeWordAlignment =
- StaticData::Instance().PrintAlignmentInfoInNbest();
-
- bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
-
- for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
- p != nBestList.end(); ++p) {
- const ChartKBestExtractor::Derivation &derivation = **p;
-
- // get the derivation's target-side yield
- Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
-
- // delete <s> and </s>
- UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
- outputPhrase.RemoveWord(0);
- outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
-
- // print the translation ID, surface factors, and scores
- out << translationId << " ||| ";
- OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
- out << " ||| ";
- OutputAllFeatureScores(derivation.scoreBreakdown, out);
- out << " ||| " << derivation.score;
-
- // optionally, print word alignments
- if (includeWordAlignment) {
- out << " ||| ";
- Alignments align;
- OutputAlignmentNBest(align, derivation, 0);
- for (Alignments::const_iterator q = align.begin(); q != align.end();
- ++q) {
- out << q->first << "-" << q->second << " ";
- }
- }
-
- // optionally, print tree
- if (PrintNBestTrees) {
- TreePointer tree = ChartKBestExtractor::GetOutputTree(derivation);
- out << " ||| " << tree->GetString();
- }
-
- out << std::endl;
- }
-
- assert(m_nBestOutputCollector);
- m_nBestOutputCollector->Write(translationId, out.str());
-}
-
-size_t IOWrapper::OutputAlignmentNBest(
- Alignments &retAlign,
- const Moses::ChartKBestExtractor::Derivation &derivation,
- size_t startTarget)
-{
- const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
-
- size_t totalTargetSize = 0;
- size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
-
- const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
-
- size_t thisSourceSize = CalcSourceSize(&hypo);
-
- // position of each terminal word in translation rule, irrespective of alignment
- // if non-term, number is undefined
- vector<size_t> sourceOffsets(thisSourceSize, 0);
- vector<size_t> targetOffsets(tp.GetSize(), 0);
-
- const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
- vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
- const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
-
- UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
- "Error");
-
- size_t targetInd = 0;
- for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
- if (tp.GetWord(targetPos).IsNonTerminal()) {
- UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
- size_t sourceInd = targetPos2SourceInd[targetPos];
- size_t sourcePos = sourceInd2pos[sourceInd];
-
- const Moses::ChartKBestExtractor::Derivation &subderivation =
- *derivation.subderivations[sourceInd];
-
- // calc source size
- size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
- sourceOffsets[sourcePos] = sourceSize;
-
- // calc target size.
- // Recursively look thru child hypos
- size_t currStartTarget = startTarget + totalTargetSize;
- size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
- currStartTarget);
- targetOffsets[targetPos] = targetSize;
-
- totalTargetSize += targetSize;
- ++targetInd;
- } else {
- ++totalTargetSize;
- }
- }
-
- // convert position within translation rule to absolute position within
- // source sentence / output sentence
- ShiftOffsets(sourceOffsets, startSource);
- ShiftOffsets(targetOffsets, startTarget);
-
- // get alignments from this hypo
- const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
-
- // add to output arg, offsetting by source & target
- AlignmentInfo::const_iterator iter;
- for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
- const std::pair<size_t,size_t> &align = *iter;
- size_t relSource = align.first;
- size_t relTarget = align.second;
- size_t absSource = sourceOffsets[relSource];
- size_t absTarget = targetOffsets[relTarget];
-
- pair<size_t, size_t> alignPoint(absSource, absTarget);
- pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
- UTIL_THROW_IF2(!ret.second, "Error");
- }
-
- return totalTargetSize;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/***
- * print surface factor only for the given phrase
- */
-void IOWrapper::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
- char reportSegmentation, bool reportAllFactors)
-{
- UTIL_THROW_IF2(outputFactorOrder.size() == 0,
- "Must specific at least 1 output factor");
- const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
- bool markUnknown = StaticData::Instance().GetMarkUnknown();
- if (reportAllFactors == true) {
- out << phrase;
- } else {
- FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
-
- std::map<size_t, const Factor*> placeholders;
- if (placeholderFactor != NOT_FOUND) {
- // creates map of target position -> factor for placeholders
- placeholders = GetPlaceholders(edge, placeholderFactor);
- }
-
- size_t size = phrase.GetSize();
- for (size_t pos = 0 ; pos < size ; pos++) {
- const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
-
- if (placeholders.size()) {
- // do placeholders
- std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
- if (iter != placeholders.end()) {
- factor = iter->second;
- }
- }
-
- UTIL_THROW_IF2(factor == NULL,
- "No factor 0 at position " << pos);
-
- //preface surface form with UNK if marking unknowns
- const Word &word = phrase.GetWord(pos);
- if(markUnknown && word.IsOOV()) {
- out << "UNK" << *factor;
- } else {
- out << *factor;
- }
-
- for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
- const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
- UTIL_THROW_IF2(factor == NULL,
- "No factor " << i << " at position " << pos);
-
- out << "|" << *factor;
- }
- out << " ";
- }
- }
-
- // trace ("report segmentation") option "-t" / "-tt"
- if (reportSegmentation > 0 && phrase.GetSize() > 0) {
- const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
- const int sourceStart = sourceRange.GetStartPos();
- const int sourceEnd = sourceRange.GetEndPos();
- out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
- if (reportSegmentation == 2) {
- out << ",wa=";
- const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
- OutputAlignment(out, ai, 0, 0);
- out << ",total=";
- out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
- out << ",";
- ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
- scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
- OutputAllFeatureScores(scoreBreakdown, out);
- }
- out << "| ";
- }
-}
-
-void IOWrapper::OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder,
- char reportSegmentation, bool reportAllFactors)
-{
- if (hypo != NULL) {
- // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
- OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
- OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors);
- }
-}
-
-void IOWrapper::OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset)
-{
- typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
- AlignVec alignments = ai.GetSortedAlignments();
-
- AlignVec::const_iterator it;
- for (it = alignments.begin(); it != alignments.end(); ++it) {
- const std::pair<size_t,size_t> &alignment = **it;
- out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
- }
-
-}
-
-void IOWrapper::OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
-{
- size_t targetOffset = 0;
-
- for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
- const Hypothesis &edge = *edges[currEdge];
- const TargetPhrase &tp = edge.GetCurrTargetPhrase();
- size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
-
- OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
-
- targetOffset += tp.GetSize();
- }
- // Removing std::endl here breaks -alignment-output-file, so stop doing that, please :)
- // Or fix it somewhere else.
- out << std::endl;
-}
-
-void IOWrapper::OutputAlignment(std::ostream &out, const Moses::Hypothesis *hypo)
-{
- std::vector<const Hypothesis *> edges;
- const Hypothesis *currentHypo = hypo;
- while (currentHypo) {
- edges.push_back(currentHypo);
- currentHypo = currentHypo->GetPrevHypo();
- }
-
- OutputAlignment(out, edges);
-
-}
-
-void IOWrapper::OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges)
-{
- ostringstream out;
- OutputAlignment(out, edges);
-
- collector->Write(lineNo,out.str());
-}
-
-void IOWrapper::OutputAlignment(OutputCollector* collector, size_t lineNo , const Hypothesis *hypo)
-{
- if (collector) {
- std::vector<const Hypothesis *> edges;
- const Hypothesis *currentHypo = hypo;
- while (currentHypo) {
- edges.push_back(currentHypo);
- currentHypo = currentHypo->GetPrevHypo();
- }
-
- OutputAlignment(collector,lineNo, edges);
- }
-}
-
-void IOWrapper::OutputAlignment(OutputCollector* collector, size_t lineNo , const TrellisPath &path)
-{
- if (collector) {
- OutputAlignment(collector,lineNo, path.GetEdges());
- }
-}
-
-void IOWrapper::OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, char reportSegmentation, bool reportAllFactors, std::ostream &out)
-{
- const std::vector<const Hypothesis *> &edges = path.GetEdges();
-
- for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
- const Hypothesis &edge = *edges[currEdge];
- OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
- }
- out << endl;
-}
-
-void IOWrapper::Backtrack(const Hypothesis *hypo)
-{
-
- if (hypo->GetPrevHypo() != NULL) {
- VERBOSE(3,hypo->GetId() << " <= ");
- Backtrack(hypo->GetPrevHypo());
- }
-}
-
-void IOWrapper::OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*translationId*/, char /*reportSegmentation*/, bool /*reportAllFactors*/, ostream& out)
-{
-
- for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
- const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
- UTIL_THROW_IF2(factor == NULL,
- "No factor 0 at position " << i);
- if (i>0) out << " " << *factor;
- else out << *factor;
- }
- out << endl;
-}
-
-
-void IOWrapper::OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
-{
- if (hypo->GetPrevHypo()) {
- OutputInput(map, hypo->GetPrevHypo());
- map[hypo->GetCurrSourceWordsRange().GetStartPos()] = &hypo->GetTranslationOption().GetInputPath().GetPhrase();
- }
-}
-
-void IOWrapper::OutputInput(std::ostream& os, const Hypothesis* hypo)
-{
- size_t len = hypo->GetInput().GetSize();
- std::vector<const Phrase*> inp_phrases(len, 0);
- OutputInput(inp_phrases, hypo);
- for (size_t i=0; i<len; ++i)
- if (inp_phrases[i]) os << *inp_phrases[i];
-}
-
-void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, char reportSegmentation, bool reportAllFactors)
-{
- if (hypo != NULL) {
- VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
- VERBOSE(3,"Best path: ");
- Backtrack(hypo);
- VERBOSE(3,"0" << std::endl);
- if (!m_surpressSingleBestOutput) {
- if (StaticData::Instance().GetOutputHypoScore()) {
- cout << hypo->GetTotalScore() << " ";
- }
-
- if (StaticData::Instance().IsPathRecoveryEnabled()) {
- OutputInput(cout, hypo);
- cout << "||| ";
- }
- OutputBestSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
- cout << endl;
- }
- } else {
- VERBOSE(1, "NO BEST TRANSLATION" << endl);
- if (!m_surpressSingleBestOutput) {
- cout << endl;
- }
- }
-}
-
bool IOWrapper::ReadInput(InputTypeEnum inputType, InputType*& source)
{
delete source;
@@ -1239,399 +271,17 @@ bool IOWrapper::ReadInput(InputTypeEnum inputType, InputType*& source)
case TreeInputType:
source = GetInput(new TreeInput);
break;
+ case TabbedSentenceInput:
+ source = GetInput(new TabbedSentence);
+ break;
+ case ForestInputType:
+ source = GetInput(new ForestInput);
+ break;
default:
TRACE_ERR("Unknown input type: " << inputType << "\n");
}
return (source ? true : false);
}
-void IOWrapper::OutputNBest(std::ostream& out
- , const Moses::TrellisPathList &nBestList
- , const std::vector<Moses::FactorType>& outputFactorOrder
- , long translationId
- , char reportSegmentation)
-{
- const StaticData &staticData = StaticData::Instance();
- bool reportAllFactors = staticData.GetReportAllFactorsNBest();
- bool includeSegmentation = staticData.NBestIncludesSegmentation();
- bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
-
- TrellisPathList::const_iterator iter;
- for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
- const TrellisPath &path = **iter;
- const std::vector<const Hypothesis *> &edges = path.GetEdges();
-
- // print the surface factor of the translation
- out << translationId << " ||| ";
- for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
- const Hypothesis &edge = *edges[currEdge];
- OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
- }
- out << " |||";
-
- // print scores with feature names
- OutputAllFeatureScores(path.GetScoreBreakdown(), out );
-
- // total
- out << " ||| " << path.GetTotalScore();
-
- //phrase-to-phrase segmentation
- if (includeSegmentation) {
- out << " |||";
- for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
- const Hypothesis &edge = *edges[currEdge];
- const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
- WordsRange targetRange = path.GetTargetWordsRange(edge);
- out << " " << sourceRange.GetStartPos();
- if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
- out << "-" << sourceRange.GetEndPos();
- }
- out<< "=" << targetRange.GetStartPos();
- if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
- out<< "-" << targetRange.GetEndPos();
- }
- }
- }
-
- if (includeWordAlignment) {
- out << " ||| ";
- for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
- const Hypothesis &edge = *edges[currEdge];
- const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
- WordsRange targetRange = path.GetTargetWordsRange(edge);
- const int sourceOffset = sourceRange.GetStartPos();
- const int targetOffset = targetRange.GetStartPos();
- const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
-
- OutputAlignment(out, ai, sourceOffset, targetOffset);
-
- }
- }
-
- if (StaticData::Instance().IsPathRecoveryEnabled()) {
- out << " ||| ";
- OutputInput(out, edges[0]);
- }
-
- out << endl;
- }
-
- out << std::flush;
-}
-
-void IOWrapper::OutputAllFeatureScores(const Moses::ScoreComponentCollection &features
- , std::ostream &out)
-{
- std::string lastName = "";
- const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
- for( size_t i=0; i<sff.size(); i++ ) {
- const StatefulFeatureFunction *ff = sff[i];
- if (ff->GetScoreProducerDescription() != "BleuScoreFeature"
- && ff->IsTuneable()) {
- OutputFeatureScores( out, features, ff, lastName );
- }
- }
- const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
- for( size_t i=0; i<slf.size(); i++ ) {
- const StatelessFeatureFunction *ff = slf[i];
- if (ff->IsTuneable()) {
- OutputFeatureScores( out, features, ff, lastName );
- }
- }
-}
-
-void IOWrapper::OutputFeatureScores( std::ostream& out
- , const ScoreComponentCollection &features
- , const FeatureFunction *ff
- , std::string &lastName )
-{
- const StaticData &staticData = StaticData::Instance();
- bool labeledOutput = staticData.IsLabeledNBestList();
-
- // regular features (not sparse)
- if (ff->GetNumScoreComponents() != 0) {
- if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
- lastName = ff->GetScoreProducerDescription();
- out << " " << lastName << "=";
- }
- vector<float> scores = features.GetScoresForProducer( ff );
- for (size_t j = 0; j<scores.size(); ++j) {
- out << " " << scores[j];
- }
- }
-
- // sparse features
- const FVector scores = features.GetVectorForProducer( ff );
- for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
- out << " " << i->first << "= " << i->second;
- }
-}
-
-void IOWrapper::OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId)
-{
- for (vector<LatticeMBRSolution>::const_iterator si = solutions.begin(); si != solutions.end(); ++si) {
- out << translationId;
- out << " |||";
- const vector<Word> mbrHypo = si->GetWords();
- for (size_t i = 0 ; i < mbrHypo.size() ; i++) {
- const Factor *factor = mbrHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
- if (i>0) out << " " << *factor;
- else out << *factor;
- }
- out << " |||";
- out << " map: " << si->GetMapScore();
- out << " w: " << mbrHypo.size();
- const vector<float>& ngramScores = si->GetNgramScores();
- for (size_t i = 0; i < ngramScores.size(); ++i) {
- out << " " << ngramScores[i];
- }
- out << " ||| " << si->GetScore();
-
- out << endl;
- }
-}
-
-
-void IOWrapper::OutputLatticeMBRNBestList(const vector<LatticeMBRSolution>& solutions,long translationId)
-{
- OutputLatticeMBRNBest(*m_nBestStream, solutions,translationId);
-}
-
-IOWrapper *IOWrapper::GetIOWrapper(const StaticData &staticData)
-{
- IOWrapper *ioWrapper;
- const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
- ,&outputFactorOrder = staticData.GetOutputFactorOrder();
- FactorMask inputFactorUsed(inputFactorOrder);
-
- // io
- string inputPath;
- if (staticData.GetParam("input-file").size() == 1) {
- VERBOSE(2,"IO from File" << endl);
- inputPath = staticData.GetParam("input-file")[0];
- }
- ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
- , staticData.GetNBestSize()
- , staticData.GetNBestFilePath()
- , inputPath);
-
- IFVERBOSE(1)
- PrintUserTime("Created input-output object");
-
- return ioWrapper;
-}
-
-
-////////////////////////////
-#include "moses/Syntax/PVertex.h"
-#include "moses/Syntax/S2T/DerivationWriter.h"
-
-void IOWrapper::OutputDetailedTranslationReport(const Syntax::SHyperedge *best,
- long translationId)
-{
- if (best == NULL) {
- return;
- }
- std::ostringstream out;
- Syntax::S2T::DerivationWriter::Write(*best, translationId, out);
- UTIL_THROW_IF2(m_detailedTranslationCollector == NULL,
- "No ouput file for detailed reports specified");
- m_detailedTranslationCollector->Write(translationId, out.str());
-}
-
-void IOWrapper::OutputBestHypo(const Syntax::SHyperedge *best,
- long translationId)
-{
- if (!m_singleBestOutputCollector) {
- return;
- }
- std::ostringstream out;
- IOWrapper::FixPrecision(out);
- if (best == NULL) {
- VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
- if (StaticData::Instance().GetOutputHypoScore()) {
- out << "0 ";
- }
- } else {
- if (StaticData::Instance().GetOutputHypoScore()) {
- out << best->score << " ";
- }
- Phrase yield = Syntax::GetOneBestTargetYield(*best);
- // delete 1st & last
- UTIL_THROW_IF2(yield.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
- yield.RemoveWord(0);
- yield.RemoveWord(yield.GetSize()-1);
- out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
- out << '\n';
- }
- m_singleBestOutputCollector->Write(translationId, out.str());
-}
-
-void IOWrapper::OutputNBestList(
- const Syntax::KBestExtractor::KBestVec &nBestList, long translationId)
-{
- std::ostringstream out;
-
- if (m_nBestOutputCollector->OutputIsCout()) {
- // Set precision only if we're writing the n-best list to cout. This is to
- // preserve existing behaviour, but should probably be done either way.
- IOWrapper::FixPrecision(out);
- }
-
- bool includeWordAlignment =
- StaticData::Instance().PrintAlignmentInfoInNbest();
-
- bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
-
- for (Syntax::KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
- p != nBestList.end(); ++p) {
- const Syntax::KBestExtractor::Derivation &derivation = **p;
-
- // get the derivation's target-side yield
- Phrase outputPhrase = Syntax::KBestExtractor::GetOutputPhrase(derivation);
-
- // delete <s> and </s>
- UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
- outputPhrase.RemoveWord(0);
- outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
-
- // print the translation ID, surface factors, and scores
- out << translationId << " ||| ";
- OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
- out << " ||| ";
- OutputAllFeatureScores(derivation.scoreBreakdown, out);
- out << " ||| " << derivation.score;
-
- // optionally, print word alignments
- if (includeWordAlignment) {
- out << " ||| ";
- Alignments align;
- OutputAlignmentNBest(align, derivation, 0);
- for (Alignments::const_iterator q = align.begin(); q != align.end();
- ++q) {
- out << q->first << "-" << q->second << " ";
- }
- }
-
- // optionally, print tree
- if (PrintNBestTrees) {
- TreePointer tree = Syntax::KBestExtractor::GetOutputTree(derivation);
- out << " ||| " << tree->GetString();
- }
-
- out << std::endl;
- }
-
- assert(m_nBestOutputCollector);
- m_nBestOutputCollector->Write(translationId, out.str());
-}
-
-size_t IOWrapper::CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const
-{
- using namespace Moses::Syntax;
-
- const Syntax::SHyperedge &shyperedge = d.edge->shyperedge;
- size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
- for (size_t i = 0; i < shyperedge.tail.size(); ++i) {
- size_t childSize = shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
- ret -= (childSize - 1);
- }
- return ret;
-}
-
-size_t IOWrapper::OutputAlignmentNBest(
- Alignments &retAlign,
- const Syntax::KBestExtractor::Derivation &derivation,
- size_t startTarget)
-{
- const Syntax::SHyperedge &shyperedge = derivation.edge->shyperedge;
-
- size_t totalTargetSize = 0;
- size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
-
- const TargetPhrase &tp = *(shyperedge.translation);
-
- size_t thisSourceSize = CalcSourceSize(derivation);
-
- // position of each terminal word in translation rule, irrespective of alignment
- // if non-term, number is undefined
- vector<size_t> sourceOffsets(thisSourceSize, 0);
- vector<size_t> targetOffsets(tp.GetSize(), 0);
-
- const AlignmentInfo &aiNonTerm = shyperedge.translation->GetAlignNonTerm();
- vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
- const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
-
- UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
- "Error");
-
- size_t targetInd = 0;
- for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
- if (tp.GetWord(targetPos).IsNonTerminal()) {
- UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
- size_t sourceInd = targetPos2SourceInd[targetPos];
- size_t sourcePos = sourceInd2pos[sourceInd];
-
- const Moses::Syntax::KBestExtractor::Derivation &subderivation =
- *derivation.subderivations[sourceInd];
-
- // calc source size
- size_t sourceSize =
- subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
- sourceOffsets[sourcePos] = sourceSize;
-
- // calc target size.
- // Recursively look thru child hypos
- size_t currStartTarget = startTarget + totalTargetSize;
- size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
- currStartTarget);
- targetOffsets[targetPos] = targetSize;
-
- totalTargetSize += targetSize;
- ++targetInd;
- } else {
- ++totalTargetSize;
- }
- }
-
- // convert position within translation rule to absolute position within
- // source sentence / output sentence
- ShiftOffsets(sourceOffsets, startSource);
- ShiftOffsets(targetOffsets, startTarget);
-
- // get alignments from this hypo
- const AlignmentInfo &aiTerm = shyperedge.translation->GetAlignTerm();
-
- // add to output arg, offsetting by source & target
- AlignmentInfo::const_iterator iter;
- for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
- const std::pair<size_t,size_t> &align = *iter;
- size_t relSource = align.first;
- size_t relTarget = align.second;
- size_t absSource = sourceOffsets[relSource];
- size_t absTarget = targetOffsets[relTarget];
-
- pair<size_t, size_t> alignPoint(absSource, absTarget);
- pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
- UTIL_THROW_IF2(!ret.second, "Error");
- }
-
- return totalTargetSize;
-}
-
-void IOWrapper::OutputUnknowns(const std::set<Moses::Word> &unknowns,
- long translationId)
-{
- std::ostringstream out;
- for (std::set<Moses::Word>::const_iterator p = unknowns.begin();
- p != unknowns.end(); ++p) {
- out << *p;
- }
- out << std::endl;
- m_unknownsCollector->Write(translationId, out.str());
-}
-
} // namespace
diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h
index 4253871b3..8a0a8cc54 100644
--- a/moses/IOWrapper.h
+++ b/moses/IOWrapper.h
@@ -41,6 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/TypeDef.h"
#include "moses/Sentence.h"
+#include "moses/TabbedSentence.h"
#include "moses/FactorTypeSet.h"
#include "moses/FactorCollection.h"
#include "moses/Hypothesis.h"
@@ -73,22 +74,18 @@ class IOWrapper
{
protected:
- const std::vector<Moses::FactorType> &m_inputFactorOrder;
- const std::vector<Moses::FactorType> &m_outputFactorOrder;
- const Moses::FactorMask &m_inputFactorUsed;
- std::string m_inputFilePath;
- Moses::InputFileStream *m_inputFile;
- std::istream *m_inputStream;
+ const std::vector<Moses::FactorType> *m_inputFactorOrder;
+ std::string m_inputFilePath;
+ Moses::InputFileStream *m_inputFile;
+ std::istream *m_inputStream;
std::ostream *m_nBestStream;
std::ostream *m_outputWordGraphStream;
+ std::ostream *m_outputSearchGraphStream;
std::ostream *m_detailedTranslationReportingStream;
+ std::ostream *m_unknownsStream;
+ std::ostream *m_detailedTreeFragmentsTranslationReportingStream;
std::ofstream *m_alignmentInfoStream;
- std::ostream *m_unknownsStream;
- std::ostream *m_outputSearchGraphStream;
std::ofstream *m_latticeSamplesStream;
- std::ostream *m_detailedTreeFragmentsTranslationReportingStream;
-
- bool m_surpressSingleBestOutput;
Moses::OutputCollector *m_singleBestOutputCollector;
Moses::OutputCollector *m_nBestOutputCollector;
@@ -100,81 +97,16 @@ protected:
Moses::OutputCollector *m_latticeSamplesCollector;
Moses::OutputCollector *m_detailTreeFragmentsOutputCollector;
- // CHART
- typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
- typedef std::set< std::pair<size_t, size_t> > Alignments;
-
- void Backtrack(const ChartHypothesis *hypo);
- void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
- void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
- void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
- void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
-
- void ReconstructApplicationContext(const Moses::ChartHypothesis &hypo,
- const Moses::Sentence &sentence,
- ApplicationContext &context);
- void ReconstructApplicationContext(const search::Applied *applied,
- const Moses::Sentence &sentence,
- ApplicationContext &context);
- void WriteApplicationContext(std::ostream &out,
- const ApplicationContext &context);
- void OutputTreeFragmentsTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const Moses::ChartHypothesis *hypo,
- const Moses::Sentence &sentence,
- long translationId);
- void OutputTreeFragmentsTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const search::Applied *applied,
- const Moses::Sentence &sentence,
- long translationId);
-
- void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors);
- void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
- char reportSegmentation, bool reportAllFactors);
-
- size_t OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget);
- size_t OutputAlignmentNBest(Alignments &retAlign,
- const Moses::ChartKBestExtractor::Derivation &derivation,
- size_t startTarget);
- std::size_t OutputAlignmentNBest(Alignments &retAlign, const Moses::Syntax::KBestExtractor::Derivation &derivation, std::size_t startTarget);
-
- size_t CalcSourceSize(const Moses::ChartHypothesis *hypo);
- size_t CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const;
-
- template <class T>
- void ShiftOffsets(std::vector<T> &offsets, T shift)
- {
- T currPos = shift;
- for (size_t i = 0; i < offsets.size(); ++i) {
- if (offsets[i] == 0) {
- offsets[i] = currPos;
- ++currPos;
- } else {
- currPos += offsets[i];
- }
- }
- }
+ bool m_surpressSingleBestOutput;
+
public:
- static IOWrapper *GetIOWrapper(const Moses::StaticData &staticData);
- static void FixPrecision(std::ostream &, size_t size=3);
-
- IOWrapper(const std::vector<Moses::FactorType> &inputFactorOrder
- , const std::vector<Moses::FactorType> &outputFactorOrder
- , const Moses::FactorMask &inputFactorUsed
- , size_t nBestSize
- , const std::string &nBestFilePath
- , const std::string &inputFilePath = "");
+ IOWrapper();
~IOWrapper();
Moses::InputType* GetInput(Moses::InputType *inputType);
bool ReadInput(Moses::InputTypeEnum inputType, Moses::InputType*& source);
- void OutputBestHypo(const Moses::Hypothesis *hypo, long translationId, char reportSegmentation, bool reportAllFactors);
- void OutputLatticeMBRNBestList(const std::vector<LatticeMBRSolution>& solutions,long translationId);
- void Backtrack(const Moses::Hypothesis *hypo);
-
Moses::OutputCollector *GetSingleBestOutputCollector() {
return m_singleBestOutputCollector;
}
@@ -207,66 +139,9 @@ public:
return m_latticeSamplesCollector;
}
- // CHART
- void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId);
- void OutputBestHypo(search::Applied applied, long translationId);
- void OutputBestHypo(const Moses::Syntax::SHyperedge *, long translationId);
-
- void OutputBestNone(long translationId);
-
- void OutputNBestList(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, long translationId);
- void OutputNBestList(const std::vector<search::Applied> &nbest, long translationId);
- void OutputNBestList(const Moses::Syntax::KBestExtractor::KBestVec &nBestList, long translationId);
-
- void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
- void OutputDetailedTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
- void OutputDetailedTranslationReport(const Moses::Syntax::SHyperedge *, long translationId);
-
- void OutputDetailedAllTranslationReport(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, const Moses::ChartManager &manager, const Moses::Sentence &sentence, long translationId);
-
- void OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo);
- void OutputUnknowns(const std::vector<Moses::Phrase*> &, long);
- void OutputUnknowns(const std::set<Moses::Word> &, long);
-
- void OutputDetailedTreeFragmentsTranslationReport(const Moses::ChartHypothesis *hypo,
- const Moses::Sentence &sentence,
- long translationId);
- void OutputDetailedTreeFragmentsTranslationReport(const search::Applied *applied,
- const Moses::Sentence &sentence,
- long translationId);
-
- // phrase-based
- void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors);
- void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
- void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
- char reportSegmentation, bool reportAllFactors, std::ostream& out);
- void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,char reportSegmentation, bool reportAllFactors, std::ostream &out);
- void OutputInput(std::ostream& os, const Moses::Hypothesis* hypo);
- void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo);
-
- void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::Hypothesis *hypo);
- void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::TrellisPath &path);
- void OutputAlignment(OutputCollector* collector, size_t lineNo , const std::vector<const Hypothesis *> &edges);
-
- static void OutputAlignment(std::ostream &out, const Moses::Hypothesis *hypo);
- static void OutputAlignment(std::ostream &out, const std::vector<const Hypothesis *> &edges);
- static void OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset);
-
- void OutputNBest(std::ostream& out
- , const Moses::TrellisPathList &nBestList
- , const std::vector<Moses::FactorType>& outputFactorOrder
- , long translationId
- , char reportSegmentation);
-
- static void OutputAllFeatureScores(const Moses::ScoreComponentCollection &features
- , std::ostream &out);
- static void OutputFeatureScores( std::ostream& out
- , const Moses::ScoreComponentCollection &features
- , const Moses::FeatureFunction *ff
- , std::string &lastName );
-
- // creates a map of TARGET positions which should be replaced by word using placeholder
- std::map<size_t, const Moses::Factor*> GetPlaceholders(const Moses::Hypothesis &hypo, Moses::FactorType placeholderFactor);
+ Moses::OutputCollector *GetDetailTreeFragmentsOutputCollector() {
+ return m_detailTreeFragmentsOutputCollector;
+ }
// post editing
std::ifstream *spe_src, *spe_trg, *spe_aln;
diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp
index 06c46b786..49573664e 100644
--- a/moses/Incremental.cpp
+++ b/moses/Incremental.cpp
@@ -8,6 +8,7 @@
#include "moses/StaticData.h"
#include "moses/Util.h"
#include "moses/LM/Base.h"
+#include "moses/OutputCollector.h"
#include "lm/model.hh"
#include "search/applied.hh"
@@ -162,7 +163,7 @@ template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std:
{
std::vector<lm::WordIndex> words;
UTIL_THROW_IF2(phrase.GetSize() > 1,
- "OOV target phrase should be 0 or 1 word in length");
+ "OOV target phrase should be 0 or 1 word in length");
if (phrase.GetSize())
words.push_back(Convert(phrase.GetWord(0)));
@@ -182,9 +183,9 @@ template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std:
// for pruning
template <class Model> float Fill<Model>::GetBestScore(const ChartCellLabel *chartCell) const
{
- search::PartialVertex vertex = chartCell->GetStack().incr->RootAlternate();
- UTIL_THROW_IF2(vertex.Empty(), "hypothesis with empty stack");
- return vertex.Bound();
+ search::PartialVertex vertex = chartCell->GetStack().incr->RootAlternate();
+ UTIL_THROW_IF2(vertex.Empty(), "hypothesis with empty stack");
+ return vertex.Bound();
}
// TODO: factors (but chart doesn't seem to support factors anyway).
@@ -203,7 +204,7 @@ struct ChartCellBaseFactory {
} // namespace
Manager::Manager(const InputType &source) :
- source_(source),
+ BaseManager(source),
cells_(source, ChartCellBaseFactory()),
parser_(source, cells_),
n_best_(search::NBestConfig(StaticData::Instance().GetNBestSize())) {}
@@ -220,7 +221,7 @@ template <class Model, class Best> search::History Manager::PopulateBest(const M
search::Config config(abstract.GetWeight() * M_LN10, data.GetCubePruningPopLimit(), search::NBestConfig(data.GetNBestSize()));
search::Context<Model> context(config, model);
- size_t size = source_.GetSize();
+ size_t size = m_source.GetSize();
boost::object_pool<search::Vertex> vertex_pool(std::max<size_t>(size * size / 2, 32));
for (int startPos = size-1; startPos >= 0; --startPos) {
@@ -272,12 +273,243 @@ template void Manager::LMCallback<lm::ngram::QuantTrieModel>(const lm::ngram::Qu
template void Manager::LMCallback<lm::ngram::ArrayTrieModel>(const lm::ngram::ArrayTrieModel &model, const std::vector<lm::WordIndex> &words);
template void Manager::LMCallback<lm::ngram::QuantArrayTrieModel>(const lm::ngram::QuantArrayTrieModel &model, const std::vector<lm::WordIndex> &words);
-const std::vector<search::Applied> &Manager::ProcessSentence()
+void Manager::Decode()
{
LanguageModel::GetFirstLM().IncrementalCallback(*this);
+}
+
+const std::vector<search::Applied> &Manager::GetNBest() const
+{
return *completed_nbest_;
}
+void Manager::OutputBest(OutputCollector *collector) const
+{
+ const long translationId = m_source.GetTranslationId();
+ const std::vector<search::Applied> &nbest = GetNBest();
+ if (!nbest.empty()) {
+ OutputBestHypo(collector, nbest[0], translationId);
+ } else {
+ OutputBestNone(collector, translationId);
+ }
+
+}
+
+
+void Manager::OutputNBest(OutputCollector *collector) const
+{
+ if (collector == NULL) {
+ return;
+ }
+
+ OutputNBestList(collector, *completed_nbest_, m_source.GetTranslationId());
+}
+
+void Manager::OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
+
+ std::ostringstream out;
+ // wtf? copied from the original OutputNBestList
+ if (collector->OutputIsCout()) {
+ FixPrecision(out);
+ }
+ Phrase outputPhrase;
+ ScoreComponentCollection features;
+ for (std::vector<search::Applied>::const_iterator i = nbest.begin(); i != nbest.end(); ++i) {
+ Incremental::PhraseAndFeatures(*i, outputPhrase, features);
+ // <s> and </s>
+ UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+
+ outputPhrase.RemoveWord(0);
+ outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
+ out << translationId << " ||| ";
+ OutputSurface(out, outputPhrase, outputFactorOrder, false);
+ out << " ||| ";
+ features.OutputAllFeatureScores(out);
+ out << " ||| " << i->GetScore() << '\n';
+ }
+ out << std::flush;
+ assert(collector);
+ collector->Write(translationId, out.str());
+}
+
+void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const
+{
+ if (collector && !completed_nbest_->empty()) {
+ const search::Applied &applied = completed_nbest_->at(0);
+ OutputDetailedTranslationReport(collector,
+ &applied,
+ static_cast<const Sentence&>(m_source),
+ m_source.GetTranslationId());
+ }
+
+}
+
+void Manager::OutputDetailedTranslationReport(
+ OutputCollector *collector,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const
+{
+ if (applied == NULL) {
+ return;
+ }
+ std::ostringstream out;
+ ApplicationContext applicationContext;
+
+ OutputTranslationOptions(out, applicationContext, applied, sentence, translationId);
+ collector->Write(translationId, out.str());
+}
+
+void Manager::OutputTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence, long translationId) const
+{
+ if (applied != NULL) {
+ OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
+ out << std::endl;
+ }
+
+ // recursive
+ const search::Applied *child = applied->Children();
+ for (size_t i = 0; i < applied->GetArity(); i++) {
+ OutputTranslationOptions(out, applicationContext, child++, sentence, translationId);
+ }
+}
+
+void Manager::OutputTranslationOption(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const
+{
+ ReconstructApplicationContext(applied, sentence, applicationContext);
+ const TargetPhrase &phrase = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
+ out << "Trans Opt " << translationId
+ << " " << applied->GetRange()
+ << ": ";
+ WriteApplicationContext(out, applicationContext);
+ out << ": " << phrase.GetTargetLHS()
+ << "->" << phrase
+ << " " << applied->GetScore(); // << hypo->GetScoreBreakdown() TODO: missing in incremental search hypothesis
+}
+
+// Given a hypothesis and sentence, reconstructs the 'application context' --
+// the source RHS symbols of the SCFG rule that was applied, plus their spans.
+void Manager::ReconstructApplicationContext(const search::Applied *applied,
+ const Sentence &sentence,
+ ApplicationContext &context) const
+{
+ context.clear();
+ const WordsRange &span = applied->GetRange();
+ const search::Applied *child = applied->Children();
+ size_t i = span.GetStartPos();
+ size_t j = 0;
+
+ while (i <= span.GetEndPos()) {
+ if (j == applied->GetArity() || i < child->GetRange().GetStartPos()) {
+ // Symbol is a terminal.
+ const Word &symbol = sentence.GetWord(i);
+ context.push_back(std::make_pair(symbol, WordsRange(i, i)));
+ ++i;
+ } else {
+ // Symbol is a non-terminal.
+ const Word &symbol = static_cast<const TargetPhrase*>(child->GetNote().vp)->GetTargetLHS();
+ const WordsRange &range = child->GetRange();
+ context.push_back(std::make_pair(symbol, range));
+ i = range.GetEndPos()+1;
+ ++child;
+ ++j;
+ }
+ }
+}
+
+void Manager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
+{
+ if (collector == NULL || Completed().empty()) {
+ return;
+ }
+
+ const search::Applied *applied = &Completed()[0];
+ const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
+ const size_t translationId = m_source.GetTranslationId();
+
+ std::ostringstream out;
+ ApplicationContext applicationContext;
+
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, applied, sentence, translationId);
+
+ //Tree of full sentence
+ //TODO: incremental search doesn't support stateful features
+
+ collector->Write(translationId, out.str());
+
+}
+
+void Manager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const
+{
+
+ if (applied != NULL) {
+ OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
+
+ const TargetPhrase &currTarPhr = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
+
+ out << " ||| ";
+ if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
+ out << " " << *property->GetValueString();
+ } else {
+ out << " " << "noTreeInfo";
+ }
+ out << std::endl;
+ }
+
+ // recursive
+ const search::Applied *child = applied->Children();
+ for (size_t i = 0; i < applied->GetArity(); i++) {
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId);
+ }
+}
+
+void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const
+{
+ if (collector == NULL) return;
+ std::ostringstream out;
+ FixPrecision(out);
+ if (StaticData::Instance().GetOutputHypoScore()) {
+ out << applied.GetScore() << ' ';
+ }
+ Phrase outPhrase;
+ Incremental::ToPhrase(applied, outPhrase);
+ // delete 1st & last
+ UTIL_THROW_IF2(outPhrase.GetSize() < 2,
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ outPhrase.RemoveWord(0);
+ outPhrase.RemoveWord(outPhrase.GetSize() - 1);
+ out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
+ out << '\n';
+ collector->Write(translationId, out.str());
+
+ VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << std::endl);
+}
+
+void Manager::OutputBestNone(OutputCollector *collector, long translationId) const
+{
+ if (collector == NULL) return;
+ if (StaticData::Instance().GetOutputHypoScore()) {
+ collector->Write(translationId, "0 \n");
+ } else {
+ collector->Write(translationId, "\n");
+ }
+}
+
namespace
{
diff --git a/moses/Incremental.h b/moses/Incremental.h
index 20040bf45..c1f5e40b3 100644
--- a/moses/Incremental.h
+++ b/moses/Incremental.h
@@ -7,6 +7,8 @@
#include "moses/ChartCellCollection.h"
#include "moses/ChartParser.h"
+#include "BaseManager.h"
+
#include <vector>
#include <string>
@@ -19,7 +21,7 @@ class LanguageModel;
namespace Incremental
{
-class Manager
+class Manager : public BaseManager
{
public:
Manager(const InputType &source);
@@ -28,17 +30,38 @@ public:
template <class Model> void LMCallback(const Model &model, const std::vector<lm::WordIndex> &words);
- const std::vector<search::Applied> &ProcessSentence();
+ void Decode();
+
+ const std::vector<search::Applied> &GetNBest() const;
// Call to get the same value as ProcessSentence returned.
const std::vector<search::Applied> &Completed() const {
return *completed_nbest_;
}
+ // output
+ void OutputBest(OutputCollector *collector) const;
+ void OutputNBest(OutputCollector *collector) const;
+ void OutputDetailedTranslationReport(OutputCollector *collector) const;
+ void OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const;
+ void OutputLatticeSamples(OutputCollector *collector) const {
+ }
+ void OutputAlignment(OutputCollector *collector) const {
+ }
+ void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
+ void OutputWordGraph(OutputCollector *collector) const {
+ }
+ void OutputSearchGraph(OutputCollector *collector) const {
+ }
+ void OutputSearchGraphSLF() const {
+ }
+ void OutputSearchGraphHypergraph() const {
+ }
+
+
private:
template <class Model, class Best> search::History PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out);
- const InputType &source_;
ChartCellCollectionBase cells_;
ChartParser parser_;
@@ -51,6 +74,39 @@ private:
search::NBest n_best_;
const std::vector<search::Applied> *completed_nbest_;
+
+ // outputs
+ void OutputDetailedTranslationReport(
+ OutputCollector *collector,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputTranslationOption(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const;
+ void ReconstructApplicationContext(const search::Applied *applied,
+ const Sentence &sentence,
+ ApplicationContext &context) const;
+ void OutputTreeFragmentsTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const;
+ void OutputBestNone(OutputCollector *collector, long translationId) const;
+
+ void OutputUnknowns(OutputCollector *collector) const {
+ }
+ void CalcDecoderStatistics() const {
+ }
+
};
// Just get the phrase.
diff --git a/moses/InputPath.cpp b/moses/InputPath.cpp
index 523b03d53..ab7c9c782 100644
--- a/moses/InputPath.cpp
+++ b/moses/InputPath.cpp
@@ -33,14 +33,14 @@ InputPath(const Phrase &phrase, const NonTerminalSet &sourceNonTerms,
InputPath::~InputPath()
{
- // Since there is no way for the Phrase Dictionaries to tell in
- // which (sentence) context phrases were looked up, we tell them
+ // Since there is no way for the Phrase Dictionaries to tell in
+ // which (sentence) context phrases were looked up, we tell them
// now that the phrase isn't needed any more by this inputPath
typedef std::pair<const TargetPhraseCollection*, const void* > entry;
std::map<const PhraseDictionary*, entry>::const_iterator iter;
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter)
iter->first->Release(iter->second.first);
-
+
delete m_inputScore;
}
@@ -86,11 +86,11 @@ size_t InputPath::GetTotalRuleSize() const
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) {
// const PhraseDictionary *pt = iter->first;
- const TargetPhraseCollection *tpColl = iter->second.first;
+ const TargetPhraseCollection *tpColl = iter->second.first;
- if (tpColl) {
- ret += tpColl->GetSize();
- }
+ if (tpColl) {
+ ret += tpColl->GetSize();
+ }
}
return ret;
@@ -100,7 +100,6 @@ std::ostream& operator<<(std::ostream& out, const InputPath& obj)
{
out << &obj << " " << obj.GetWordsRange() << " " << obj.GetPrevPath() << " " << obj.GetPhrase();
- out << "pt: ";
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
for (iter = obj.m_targetPhrases.begin(); iter != obj.m_targetPhrases.end(); ++iter) {
const PhraseDictionary *pt = iter->first;
@@ -108,10 +107,9 @@ std::ostream& operator<<(std::ostream& out, const InputPath& obj)
out << pt << "=";
if (tpColl) {
- cerr << tpColl->GetSize() << " ";
- }
- else {
- cerr << "NULL ";
+ cerr << tpColl->GetSize() << " ";
+ } else {
+ cerr << "NULL ";
}
}
diff --git a/moses/InputPath.h b/moses/InputPath.h
index eed8fd495..c67d88795 100644
--- a/moses/InputPath.h
+++ b/moses/InputPath.h
@@ -54,8 +54,8 @@ public:
: m_prevPath(NULL)
, m_range(NOT_FOUND, NOT_FOUND)
, m_inputScore(NULL)
- , m_nextNode(NOT_FOUND)
- {}
+ , m_nextNode(NOT_FOUND) {
+ }
InputPath(const Phrase &phrase, const NonTerminalSet &sourceNonTerms, const WordsRange &range, const InputPath *prevNode
,const ScorePair *inputScore);
@@ -92,8 +92,9 @@ public:
, const TargetPhraseCollection *targetPhrases
, const void *ptNode);
const TargetPhraseCollection *GetTargetPhrases(const PhraseDictionary &phraseDictionary) const;
- const TargetPhrases &GetTargetPhrases() const
- { return m_targetPhrases; }
+ const TargetPhrases &GetTargetPhrases() const {
+ return m_targetPhrases;
+ }
// pointer to internal node in phrase-table. Since this is implementation dependent, this is a void*
const void *GetPtNode(const PhraseDictionary &phraseDictionary) const;
diff --git a/moses/Jamfile b/moses/Jamfile
index 3be041e60..224ec530a 100644
--- a/moses/Jamfile
+++ b/moses/Jamfile
@@ -17,7 +17,12 @@ if $(with-oxlm) {
oxlm = ;
}
-alias headers : ../util//kenutil : : : $(max-factors) $(dlib) $(oxlm) ;
+local classifier = ;
+if [ option.get "with-vw" ] {
+ classifier += ..//vw//classifier ;
+}
+
+alias headers : ../util//kenutil $(classifier) : : : $(max-factors) $(dlib) $(oxlm) ;
alias ThreadPool : ThreadPool.cpp ;
alias Util : Util.cpp Timer.cpp ;
@@ -60,14 +65,23 @@ if [ option.get "with-mm" : no : yes ] = yes
alias mmlib ;
}
+local with-vw = [ option.get "with-vw" ] ;
+if $(with-vw) {
+ alias vwfiles : [ glob FF/VW/*.cpp ] ;
+} else {
+ alias vwfiles ;
+}
+
lib moses :
[ glob
*.cpp
Syntax/*.cpp
+ Syntax/F2S/*.cpp
Syntax/S2T/*.cpp
Syntax/S2T/Parsers/*.cpp
Syntax/S2T/Parsers/RecursiveCYKPlusParser/*.cpp
Syntax/S2T/Parsers/Scope3Parser/*.cpp
+ Syntax/T2S/*.cpp
TranslationModel/*.cpp
TranslationModel/fuzzy-match/*.cpp
TranslationModel/DynSAInclude/*.cpp
@@ -85,11 +99,11 @@ lib moses :
SyntacticLanguageModel.cpp
*Test.cpp Mock*.cpp FF/*Test.cpp
FF/Factory.cpp
-]
+] vwfiles
headers FF_Factory.o LM//LM TranslationModel/CompactPT//CompactPT TranslationModel/ProbingPT//ProbingPT synlm ThreadPool
..//search ../util/double-conversion//double-conversion ..//z ../OnDiskPt//OnDiskPt
-$(TOP)//boost_iostreams mmlib
+$(TOP)//boost_filesystem $(TOP)//boost_iostreams mmlib
:
<threading>single:<source>../util//rt
;
@@ -99,5 +113,5 @@ alias headers-to-install : [ glob-tree *.h ] ;
import testing ;
-unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] moses headers ..//z ../OnDiskPt//OnDiskPt ..//boost_unit_test_framework ;
+unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] ..//boost_filesystem moses headers ..//z ../OnDiskPt//OnDiskPt ..//boost_unit_test_framework ;
diff --git a/moses/LM/Backward.cpp b/moses/LM/Backward.cpp
index 1a275459c..2fb7451b5 100644
--- a/moses/LM/Backward.cpp
+++ b/moses/LM/Backward.cpp
@@ -131,8 +131,8 @@ template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase
lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);
UTIL_THROW_IF2(m_beginSentenceFactor == phrase.GetWord(0).GetFactor(m_factorType),
- "BackwardLanguageModel does not currently support rules that include <s>"
- );
+ "BackwardLanguageModel does not currently support rules that include <s>"
+ );
float before_boundary = 0.0f;
@@ -144,8 +144,8 @@ template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase
for (position = lastWord; position >= 0; position-=1) {
const Word &word = phrase.GetWord(position);
UTIL_THROW_IF2(word.IsNonTerminal(),
- "BackwardLanguageModel does not currently support rules that include non-terminals "
- );
+ "BackwardLanguageModel does not currently support rules that include non-terminals "
+ );
lm::WordIndex index = TranslateID(word);
scorer.Terminal(index);
@@ -259,8 +259,8 @@ template <class Model> FFState *BackwardLanguageModel<Model>::Evaluate(const Phr
for (int position=std::min( lastWord, ngramBoundary - 1); position >= 0; position-=1) {
const Word &word = phrase.GetWord(position);
UTIL_THROW_IF2(word.IsNonTerminal(),
- "BackwardLanguageModel does not currently support rules that include non-terminals "
- );
+ "BackwardLanguageModel does not currently support rules that include non-terminals "
+ );
lm::WordIndex index = TranslateID(word);
scorer.Terminal(index);
@@ -285,27 +285,27 @@ template <class Model> FFState *BackwardLanguageModel<Model>::Evaluate(const Phr
LanguageModel *ConstructBackwardLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
{
- lm::ngram::ModelType model_type;
- if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
- switch(model_type) {
- case lm::ngram::PROBING:
- return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
- case lm::ngram::REST_PROBING:
- return new BackwardLanguageModel<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
- case lm::ngram::TRIE:
- return new BackwardLanguageModel<lm::ngram::TrieModel>(line, file, factorType, lazy);
- case lm::ngram::QUANT_TRIE:
- return new BackwardLanguageModel<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
- case lm::ngram::ARRAY_TRIE:
- return new BackwardLanguageModel<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
- case lm::ngram::QUANT_ARRAY_TRIE:
- return new BackwardLanguageModel<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
- default:
- UTIL_THROW2("Unrecognized kenlm model type " << model_type);
- }
- } else {
- return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+ lm::ngram::ModelType model_type;
+ if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
+ switch(model_type) {
+ case lm::ngram::PROBING:
+ return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+ case lm::ngram::REST_PROBING:
+ return new BackwardLanguageModel<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
+ case lm::ngram::TRIE:
+ return new BackwardLanguageModel<lm::ngram::TrieModel>(line, file, factorType, lazy);
+ case lm::ngram::QUANT_TRIE:
+ return new BackwardLanguageModel<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
+ case lm::ngram::ARRAY_TRIE:
+ return new BackwardLanguageModel<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
+ case lm::ngram::QUANT_ARRAY_TRIE:
+ return new BackwardLanguageModel<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
+ default:
+ UTIL_THROW2("Unrecognized kenlm model type " << model_type);
}
+ } else {
+ return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+ }
}
} // namespace Moses
diff --git a/moses/LM/Base.cpp b/moses/LM/Base.cpp
index db71119d5..76a6336c3 100644
--- a/moses/LM/Base.cpp
+++ b/moses/LM/Base.cpp
@@ -70,9 +70,9 @@ void LanguageModel::ReportHistoryOrder(std::ostream &out,const Phrase &phrase) c
}
void LanguageModel::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
// contains factors used by this LM
float fullScore, nGramScore;
diff --git a/moses/LM/Base.h b/moses/LM/Base.h
index 2be19e5bd..eb0a98ca1 100644
--- a/moses/LM/Base.h
+++ b/moses/LM/Base.h
@@ -88,16 +88,21 @@ public:
virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const;
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
+
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
};
diff --git a/moses/LM/BilingualLM.cpp b/moses/LM/BilingualLM.cpp
index bc0e61a67..97f3d55e9 100644
--- a/moses/LM/BilingualLM.cpp
+++ b/moses/LM/BilingualLM.cpp
@@ -4,7 +4,8 @@
using namespace std;
-namespace Moses {
+namespace Moses
+{
int BilingualLMState::Compare(const FFState& other) const
{
@@ -17,17 +18,19 @@ int BilingualLMState::Compare(const FFState& other) const
////////////////////////////////////////////////////////////////
BilingualLM::BilingualLM(const std::string &line)
- : StatefulFeatureFunction(1, line),
- word_factortype(0) {
+ : StatefulFeatureFunction(1, line),
+ word_factortype(0)
+{
FactorCollection& factorFactory = FactorCollection::Instance(); //Factor Factory to use for BOS_ and EOS_
BOS_factor = factorFactory.AddFactor(BOS_);
BOS_word.SetFactor(0, BOS_factor);
EOS_factor = factorFactory.AddFactor(EOS_);
EOS_word.SetFactor(0, EOS_factor);
-
+
}
-void BilingualLM::Load(){
+void BilingualLM::Load()
+{
ReadParameters();
loadModel();
}
@@ -35,14 +38,15 @@ void BilingualLM::Load(){
//Populates words with amount words from the targetPhrase from the previous hypothesis where
//words[0] is the last word of the previous hypothesis, words[1] is the second last etc...
void BilingualLM::requestPrevTargetNgrams(
- const Hypothesis &cur_hypo, int amount, std::vector<int> &words) const {
+ const Hypothesis &cur_hypo, int amount, std::vector<int> &words) const
+{
const Hypothesis * prev_hyp = cur_hypo.GetPrevHypo();
int found = 0;
while (prev_hyp && found != amount) {
const TargetPhrase& currTargetPhrase = prev_hyp->GetCurrTargetPhrase();
- for (int i = currTargetPhrase.GetSize() - 1; i> -1; i--){
- if (found != amount){
+ for (int i = currTargetPhrase.GetSize() - 1; i> -1; i--) {
+ if (found != amount) {
const Word& word = currTargetPhrase.GetWord(i);
words[found] = getNeuralLMId(word, false);
found++;
@@ -55,18 +59,19 @@ void BilingualLM::requestPrevTargetNgrams(
}
int neuralLM_wordID = getNeuralLMId(BOS_word, false);
- for (int i = found; i < amount; i++){
+ for (int i = found; i < amount; i++) {
words[i] = neuralLM_wordID;
}
}
-//Populates the words vector with target_ngrams sized that also contains the current word we are looking at.
+//Populates the words vector with target_ngrams sized that also contains the current word we are looking at.
//(in effect target_ngrams + 1)
void BilingualLM::getTargetWords(
- const Hypothesis &cur_hypo,
- const TargetPhrase &targetPhrase,
- int current_word_index,
- std::vector<int> &words) const {
+ const Hypothesis &cur_hypo,
+ const TargetPhrase &targetPhrase,
+ int current_word_index,
+ std::vector<int> &words) const
+{
//Check if we need to look at previous target phrases
int additional_needed = current_word_index - target_ngrams;
if (additional_needed < 0) {
@@ -87,7 +92,7 @@ void BilingualLM::getTargetWords(
}
} else {
//We haven't added any words, proceed as before
- for (int i = current_word_index - target_ngrams; i <= current_word_index; i++){
+ for (int i = current_word_index - target_ngrams; i <= current_word_index; i++) {
const Word& word = targetPhrase.GetWord(i);
words.push_back(getNeuralLMId(word, false));
}
@@ -97,7 +102,8 @@ void BilingualLM::getTargetWords(
//Returns source words in the way NeuralLM expects them.
size_t BilingualLM::selectMiddleAlignment(
- const set<size_t>& alignment_links) const {
+ const set<size_t>& alignment_links) const
+{
set<size_t>::iterator it = alignment_links.begin();
for (int i = 0; i < (alignment_links.size() - 1) / 2; ++i) {
@@ -108,11 +114,12 @@ size_t BilingualLM::selectMiddleAlignment(
}
void BilingualLM::getSourceWords(
- const TargetPhrase &targetPhrase,
- int targetWordIdx,
- const Sentence &source_sent,
- const WordsRange &sourceWordRange,
- std::vector<int> &words) const {
+ const TargetPhrase &targetPhrase,
+ int targetWordIdx,
+ const Sentence &source_sent,
+ const WordsRange &sourceWordRange,
+ std::vector<int> &words) const
+{
//Get source context
//Get alignment for the word we require
@@ -123,7 +130,7 @@ void BilingualLM::getSourceWords(
std::set<size_t> last_word_al;
for (int j = 0; j < targetPhrase.GetSize(); j++) {
// Find the nearest aligned word with preference for right.
- if ((targetWordIdx + j) < targetPhrase.GetSize()){
+ if ((targetWordIdx + j) < targetPhrase.GetSize()) {
last_word_al = alignments.GetAlignmentsForTarget(targetWordIdx + j);
if (!last_word_al.empty()) {
break;
@@ -146,7 +153,7 @@ void BilingualLM::getSourceWords(
//It should never be the case the the word_al size would be zero, but several times this has happened because
//of a corrupt phrase table. It is best to have this check here, as it makes debugging the problem a lot easier.
UTIL_THROW_IF2(last_word_al.size() == 0,
- "A target phrase with no alignments detected! " << targetPhrase << "Check if there is something wrong with your phrase table.");
+ "A target phrase with no alignments detected! " << targetPhrase << "Check if there is something wrong with your phrase table.");
size_t source_center_index = selectMiddleAlignment(last_word_al);
// We have found the alignment. Now determine how much to shift by to get the actual source word index.
size_t phrase_start_pos = sourceWordRange.GetStartPos();
@@ -156,7 +163,8 @@ void BilingualLM::getSourceWords(
appendSourceWordsToVector(source_sent, words, source_word_mid_idx);
}
-size_t BilingualLM::getState(const Hypothesis& cur_hypo) const {
+size_t BilingualLM::getState(const Hypothesis& cur_hypo) const
+{
const TargetPhrase &targetPhrase = cur_hypo.GetCurrTargetPhrase();
size_t hashCode = 0;
@@ -190,25 +198,26 @@ size_t BilingualLM::getState(const Hypothesis& cur_hypo) const {
}
void BilingualLM::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {}
void BilingualLM::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
}
FFState* BilingualLM::EvaluateWhenApplied(
- const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const {
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const
+{
Manager& manager = cur_hypo.GetManager();
const Sentence& source_sent = static_cast<const Sentence&>(manager.GetSource());
@@ -223,9 +232,9 @@ FFState* BilingualLM::EvaluateWhenApplied(
const WordsRange& sourceWordRange = cur_hypo.GetCurrSourceWordsRange(); //Source words range to calculate offsets
// For each word in the current target phrase get its LM score.
- for (int i = 0; i < currTargetPhrase.GetSize(); i++){
+ for (int i = 0; i < currTargetPhrase.GetSize(); i++) {
getSourceWords(
- currTargetPhrase, i, source_sent, sourceWordRange, source_words);
+ currTargetPhrase, i, source_sent, sourceWordRange, source_words);
getTargetWords(cur_hypo, currTargetPhrase, i, target_words);
value += Score(source_words, target_words);
@@ -234,36 +243,33 @@ FFState* BilingualLM::EvaluateWhenApplied(
target_words.clear();
}
- size_t new_state = getState(cur_hypo);
+ size_t new_state = getState(cur_hypo);
accumulator->PlusEquals(this, value);
return new BilingualLMState(new_state);
}
-void BilingualLM::getAllTargetIdsChart(const ChartHypothesis& cur_hypo, size_t featureID, std::vector<int>& wordIds) const {
+void BilingualLM::getAllTargetIdsChart(const ChartHypothesis& cur_hypo, size_t featureID, std::vector<int>& wordIds) const
+{
const TargetPhrase targetPhrase = cur_hypo.GetCurrTargetPhrase();
- int next_nonterminal_index = 0;
- for (int i = 0; i < targetPhrase.GetSize(); i++){
- if (targetPhrase.GetWord(i).IsNonTerminal()){ //Nonterminal get from prev state
- const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(next_nonterminal_index);
+ for (int i = 0; i < targetPhrase.GetSize(); i++) {
+ if (targetPhrase.GetWord(i).IsNonTerminal()) { //Nonterminal get from prev state
+ const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(targetPhrase.GetAlignNonTerm().GetNonTermIndexMap()[i]);
const BilingualLMState * prev_state = static_cast<const BilingualLMState *>(prev_hypo->GetFFState(featureID));
const std::vector<int> prevWordIDs = prev_state->GetWordIdsVector();
- for (std::vector<int>::const_iterator it = prevWordIDs.begin(); it!= prevWordIDs.end(); it++){
+ for (std::vector<int>::const_iterator it = prevWordIDs.begin(); it!= prevWordIDs.end(); it++) {
wordIds.push_back(*it);
}
- next_nonterminal_index++;
} else {
wordIds.push_back(getNeuralLMId(targetPhrase.GetWord(i), false));
}
}
-}
+}
-void BilingualLM::getAllAlignments(const ChartHypothesis& cur_hypo, size_t featureID, std::vector<int>& word_alignemnts) const {
+void BilingualLM::getAllAlignments(const ChartHypothesis& cur_hypo, size_t featureID, std::vector<int>& word_alignments) const
+{
const TargetPhrase targetPhrase = cur_hypo.GetCurrTargetPhrase();
- int next_nonterminal_index = 0;
- int nonterm_length = 0; //Account for the size of nonterminals when calculating the alignment.
- int source_phrase_start_pos = cur_hypo.GetCurrSourceRange().GetStartPos();
int source_word_mid_idx; //The word alignment
//Get source sent
@@ -271,75 +277,95 @@ void BilingualLM::getAllAlignments(const ChartHypothesis& cur_hypo, size_t featu
const Sentence& source_sent = static_cast<const Sentence&>(manager.GetSource());
const AlignmentInfo& alignments = targetPhrase.GetAlignTerm();
- for (int i = 0; i < targetPhrase.GetSize(); i++){
+ // get absolute position in source sentence for each source word in rule
+ std::vector<int> absolute_source_position (cur_hypo.GetCurrSourceRange().GetNumWordsCovered(), 0); //we actually only need number of source symbols in rule; can we get this number cheaply?
+
+ absolute_source_position[0] = cur_hypo.GetCurrSourceRange().GetStartPos();
+ // get last absolute position of each source nonterminal symbol
+ for (int i = 0; i < targetPhrase.GetSize(); i++) {
+ if (targetPhrase.GetWord(i).IsNonTerminal()) {
+ const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(targetPhrase.GetAlignNonTerm().GetNonTermIndexMap()[i]);
+ absolute_source_position[targetPhrase.GetAlignNonTerm().GetNonTermIndexMap2()[i]] = prev_hypo->GetCurrSourceRange().GetEndPos();
+ }
+ }
+
+ // set absolute position of all source terminal symbols based on absolute position of previous symbol
+ for (int i = 0; i != absolute_source_position.size(); i++) {
+ if (i && absolute_source_position[i] == 0) {
+ absolute_source_position[i] = absolute_source_position[i-1] + 1;
+ }
+ }
+
+ for (int i = 0; i < targetPhrase.GetSize(); i++) {
//Sometimes we have to traverse more than one target words because of
//unaligned words. This is O(n^2) in worst case, but usually closer to O(n)
- if (targetPhrase.GetWord(i).IsNonTerminal()){
+ if (targetPhrase.GetWord(i).IsNonTerminal()) {
//If we have a non terminal we can get the alignments from the previous state
- const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(next_nonterminal_index);
+ const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(targetPhrase.GetAlignNonTerm().GetNonTermIndexMap()[i]);
const BilingualLMState * prev_state = static_cast<const BilingualLMState *>(prev_hypo->GetFFState(featureID));
const std::vector<int> prevWordAls = prev_state->GetWordAlignmentVector();
- nonterm_length += prevWordAls.size();
- for (std::vector<int>::const_iterator it = prevWordAls.begin(); it!= prevWordAls.end(); it++){
- word_alignemnts.push_back(*it);
+ for (std::vector<int>::const_iterator it = prevWordAls.begin(); it!= prevWordAls.end(); it++) {
+ word_alignments.push_back(*it);
}
- next_nonterminal_index++;
} else {
- std::set<size_t> word_al; //Keep word alignments
bool resolvedIndexis = false; //If we are aligning to an existing nonterm we don't need to calculate offsets
- for (int j = 0; j < targetPhrase.GetSize(); j++){
- //Try to get alignment from the current word and if it is unaligned,
- //try from the first word to the right and then to the left
- if ((i+j) < targetPhrase.GetSize()) {
- if (targetPhrase.GetWord(i + j).IsNonTerminal()) {
- const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(next_nonterminal_index);
- const BilingualLMState * prev_state = static_cast<const BilingualLMState *>(prev_hypo->GetFFState(featureID));
- const std::vector<int>& word_alignments = prev_state->GetWordAlignmentVector();
- source_word_mid_idx = word_alignments.front(); // The first word on the right of our word
- resolvedIndexis = true;
- break;
- }
- word_al = alignments.GetAlignmentsForTarget(i + j);
- if (!word_al.empty()) {
- break;
- }
- }
-
- if ((i - j) >= 0) {
- if (targetPhrase.GetWord(i - j).IsNonTerminal()) {
- const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(next_nonterminal_index - 1); //We need to look at the nonterm on the left.
- const BilingualLMState * prev_state = static_cast<const BilingualLMState *>(prev_hypo->GetFFState(featureID));
- const std::vector<int>& word_alignments = prev_state->GetWordAlignmentVector();
- source_word_mid_idx = word_alignments.back(); // The first word on the left of our word
- resolvedIndexis = true;
- break;
+ std::set<size_t> word_al = alignments.GetAlignmentsForTarget(i);
+ if (word_al.empty()) {
+ for (int j = 1; j < targetPhrase.GetSize(); j++) {
+ //Try to get alignment from the current word and if it is unaligned,
+ //try from the first word to the right and then to the left
+ if ((i+j) < targetPhrase.GetSize()) {
+ //TODO: this will always succeed, even if first word in previous hypo is unaligned. should it?
+ if (targetPhrase.GetWord(i + j).IsNonTerminal()) {
+ const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(targetPhrase.GetAlignNonTerm().GetNonTermIndexMap()[i+j]);
+ const BilingualLMState * prev_state = static_cast<const BilingualLMState *>(prev_hypo->GetFFState(featureID));
+ source_word_mid_idx = prev_state->GetWordAlignmentVector().front(); // The first word on the right of our word
+ resolvedIndexis = true;
+ break;
+ }
+ word_al = alignments.GetAlignmentsForTarget(i + j);
+ if (!word_al.empty()) {
+ break;
+ }
}
- word_al = alignments.GetAlignmentsForTarget(i - j);
- if (!word_al.empty()) {
- break;
+ if ((i - j) >= 0) {
+ //TODO: this will always succeed, even if last word in previous hypo is unaligned. should it?
+ if (targetPhrase.GetWord(i - j).IsNonTerminal()) {
+ const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(targetPhrase.GetAlignNonTerm().GetNonTermIndexMap()[i-j]);
+ const BilingualLMState * prev_state = static_cast<const BilingualLMState *>(prev_hypo->GetFFState(featureID));
+ source_word_mid_idx = prev_state->GetWordAlignmentVector().back(); // The first word on the left of our word
+ resolvedIndexis = true;
+ break;
+ }
+
+ word_al = alignments.GetAlignmentsForTarget(i - j);
+ if (!word_al.empty()) {
+ break;
+ }
}
}
}
- if (!resolvedIndexis){
+ if (!resolvedIndexis) {
//It should never be the case the the word_al size would be zero, but several times this has happened because
//of a corrupt phrase table. It is best to have this check here, as it makes debugging the problem a lot easier.
UTIL_THROW_IF2(word_al.size() == 0,
- "A target phrase with no alignments detected! " << targetPhrase << "Check if there is something wrong with your phrase table.");
+ "A target phrase with no alignments detected! " << targetPhrase << "Check if there is something wrong with your phrase table.");
size_t source_center_index = selectMiddleAlignment(word_al);
// We have found the alignment. Now determine how much to shift by to get the actual source word index.
- source_word_mid_idx = source_phrase_start_pos + (int)source_center_index + nonterm_length;
+ source_word_mid_idx = absolute_source_position[source_center_index];
}
- word_alignemnts.push_back(source_word_mid_idx);
+ word_alignments.push_back(source_word_mid_idx);
}
}
}
-size_t BilingualLM::getStateChart(std::vector<int>& neuralLMids) const {
+size_t BilingualLM::getStateChart(std::vector<int>& neuralLMids) const
+{
size_t hashCode = 0;
- for (int i = neuralLMids.size() - target_ngrams; i < neuralLMids.size(); i++){
+ for (int i = neuralLMids.size() - target_ngrams; i < neuralLMids.size(); i++) {
int neuralLM_wordID;
if (i < 0) {
neuralLM_wordID = getNeuralLMId(BOS_word, false);
@@ -352,10 +378,11 @@ size_t BilingualLM::getStateChart(std::vector<int>& neuralLMids) const {
}
void BilingualLM::getTargetWordsChart(
- std::vector<int>& neuralLMids,
- int current_word_index,
- std::vector<int>& words,
- bool sentence_begin) const {
+ std::vector<int>& neuralLMids,
+ int current_word_index,
+ std::vector<int>& words,
+ bool sentence_begin) const
+{
for (int i = current_word_index - target_ngrams; i <= current_word_index; i++) {
if (i < 0) {
@@ -370,7 +397,8 @@ void BilingualLM::getTargetWordsChart(
}
}
-void BilingualLM::appendSourceWordsToVector(const Sentence &source_sent, std::vector<int> &words, int source_word_mid_idx) const {
+void BilingualLM::appendSourceWordsToVector(const Sentence &source_sent, std::vector<int> &words, int source_word_mid_idx) const
+{
//Define begin and end indexes of the lookup. Cases for even and odd ngrams
//This can result in indexes which span larger than the length of the source phrase.
//In this case we just
@@ -401,14 +429,15 @@ void BilingualLM::appendSourceWordsToVector(const Sentence &source_sent, std::ve
}
FFState* BilingualLM::EvaluateWhenApplied(
- const ChartHypothesis& cur_hypo,
- int featureID, /* - used to index the state in the previous hypotheses */
- ScoreComponentCollection* accumulator) const {
+ const ChartHypothesis& cur_hypo,
+ int featureID, /* - used to index the state in the previous hypotheses */
+ ScoreComponentCollection* accumulator) const
+{
//Init vectors
std::vector<int> source_words;
source_words.reserve(source_ngrams);
std::vector<int> target_words;
- target_words.reserve(target_ngrams);
+ target_words.reserve(target_ngrams+1);
float value = 0; //NeuralLM score
const TargetPhrase& currTargetPhrase = cur_hypo.GetCurrTargetPhrase();
@@ -417,23 +446,21 @@ FFState* BilingualLM::EvaluateWhenApplied(
std::vector<int> alignments;
//Estimate size and reserve vectors to avoid reallocation
int future_size = currTargetPhrase.GetNumTerminals();
- for (int i =0; i<currTargetPhrase.GetNumNonTerminals(); i++){
+ for (int i =0; i<currTargetPhrase.GetNumNonTerminals(); i++) {
const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(i); //We need to look at the nonterm on the left.
- const BilingualLMState * prev_state = static_cast<const BilingualLMState *>(prev_hypo->GetFFState(featureID));
- const std::vector<int>& wordIds = prev_state->GetWordIdsVector();
- future_size += wordIds.size();
+ future_size += prev_hypo->GetCurrTargetPhrase().GetSize();
}
neuralLMids.reserve(future_size);
- neuralLMids.reserve(future_size);
+ alignments.reserve(future_size);
getAllTargetIdsChart(cur_hypo, featureID, neuralLMids);
getAllAlignments(cur_hypo, featureID, alignments);
bool sentence_begin = false; //Check if this hypothesis' target words are located in the beginning of the sentence
- if (neuralLMids[0] == getNeuralLMId(BOS_word, true)){
+ if (neuralLMids[0] == getNeuralLMId(BOS_word, false)) {
sentence_begin = true;
}
-
+
//Get source sentence
const ChartManager& manager = cur_hypo.GetManager();
const Sentence& source_sent = static_cast<const Sentence&>(manager.GetSource());
@@ -453,13 +480,15 @@ FFState* BilingualLM::EvaluateWhenApplied(
}
size_t new_state = getStateChart(neuralLMids);
- accumulator->Assign(this, value);
+ accumulator->PlusEquals(this, -accumulator->GetScoreForProducer(this));
+ accumulator->PlusEquals(this, value);
return new BilingualLMState(new_state, alignments, neuralLMids);
}
-void BilingualLM::SetParameter(const std::string& key, const std::string& value) {
- if (key == "filepath") {
+void BilingualLM::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "path") {
m_filePath = value;
} else {
StatefulFeatureFunction::SetParameter(key, value);
diff --git a/moses/LM/BilingualLM.h b/moses/LM/BilingualLM.h
index 9f7235956..67a6c2ea1 100644
--- a/moses/LM/BilingualLM.h
+++ b/moses/LM/BilingualLM.h
@@ -21,13 +21,13 @@ class BilingualLMState : public FFState
std::vector<int> neuralLM_ids; //Carry the neuralLMids of the previous target phrase to avoid calling GetWholePhrase. Hiero only.
public:
BilingualLMState(size_t hash)
- :m_hash(hash)
- {}
+ :m_hash(hash) {
+ }
BilingualLMState(size_t hash, std::vector<int>& word_alignments_vec, std::vector<int>& neural_ids)
:m_hash(hash)
, word_alignments(word_alignments_vec)
- , neuralLM_ids(neural_ids)
- {}
+ , neuralLM_ids(neural_ids) {
+ }
const std::vector<int>& GetWordAlignmentVector() const {
return word_alignments;
@@ -40,8 +40,9 @@ public:
int Compare(const FFState& other) const;
};
-class BilingualLM : public StatefulFeatureFunction {
- private:
+class BilingualLM : public StatefulFeatureFunction
+{
+private:
virtual float Score(std::vector<int>& source_words, std::vector<int>& target_words) const = 0;
virtual int getNeuralLMId(const Word& word, bool is_source_word) const = 0;
@@ -53,19 +54,19 @@ class BilingualLM : public StatefulFeatureFunction {
size_t selectMiddleAlignment(const std::set<size_t>& alignment_links) const;
void getSourceWords(
- const TargetPhrase &targetPhrase,
- int targetWordIdx,
- const Sentence &source_sent,
- const WordsRange &sourceWordRange,
- std::vector<int> &words) const;
+ const TargetPhrase &targetPhrase,
+ int targetWordIdx,
+ const Sentence &source_sent,
+ const WordsRange &sourceWordRange,
+ std::vector<int> &words) const;
void appendSourceWordsToVector(const Sentence &source_sent, std::vector<int> &words, int source_word_mid_idx) const;
void getTargetWords(
- const Hypothesis &cur_hypo,
- const TargetPhrase &targetPhrase,
- int current_word_index,
- std::vector<int> &words) const;
+ const Hypothesis &cur_hypo,
+ const TargetPhrase &targetPhrase,
+ int current_word_index,
+ std::vector<int> &words) const;
size_t getState(const Hypothesis &cur_hypo) const;
@@ -112,28 +113,31 @@ public:
void Load();
void EvaluateInIsolation(
- const Phrase &source,
- const TargetPhrase &targetPhrase,
- ScoreComponentCollection &scoreBreakdown,
- ScoreComponentCollection &estimatedFutureScore) const;
+ const Phrase &source,
+ const TargetPhrase &targetPhrase,
+ ScoreComponentCollection &scoreBreakdown,
+ ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(
- const InputType &input,
- const InputPath &inputPath,
- const TargetPhrase &targetPhrase,
- const StackVec *stackVec,
- ScoreComponentCollection &scoreBreakdown,
- ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ const InputType &input,
+ const InputPath &inputPath,
+ const TargetPhrase &targetPhrase,
+ const StackVec *stackVec,
+ ScoreComponentCollection &scoreBreakdown,
+ ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {};
FFState* EvaluateWhenApplied(
- const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(
- const ChartHypothesis& cur_hypo ,
- int featureID, /* - used to index the state in the previous hypotheses */
- ScoreComponentCollection* accumulator) const;
+ const ChartHypothesis& cur_hypo ,
+ int featureID, /* - used to index the state in the previous hypotheses */
+ ScoreComponentCollection* accumulator) const;
void SetParameter(const std::string& key, const std::string& value);
};
diff --git a/moses/LM/DALMWrapper.cpp b/moses/LM/DALMWrapper.cpp
index 68b3050de..638060b0e 100644
--- a/moses/LM/DALMWrapper.cpp
+++ b/moses/LM/DALMWrapper.cpp
@@ -16,24 +16,25 @@
using namespace std;
/////////////////////////
-void read_ini(const char *inifile, string &model, string &words, string &wordstxt){
- ifstream ifs(inifile);
- string line;
-
- getline(ifs, line);
- while(ifs){
- unsigned int pos = line.find("=");
- string key = line.substr(0, pos);
- string value = line.substr(pos+1, line.size()-pos);
- if(key=="MODEL"){
- model = value;
- }else if(key=="WORDS"){
- words = value;
- }else if(key=="WORDSTXT"){
- wordstxt = value;
- }
- getline(ifs, line);
- }
+void read_ini(const char *inifile, string &model, string &words, string &wordstxt)
+{
+ ifstream ifs(inifile);
+ string line;
+
+ getline(ifs, line);
+ while(ifs) {
+ unsigned int pos = line.find("=");
+ string key = line.substr(0, pos);
+ string value = line.substr(pos+1, line.size()-pos);
+ if(key=="MODEL") {
+ model = value;
+ } else if(key=="WORDS") {
+ words = value;
+ } else if(key=="WORDSTXT") {
+ wordstxt = value;
+ }
+ getline(ifs, line);
+ }
}
/////////////////////////
@@ -43,140 +44,140 @@ namespace Moses
class DALMState : public FFState
{
private:
- DALM::State state;
+ DALM::State state;
public:
- DALMState(){
- }
-
- DALMState(const DALMState &from){
- state = from.state;
- }
-
- virtual ~DALMState(){
- }
-
- void reset(const DALMState &from){
- state = from.state;
- }
-
- virtual int Compare(const FFState& other) const{
- const DALMState &o = static_cast<const DALMState &>(other);
- if(state.get_count() < o.state.get_count()) return -1;
- else if(state.get_count() > o.state.get_count()) return 1;
- else return state.compare(o.state);
- }
-
- DALM::State &get_state(){
- return state;
- }
-
- void refresh(){
- state.refresh();
- }
+ DALMState() {
+ }
+
+ DALMState(const DALMState &from) {
+ state = from.state;
+ }
+
+ virtual ~DALMState() {
+ }
+
+ void reset(const DALMState &from) {
+ state = from.state;
+ }
+
+ virtual int Compare(const FFState& other) const {
+ const DALMState &o = static_cast<const DALMState &>(other);
+ if(state.get_count() < o.state.get_count()) return -1;
+ else if(state.get_count() > o.state.get_count()) return 1;
+ else return state.compare(o.state);
+ }
+
+ DALM::State &get_state() {
+ return state;
+ }
+
+ void refresh() {
+ state.refresh();
+ }
};
class DALMChartState : public FFState
{
private:
- DALM::Fragment prefixFragments[DALM_MAX_ORDER-1];
- unsigned char prefixLength;
- DALM::State rightContext;
- bool isLarge;
- size_t hypoSize;
+ DALM::Fragment prefixFragments[DALM_MAX_ORDER-1];
+ unsigned char prefixLength;
+ DALM::State rightContext;
+ bool isLarge;
+ size_t hypoSize;
public:
- DALMChartState()
- : prefixLength(0),
- isLarge(false)
- {}
-
- /*
- DALMChartState(const DALMChartState &other)
- : prefixLength(other.prefixLength),
- rightContext(other.rightContext),
- isLarge(other.isLarge)
- {
- std::copy(
- other.prefixFragments,
- other.prefixFragments+other.prefixLength,
- prefixFragments
- );
- }
- */
-
- virtual ~DALMChartState(){
- }
-
- /*
- DALMChartState &operator=(const DALMChartState &other){
- prefixLength = other.prefixLength;
- std::copy(
- other.prefixFragments,
- other.prefixFragments+other.prefixLength,
- prefixFragments
- );
- rightContext = other.rightContext;
- isLarge=other.isLarge;
-
- return *this;
- }
- */
-
- inline unsigned char GetPrefixLength() const{
- return prefixLength;
- }
-
- inline unsigned char &GetPrefixLength(){
- return prefixLength;
- }
-
- inline const DALM::Fragment *GetPrefixFragments() const{
- return prefixFragments;
- }
-
- inline DALM::Fragment *GetPrefixFragments(){
- return prefixFragments;
- }
-
- inline const DALM::State &GetRightContext() const{
- return rightContext;
- }
-
- inline DALM::State &GetRightContext() {
- return rightContext;
- }
-
- inline bool LargeEnough() const{
- return isLarge;
- }
-
- inline void SetAsLarge() {
- isLarge=true;
- }
-
- inline size_t &GetHypoSize() {
- return hypoSize;
- }
- inline size_t GetHypoSize() const {
- return hypoSize;
- }
-
- virtual int Compare(const FFState& other) const{
- const DALMChartState &o = static_cast<const DALMChartState &>(other);
- if(prefixLength < o.prefixLength) return -1;
- if(prefixLength > o.prefixLength) return 1;
- if(prefixLength!=0){
- const DALM::Fragment &f = prefixFragments[prefixLength-1];
- const DALM::Fragment &of = o.prefixFragments[prefixLength-1];
- int ret = DALM::compare_fragments(f,of);
- if(ret != 0) return ret;
- }
- if(isLarge != o.isLarge) return (int)isLarge - (int)o.isLarge;
- if(rightContext.get_count() < o.rightContext.get_count()) return -1;
- if(rightContext.get_count() > o.rightContext.get_count()) return 1;
- return rightContext.compare(o.rightContext);
- }
+ DALMChartState()
+ : prefixLength(0),
+ isLarge(false) {
+ }
+
+ /*
+ DALMChartState(const DALMChartState &other)
+ : prefixLength(other.prefixLength),
+ rightContext(other.rightContext),
+ isLarge(other.isLarge)
+ {
+ std::copy(
+ other.prefixFragments,
+ other.prefixFragments+other.prefixLength,
+ prefixFragments
+ );
+ }
+ */
+
+ virtual ~DALMChartState() {
+ }
+
+ /*
+ DALMChartState &operator=(const DALMChartState &other){
+ prefixLength = other.prefixLength;
+ std::copy(
+ other.prefixFragments,
+ other.prefixFragments+other.prefixLength,
+ prefixFragments
+ );
+ rightContext = other.rightContext;
+ isLarge=other.isLarge;
+
+ return *this;
+ }
+ */
+
+ inline unsigned char GetPrefixLength() const {
+ return prefixLength;
+ }
+
+ inline unsigned char &GetPrefixLength() {
+ return prefixLength;
+ }
+
+ inline const DALM::Fragment *GetPrefixFragments() const {
+ return prefixFragments;
+ }
+
+ inline DALM::Fragment *GetPrefixFragments() {
+ return prefixFragments;
+ }
+
+ inline const DALM::State &GetRightContext() const {
+ return rightContext;
+ }
+
+ inline DALM::State &GetRightContext() {
+ return rightContext;
+ }
+
+ inline bool LargeEnough() const {
+ return isLarge;
+ }
+
+ inline void SetAsLarge() {
+ isLarge=true;
+ }
+
+ inline size_t &GetHypoSize() {
+ return hypoSize;
+ }
+ inline size_t GetHypoSize() const {
+ return hypoSize;
+ }
+
+ virtual int Compare(const FFState& other) const {
+ const DALMChartState &o = static_cast<const DALMChartState &>(other);
+ if(prefixLength < o.prefixLength) return -1;
+ if(prefixLength > o.prefixLength) return 1;
+ if(prefixLength!=0) {
+ const DALM::Fragment &f = prefixFragments[prefixLength-1];
+ const DALM::Fragment &of = o.prefixFragments[prefixLength-1];
+ int ret = DALM::compare_fragments(f,of);
+ if(ret != 0) return ret;
+ }
+ if(isLarge != o.isLarge) return (int)isLarge - (int)o.isLarge;
+ if(rightContext.get_count() < o.rightContext.get_count()) return -1;
+ if(rightContext.get_count() > o.rightContext.get_count()) return 1;
+ return rightContext.compare(o.rightContext);
+ }
};
LanguageModelDALM::LanguageModelDALM(const std::string &line)
@@ -191,62 +192,64 @@ LanguageModelDALM::LanguageModelDALM(const std::string &line)
LanguageModelDALM::~LanguageModelDALM()
{
- delete m_logger;
- delete m_vocab;
- delete m_lm;
+ delete m_logger;
+ delete m_vocab;
+ delete m_lm;
}
void LanguageModelDALM::Load()
{
- /////////////////////
- // READING INIFILE //
- /////////////////////
- string inifile= m_filePath + "/dalm.ini";
-
- string model; // Path to the double-array file.
- string words; // Path to the vocabulary file.
- string wordstxt; //Path to the vocabulary file in text format.
- read_ini(inifile.c_str(), model, words, wordstxt);
-
- model = m_filePath + "/" + model;
- words = m_filePath + "/" + words;
- wordstxt = m_filePath + "/" + wordstxt;
-
- UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
- util::FileOpenException,
- "Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
-
- ////////////////
- // LOADING LM //
- ////////////////
-
- // Preparing a logger object.
- m_logger = new DALM::Logger(stderr);
- m_logger->setLevel(DALM::LOGGER_INFO);
-
- // Load the vocabulary file.
- m_vocab = new DALM::Vocabulary(words, *m_logger);
-
- // Load the language model.
- m_lm = new DALM::LM(model, *m_vocab, m_nGramOrder, *m_logger);
-
- wid_start = m_vocab->lookup(BOS_);
- wid_end = m_vocab->lookup(EOS_);
-
- // vocab mapping
- CreateVocabMapping(wordstxt);
-
- FactorCollection &collection = FactorCollection::Instance();
- m_beginSentenceFactor = collection.AddFactor(BOS_);
+ /////////////////////
+ // READING INIFILE //
+ /////////////////////
+ string inifile= m_filePath + "/dalm.ini";
+
+ string model; // Path to the double-array file.
+ string words; // Path to the vocabulary file.
+ string wordstxt; //Path to the vocabulary file in text format.
+ read_ini(inifile.c_str(), model, words, wordstxt);
+
+ model = m_filePath + "/" + model;
+ words = m_filePath + "/" + words;
+ wordstxt = m_filePath + "/" + wordstxt;
+
+ UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
+ util::FileOpenException,
+ "Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
+
+ ////////////////
+ // LOADING LM //
+ ////////////////
+
+ // Preparing a logger object.
+ m_logger = new DALM::Logger(stderr);
+ m_logger->setLevel(DALM::LOGGER_INFO);
+
+ // Load the vocabulary file.
+ m_vocab = new DALM::Vocabulary(words, *m_logger);
+
+ // Load the language model.
+ m_lm = new DALM::LM(model, *m_vocab, m_nGramOrder, *m_logger);
+
+ wid_start = m_vocab->lookup(BOS_);
+ wid_end = m_vocab->lookup(EOS_);
+
+ // vocab mapping
+ CreateVocabMapping(wordstxt);
+
+ FactorCollection &collection = FactorCollection::Instance();
+ m_beginSentenceFactor = collection.AddFactor(BOS_);
}
-const FFState *LanguageModelDALM::EmptyHypothesisState(const InputType &/*input*/) const{
- DALMState *s = new DALMState();
- m_lm->init_state(s->get_state());
- return s;
+const FFState *LanguageModelDALM::EmptyHypothesisState(const InputType &/*input*/) const
+{
+ DALMState *s = new DALMState();
+ m_lm->init_state(s->get_state());
+ return s;
}
-void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const{
+void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
+{
fullScore = 0;
ngramScore = 0;
@@ -254,18 +257,18 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
size_t phraseSize = phrase.GetSize();
if (!phraseSize) return;
-
+
size_t currPos = 0;
size_t hist_count = 0;
- DALM::State state;
-
- if(phrase.GetWord(0).GetFactor(m_factorType) == m_beginSentenceFactor){
- m_lm->init_state(state);
- currPos++;
- hist_count++;
- }
-
- float score;
+ DALM::State state;
+
+ if(phrase.GetWord(0).GetFactor(m_factorType) == m_beginSentenceFactor) {
+ m_lm->init_state(state);
+ currPos++;
+ hist_count++;
+ }
+
+ float score;
while (currPos < phraseSize) {
const Word &word = phrase.GetWord(currPos);
hist_count++;
@@ -274,9 +277,9 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
state.refresh();
hist_count = 0;
} else {
- DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
- score = m_lm->query(wid, state);
- fullScore += score;
+ DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
+ score = m_lm->query(wid, state);
+ fullScore += score;
if (hist_count >= m_nGramOrder) ngramScore += score;
if (wid==m_vocab->unk()) ++oovCount;
}
@@ -284,41 +287,42 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
currPos++;
}
- fullScore = TransformLMScore(fullScore);
- ngramScore = TransformLMScore(ngramScore);
+ fullScore = TransformLMScore(fullScore);
+ ngramScore = TransformLMScore(ngramScore);
}
-FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const{
+FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
+{
// In this function, we only compute the LM scores of n-grams that overlap a
// phrase boundary. Phrase-internal scores are taken directly from the
// translation option.
- const DALMState *dalm_ps = static_cast<const DALMState *>(ps);
-
+ const DALMState *dalm_ps = static_cast<const DALMState *>(ps);
+
// Empty phrase added? nothing to be done
- if (hypo.GetCurrTargetLength() == 0){
+ if (hypo.GetCurrTargetLength() == 0) {
return dalm_ps ? new DALMState(*dalm_ps) : NULL;
}
-
+
const std::size_t begin = hypo.GetCurrTargetWordsRange().GetStartPos();
//[begin, end) in STL-like fashion.
const std::size_t end = hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
const std::size_t adjust_end = std::min(end, begin + m_nGramOrder - 1);
-
+
DALMState *dalm_state = new DALMState(*dalm_ps);
- DALM::State &state = dalm_state->get_state();
+ DALM::State &state = dalm_state->get_state();
float score = 0.0;
- for(std::size_t position=begin; position < adjust_end; position++){
- score += m_lm->query(GetVocabId(hypo.GetWord(position).GetFactor(m_factorType)), state);
+ for(std::size_t position=begin; position < adjust_end; position++) {
+ score += m_lm->query(GetVocabId(hypo.GetWord(position).GetFactor(m_factorType)), state);
}
-
+
if (hypo.IsSourceCompleted()) {
// Score end of sentence.
std::vector<DALM::VocabId> indices(m_nGramOrder-1);
const DALM::VocabId *last = LastIDs(hypo, &indices.front());
m_lm->set_state(&indices.front(), (last-&indices.front()), state);
-
- score += m_lm->query(wid_end, state);
+
+ score += m_lm->query(wid_end, state);
} else if (adjust_end < end) {
// Get state after adding a long phrase.
std::vector<DALM::VocabId> indices(m_nGramOrder-1);
@@ -326,7 +330,7 @@ FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FF
m_lm->set_state(&indices.front(), (last-&indices.front()), state);
}
- score = TransformLMScore(score);
+ score = TransformLMScore(score);
if (OOVFeatureEnabled()) {
std::vector<float> scores(2);
scores[0] = score;
@@ -335,56 +339,54 @@ FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FF
} else {
out->PlusEquals(this, score);
}
-
+
return dalm_state;
}
-FFState *LanguageModelDALM::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const{
+FFState *LanguageModelDALM::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const
+{
// initialize language model context state
- DALMChartState *newState = new DALMChartState();
- DALM::State &state = newState->GetRightContext();
+ DALMChartState *newState = new DALMChartState();
+ DALM::State &state = newState->GetRightContext();
- DALM::Fragment *prefixFragments = newState->GetPrefixFragments();
- unsigned char &prefixLength = newState->GetPrefixLength();
- size_t &hypoSizeAll = newState->GetHypoSize();
+ DALM::Fragment *prefixFragments = newState->GetPrefixFragments();
+ unsigned char &prefixLength = newState->GetPrefixLength();
+ size_t &hypoSizeAll = newState->GetHypoSize();
// initial language model scores
- float hypoScore = 0.0; // total hypothesis score.
+ float hypoScore = 0.0; // diffs of scores.
- const TargetPhrase &targetPhrase = hypo.GetCurrTargetPhrase();
- size_t hypoSize = targetPhrase.GetSize();
- hypoSizeAll = hypoSize;
+ const TargetPhrase &targetPhrase = hypo.GetCurrTargetPhrase();
+ size_t hypoSize = targetPhrase.GetSize();
+ hypoSizeAll = hypoSize;
// get index map for underlying hypotheses
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
targetPhrase.GetAlignNonTerm().GetNonTermIndexMap();
- size_t phrasePos = 0;
-
- // begginig of sentence.
- if(hypoSize > 0){
- const Word &word = targetPhrase.GetWord(0);
- if(word.GetFactor(m_factorType) == m_beginSentenceFactor){
- m_lm->init_state(state);
- // state is finalized.
- newState->SetAsLarge();
- phrasePos++;
- }else if(word.IsNonTerminal()){
+ size_t phrasePos = 0;
+
+ // begginig of sentence.
+ if(hypoSize > 0) {
+ const Word &word = targetPhrase.GetWord(0);
+ if(word.GetFactor(m_factorType) == m_beginSentenceFactor) {
+ m_lm->init_state(state);
+ // state is finalized.
+ newState->SetAsLarge();
+ phrasePos++;
+ } else if(word.IsNonTerminal()) {
// special case: rule starts with non-terminal -> copy everything
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[0]);
const DALMChartState* prevState =
static_cast<const DALMChartState*>(prevHypo->GetFFState(featureID));
- // copy chart state
- (*newState) = (*prevState);
- hypoSizeAll = hypoSize+prevState->GetHypoSize()-1;
-
- // get hypoScore
- hypoScore = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
+ // copy chart state
+ (*newState) = (*prevState);
+ hypoSizeAll = hypoSize+prevState->GetHypoSize()-1;
- phrasePos++;
- }
+ phrasePos++;
+ }
}
// loop over rule
@@ -395,35 +397,34 @@ FFState *LanguageModelDALM::EvaluateWhenApplied(const ChartHypothesis& hypo, int
// regular word
if (!word.IsNonTerminal()) {
- EvaluateTerminal(
- word, hypoScore,
- newState, state,
- prefixFragments, prefixLength
- );
+ EvaluateTerminal(
+ word, hypoScore,
+ newState, state,
+ prefixFragments, prefixLength
+ );
}
// non-terminal, add phrase from underlying hypothesis
// internal non-terminal
else {
// look up underlying hypothesis
- const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
- const DALMChartState* prevState =
- static_cast<const DALMChartState*>(prevHypo->GetFFState(featureID));
- size_t prevTargetPhraseLength = prevHypo->GetCurrTargetPhrase().GetSize();
- float prevHypoScore = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
- hypoSizeAll += prevState->GetHypoSize()-1;
-
- EvaluateNonTerminal(
- word, hypoScore,
- newState, state,
- prefixFragments, prefixLength,
- prevState, prevTargetPhraseLength, prevHypoScore
- );
+ const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
+ const DALMChartState* prevState =
+ static_cast<const DALMChartState*>(prevHypo->GetFFState(featureID));
+ size_t prevTargetPhraseLength = prevHypo->GetCurrTargetPhrase().GetSize();
+ hypoSizeAll += prevState->GetHypoSize()-1;
+
+ EvaluateNonTerminal(
+ word, hypoScore,
+ newState, state,
+ prefixFragments, prefixLength,
+ prevState, prevTargetPhraseLength
+ );
}
}
// assign combined score to score breakdown
- out->Assign(this, TransformLMScore(hypoScore));
+ out->PlusEquals(this, TransformLMScore(hypoScore));
return newState;
}
@@ -437,80 +438,81 @@ void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt)
{
InputFileStream vocabStrm(wordstxt);
- std::vector< std::pair<std::size_t, DALM::VocabId> > vlist;
+ std::vector< std::pair<std::size_t, DALM::VocabId> > vlist;
string line;
- std::size_t max_fid = 0;
+ std::size_t max_fid = 0;
while(getline(vocabStrm, line)) {
- const Factor *factor = FactorCollection::Instance().AddFactor(line);
- std::size_t fid = factor->GetId();
- DALM::VocabId wid = m_vocab->lookup(line.c_str());
+ const Factor *factor = FactorCollection::Instance().AddFactor(line);
+ std::size_t fid = factor->GetId();
+ DALM::VocabId wid = m_vocab->lookup(line.c_str());
- vlist.push_back(std::pair<std::size_t, DALM::VocabId>(fid, wid));
- if(max_fid < fid) max_fid = fid;
+ vlist.push_back(std::pair<std::size_t, DALM::VocabId>(fid, wid));
+ if(max_fid < fid) max_fid = fid;
}
- for(std::size_t i = 0; i < m_vocabMap.size(); i++){
- m_vocabMap[i] = m_vocab->unk();
- }
+ for(std::size_t i = 0; i < m_vocabMap.size(); i++) {
+ m_vocabMap[i] = m_vocab->unk();
+ }
- m_vocabMap.resize(max_fid+1, m_vocab->unk());
- std::vector< std::pair<std::size_t, DALM::VocabId> >::iterator it = vlist.begin();
- while(it != vlist.end()){
- std::pair<std::size_t, DALM::VocabId> &entry = *it;
- m_vocabMap[entry.first] = entry.second;
+ m_vocabMap.resize(max_fid+1, m_vocab->unk());
+ std::vector< std::pair<std::size_t, DALM::VocabId> >::iterator it = vlist.begin();
+ while(it != vlist.end()) {
+ std::pair<std::size_t, DALM::VocabId> &entry = *it;
+ m_vocabMap[entry.first] = entry.second;
- ++it;
- }
+ ++it;
+ }
}
DALM::VocabId LanguageModelDALM::GetVocabId(const Factor *factor) const
{
- std::size_t fid = factor->GetId();
- return (m_vocabMap.size() > fid)? m_vocabMap[fid] : m_vocab->unk();
+ std::size_t fid = factor->GetId();
+ return (m_vocabMap.size() > fid)? m_vocabMap[fid] : m_vocab->unk();
}
void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value)
{
if (key == "factor") {
m_factorType = Scan<FactorType>(value);
- } else if (key == "order") {
- m_nGramOrder = Scan<size_t>(value);
- } else if (key == "path") {
- m_filePath = value;
+ } else if (key == "order") {
+ m_nGramOrder = Scan<size_t>(value);
+ } else if (key == "path") {
+ m_filePath = value;
} else {
LanguageModel::SetParameter(key, value);
}
- m_ContextSize = m_nGramOrder-1;
+ m_ContextSize = m_nGramOrder-1;
}
void LanguageModelDALM::EvaluateTerminal(
- const Word &word,
- float &hypoScore,
- DALMChartState *newState,
- DALM::State &state,
- DALM::Fragment *prefixFragments,
- unsigned char &prefixLength) const{
-
- DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
- if (newState->LargeEnough()) {
- float score = m_lm->query(wid, state);
- hypoScore += score;
- }else{
- float score = m_lm->query(wid, state, prefixFragments[prefixLength]);
-
- if(score > 0){
- hypoScore -= score;
- newState->SetAsLarge();
- }else if(state.get_count()<=prefixLength){
- hypoScore += score;
- prefixLength++;
- newState->SetAsLarge();
- }else{
- hypoScore += score;
- prefixLength++;
- if(prefixLength >= m_ContextSize) newState->SetAsLarge();
- }
- }
+ const Word &word,
+ float &hypoScore,
+ DALMChartState *newState,
+ DALM::State &state,
+ DALM::Fragment *prefixFragments,
+ unsigned char &prefixLength) const
+{
+
+ DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
+ if (newState->LargeEnough()) {
+ float score = m_lm->query(wid, state);
+ hypoScore += score;
+ } else {
+ float score = m_lm->query(wid, state, prefixFragments[prefixLength]);
+
+ if(score > 0) {
+ hypoScore -= score;
+ newState->SetAsLarge();
+ } else if(state.get_count()<=prefixLength) {
+ hypoScore += score;
+ prefixLength++;
+ newState->SetAsLarge();
+ } else {
+ hypoScore += score;
+ prefixLength++;
+ if(prefixLength >= m_ContextSize) newState->SetAsLarge();
+ }
+ }
}
void LanguageModelDALM::EvaluateNonTerminal(
@@ -520,74 +522,73 @@ void LanguageModelDALM::EvaluateNonTerminal(
DALM::State &state,
DALM::Fragment *prefixFragments,
unsigned char &prefixLength,
- const DALMChartState *prevState,
- size_t prevTargetPhraseLength,
- float prevHypoScore
- ) const{
+ const DALMChartState *prevState,
+ size_t prevTargetPhraseLength
+) const
+{
const unsigned char prevPrefixLength = prevState->GetPrefixLength();
- const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments();
- hypoScore += prevHypoScore;
-
- if(prevPrefixLength == 0){
- newState->SetAsLarge();
- hypoScore += state.sum_bows(0, state.get_count());
- state = prevState->GetRightContext();
- return;
- }
- if(!state.has_context()){
- newState->SetAsLarge();
- state = prevState->GetRightContext();
- return;
- }
- DALM::Gap gap(state);
+ const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments();
+
+ if(prevPrefixLength == 0) {
+ newState->SetAsLarge();
+ hypoScore += state.sum_bows(0, state.get_count());
+ state = prevState->GetRightContext();
+ return;
+ }
+ if(!state.has_context()) {
+ newState->SetAsLarge();
+ state = prevState->GetRightContext();
+ return;
+ }
+ DALM::Gap gap(state);
// score its prefix
for(size_t prefixPos = 0; prefixPos < prevPrefixLength; prefixPos++) {
- const DALM::Fragment &f = prevPrefixFragments[prefixPos];
- if (newState->LargeEnough()) {
- float score = m_lm->query(f, state, gap);
- hypoScore += score;
-
- if(!gap.is_extended()){
- state = prevState->GetRightContext();
- return;
- }else if(state.get_count() <= prefixPos+1){
- state = prevState->GetRightContext();
- return;
- }
- } else {
- DALM::Fragment &fnew = prefixFragments[prefixLength];
- float score = m_lm->query(f, state, gap, fnew);
- hypoScore += score;
-
- if(!gap.is_extended()){
- newState->SetAsLarge();
- state = prevState->GetRightContext();
- return;
- }else if(state.get_count() <= prefixPos+1){
- if(!gap.is_finalized()) prefixLength++;
- newState->SetAsLarge();
- state = prevState->GetRightContext();
- return;
- }else if(gap.is_finalized()){
- newState->SetAsLarge();
- }else{
- prefixLength++;
- if(prefixLength >= m_ContextSize) newState->SetAsLarge();
- }
- }
- gap.succ();
+ const DALM::Fragment &f = prevPrefixFragments[prefixPos];
+ if (newState->LargeEnough()) {
+ float score = m_lm->query(f, state, gap);
+ hypoScore += score;
+
+ if(!gap.is_extended()) {
+ state = prevState->GetRightContext();
+ return;
+ } else if(state.get_count() <= prefixPos+1) {
+ state = prevState->GetRightContext();
+ return;
+ }
+ } else {
+ DALM::Fragment &fnew = prefixFragments[prefixLength];
+ float score = m_lm->query(f, state, gap, fnew);
+ hypoScore += score;
+
+ if(!gap.is_extended()) {
+ newState->SetAsLarge();
+ state = prevState->GetRightContext();
+ return;
+ } else if(state.get_count() <= prefixPos+1) {
+ if(!gap.is_finalized()) prefixLength++;
+ newState->SetAsLarge();
+ state = prevState->GetRightContext();
+ return;
+ } else if(gap.is_finalized()) {
+ newState->SetAsLarge();
+ } else {
+ prefixLength++;
+ if(prefixLength >= m_ContextSize) newState->SetAsLarge();
+ }
+ }
+ gap.succ();
}
// check if we are dealing with a large sub-phrase
if (prevState->LargeEnough()) {
newState->SetAsLarge();
- if(prevPrefixLength < prevState->GetHypoSize()){
- hypoScore += state.sum_bows(prevPrefixLength, state.get_count());
- }
- // copy language model state
- state = prevState->GetRightContext();
+ if(prevPrefixLength < prevState->GetHypoSize()) {
+ hypoScore += state.sum_bows(prevPrefixLength, state.get_count());
+ }
+ // copy language model state
+ state = prevState->GetRightContext();
} else {
m_lm->set_state(state, prevState->GetRightContext(), prevPrefixFragments, gap);
}
diff --git a/moses/LM/DALMWrapper.h b/moses/LM/DALMWrapper.h
index ad53819c0..fe724e7c8 100644
--- a/moses/LM/DALMWrapper.h
+++ b/moses/LM/DALMWrapper.h
@@ -27,7 +27,7 @@ class LanguageModelDALM : public LanguageModel
public:
LanguageModelDALM(const std::string &line);
virtual ~LanguageModelDALM();
-
+
void Load();
virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
@@ -40,8 +40,8 @@ public:
virtual bool IsUseable(const FactorMask &mask) const;
- virtual void SetParameter(const std::string& key, const std::string& value);
-
+ virtual void SetParameter(const std::string& key, const std::string& value);
+
protected:
const Factor *m_beginSentenceFactor;
@@ -49,17 +49,17 @@ protected:
std::string m_filePath;
size_t m_nGramOrder; //! max n-gram length contained in this LM
- size_t m_ContextSize;
+ size_t m_ContextSize;
- DALM::Logger *m_logger;
- DALM::Vocabulary *m_vocab;
- DALM::LM *m_lm;
- DALM::VocabId wid_start, wid_end;
+ DALM::Logger *m_logger;
+ DALM::Vocabulary *m_vocab;
+ DALM::LM *m_lm;
+ DALM::VocabId wid_start, wid_end;
- mutable std::vector<DALM::VocabId> m_vocabMap;
+ mutable std::vector<DALM::VocabId> m_vocabMap;
- void CreateVocabMapping(const std::string &wordstxt);
- DALM::VocabId GetVocabId(const Factor *factor) const;
+ void CreateVocabMapping(const std::string &wordstxt);
+ DALM::VocabId GetVocabId(const Factor *factor) const;
private:
// Convert last words of hypothesis into vocab ids, returning an end pointer.
@@ -77,26 +77,25 @@ private:
}
}
- void EvaluateTerminal(
- const Word &word,
- float &hypoScore,
- DALMChartState *newState,
- DALM::State &state,
- DALM::Fragment *prefixFragments,
- unsigned char &prefixLength
- ) const;
-
- void EvaluateNonTerminal(
- const Word &word,
- float &hypoScore,
- DALMChartState *newState,
- DALM::State &state,
- DALM::Fragment *prefixFragments,
- unsigned char &prefixLength,
- const DALMChartState *prevState,
- size_t prevTargetPhraseLength,
- float prevHypoScore
- ) const;
+ void EvaluateTerminal(
+ const Word &word,
+ float &hypoScore,
+ DALMChartState *newState,
+ DALM::State &state,
+ DALM::Fragment *prefixFragments,
+ unsigned char &prefixLength
+ ) const;
+
+ void EvaluateNonTerminal(
+ const Word &word,
+ float &hypoScore,
+ DALMChartState *newState,
+ DALM::State &state,
+ DALM::Fragment *prefixFragments,
+ unsigned char &prefixLength,
+ const DALMChartState *prevState,
+ size_t prevTargetPhraseLength
+ ) const;
};
}
diff --git a/moses/LM/IRST.cpp b/moses/LM/IRST.cpp
index 44c5d8c4c..4d591e9e9 100644
--- a/moses/LM/IRST.cpp
+++ b/moses/LM/IRST.cpp
@@ -26,21 +26,44 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "n_gram.h"
#include "lmContainer.h"
+using namespace irstlm;
+
#include "IRST.h"
+#include "moses/LM/PointerState.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
#include "moses/FactorCollection.h"
#include "moses/Phrase.h"
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
-#include "moses/UserMessage.h"
using namespace std;
namespace Moses
{
+
+class IRSTLMState : public PointerState
+{
+public:
+ IRSTLMState():PointerState(NULL) {}
+ IRSTLMState(const void* lms):PointerState(lms) {}
+ IRSTLMState(const IRSTLMState& copy_from):PointerState(copy_from.lmstate) {}
+
+ IRSTLMState& operator=( const IRSTLMState& rhs )
+ {
+ lmstate = rhs.lmstate;
+ return *this;
+ }
+
+ const void* GetState() const
+ {
+ return lmstate;
+ }
+};
+
LanguageModelIRST::LanguageModelIRST(const std::string &line)
:LanguageModelSingleFactor(line)
+ ,m_lmtb_dub(0), m_lmtb_size(0)
{
const StaticData &staticData = StaticData::Instance();
int threadCount = staticData.ThreadCount();
@@ -50,6 +73,10 @@ LanguageModelIRST::LanguageModelIRST(const std::string &line)
ReadParameters();
+ VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_lmtb_dub:|" << m_lmtb_dub << "|" << std::endl);
+ VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_filePath:|" << m_filePath << "|" << std::endl);
+ VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_factorType:|" << m_factorType << "|" << std::endl);
+ VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_lmtb_size:|" << m_lmtb_size << "|" << std::endl);
}
LanguageModelIRST::~LanguageModelIRST()
@@ -64,19 +91,23 @@ LanguageModelIRST::~LanguageModelIRST()
}
-void LanguageModelIRST::Load()
+bool LanguageModelIRST::IsUseable(const FactorMask &mask) const
{
- cerr << "In LanguageModelIRST::Load: nGramOrder = " << m_nGramOrder << "\n";
+ bool ret = mask[m_factorType];
+ return ret;
+}
+void LanguageModelIRST::Load()
+{
FactorCollection &factorCollection = FactorCollection::Instance();
m_lmtb = m_lmtb->CreateLanguageModel(m_filePath);
- m_lmtb->setMaxLoadedLevel(1000);
+ if (m_lmtb_size > 0) m_lmtb->setMaxLoadedLevel(m_lmtb_size);
m_lmtb->load(m_filePath);
d=m_lmtb->getDict();
d->incflag(1);
- m_lmtb_size=m_lmtb->maxlevel();
+ m_nGramOrder = m_lmtb_size = m_lmtb->maxlevel();
// LM can be ok, just outputs warnings
// Mauro: in the original, the following two instructions are wrongly switched:
@@ -85,7 +116,7 @@ void LanguageModelIRST::Load()
CreateFactors(factorCollection);
- VERBOSE(1, "IRST: m_unknownId=" << m_unknownId << std::endl);
+ VERBOSE(1, GetScoreProducerDescription() << " LanguageModelIRST::Load() m_unknownId=" << m_unknownId << std::endl);
//install caches to save time (only if PS_CACHE_ENABLE is defined through compilation flags)
m_lmtb->init_caches(m_lmtb_size>2?m_lmtb_size-1:2);
@@ -113,6 +144,8 @@ void LanguageModelIRST::CreateFactors(FactorCollection &factorCollection)
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
factorId = m_sentenceStart->GetId();
+ const std::string bs = BOS_;
+ const std::string es = EOS_;
m_lmtb_sentenceStart=lmIdMap[factorId] = GetLmID(BOS_);
maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
m_sentenceStartWord[m_factorType] = m_sentenceStart;
@@ -138,6 +171,11 @@ int LanguageModelIRST::GetLmID( const std::string &str ) const
return d->encode( str.c_str() ); // at the level of micro tags
}
+int LanguageModelIRST::GetLmID( const Word &word ) const
+{
+ return GetLmID( word.GetFactor(m_factorType) );
+}
+
int LanguageModelIRST::GetLmID( const Factor *factor ) const
{
size_t factorId = factor->GetId();
@@ -153,7 +191,7 @@ int LanguageModelIRST::GetLmID( const Factor *factor ) const
///di cui non sia stato ancora calcolato il suo codice target abbia
///comunque un factorID noto (e quindi minore di m_lmIdLookup.size())
///E' necessario dunque identificare questi casi di indeterminatezza
- ///del codice target. Attualamente, questo controllo e' stato implementato
+ ///del codice target. Attualmente, questo controllo e' stato implementato
///impostando a m_empty tutti i termini che non hanno ancora
//ricevuto un codice target effettivo
///////////
@@ -194,10 +232,137 @@ int LanguageModelIRST::GetLmID( const Factor *factor ) const
}
}
-LMResult LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
+const FFState* LanguageModelIRST::EmptyHypothesisState(const InputType &/*input*/) const
+{
+ std::auto_ptr<IRSTLMState> ret(new IRSTLMState());
+
+ return ret.release();
+}
+
+void LanguageModelIRST::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
+{
+ fullScore = 0;
+ ngramScore = 0;
+ oovCount = 0;
+
+ if ( !phrase.GetSize() ) return;
+
+ int _min = min(m_lmtb_size - 1, (int) phrase.GetSize());
+
+ int codes[m_lmtb_size];
+ int idx = 0;
+ codes[idx] = m_lmtb_sentenceStart;
+ ++idx;
+ int position = 0;
+
+ char* msp = NULL;
+ float before_boundary = 0.0;
+ for (; position < _min; ++position)
+ {
+ codes[idx] = GetLmID(phrase.GetWord(position));
+ if (codes[idx] == m_unknownId) ++oovCount;
+ before_boundary += m_lmtb->clprob(codes,idx+1,NULL,NULL,&msp);
+ ++idx;
+ }
+
+ ngramScore = 0.0;
+ int end_loop = (int) phrase.GetSize();
+
+ for (; position < end_loop; ++position) {
+ for (idx = 1; idx < m_lmtb_size; ++idx)
+ {
+ codes[idx-1] = codes[idx];
+ }
+ codes[idx-1] = GetLmID(phrase.GetWord(position));
+ if (codes[idx-1] == m_unknownId) ++oovCount;
+ ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp);
+ }
+ before_boundary = TransformLMScore(before_boundary);
+ ngramScore = TransformLMScore(ngramScore);
+ fullScore = ngramScore + before_boundary;
+}
+
+FFState* LanguageModelIRST::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
{
- FactorType factorType = GetFactorType();
+ if (!hypo.GetCurrTargetLength()) {
+ std::auto_ptr<IRSTLMState> ret(new IRSTLMState(ps));
+ return ret.release();
+ }
+
+ //[begin, end) in STL-like fashion.
+ const int begin = (const int) hypo.GetCurrTargetWordsRange().GetStartPos();
+ const int end = (const int) hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
+ const int adjust_end = (const int) std::min(end, begin + m_lmtb_size - 1);
+
+ //set up context
+ //fill the farthest positions with sentenceStart symbols, if "empty" positions are available
+ //so that the vector looks like = "<s> <s> context_word context_word" for a two-word context and a LM of order 5
+ int codes[m_lmtb_size];
+ int idx=m_lmtb_size-1;
+ int position = (const int) begin;
+ while (position >= 0) {
+ codes[idx] = GetLmID(hypo.GetWord(position));
+ --idx;
+ --position;
+ }
+ while (idx>=0){
+ codes[idx] = m_lmtb_sentenceStart;
+ --idx;
+ }
+
+ char* msp = NULL;
+ float score = m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
+
+ position = (const int) begin+1;
+ while (position < adjust_end){
+ for (idx=1; idx<m_lmtb_size; idx++){
+ codes[idx-1] = codes[idx];
+ }
+ codes[idx-1] = GetLmID(hypo.GetWord(position));
+ score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
+ ++position;
+ }
+
+ //adding probability of having sentenceEnd symbol, after this phrase;
+ //this could happen only when all source words are covered
+ if (hypo.IsSourceCompleted()) {
+ idx=m_lmtb_size-1;
+ codes[idx] = m_lmtb_sentenceEnd;
+ --idx;
+ position = (const int) end - 1;
+ while (position >= 0 && idx >= 0) {
+ codes[idx] = GetLmID(hypo.GetWord(position));
+ --idx;
+ --position;
+ }
+ while (idx>=0){
+ codes[idx] = m_lmtb_sentenceStart;
+ --idx;
+ }
+ score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
+ }else{
+ // need to set the LM state
+
+ if (adjust_end < end) { //the LMstate of this target phrase refers to the last m_lmtb_size-1 words
+ position = (const int) end - 1;
+ for (idx=m_lmtb_size-1; idx>0; --idx){
+ codes[idx] = GetLmID(hypo.GetWord(position));
+ }
+ codes[idx] = m_lmtb_sentenceStart;
+ msp = (char *) m_lmtb->cmaxsuffptr(codes,m_lmtb_size);
+ }
+ }
+ score = TransformLMScore(score);
+ out->PlusEquals(this, score);
+
+ std::auto_ptr<IRSTLMState> ret(new IRSTLMState(msp));
+
+ return ret.release();
+}
+
+LMResult LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
+{
// set up context
size_t count = contextFactor.size();
if (count < 0) {
@@ -215,23 +380,24 @@ LMResult LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, S
if (count < (size_t) m_lmtb_size) codes[idx++] = m_lmtb_sentenceStart;
for (size_t i = 0 ; i < count ; i++) {
- codes[idx++] = GetLmID((*contextFactor[i])[factorType]);
+ codes[idx] = GetLmID(*contextFactor[i]);
+ ++idx;
}
+
LMResult result;
result.unknown = (codes[idx - 1] == m_unknownId);
char* msp = NULL;
- unsigned int ilen;
- result.score = m_lmtb->clprob(codes,idx,NULL,NULL,&msp,&ilen);
+ result.score = m_lmtb->clprob(codes,idx,NULL,NULL,&msp);
if (finalState) *finalState=(State *) msp;
result.score = TransformLMScore(result.score);
+
return result;
}
-
-bool LMCacheCleanup(size_t sentences_done, size_t m_lmcache_cleanup_threshold)
+bool LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold)
{
if (sentences_done==-1) return true;
if (m_lmcache_cleanup_threshold)
@@ -262,5 +428,15 @@ void LanguageModelIRST::CleanUpAfterSentenceProcessing(const InputType& source)
}
}
+void LanguageModelIRST::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "dub") {
+ m_lmtb_dub = Scan<unsigned int>(value);
+ } else {
+ LanguageModelSingleFactor::SetParameter(key, value);
+ }
+ m_lmtb_size = m_nGramOrder;
+}
+
}
diff --git a/moses/LM/IRST.h b/moses/LM/IRST.h
index 9b895073b..4e9da7226 100644
--- a/moses/LM/IRST.h
+++ b/moses/LM/IRST.h
@@ -24,17 +24,32 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <string>
#include <vector>
+
#include "moses/Factor.h"
+#include "moses/LM/SingleFactor.h"
+#include "moses/Hypothesis.h"
#include "moses/TypeDef.h"
+
#include "moses/Util.h"
-#include "SingleFactor.h"
+//this is required because:
+//- IRSTLM package uses the namespace irstlm
+//- the compilation of "IRST.cpp" requires "using namespace irstlm", which is defined in any file of the IRSTLM package
+// but conflicts with these foward declaration of class lmContainer
+//- for files in moses/LM the IRSTLM include directory is set
+// but not for the rest of files
+#ifdef LM_IRST
class lmContainer; // irst lm container for any lm type
class ngram;
class dictionary;
+#endif
+
namespace Moses
{
+
+//class LanguageModel;
+class FFState;
class Phrase;
/** Implementation of single factor LM using IRST's code.
@@ -50,31 +65,50 @@ protected:
int m_empty; //code of an empty position
int m_lmtb_sentenceStart; //lmtb symbols to initialize ngram with
int m_lmtb_sentenceEnd; //lmt symbol to initialize ngram with
- int m_lmtb_size; //max ngram stored in the table
int m_lmtb_dub; //dictionary upperboud
+ int m_lmtb_size; //max ngram stored in the table
+
+ dictionary* d;
std::string m_mapFilePath;
void CreateFactors(FactorCollection &factorCollection);
+
+ int GetLmID( const Word &word ) const;
int GetLmID( const std::string &str ) const;
int GetLmID( const Factor *factor ) const;
- dictionary* d;
public:
LanguageModelIRST(const std::string &line);
~LanguageModelIRST();
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+ bool IsUseable(const FactorMask &mask) const;
+
void Load();
+ const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
+
+ virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
+
+ virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
+
+/*
+ virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
+
+ virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const;
+*/
+
void InitializeForInput(InputType const& source);
void CleanUpAfterSentenceProcessing(const InputType& source);
void set_dictionary_upperbound(int dub) {
m_lmtb_size=dub ;
-//m_lmtb->set_dictionary_upperbound(dub);
};
};
diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp
index 9e6746454..62c9616c0 100644
--- a/moses/LM/Implementation.cpp
+++ b/moses/LM/Implementation.cpp
@@ -114,7 +114,7 @@ void LanguageModelImplementation::CalcScore(const Phrase &phrase, float &fullSco
} else {
ShiftOrPush(contextFactor, word);
UTIL_THROW_IF2(contextFactor.size() > GetNGramOrder(),
- "Can only calculate LM score of phrases up to the n-gram order");
+ "Can only calculate LM score of phrases up to the n-gram order");
if (word == GetSentenceStartWord()) {
// do nothing, don't include prob for <s> unigram
@@ -253,8 +253,8 @@ FFState* LanguageModelImplementation::EvaluateWhenApplied(const ChartHypothesis&
// beginning of sentence symbol <s>? -> just update state
if (word == GetSentenceStartWord()) {
- UTIL_THROW_IF2(phrasePos != 0,
- "Sentence start symbol must be at the beginning of sentence");
+ UTIL_THROW_IF2(phrasePos != 0,
+ "Sentence start symbol must be at the beginning of sentence");
delete lmState;
lmState = NewState( GetBeginSentenceState() );
}
@@ -280,7 +280,7 @@ FFState* LanguageModelImplementation::EvaluateWhenApplied(const ChartHypothesis&
// get prefixScore and finalizedScore
prefixScore = prevState->GetPrefixScore();
- finalizedScore = prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0] - prefixScore;
+ finalizedScore = -prefixScore;
// get language model state
delete lmState;
@@ -308,13 +308,10 @@ FFState* LanguageModelImplementation::EvaluateWhenApplied(const ChartHypothesis&
updateChartScore( &prefixScore, &finalizedScore, GetValueGivenState(contextFactor, *lmState).score, ++wordPos );
}
+ finalizedScore -= prevState->GetPrefixScore();
+
// check if we are dealing with a large sub-phrase
if (subPhraseLength > GetNGramOrder() - 1) {
- // add its finalized language model score
- finalizedScore +=
- prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0] // full score
- - prevState->GetPrefixScore(); // - prefix score
-
// copy language model state
delete lmState;
lmState = NewState( prevState->GetRightContext() );
@@ -337,15 +334,15 @@ FFState* LanguageModelImplementation::EvaluateWhenApplied(const ChartHypothesis&
}
}
- // assign combined score to score breakdown
+ // add combined score to score breakdown
if (OOVFeatureEnabled()) {
vector<float> scores(2);
- scores[0] = prefixScore + finalizedScore;
- scores[1] = out->GetScoresForProducer(this)[1];
- out->Assign(this, scores);
- }
- else {
- out->Assign(this, prefixScore + finalizedScore);
+ scores[0] = prefixScore + finalizedScore - hypo.GetTranslationOption().GetScores().GetScoresForProducer(this)[0];
+ // scores[1] = out->GetScoresForProducer(this)[1];
+ scores[1] = 0;
+ out->PlusEquals(this, scores);
+ } else {
+ out->PlusEquals(this, prefixScore + finalizedScore - hypo.GetTranslationOption().GetScores().GetScoresForProducer(this)[0]);
}
ret->Set(prefixScore, lmState);
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 3d68d161b..826e0b7f1 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -18,9 +18,9 @@ if $(with-irstlm) {
alias irst : IRST.o irstlm : : : <define>LM_IRST ;
dependencies += irst ;
lmmacros += LM_IRST ;
- echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
- echo "!!! You are linking with the IRSTLM library; be sure the release is >= 5.70.02 !!!" ;
- echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
+ echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
+ echo "!!! You are linking with the IRSTLM library; be sure the release is >= 5.80.07 !!!" ;
+ echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
}
#SRILM
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index 7346be3a3..71f300481 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -24,12 +24,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <memory>
#include <stdlib.h>
#include <boost/shared_ptr.hpp>
+#include <boost/lexical_cast.hpp>
#include "lm/binary_format.hh"
#include "lm/enumerate_vocab.hh"
#include "lm/left.hh"
#include "lm/model.hh"
#include "util/exception.hh"
+#include "util/tokenize_piece.hh"
#include "Ken.h"
#include "Base.h"
@@ -42,7 +44,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/StaticData.h"
#include "moses/ChartHypothesis.h"
#include "moses/Incremental.h"
-#include "moses/UserMessage.h"
#include "moses/Syntax/SVertex.h"
using namespace std;
@@ -329,8 +330,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
// Non-terminal is first so we can copy instead of rescoring.
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetFFState(featureID))->GetChartState();
- float prob = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
- ruleScore.BeginNonTerminal(prevState, prob);
+ ruleScore.BeginNonTerminal(prevState);
phrasePos++;
}
}
@@ -340,8 +340,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
if (word.IsNonTerminal()) {
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetFFState(featureID))->GetChartState();
- float prob = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
- ruleScore.NonTerminal(prevState, prob);
+ ruleScore.NonTerminal(prevState);
} else {
ruleScore.Terminal(TranslateID(word));
}
@@ -349,14 +348,15 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
float score = ruleScore.Finish();
score = TransformLMScore(score);
+ score -= hypo.GetTranslationOption().GetScores().GetScoresForProducer(this)[0];
+
if (OOVFeatureEnabled()) {
std::vector<float> scores(2);
scores[0] = score;
scores[1] = 0.0;
- accumulator->Assign(this, scores);
- }
- else {
- accumulator->Assign(this, score);
+ accumulator->PlusEquals(this, scores);
+ } else {
+ accumulator->PlusEquals(this, score);
}
return newState;
}
@@ -365,7 +365,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
{
LanguageModelChartStateKenLM *newState = new LanguageModelChartStateKenLM();
lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState->GetChartState());
- const TargetPhrase &target = *hyperedge.translation;
+ const TargetPhrase &target = *hyperedge.label.translation;
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
target.GetAlignNonTerm().GetNonTermIndexMap2();
@@ -382,7 +382,8 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
// Non-terminal is first so we can copy instead of rescoring.
const Syntax::SVertex *pred = hyperedge.tail[nonTermIndexMap[phrasePos]];
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(pred->state[featureID])->GetChartState();
- float prob = UntransformLMScore(pred->best->scoreBreakdown.GetScoresForProducer(this)[0]);
+ float prob = UntransformLMScore(
+ pred->best->label.scoreBreakdown.GetScoresForProducer(this)[0]);
ruleScore.BeginNonTerminal(prevState, prob);
phrasePos++;
}
@@ -393,7 +394,8 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
if (word.IsNonTerminal()) {
const Syntax::SVertex *pred = hyperedge.tail[nonTermIndexMap[phrasePos]];
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(pred->state[featureID])->GetChartState();
- float prob = UntransformLMScore(pred->best->scoreBreakdown.GetScoresForProducer(this)[0]);
+ float prob = UntransformLMScore(
+ pred->best->label.scoreBreakdown.GetScoresForProducer(this)[0]);
ruleScore.NonTerminal(prevState, prob);
} else {
ruleScore.Terminal(TranslateID(word));
@@ -439,29 +441,32 @@ bool LanguageModelKen<Model>::IsUseable(const FactorMask &mask) const
return ret;
}
-
LanguageModel *ConstructKenLM(const std::string &line)
{
FactorType factorType = 0;
string filePath;
bool lazy = false;
- vector<string> toks = Tokenize(line);
- for (size_t i = 1; i < toks.size(); ++i) {
- vector<string> args = Tokenize(toks[i], "=");
- UTIL_THROW_IF2(args.size() != 2,
- "Incorrect format of KenLM property: " << toks[i]);
-
- if (args[0] == "factor") {
- factorType = Scan<FactorType>(args[1]);
- } else if (args[0] == "order") {
- //nGramOrder = Scan<size_t>(args[1]);
- } else if (args[0] == "path") {
- filePath = args[1];
- } else if (args[0] == "lazyken") {
- lazy = Scan<bool>(args[1]);
- } else if (args[0] == "name") {
+ util::TokenIter<util::SingleCharacter, true> argument(line, ' ');
+ ++argument; // KENLM
+
+ for (; argument; ++argument) {
+ const char *equals = std::find(argument->data(), argument->data() + argument->size(), '=');
+ UTIL_THROW_IF2(equals == argument->data() + argument->size(),
+ "Expected = in KenLM argument " << *argument);
+ StringPiece name(argument->data(), equals - argument->data());
+ StringPiece value(equals + 1, argument->data() + argument->size() - equals - 1);
+ if (name == "factor") {
+ factorType = boost::lexical_cast<FactorType>(value);
+ } else if (name == "order") {
+ // Ignored
+ } else if (name == "path") {
+ filePath.assign(value.data(), value.size());
+ } else if (name == "lazyken") {
+ lazy = boost::lexical_cast<bool>(value);
+ } else {
// that's ok. do nothing, passes onto LM constructor
+ //UTIL_THROW2("Unknown KenLM argument " << name);
}
}
@@ -470,29 +475,27 @@ LanguageModel *ConstructKenLM(const std::string &line)
LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
{
- lm::ngram::ModelType model_type;
- if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
-
- switch(model_type) {
- case lm::ngram::PROBING:
- return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
- case lm::ngram::REST_PROBING:
- return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
- case lm::ngram::TRIE:
- return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, lazy);
- case lm::ngram::QUANT_TRIE:
- return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
- case lm::ngram::ARRAY_TRIE:
- return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
- case lm::ngram::QUANT_ARRAY_TRIE:
- return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
- default:
- UTIL_THROW2("Unrecognized kenlm model type " << model_type);
- }
- } else {
+ lm::ngram::ModelType model_type;
+ if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
+ switch(model_type) {
+ case lm::ngram::PROBING:
return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+ case lm::ngram::REST_PROBING:
+ return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
+ case lm::ngram::TRIE:
+ return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, lazy);
+ case lm::ngram::QUANT_TRIE:
+ return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
+ case lm::ngram::ARRAY_TRIE:
+ return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
+ case lm::ngram::QUANT_ARRAY_TRIE:
+ return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
+ default:
+ UTIL_THROW2("Unrecognized kenlm model type " << model_type);
}
+ } else {
+ return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+ }
}
}
-
diff --git a/moses/LM/LDHT.cpp b/moses/LM/LDHT.cpp
index 1d0331df5..7a3b0ebd5 100644
--- a/moses/LM/LDHT.cpp
+++ b/moses/LM/LDHT.cpp
@@ -98,8 +98,8 @@ public:
const FFState* input_state,
ScoreComponentCollection* score_output) const;
FFState* EvaluateWhenApplied(const ChartHypothesis& hypo,
- int featureID,
- ScoreComponentCollection* accumulator) const;
+ int featureID,
+ ScoreComponentCollection* accumulator) const;
virtual void IssueRequestsFor(Hypothesis& hypo,
const FFState* input_state);
diff --git a/moses/LM/MaxEntSRI.cpp b/moses/LM/MaxEntSRI.cpp
index 15f3f03fa..3e7f4df44 100644
--- a/moses/LM/MaxEntSRI.cpp
+++ b/moses/LM/MaxEntSRI.cpp
@@ -162,7 +162,7 @@ LMResult LanguageModelMaxEntSRI::GetValue(const vector<const Word*> &contextFact
ngram[count] = Vocab_None;
UTIL_THROW_IF2((*contextFactor[count-1])[factorType] == NULL,
- "No factor " << factorType << " at position " << (count-1));
+ "No factor " << factorType << " at position " << (count-1));
// call sri lm fn
VocabIndex lmId = GetLmID((*contextFactor[count-1])[factorType]);
ret = GetValue(lmId, ngram+1);
diff --git a/moses/LM/NeuralLMWrapper.cpp b/moses/LM/NeuralLMWrapper.cpp
index 95b2bf13b..22ff90bb9 100644
--- a/moses/LM/NeuralLMWrapper.cpp
+++ b/moses/LM/NeuralLMWrapper.cpp
@@ -10,7 +10,7 @@ using namespace std;
namespace Moses
{
NeuralLMWrapper::NeuralLMWrapper(const std::string &line)
-:LanguageModelSingleFactor(line)
+ :LanguageModelSingleFactor(line)
{
ReadParameters();
}
diff --git a/moses/LM/NeuralLMWrapper.h b/moses/LM/NeuralLMWrapper.h
index 2b80fb303..bd6635a7c 100644
--- a/moses/LM/NeuralLMWrapper.h
+++ b/moses/LM/NeuralLMWrapper.h
@@ -4,8 +4,9 @@
#include <boost/thread/tss.hpp>
-namespace nplm {
- class neuralLM;
+namespace nplm
+{
+class neuralLM;
}
namespace Moses
diff --git a/moses/LM/SRI.cpp b/moses/LM/SRI.cpp
index f3aee2864..fb60a4adb 100644
--- a/moses/LM/SRI.cpp
+++ b/moses/LM/SRI.cpp
@@ -162,7 +162,7 @@ LMResult LanguageModelSRI::GetValue(const vector<const Word*> &contextFactor, St
ngram[count] = Vocab_None;
UTIL_THROW_IF2((*contextFactor[count-1])[factorType] == NULL,
- "No factor " << factorType << " at position " << (count-1));
+ "No factor " << factorType << " at position " << (count-1));
// call sri lm fn
VocabIndex lmId = GetLmID((*contextFactor[count-1])[factorType]);
ret = GetValue(lmId, ngram+1);
diff --git a/moses/LM/SingleFactor.cpp b/moses/LM/SingleFactor.cpp
index 1efb13f16..8f9e952d3 100644
--- a/moses/LM/SingleFactor.cpp
+++ b/moses/LM/SingleFactor.cpp
@@ -48,8 +48,8 @@ LanguageModelSingleFactor::LanguageModelSingleFactor(const std::string &line)
LanguageModelSingleFactor::~LanguageModelSingleFactor()
{
- delete m_nullContextState;
- delete m_beginSentenceState;
+ delete m_nullContextState;
+ delete m_beginSentenceState;
}
const FFState *LanguageModelSingleFactor::GetNullContextState() const
@@ -89,13 +89,13 @@ void LanguageModelSingleFactor::SetParameter(const std::string& key, const std::
std::string LanguageModelSingleFactor::DebugContextFactor(const std::vector<const Word*> &contextFactor) const
{
- std::string ret;
- for (size_t i = 0; i < contextFactor.size(); ++i) {
- const Word &word = *contextFactor[i];
- ret += word.ToString();
- }
+ std::string ret;
+ for (size_t i = 0; i < contextFactor.size(); ++i) {
+ const Word &word = *contextFactor[i];
+ ret += word.ToString();
+ }
- return ret;
+ return ret;
}
}
diff --git a/moses/LM/bilingual-lm/BiLM_NPLM.cpp b/moses/LM/bilingual-lm/BiLM_NPLM.cpp
index 190aade1c..eb47d7360 100644
--- a/moses/LM/bilingual-lm/BiLM_NPLM.cpp
+++ b/moses/LM/bilingual-lm/BiLM_NPLM.cpp
@@ -2,33 +2,36 @@
#include "neuralLM.h"
#include "vocabulary.h"
-namespace Moses {
+namespace Moses
+{
BilingualLM_NPLM::BilingualLM_NPLM(const std::string &line)
- : BilingualLM(line),
- premultiply(true),
- factored(false),
- neuralLM_cache(1000000) {
-
- if (!NULL_overwrite) {
- NULL_string = "<null>"; //Default null value for nplm
- }
- FactorCollection& factorFactory = FactorCollection::Instance(); // To add null word.
- const Factor* NULL_factor = factorFactory.AddFactor(NULL_string);
- NULL_word.SetFactor(0, NULL_factor);
- }
-
-float BilingualLM_NPLM::Score(std::vector<int>& source_words, std::vector<int>& target_words) const {
+ : BilingualLM(line),
+ premultiply(true),
+ factored(false),
+ neuralLM_cache(1000000)
+{
+
+ NULL_string = "<null>"; //Default null value for nplm
+ FactorCollection& factorFactory = FactorCollection::Instance(); // To add null word.
+ const Factor* NULL_factor = factorFactory.AddFactor(NULL_string);
+ NULL_word.SetFactor(0, NULL_factor);
+}
+
+float BilingualLM_NPLM::Score(std::vector<int>& source_words, std::vector<int>& target_words) const
+{
source_words.reserve(source_ngrams+target_ngrams+1);
source_words.insert( source_words.end(), target_words.begin(), target_words.end() );
return FloorScore(m_neuralLM->lookup_ngram(source_words));
}
-const Word& BilingualLM_NPLM::getNullWord() const {
+const Word& BilingualLM_NPLM::getNullWord() const
+{
return NULL_word;
}
-int BilingualLM_NPLM::getNeuralLMId(const Word& word, bool is_source_word) const {
+int BilingualLM_NPLM::getNeuralLMId(const Word& word, bool is_source_word) const
+{
initSharedPointer();
//Decide if we are doing source or target side first.
@@ -47,34 +50,36 @@ int BilingualLM_NPLM::getNeuralLMId(const Word& word, bool is_source_word) const
it = neuralLMids->find(factor);
//If we know the word return immediately
- if (it != neuralLMids->end()){
+ if (it != neuralLMids->end()) {
return it->second;
}
//If we don't know the word and we aren't factored, return the word.
if (!factored) {
- return unknown_word_id;
- }
+ return unknown_word_id;
+ }
//Else try to get a pos_factor
const Factor* pos_factor = word.GetFactor(pos_factortype);
it = neuralLMids->find(pos_factor);
- if (it != neuralLMids->end()){
+ if (it != neuralLMids->end()) {
return it->second;
} else {
return unknown_word_id;
}
}
-void BilingualLM_NPLM::initSharedPointer() const {
+void BilingualLM_NPLM::initSharedPointer() const
+{
if (!m_neuralLM.get()) {
m_neuralLM.reset(new nplm::neuralLM(*m_neuralLM_shared));
}
}
-void BilingualLM_NPLM::SetParameter(const std::string& key, const std::string& value) {
- if (key == "target_ngrams") {
- target_ngrams = Scan<int>(value);
- } else if (key == "source_ngrams") {
- source_ngrams = Scan<int>(value);
+void BilingualLM_NPLM::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "order") {
+ target_ngrams = Scan<int>(value)-1;
+ } else if (key == "source_window") {
+ source_ngrams = Scan<int>(value)*2+1;
} else if (key == "factored") {
factored = Scan<bool>(value);
} else if (key == "pos_factor") {
@@ -87,6 +92,7 @@ void BilingualLM_NPLM::SetParameter(const std::string& key, const std::string& v
neuralLM_cache = atoi(value.c_str());
} else if (key == "premultiply") {
premultiply = Scan<bool>(value);
+ //TODO: doesn't currently do anything (constructor doesn't know about parameters)
} else if (key == "null_word") {
NULL_string = value;
NULL_overwrite = true;
@@ -95,7 +101,8 @@ void BilingualLM_NPLM::SetParameter(const std::string& key, const std::string& v
}
}
-void BilingualLM_NPLM::loadModel() {
+void BilingualLM_NPLM::loadModel()
+{
m_neuralLM_shared = new nplm::neuralLM();
m_neuralLM_shared->read(m_filePath);
if (premultiply) {
@@ -104,9 +111,9 @@ void BilingualLM_NPLM::loadModel() {
int ngram_order = target_ngrams + source_ngrams + 1;
UTIL_THROW_IF2(
- ngram_order != m_neuralLM_shared->get_order(),
- "Wrong order of neuralLM: LM has " << m_neuralLM_shared->get_order() <<
- ", but Moses expects " << ngram_order);
+ ngram_order != m_neuralLM_shared->get_order(),
+ "Wrong order of neuralLM: LM has " << m_neuralLM_shared->get_order() <<
+ ", but Moses expects " << ngram_order);
m_neuralLM_shared->set_cache(neuralLM_cache); //Default 1000000
diff --git a/moses/LM/bilingual-lm/BiLM_NPLM.h b/moses/LM/bilingual-lm/BiLM_NPLM.h
index 9a3167455..e291e4d22 100644
--- a/moses/LM/bilingual-lm/BiLM_NPLM.h
+++ b/moses/LM/bilingual-lm/BiLM_NPLM.h
@@ -3,17 +3,20 @@
#include <utility> //make_pair
#include <fstream> //Read vocabulary files
-namespace nplm {
- class neuralLM;
+namespace nplm
+{
+class neuralLM;
}
-namespace Moses {
+namespace Moses
+{
-class BilingualLM_NPLM : public BilingualLM {
- public:
+class BilingualLM_NPLM : public BilingualLM
+{
+public:
BilingualLM_NPLM(const std::string &line);
- private:
+private:
float Score(std::vector<int>& source_words, std::vector<int>& target_words) const;
int getNeuralLMId(const Word& word, bool is_source_word) const;
diff --git a/moses/LM/oxlm/OxLM.cpp b/moses/LM/oxlm/OxLM.cpp
index 5047a0344..7700a9237 100644
--- a/moses/LM/oxlm/OxLM.cpp
+++ b/moses/LM/oxlm/OxLM.cpp
@@ -16,9 +16,10 @@ namespace Moses
template<class Model>
OxLM<Model>::OxLM(const string &line)
- : LanguageModelSingleFactor(line), normalized(true),
- posBackOff(false), posFactorType(1),
- persistentCache(false) {
+ : LanguageModelSingleFactor(line), normalized(true),
+ posBackOff(false), posFactorType(1),
+ persistentCache(false)
+{
ReadParameters();
FactorCollection &factorCollection = FactorCollection::Instance();
@@ -35,7 +36,8 @@ OxLM<Model>::OxLM(const string &line)
template<class Model>
-OxLM<Model>::~OxLM() {
+OxLM<Model>::~OxLM()
+{
if (persistentCache) {
if (cache.get()) {
string cache_file = m_filePath + ".phrases.cache.bin";
@@ -49,7 +51,8 @@ OxLM<Model>::~OxLM() {
template<class Model>
-void OxLM<Model>::SetParameter(const string& key, const string& value) {
+void OxLM<Model>::SetParameter(const string& key, const string& value)
+{
if (key == "normalized") {
normalized = Scan<bool>(value);
} else if (key == "persistent-cache") {
@@ -66,7 +69,8 @@ void OxLM<Model>::SetParameter(const string& key, const string& value) {
}
template<class Model>
-void OxLM<Model>::Load() {
+void OxLM<Model>::Load()
+{
model.load(m_filePath);
boost::shared_ptr<Vocabulary> vocab = model.getVocab();
@@ -78,12 +82,13 @@ void OxLM<Model>::Load() {
size_t ngram_order = model.getConfig()->ngram_order;
UTIL_THROW_IF2(
- m_nGramOrder != ngram_order,
- "Wrong order for OxLM: LM has " << ngram_order << ", but Moses expects " << m_nGramOrder);
+ m_nGramOrder != ngram_order,
+ "Wrong order for OxLM: LM has " << ngram_order << ", but Moses expects " << m_nGramOrder);
}
template<class Model>
-double OxLM<Model>::GetScore(int word, const vector<int>& context) const {
+double OxLM<Model>::GetScore(int word, const vector<int>& context) const
+{
if (normalized) {
return model.getLogProb(word, context);
} else {
@@ -93,7 +98,8 @@ double OxLM<Model>::GetScore(int word, const vector<int>& context) const {
template<class Model>
LMResult OxLM<Model>::GetValue(
- const vector<const Word*> &contextFactor, State* finalState) const {
+ const vector<const Word*> &contextFactor, State* finalState) const
+{
if (!cache.get()) {
cache.reset(new QueryCache());
string cache_file = m_filePath + ".phrases.cache.bin";
@@ -144,7 +150,8 @@ LMResult OxLM<Model>::GetValue(
}
template<class Model>
-void OxLM<Model>::loadPersistentCache(const string& cache_file) const {
+void OxLM<Model>::loadPersistentCache(const string& cache_file) const
+{
if (boost::filesystem::exists(cache_file)) {
ifstream f(cache_file);
boost::archive::binary_iarchive iar(f);
@@ -158,7 +165,8 @@ void OxLM<Model>::loadPersistentCache(const string& cache_file) const {
}
template<class Model>
-void OxLM<Model>::savePersistentCache(const string& cache_file) const {
+void OxLM<Model>::savePersistentCache(const string& cache_file) const
+{
ofstream f(cache_file);
boost::archive::binary_oarchive oar(f);
cerr << "Saving persistent cache to " << cache_file << endl;
@@ -168,7 +176,8 @@ void OxLM<Model>::savePersistentCache(const string& cache_file) const {
}
template<class Model>
-void OxLM<Model>::InitializeForInput(const InputType& source) {
+void OxLM<Model>::InitializeForInput(const InputType& source)
+{
LanguageModelSingleFactor::InitializeForInput(source);
if (persistentCache) {
@@ -183,7 +192,8 @@ void OxLM<Model>::InitializeForInput(const InputType& source) {
}
template<class Model>
-void OxLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source) {
+void OxLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source)
+{
// Thread safe: the model cache is thread specific.
model.clearCache();
diff --git a/moses/LM/oxlm/OxLM.h b/moses/LM/oxlm/OxLM.h
index a528d0882..446758b2a 100644
--- a/moses/LM/oxlm/OxLM.h
+++ b/moses/LM/oxlm/OxLM.h
@@ -11,12 +11,14 @@
#include "OxLMMapper.h"
-namespace Moses {
+namespace Moses
+{
template<class Model>
-class OxLM : public LanguageModelSingleFactor {
- public:
- OxLM(const std::string &line);
+class OxLM : public LanguageModelSingleFactor
+{
+public:
+ OxLM(const std::string &line);
~OxLM();
@@ -25,21 +27,21 @@ class OxLM : public LanguageModelSingleFactor {
void Load();
virtual LMResult GetValue(
- const std::vector<const Word*> &contextFactor,
- State* finalState = 0) const;
+ const std::vector<const Word*> &contextFactor,
+ State* finalState = 0) const;
virtual void InitializeForInput(const InputType& source);
virtual void CleanUpAfterSentenceProcessing(const InputType& source);
- private:
+private:
double GetScore(int word, const vector<int>& context) const;
void loadPersistentCache(const string& cache_file) const;
void savePersistentCache(const string& cache_file) const;
- protected:
+protected:
Model model;
boost::shared_ptr<OxLMMapper> mapper;
diff --git a/moses/LM/oxlm/OxLMMapper.cpp b/moses/LM/oxlm/OxLMMapper.cpp
index f2953b4e9..0c0ca8062 100644
--- a/moses/LM/oxlm/OxLMMapper.cpp
+++ b/moses/LM/oxlm/OxLMMapper.cpp
@@ -4,13 +4,15 @@
using namespace std;
-namespace Moses {
+namespace Moses
+{
OxLMMapper::OxLMMapper(
- const boost::shared_ptr<oxlm::Vocabulary>& vocab,
- bool pos_back_off,
- const FactorType& pos_factor_type)
- : posBackOff(pos_back_off), posFactorType(pos_factor_type) {
+ const boost::shared_ptr<oxlm::Vocabulary>& vocab,
+ bool pos_back_off,
+ const FactorType& pos_factor_type)
+ : posBackOff(pos_back_off), posFactorType(pos_factor_type)
+{
for (int i = 0; i < vocab->size(); ++i) {
const string &str = vocab->convert(i);
FactorCollection &fc = FactorCollection::Instance();
@@ -21,9 +23,10 @@ OxLMMapper::OxLMMapper(
kUNKNOWN = vocab->convert("<unk>");
}
-int OxLMMapper::convert(const Word& word) const {
+int OxLMMapper::convert(const Word& word) const
+{
const Moses::Factor* word_factor = word.GetFactor(0);
- Coll::const_iterator iter = moses2Oxlm.find(word_factor);
+ Coll::const_iterator iter = moses2Oxlm.find(word_factor);
if (posBackOff && iter == moses2Oxlm.end()) {
const Moses::Factor* pos_factor = word.GetFactor(posFactorType);
iter = moses2Oxlm.find(pos_factor);
@@ -33,15 +36,16 @@ int OxLMMapper::convert(const Word& word) const {
}
void OxLMMapper::convert(
- const vector<const Word*>& contextFactor,
- vector<int> &ids, int &word) const {
+ const vector<const Word*>& contextFactor,
+ vector<int> &ids, int &word) const
+{
ids.clear();
- for (size_t i = 0; i < contextFactor.size() - 1; ++i) {
+ for (size_t i = 0; i < contextFactor.size() - 1; ++i) {
ids.push_back(convert(*contextFactor[i]));
- }
- std::reverse(ids.begin(), ids.end());
+ }
+ std::reverse(ids.begin(), ids.end());
- word = convert(*contextFactor.back());
+ word = convert(*contextFactor.back());
}
} // namespace Moses
diff --git a/moses/LM/oxlm/OxLMMapper.h b/moses/LM/oxlm/OxLMMapper.h
index 1aef7af88..07184ed72 100644
--- a/moses/LM/oxlm/OxLMMapper.h
+++ b/moses/LM/oxlm/OxLMMapper.h
@@ -7,23 +7,25 @@
#include "moses/Factor.h"
#include "moses/Phrase.h"
-namespace Moses {
+namespace Moses
+{
-class OxLMMapper {
- public:
+class OxLMMapper
+{
+public:
OxLMMapper(
- const boost::shared_ptr<oxlm::Vocabulary>& vocab,
- bool pos_back_off,
- const FactorType& pos_factor_type);
+ const boost::shared_ptr<oxlm::Vocabulary>& vocab,
+ bool pos_back_off,
+ const FactorType& pos_factor_type);
int convert(const Word& word) const;
void convert(
- const std::vector<const Word*> &contextFactor,
- std::vector<int> &ids,
- int &word) const;
+ const std::vector<const Word*> &contextFactor,
+ std::vector<int> &ids,
+ int &word) const;
- protected:
+protected:
bool posBackOff;
FactorType posFactorType;
diff --git a/moses/LM/oxlm/OxLMParallelMapper.cpp b/moses/LM/oxlm/OxLMParallelMapper.cpp
index 3bfd4be04..c8fe692cc 100644
--- a/moses/LM/oxlm/OxLMParallelMapper.cpp
+++ b/moses/LM/oxlm/OxLMParallelMapper.cpp
@@ -6,15 +6,17 @@
using namespace std;
-namespace Moses {
+namespace Moses
+{
OxLMParallelMapper::OxLMParallelMapper(
- const boost::shared_ptr<oxlm::Vocabulary>& vocab,
- bool pos_back_off,
- const FactorType& pos_factor_type)
- : OxLMMapper(vocab, pos_back_off, pos_factor_type) {
+ const boost::shared_ptr<oxlm::Vocabulary>& vocab,
+ bool pos_back_off,
+ const FactorType& pos_factor_type)
+ : OxLMMapper(vocab, pos_back_off, pos_factor_type)
+{
boost::shared_ptr<oxlm::ParallelVocabulary> parallel_vocab =
- dynamic_pointer_cast<oxlm::ParallelVocabulary>(vocab);
+ dynamic_pointer_cast<oxlm::ParallelVocabulary>(vocab);
assert(parallel_vocab != nullptr);
for (int i = 0; i < parallel_vocab->sourceSize(); ++i) {
@@ -27,7 +29,8 @@ OxLMParallelMapper::OxLMParallelMapper(
kSOURCE_UNKNOWN = parallel_vocab->convertSource("<unk>");
}
-int OxLMParallelMapper::convertSource(const Word& word) const {
+int OxLMParallelMapper::convertSource(const Word& word) const
+{
const Moses::Factor* word_factor = word.GetFactor(0);
Coll::const_iterator iter = moses2SourceOxlm.find(word_factor);
if (posBackOff && iter == moses2SourceOxlm.end()) {
diff --git a/moses/LM/oxlm/OxLMParallelMapper.h b/moses/LM/oxlm/OxLMParallelMapper.h
index 9fbcfa2a3..79cf72e8f 100644
--- a/moses/LM/oxlm/OxLMParallelMapper.h
+++ b/moses/LM/oxlm/OxLMParallelMapper.h
@@ -2,18 +2,20 @@
#include "moses/LM/oxlm/OxLMMapper.h"
-namespace Moses {
+namespace Moses
+{
-class OxLMParallelMapper : public OxLMMapper {
- public:
+class OxLMParallelMapper : public OxLMMapper
+{
+public:
OxLMParallelMapper(
- const boost::shared_ptr<oxlm::Vocabulary>& vocab,
- bool pos_back_off,
- const FactorType& pos_factor_type);
+ const boost::shared_ptr<oxlm::Vocabulary>& vocab,
+ bool pos_back_off,
+ const FactorType& pos_factor_type);
int convertSource(const Word& word) const;
- private:
+private:
Coll moses2SourceOxlm;
int kSOURCE_UNKNOWN;
};
diff --git a/moses/LM/oxlm/SourceOxLM.cpp b/moses/LM/oxlm/SourceOxLM.cpp
index 4a6991eb2..408208e3b 100644
--- a/moses/LM/oxlm/SourceOxLM.cpp
+++ b/moses/LM/oxlm/SourceOxLM.cpp
@@ -7,17 +7,20 @@
using namespace std;
using namespace oxlm;
-namespace Moses {
+namespace Moses
+{
SourceOxLM::SourceOxLM(const string &line)
- : BilingualLM(line), posBackOff(false), posFactorType(1),
- persistentCache(false), cacheHits(0), totalHits(0) {
- FactorCollection& factorFactory = FactorCollection::Instance(); // To add null word.
- const Factor* NULL_factor = factorFactory.AddFactor("<unk>");
- NULL_word.SetFactor(0, NULL_factor);
- }
-
-SourceOxLM::~SourceOxLM() {
+ : BilingualLM(line), posBackOff(false), posFactorType(1),
+ persistentCache(false), cacheHits(0), totalHits(0)
+{
+ FactorCollection& factorFactory = FactorCollection::Instance(); // To add null word.
+ const Factor* NULL_factor = factorFactory.AddFactor("<unk>");
+ NULL_word.SetFactor(0, NULL_factor);
+}
+
+SourceOxLM::~SourceOxLM()
+{
if (persistentCache) {
double cache_hit_ratio = 100.0 * cacheHits / totalHits;
cerr << "Cache hit ratio: " << cache_hit_ratio << endl;
@@ -25,8 +28,9 @@ SourceOxLM::~SourceOxLM() {
}
float SourceOxLM::Score(
- vector<int>& source_words,
- vector<int>& target_words) const {
+ vector<int>& source_words,
+ vector<int>& target_words) const
+{
// OxLM expects the context in the following format:
// [t_{n-1}, t_{n-2}, ..., t_{n-m}, s_{a_n-sm}, s_{a_n-sm+1}, ..., s_{a_n+sm}]
// where n is the index for the current target word, m is the target order,
@@ -61,15 +65,18 @@ float SourceOxLM::Score(
return score;
}
-int SourceOxLM::getNeuralLMId(const Word& word, bool is_source_word) const {
+int SourceOxLM::getNeuralLMId(const Word& word, bool is_source_word) const
+{
return is_source_word ? mapper->convertSource(word) : mapper->convert(word);
}
-const Word& SourceOxLM::getNullWord() const {
+const Word& SourceOxLM::getNullWord() const
+{
return NULL_word;
}
-void SourceOxLM::loadModel() {
+void SourceOxLM::loadModel()
+{
model.load(m_filePath);
boost::shared_ptr<ModelData> config = model.getConfig();
@@ -78,10 +85,11 @@ void SourceOxLM::loadModel() {
boost::shared_ptr<Vocabulary> vocab = model.getVocab();
mapper = boost::make_shared<OxLMParallelMapper>(
- vocab, posBackOff, posFactorType);
+ vocab, posBackOff, posFactorType);
}
-void SourceOxLM::SetParameter(const string& key, const string& value) {
+void SourceOxLM::SetParameter(const string& key, const string& value)
+{
if (key == "persistent-cache") {
persistentCache = Scan<bool>(value);
} else if (key == "pos-back-off") {
@@ -93,7 +101,8 @@ void SourceOxLM::SetParameter(const string& key, const string& value) {
}
}
-void SourceOxLM::InitializeForInput(const InputType& source) {
+void SourceOxLM::InitializeForInput(const InputType& source)
+{
BilingualLM::InitializeForInput(source);
if (persistentCache) {
@@ -116,7 +125,8 @@ void SourceOxLM::InitializeForInput(const InputType& source) {
}
}
-void SourceOxLM::CleanUpAfterSentenceProcessing(const InputType& source) {
+void SourceOxLM::CleanUpAfterSentenceProcessing(const InputType& source)
+{
// Thread safe: the model cache is thread specific.
model.clearCache();
diff --git a/moses/LM/oxlm/SourceOxLM.h b/moses/LM/oxlm/SourceOxLM.h
index 3af48489f..945bbda6e 100644
--- a/moses/LM/oxlm/SourceOxLM.h
+++ b/moses/LM/oxlm/SourceOxLM.h
@@ -8,23 +8,25 @@
#include "moses/LM/BilingualLM.h"
#include "moses/LM/oxlm/OxLMParallelMapper.h"
-namespace Moses {
+namespace Moses
+{
-class SourceOxLM : public BilingualLM {
- public:
- SourceOxLM(const std::string &line);
+class SourceOxLM : public BilingualLM
+{
+public:
+ SourceOxLM(const std::string &line);
~SourceOxLM();
- private:
+private:
virtual float Score(
- std::vector<int>& source_words,
- std::vector<int>& target_words) const;
+ std::vector<int>& source_words,
+ std::vector<int>& target_words) const;
virtual int getNeuralLMId(const Word& word, bool is_source_word) const;
virtual void loadModel();
-
+
const Word& getNullWord() const;
void SetParameter(const std::string& key, const std::string& value);
@@ -33,7 +35,7 @@ class SourceOxLM : public BilingualLM {
void CleanUpAfterSentenceProcessing(const InputType& source);
- protected:
+protected:
oxlm::SourceFactoredLM model;
boost::shared_ptr<OxLMParallelMapper> mapper;
diff --git a/moses/LVoc.h b/moses/LVoc.h
index fb0294cde..47ecbc439 100644
--- a/moses/LVoc.h
+++ b/moses/LVoc.h
@@ -44,7 +44,7 @@ public:
return p.first->second;
}
Key const& symbol(LabelId i) const {
- assert(static_cast<size_t>(i)<data.size());
+ assert(static_cast<size_t>(i)<data.size());
return data[i];
}
diff --git a/moses/LatticeMBR.cpp b/moses/LatticeMBR.cpp
index 9ea21d5db..b6fa14757 100644
--- a/moses/LatticeMBR.cpp
+++ b/moses/LatticeMBR.cpp
@@ -499,7 +499,7 @@ bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b)
return a->GetWordsBitmap().GetNumWordsCovered() < b->GetWordsBitmap().GetNumWordsCovered();
}
-void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList,
+void getLatticeMBRNBest(const Manager& manager, const TrellisPathList& nBestList,
vector<LatticeMBRSolution>& solutions, size_t n)
{
const StaticData& staticData = StaticData::Instance();
@@ -546,7 +546,7 @@ void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList,
VERBOSE(2,"LMBR Score: " << solutions[0].GetScore() << endl);
}
-vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList)
+vector<Word> doLatticeMBR(const Manager& manager, const TrellisPathList& nBestList)
{
vector<LatticeMBRSolution> solutions;
@@ -554,7 +554,7 @@ vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList)
return solutions.at(0).GetWords();
}
-const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList)
+const TrellisPath doConsensusDecoding(const Manager& manager, const TrellisPathList& nBestList)
{
static const int BLEU_ORDER = 4;
static const float SMOOTH = 1;
diff --git a/moses/LatticeMBR.h b/moses/LatticeMBR.h
index 47d6da3c4..5fa47949d 100644
--- a/moses/LatticeMBR.h
+++ b/moses/LatticeMBR.h
@@ -137,15 +137,15 @@ void pruneLatticeFB(Lattice & connectedHyp, std::map < const Moses::Hypothesis*,
const std::vector< float> & estimatedScores, const Moses::Hypothesis*, size_t edgeDensity,float scale);
//Use the ngram scores to rerank the nbest list, return at most n solutions
-void getLatticeMBRNBest(Moses::Manager& manager, Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
+void getLatticeMBRNBest(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
//calculate expectated ngram counts, clipping at 1 (ie calculating posteriors) if posteriors==true.
void calcNgramExpectations(Lattice & connectedHyp, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Moses::Phrase,
float>& finalNgramScores, bool posteriors);
void GetOutputFactors(const Moses::TrellisPath &path, std::vector <Moses::Word> &translation);
void extract_ngrams(const std::vector<Moses::Word >& sentence, std::map < Moses::Phrase, int > & allngrams);
bool ascendingCoverageCmp(const Moses::Hypothesis* a, const Moses::Hypothesis* b);
-std::vector<Moses::Word> doLatticeMBR(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
-const Moses::TrellisPath doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
+std::vector<Moses::Word> doLatticeMBR(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList);
+const Moses::TrellisPath doConsensusDecoding(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList);
//std::vector<Moses::Word> doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
}
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index 7a27dcaaf..854b9cf1d 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -39,9 +39,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TranslationOption.h"
#include "TranslationOptionCollection.h"
#include "Timer.h"
+#include "moses/OutputCollector.h"
#include "moses/FF/DistortionScoreProducer.h"
#include "moses/LM/Base.h"
#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/TranslationAnalysis.h"
+#include "moses/HypergraphOutput.h"
+#include "moses/mbr.h"
+#include "moses/LatticeMBR.h"
#ifdef HAVE_PROTOBUF
#include "hypergraph.pb.h"
@@ -54,13 +59,16 @@ using namespace std;
namespace Moses
{
-Manager::Manager(InputType const& source, SearchAlgorithm searchAlgorithm)
- :m_transOptColl(source.CreateTranslationOptionCollection())
- ,m_search(Search::CreateSearch(*this, source, searchAlgorithm, *m_transOptColl))
+Manager::Manager(InputType const& source)
+ :BaseManager(source)
+ ,m_transOptColl(source.CreateTranslationOptionCollection())
,interrupted_flag(0)
,m_hypoId(0)
- ,m_source(source)
{
+ const StaticData &staticData = StaticData::Instance();
+ SearchAlgorithm searchAlgorithm = staticData.GetSearchAlgorithm();
+ m_search = Search::CreateSearch(*this, source, searchAlgorithm, *m_transOptColl);
+
StaticData::Instance().InitializeForInput(m_source);
}
@@ -77,7 +85,7 @@ Manager::~Manager()
* Main decoder loop that translates a sentence by expanding
* hypotheses stack by stack, until the end of the sentence.
*/
-void Manager::ProcessSentence()
+void Manager::Decode()
{
// initialize statistics
ResetSentenceStats(m_source);
@@ -105,16 +113,16 @@ void Manager::ProcessSentence()
IFVERBOSE(1) {
GetSentenceStats().StopTimeCollectOpts();
TRACE_ERR("Line "<< m_source.GetTranslationId() << ": Collecting options took "
- << GetSentenceStats().GetTimeCollectOpts() << " seconds at "
- << __FILE__ << ":" << __LINE__ << endl);
+ << GetSentenceStats().GetTimeCollectOpts() << " seconds at "
+ << __FILE__ << ":" << __LINE__ << endl);
}
// search for best translation with the specified algorithm
Timer searchTime;
searchTime.start();
- m_search->ProcessSentence();
+ m_search->Decode();
VERBOSE(1, "Line " << m_source.GetTranslationId() << ": Search took " << searchTime << " seconds" << endl);
- IFVERBOSE(2) {
+ IFVERBOSE(2) {
GetSentenceStats().StopTimeTotal();
TRACE_ERR(GetSentenceStats());
}
@@ -183,11 +191,11 @@ void Manager::printDivergentHypothesis(long translationId, const Hypothesis* hyp
}
-void
+void
Manager::
-printThisHypothesis(long translationId, const Hypothesis* hypo,
- const vector <const TargetPhrase*> & remainingPhrases,
- float remainingScore, ostream& outputStream) const
+printThisHypothesis(long translationId, const Hypothesis* hypo,
+ const vector <const TargetPhrase*> & remainingPhrases,
+ float remainingScore, ostream& outputStream) const
{
outputStream << translationId << " ||| ";
@@ -326,12 +334,12 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
if (i->forward >= 0) {
map<int,const Hypothesis*>::const_iterator idToHypIter = idToHyp.find(i->forward);
UTIL_THROW_IF2(idToHypIter == idToHyp.end(),
- "Couldn't find hypothesis " << i->forward);
+ "Couldn't find hypothesis " << i->forward);
const Hypothesis* nextHypo = idToHypIter->second;
outgoingHyps[hypo].insert(nextHypo);
map<int,float>::const_iterator fscoreIter = fscores.find(nextHypo->GetId());
UTIL_THROW_IF2(fscoreIter == fscores.end(),
- "Couldn't find scores for hypothsis " << nextHypo->GetId());
+ "Couldn't find scores for hypothsis " << nextHypo->GetId());
edgeScores[Edge(hypo->GetId(),nextHypo->GetId())] =
i->fscore - fscoreIter->second;
}
@@ -349,17 +357,17 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
outgoingHyps.find(i->hypo);
UTIL_THROW_IF2(outIter == outgoingHyps.end(),
- "Couldn't find hypothesis " << i->hypo->GetId());
+ "Couldn't find hypothesis " << i->hypo->GetId());
float sigma = 0;
for (set<const Hypothesis*>::const_iterator j = outIter->second.begin();
j != outIter->second.end(); ++j) {
map<const Hypothesis*, float>::const_iterator succIter = sigmas.find(*j);
UTIL_THROW_IF2(succIter == sigmas.end(),
- "Couldn't find hypothesis " << (*j)->GetId());
+ "Couldn't find hypothesis " << (*j)->GetId());
map<Edge,float>::const_iterator edgeScoreIter =
edgeScores.find(Edge(i->hypo->GetId(),(*j)->GetId()));
UTIL_THROW_IF2(edgeScoreIter == edgeScores.end(),
- "Couldn't find edge for hypothesis " << (*j)->GetId());
+ "Couldn't find edge for hypothesis " << (*j)->GetId());
float term = edgeScoreIter->second + succIter->second; // Add sigma(*j)
if (sigma == 0) {
sigma = term;
@@ -392,10 +400,10 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
j != outIter->second.end(); ++j) {
candidates.push_back(*j);
UTIL_THROW_IF2(sigmas.find(*j) == sigmas.end(),
- "Hypothesis " << (*j)->GetId() << " not found");
+ "Hypothesis " << (*j)->GetId() << " not found");
Edge edge(path.back()->GetId(),(*j)->GetId());
UTIL_THROW_IF2(edgeScores.find(edge) == edgeScores.end(),
- "Edge not found");
+ "Edge not found");
candidateScores.push_back(sigmas[*j] + edgeScores[edge]);
if (scoreTotal == 0) {
scoreTotal = candidateScores.back();
@@ -470,7 +478,7 @@ void Manager::CalcDecoderStatistics() const
}
}
-void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo, size_t &linkId)
+void Manager::OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo, size_t &linkId) const
{
const Hypothesis *prevHypo = hypo->GetPrevHypo();
@@ -550,13 +558,14 @@ void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo
outputWordGraphStream << endl;
}
-void Manager::GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *hypo ) {
+void Manager::GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *hypo ) const
+{
Phrase translation;
hypo->GetOutputPhrase(translation);
const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (size_t i = 0; i < statefulFFs.size(); ++i) {
const StatefulFeatureFunction *ff = statefulFFs[i];
- if (const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff)) {
+ if (const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff)) {
lm->ReportHistoryOrder(out, translation);
}
}
@@ -565,8 +574,19 @@ void Manager::GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *
void Manager::GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const
{
const StaticData &staticData = StaticData::Instance();
- string fileName = staticData.GetParam("output-word-graph")[0];
- bool outputNBest = Scan<bool>(staticData.GetParam("output-word-graph")[1]);
+ const PARAM_VEC *params;
+
+ string fileName;
+ bool outputNBest = false;
+ params = staticData.GetParameter().GetParam("output-word-graph");
+ if (params && params->size()) {
+ fileName = params->at(0);
+
+ if (params->size() == 2) {
+ outputNBest = Scan<bool>(params->at(1));
+ }
+ }
+
const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
outputWordGraphStream << "VERSION=1.0" << endl
@@ -1314,7 +1334,7 @@ void Manager::SerializeSearchGraphPB(
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
const Hypothesis *loserHypo = *iterArcList;
UTIL_THROW_IF2(!connected[loserHypo->GetId()],
- "Hypothesis " << loserHypo->GetId() << " is not connected");
+ "Hypothesis " << loserHypo->GetId() << " is not connected");
Hypergraph_Edge* edge = hg.add_edges();
SerializeEdgeInfo(loserHypo, edge);
edge->set_head_node(headNodeIdx);
@@ -1436,4 +1456,574 @@ SentenceStats& Manager::GetSentenceStats() const
}
+void Manager::OutputBest(OutputCollector *collector) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ long translationId = m_source.GetTranslationId();
+
+ Timer additionalReportingTime;
+
+ // apply decision rule and output best translation(s)
+ if (collector) {
+ ostringstream out;
+ ostringstream debug;
+ FixPrecision(debug,PRECISION);
+
+ // all derivations - send them to debug stream
+ if (staticData.PrintAllDerivations()) {
+ additionalReportingTime.start();
+ PrintAllDerivations(translationId, debug);
+ additionalReportingTime.stop();
+ }
+
+ Timer decisionRuleTime;
+ decisionRuleTime.start();
+
+ // MAP decoding: best hypothesis
+ const Hypothesis* bestHypo = NULL;
+ if (!staticData.UseMBR()) {
+ bestHypo = GetBestHypothesis();
+ if (bestHypo) {
+ if (StaticData::Instance().GetOutputHypoScore()) {
+ out << bestHypo->GetTotalScore() << ' ';
+ }
+ if (staticData.IsPathRecoveryEnabled()) {
+ bestHypo->OutputInput(out);
+ out << "||| ";
+ }
+
+ const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id");
+ if (params && params->size() && Scan<bool>(params->at(0)) ) {
+ out << translationId << " ";
+ }
+
+ if (staticData.GetReportSegmentation() == 2) {
+ GetOutputLanguageModelOrder(out, bestHypo);
+ }
+ bestHypo->OutputBestSurface(
+ out,
+ staticData.GetOutputFactorOrder(),
+ staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors());
+ if (staticData.PrintAlignmentInfo()) {
+ out << "||| ";
+ bestHypo->OutputAlignment(out);
+ }
+
+ IFVERBOSE(1) {
+ debug << "BEST TRANSLATION: " << *bestHypo << endl;
+ }
+ } else {
+ VERBOSE(1, "NO BEST TRANSLATION" << endl);
+ }
+
+ out << endl;
+ } // if (!staticData.UseMBR())
+
+ // MBR decoding (n-best MBR, lattice MBR, consensus)
+ else {
+ // we first need the n-best translations
+ size_t nBestSize = staticData.GetMBRSize();
+ if (nBestSize <= 0) {
+ cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
+ exit(1);
+ }
+ TrellisPathList nBestList;
+ CalcNBest(nBestSize, nBestList,true);
+ VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
+ IFVERBOSE(2) {
+ PrintUserTime("calculated n-best list for (L)MBR decoding");
+ }
+
+ // lattice MBR
+ if (staticData.UseLatticeMBR()) {
+ if (staticData.IsNBestEnabled()) {
+ //lattice mbr nbest
+ vector<LatticeMBRSolution> solutions;
+ size_t n = min(nBestSize, staticData.GetNBestSize());
+ getLatticeMBRNBest(*this,nBestList,solutions,n);
+ OutputLatticeMBRNBest(m_latticeNBestOut, solutions, translationId);
+ } else {
+ //Lattice MBR decoding
+ vector<Word> mbrBestHypo = doLatticeMBR(*this,nBestList);
+ OutputBestHypo(mbrBestHypo, translationId, staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors(),out);
+ IFVERBOSE(2) {
+ PrintUserTime("finished Lattice MBR decoding");
+ }
+ }
+ }
+
+ // consensus decoding
+ else if (staticData.UseConsensusDecoding()) {
+ const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList);
+ OutputBestHypo(conBestHypo, translationId,
+ staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors(),out);
+ OutputAlignment(m_alignmentOut, conBestHypo);
+ IFVERBOSE(2) {
+ PrintUserTime("finished Consensus decoding");
+ }
+ }
+
+ // n-best MBR decoding
+ else {
+ const TrellisPath &mbrBestHypo = doMBR(nBestList);
+ OutputBestHypo(mbrBestHypo, translationId,
+ staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors(),out);
+ OutputAlignment(m_alignmentOut, mbrBestHypo);
+ IFVERBOSE(2) {
+ PrintUserTime("finished MBR decoding");
+ }
+ }
+ }
+
+ // report best translation to output collector
+ collector->Write(translationId,out.str(),debug.str());
+
+ decisionRuleTime.stop();
+ VERBOSE(1, "Line " << translationId << ": Decision rule took " << decisionRuleTime << " seconds total" << endl);
+ } // if (m_ioWrapper.GetSingleBestOutputCollector())
+
+}
+
+void Manager::OutputNBest(OutputCollector *collector) const
+{
+ if (collector == NULL) {
+ return;
+ }
+
+ const StaticData &staticData = StaticData::Instance();
+ long translationId = m_source.GetTranslationId();
+
+ if (staticData.UseLatticeMBR()) {
+ if (staticData.IsNBestEnabled()) {
+ collector->Write(translationId, m_latticeNBestOut.str());
+ }
+ } else {
+ TrellisPathList nBestList;
+ ostringstream out;
+ CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
+ OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
+ staticData.GetReportSegmentation());
+ collector->Write(m_source.GetTranslationId(), out.str());
+ }
+
+}
+
+void Manager::OutputNBest(std::ostream& out
+ , const Moses::TrellisPathList &nBestList
+ , const std::vector<Moses::FactorType>& outputFactorOrder
+ , long translationId
+ , char reportSegmentation) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ bool reportAllFactors = staticData.GetReportAllFactorsNBest();
+ bool includeSegmentation = staticData.NBestIncludesSegmentation();
+ bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
+
+ TrellisPathList::const_iterator iter;
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+ const TrellisPath &path = **iter;
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+ // print the surface factor of the translation
+ out << translationId << " ||| ";
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
+ }
+ out << " |||";
+
+ // print scores with feature names
+ path.GetScoreBreakdown().OutputAllFeatureScores(out );
+
+ // total
+ out << " ||| " << path.GetTotalScore();
+
+ //phrase-to-phrase segmentation
+ if (includeSegmentation) {
+ out << " |||";
+ for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ WordsRange targetRange = path.GetTargetWordsRange(edge);
+ out << " " << sourceRange.GetStartPos();
+ if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
+ out << "-" << sourceRange.GetEndPos();
+ }
+ out<< "=" << targetRange.GetStartPos();
+ if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
+ out<< "-" << targetRange.GetEndPos();
+ }
+ }
+ }
+
+ if (includeWordAlignment) {
+ out << " ||| ";
+ for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ WordsRange targetRange = path.GetTargetWordsRange(edge);
+ const int sourceOffset = sourceRange.GetStartPos();
+ const int targetOffset = targetRange.GetStartPos();
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
+
+ OutputAlignment(out, ai, sourceOffset, targetOffset);
+
+ }
+ }
+
+ if (StaticData::Instance().IsPathRecoveryEnabled()) {
+ out << " ||| ";
+ OutputInput(out, edges[0]);
+ }
+
+ out << endl;
+ }
+
+ out << std::flush;
+}
+
+//////////////////////////////////////////////////////////////////////////
+/***
+ * print surface factor only for the given phrase
+ */
+void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const
+{
+ UTIL_THROW_IF2(outputFactorOrder.size() == 0,
+ "Must specific at least 1 output factor");
+ const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
+ bool markUnknown = StaticData::Instance().GetMarkUnknown();
+ if (reportAllFactors == true) {
+ out << phrase;
+ } else {
+ FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
+
+ std::map<size_t, const Factor*> placeholders;
+ if (placeholderFactor != NOT_FOUND) {
+ // creates map of target position -> factor for placeholders
+ placeholders = GetPlaceholders(edge, placeholderFactor);
+ }
+
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+
+ if (placeholders.size()) {
+ // do placeholders
+ std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
+ if (iter != placeholders.end()) {
+ factor = iter->second;
+ }
+ }
+
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor 0 at position " << pos);
+
+ //preface surface form with UNK if marking unknowns
+ const Word &word = phrase.GetWord(pos);
+ if(markUnknown && word.IsOOV()) {
+ out << "UNK" << *factor;
+ } else {
+ out << *factor;
+ }
+
+ for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor " << i << " at position " << pos);
+
+ out << "|" << *factor;
+ }
+ out << " ";
+ }
+ }
+
+ // trace ("report segmentation") option "-t" / "-tt"
+ if (reportSegmentation > 0 && phrase.GetSize() > 0) {
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ const int sourceStart = sourceRange.GetStartPos();
+ const int sourceEnd = sourceRange.GetEndPos();
+ out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
+ if (reportSegmentation == 2) {
+ out << ",wa=";
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
+ OutputAlignment(out, ai, 0, 0);
+ out << ",total=";
+ out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
+ out << ",";
+ ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
+ scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
+ scoreBreakdown.OutputAllFeatureScores(out);
+ }
+ out << "| ";
+ }
}
+
+void Manager::OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const
+{
+ typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
+ AlignVec alignments = ai.GetSortedAlignments();
+
+ AlignVec::const_iterator it;
+ for (it = alignments.begin(); it != alignments.end(); ++it) {
+ const std::pair<size_t,size_t> &alignment = **it;
+ out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
+ }
+
+}
+
+void Manager::OutputInput(std::ostream& os, const Hypothesis* hypo) const
+{
+ size_t len = hypo->GetInput().GetSize();
+ std::vector<const Phrase*> inp_phrases(len, 0);
+ OutputInput(inp_phrases, hypo);
+ for (size_t i=0; i<len; ++i)
+ if (inp_phrases[i]) os << *inp_phrases[i];
+}
+
+void Manager::OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo) const
+{
+ if (hypo->GetPrevHypo()) {
+ OutputInput(map, hypo->GetPrevHypo());
+ map[hypo->GetCurrSourceWordsRange().GetStartPos()] = &hypo->GetTranslationOption().GetInputPath().GetPhrase();
+ }
+}
+
+std::map<size_t, const Factor*> Manager::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
+{
+ const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
+ const Phrase &inputPhrase = inputPath.GetPhrase();
+
+ std::map<size_t, const Factor*> ret;
+
+ for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
+ const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
+ if (factor) {
+ std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
+ UTIL_THROW_IF2(targetPos.size() != 1,
+ "Placeholder should be aligned to 1, and only 1, word");
+ ret[*targetPos.begin()] = factor;
+ }
+ }
+
+ return ret;
+}
+
+void Manager::OutputLatticeSamples(OutputCollector *collector) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ if (collector) {
+ TrellisPathList latticeSamples;
+ ostringstream out;
+ CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
+ OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
+ staticData.GetReportSegmentation());
+ collector->Write(m_source.GetTranslationId(), out.str());
+ }
+
+}
+
+void Manager::OutputAlignment(OutputCollector *collector) const
+{
+ if (collector == NULL) {
+ return;
+ }
+
+ if (!m_alignmentOut.str().empty()) {
+ collector->Write(m_source.GetTranslationId(), m_alignmentOut.str());
+ } else {
+ std::vector<const Hypothesis *> edges;
+ const Hypothesis *currentHypo = GetBestHypothesis();
+ while (currentHypo) {
+ edges.push_back(currentHypo);
+ currentHypo = currentHypo->GetPrevHypo();
+ }
+
+ OutputAlignment(collector,m_source.GetTranslationId(), edges);
+ }
+}
+
+void Manager::OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges) const
+{
+ ostringstream out;
+ OutputAlignment(out, edges);
+
+ collector->Write(lineNo,out.str());
+}
+
+void Manager::OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges) const
+{
+ size_t targetOffset = 0;
+
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const TargetPhrase &tp = edge.GetCurrTargetPhrase();
+ size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
+
+ OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
+
+ targetOffset += tp.GetSize();
+ }
+ // Removing std::endl here breaks -alignment-output-file, so stop doing that, please :)
+ // Or fix it somewhere else.
+ out << std::endl;
+}
+
+void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const
+{
+ if (collector) {
+ ostringstream out;
+ FixPrecision(out,PRECISION);
+ TranslationAnalysis::PrintTranslationAnalysis(out, GetBestHypothesis());
+ collector->Write(m_source.GetTranslationId(),out.str());
+ }
+
+}
+
+void Manager::OutputUnknowns(OutputCollector *collector) const
+{
+ if (collector) {
+ long translationId = m_source.GetTranslationId();
+ const vector<const Phrase*>& unknowns = m_transOptColl->GetUnknownSources();
+ ostringstream out;
+ for (size_t i = 0; i < unknowns.size(); ++i) {
+ out << *(unknowns[i]);
+ }
+ out << endl;
+ collector->Write(translationId, out.str());
+ }
+
+}
+
+void Manager::OutputWordGraph(OutputCollector *collector) const
+{
+ if (collector) {
+ long translationId = m_source.GetTranslationId();
+ ostringstream out;
+ FixPrecision(out,PRECISION);
+ GetWordGraph(translationId, out);
+ collector->Write(translationId, out.str());
+ }
+}
+
+void Manager::OutputSearchGraph(OutputCollector *collector) const
+{
+ if (collector) {
+ long translationId = m_source.GetTranslationId();
+ ostringstream out;
+ FixPrecision(out,PRECISION);
+ OutputSearchGraph(translationId, out);
+ collector->Write(translationId, out.str());
+
+#ifdef HAVE_PROTOBUF
+ const StaticData &staticData = StaticData::Instance();
+ if (staticData.GetOutputSearchGraphPB()) {
+ ostringstream sfn;
+ sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << translationId << ".pb" << ends;
+ string fn = sfn.str();
+ VERBOSE(2, "Writing search graph to " << fn << endl);
+ fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out);
+ SerializeSearchGraphPB(translationId, output);
+ }
+#endif
+ }
+
+}
+
+void Manager::OutputSearchGraphSLF() const
+{
+ const StaticData &staticData = StaticData::Instance();
+ long translationId = m_source.GetTranslationId();
+
+ // Output search graph in HTK standard lattice format (SLF)
+ bool slf = staticData.GetOutputSearchGraphSLF();
+ if (slf) {
+ stringstream fileName;
+
+ string dir;
+ staticData.GetParameter().SetParameter<string>(dir, "output-search-graph-slf", "");
+
+ fileName << dir << "/" << translationId << ".slf";
+ ofstream *file = new ofstream;
+ file->open(fileName.str().c_str());
+ if (file->is_open() && file->good()) {
+ ostringstream out;
+ FixPrecision(out,PRECISION);
+ OutputSearchGraphAsSLF(translationId, out);
+ *file << out.str();
+ file -> flush();
+ } else {
+ TRACE_ERR("Cannot output HTK standard lattice for line " << translationId << " because the output file is not open or not ready for writing" << endl);
+ }
+ delete file;
+ }
+
+}
+
+void Manager::OutputSearchGraphHypergraph() const
+{
+ const StaticData &staticData = StaticData::Instance();
+ if (staticData.GetOutputSearchGraphHypergraph()) {
+ HypergraphOutput<Manager> hypergraphOutput(PRECISION);
+ hypergraphOutput.Write(*this);
+ }
+}
+
+void Manager::OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId) const
+{
+ for (vector<LatticeMBRSolution>::const_iterator si = solutions.begin(); si != solutions.end(); ++si) {
+ out << translationId;
+ out << " |||";
+ const vector<Word> mbrHypo = si->GetWords();
+ for (size_t i = 0 ; i < mbrHypo.size() ; i++) {
+ const Factor *factor = mbrHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
+ if (i>0) out << " " << *factor;
+ else out << *factor;
+ }
+ out << " |||";
+ out << " map: " << si->GetMapScore();
+ out << " w: " << mbrHypo.size();
+ const vector<float>& ngramScores = si->GetNgramScores();
+ for (size_t i = 0; i < ngramScores.size(); ++i) {
+ out << " " << ngramScores[i];
+ }
+ out << " ||| " << si->GetScore();
+
+ out << endl;
+ }
+}
+
+void Manager::OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*translationId*/, char /*reportSegmentation*/, bool /*reportAllFactors*/, ostream& out) const
+{
+
+ for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
+ const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor 0 at position " << i);
+ if (i>0) out << " " << *factor;
+ else out << *factor;
+ }
+ out << endl;
+}
+
+void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, char reportSegmentation, bool reportAllFactors, std::ostream &out) const
+{
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
+ }
+ out << endl;
+}
+
+void Manager::OutputAlignment(std::ostringstream &out, const TrellisPath &path) const
+{
+ Hypothesis::OutputAlignment(out, path.GetEdges());
+}
+
+} // namespace
diff --git a/moses/Manager.h b/moses/Manager.h
index ef4612de1..4de0f5f95 100644
--- a/moses/Manager.h
+++ b/moses/Manager.h
@@ -34,6 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "WordsBitmap.h"
#include "Search.h"
#include "SearchCubePruning.h"
+#include "BaseManager.h"
namespace Moses
{
@@ -41,6 +42,7 @@ namespace Moses
class SentenceStats;
class TrellisPath;
class TranslationOptionCollection;
+class LatticeMBRSolution;
/** Used to output the search graph */
struct SearchGraphNode {
@@ -91,7 +93,7 @@ struct SearchGraphNode {
* the appropriate stack, or re-combined with existing hypotheses
**/
-class Manager
+class Manager : public BaseManager
{
Manager();
Manager(Manager const&);
@@ -110,7 +112,6 @@ private:
protected:
// data
-// InputType const& m_source; /**< source sentence to be translated */
TranslationOptionCollection *m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
Search *m_search;
@@ -126,14 +127,35 @@ protected:
std::map< int, bool >* pConnected,
std::vector< const Hypothesis* >* pConnectedList) const;
+ // output
+ // nbest
+ mutable std::ostringstream m_latticeNBestOut;
+ mutable std::ostringstream m_alignmentOut;
+
+ void OutputNBest(std::ostream& out
+ , const Moses::TrellisPathList &nBestList
+ , const std::vector<Moses::FactorType>& outputFactorOrder
+ , long translationId
+ , char reportSegmentation) const;
+ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const;
+ void OutputAlignment(std::ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const;
+ void OutputInput(std::ostream& os, const Hypothesis* hypo) const;
+ void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo) const;
+ std::map<size_t, const Factor*> GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const;
+ void OutputAlignment(OutputCollector* collector, size_t lineNo , const std::vector<const Hypothesis *> &edges) const;
+ void OutputAlignment(std::ostream &out, const std::vector<const Hypothesis *> &edges) const;
+
+ void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo, size_t &linkId) const;
+
+ void OutputAlignment(std::ostringstream &out, const TrellisPath &path) const;
public:
- InputType const& m_source; /**< source sentence to be translated */
- Manager(InputType const& source, SearchAlgorithm searchAlgorithm);
+ Manager(InputType const& source);
~Manager();
const TranslationOptionCollection* getSntTranslationOptions();
- void ProcessSentence();
+ void Decode();
const Hypothesis *GetBestHypothesis() const;
const Hypothesis *GetActualBestHypothesis() const;
void CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct=0) const;
@@ -141,10 +163,15 @@ public:
void PrintAllDerivations(long translationId, std::ostream& outputStream) const;
void printDivergentHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase*> & remainingPhrases, float remainingScore , std::ostream& outputStream) const;
void printThisHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase* > & remainingPhrases, float remainingScore , std::ostream& outputStream) const;
- void GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *hypo );
+ void GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *hypo ) const;
void GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const;
int GetNextHypoId();
+ void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId) const;
+ void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
+ char reportSegmentation, bool reportAllFactors, std::ostream& out) const;
+ void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,char reportSegmentation, bool reportAllFactors, std::ostream &out) const;
+
#ifdef HAVE_PROTOBUF
void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const;
#endif
@@ -170,6 +197,20 @@ public:
void GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, std::set < const Hypothesis* > >* pOutgoingHyps, std::vector< float>* pFwdBwdScores) const;
+ // outputs
+ void OutputBest(OutputCollector *collector) const;
+ void OutputNBest(OutputCollector *collector) const;
+ void OutputAlignment(OutputCollector *collector) const;
+ void OutputLatticeSamples(OutputCollector *collector) const;
+ void OutputDetailedTranslationReport(OutputCollector *collector) const;
+ void OutputUnknowns(OutputCollector *collector) const;
+ void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const {
+ }
+ void OutputWordGraph(OutputCollector *collector) const;
+ void OutputSearchGraph(OutputCollector *collector) const;
+ void OutputSearchGraphSLF() const;
+ void OutputSearchGraphHypergraph() const;
+
};
}
diff --git a/moses/MockHypothesis.cpp b/moses/MockHypothesis.cpp
index c18b58a5e..e25fe4e52 100644
--- a/moses/MockHypothesis.cpp
+++ b/moses/MockHypothesis.cpp
@@ -41,7 +41,7 @@ MockHypothesisGuard::MockHypothesisGuard(
m_wp("WordPenalty"),
m_uwp("UnknownWordPenalty"),
m_dist("Distortion"),
- m_manager(m_sentence,Normal)
+ m_manager(m_sentence)
{
BOOST_CHECK_EQUAL(alignments.size(), targetSegments.size());
diff --git a/moses/PCNTools.h b/moses/PCNTools.h
index c36ed67e3..69f9a5488 100644
--- a/moses/PCNTools.h
+++ b/moses/PCNTools.h
@@ -35,8 +35,8 @@ namespace PCN
{
struct CNAlt {
- CNAlt()
- {}
+ CNAlt() {
+ }
CNAlt(const std::string &word,
const std::vector<float> &denseFeatures,
const std::map<std::string, float> &sparseFeatures,
@@ -44,8 +44,8 @@ struct CNAlt {
:m_word(word)
,m_denseFeatures(denseFeatures)
,m_sparseFeatures(sparseFeatures)
- ,m_next(next)
- {}
+ ,m_next(next) {
+ }
std::string m_word;
std::vector<float> m_denseFeatures;
diff --git a/moses/PDTAimp.cpp b/moses/PDTAimp.cpp
index 34f65da4c..1b9e789ce 100644
--- a/moses/PDTAimp.cpp
+++ b/moses/PDTAimp.cpp
@@ -8,7 +8,8 @@ PDTAimp::PDTAimp(PhraseDictionaryTreeAdaptor *p)
m_obj(p),
useCache(1),
totalE(0),
- distinctE(0) {
+ distinctE(0)
+{
m_numInputScores = 0;
m_inputFeature = &InputFeature::Instance();
@@ -20,7 +21,8 @@ PDTAimp::PDTAimp(PhraseDictionaryTreeAdaptor *p)
}
}
-PDTAimp::~PDTAimp() {
+PDTAimp::~PDTAimp()
+{
CleanUp();
delete m_dict;
@@ -57,7 +59,8 @@ PDTAimp::~PDTAimp() {
}
-void PDTAimp::CleanUp() {
+void PDTAimp::CleanUp()
+{
assert(m_dict);
m_dict->FreeMemory();
for(size_t i=0; i<m_tgtColls.size(); ++i) delete m_tgtColls[i];
@@ -68,9 +71,10 @@ void PDTAimp::CleanUp() {
}
TargetPhraseCollectionWithSourcePhrase const*
-PDTAimp::GetTargetPhraseCollection(Phrase const &src) const {
+PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
+{
- assert(m_dict);
+ assert(m_dict);
if(src.GetSize()==0) return 0;
std::pair<MapSrc2Tgt::iterator,bool> piter;
@@ -150,10 +154,11 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const {
}
void PDTAimp::Create(const std::vector<FactorType> &input
- , const std::vector<FactorType> &output
- , const std::string &filePath
- , const std::vector<float> &weight
- ) {
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , const std::vector<float> &weight
+ )
+{
// set my members
m_dict=new PhraseDictionaryTree();
@@ -174,16 +179,15 @@ void PDTAimp::Create(const std::vector<FactorType> &input
// m_dict->Read(filePath);
bool res=m_dict->Read(filePath);
if (!res) {
- std::stringstream strme;
- strme << "bin ttable was read in a wrong way\n";
- UserMessage::Add(strme.str());
+ std::cerr << "bin ttable was read in a wrong way\n";
exit(1);
}
}
-void PDTAimp::CacheSource(ConfusionNet const& src) {
- assert(m_dict);
+void PDTAimp::CacheSource(ConfusionNet const& src)
+{
+ assert(m_dict);
const size_t srcSize=src.GetSize();
std::vector<size_t> exploredPaths(srcSize+1,0);
@@ -245,7 +249,7 @@ void PDTAimp::CacheSource(ConfusionNet const& src) {
//assert that we have the right number of link params in this CN option
UTIL_THROW_IF2(currCol[colidx].second.denseScores.size() < m_numInputScores,
- "Incorrect number of input scores");
+ "Incorrect number of input scores");
// do not start with epsilon (except at first position)
if(isEpsilon && curr.begin()==curr.end() && curr.begin()>0) continue;
@@ -300,7 +304,7 @@ void PDTAimp::CacheSource(ConfusionNet const& src) {
//put input scores in first - already logged, just drop in directly
std::vector<float> transcores(m_obj->GetNumScoreComponents());
UTIL_THROW_IF2(transcores.size() != weightTrans.size(),
- "Incorrect number of translation scores");
+ "Incorrect number of translation scores");
//put in phrase table scores, logging as we insert
std::transform(tcands[i].scores.begin()
@@ -397,11 +401,12 @@ void PDTAimp::CacheSource(ConfusionNet const& src) {
}
void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
- StringTgtCand::Tokens const& factorStrings,
- Scores const& transVector,
- Scores const& inputVector,
- const std::string *alignmentString,
- Phrase const* srcPtr) const {
+ StringTgtCand::Tokens const& factorStrings,
+ Scores const& transVector,
+ Scores const& inputVector,
+ const std::string *alignmentString,
+ Phrase const* srcPtr) const
+{
FactorCollection &factorCollection = FactorCollection::Instance();
for(size_t k=0; k<factorStrings.size(); ++k) {
@@ -427,10 +432,11 @@ void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
(const std::vector<TargetPhrase> & tCands,
std::vector<std::pair<float,size_t> >& costs,
- const std::vector<Phrase> &sourcePhrases) const {
+ const std::vector<Phrase> &sourcePhrases) const
+{
// convert into TargetPhraseCollection
UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
- "Number of target phrases must equal number of source phrases");
+ "Number of target phrases must equal number of source phrases");
TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase;
diff --git a/moses/PP/CountsPhraseProperty.cpp b/moses/PP/CountsPhraseProperty.cpp
index b64366733..00bc08011 100644
--- a/moses/PP/CountsPhraseProperty.cpp
+++ b/moses/PP/CountsPhraseProperty.cpp
@@ -27,11 +27,11 @@ void CountsPhraseProperty::ProcessValue(const std::string &value)
std::ostream& operator<<(std::ostream &out, const CountsPhraseProperty &obj)
{
- out << "Count property="
- << obj.GetTargetMarginal() << " "
- << obj.GetSourceMarginal() << " "
- << obj.GetJointCount();
- return out;
+ out << "Count property="
+ << obj.GetTargetMarginal() << " "
+ << obj.GetSourceMarginal() << " "
+ << obj.GetJointCount();
+ return out;
}
} // namespace Moses
diff --git a/moses/PP/CountsPhraseProperty.h b/moses/PP/CountsPhraseProperty.h
index 4f6fbcfa8..7f3137085 100644
--- a/moses/PP/CountsPhraseProperty.h
+++ b/moses/PP/CountsPhraseProperty.h
@@ -47,9 +47,9 @@ public:
return m_jointCount;
}
- virtual const std::string *GetValueString() const {
+ virtual const std::string *GetValueString() const {
UTIL_THROW2("CountsPhraseProperty: value string not available in this phrase property");
- return NULL;
+ return NULL;
};
protected:
diff --git a/moses/PP/Factory.cpp b/moses/PP/Factory.cpp
index fd146005b..cc393b18d 100644
--- a/moses/PP/Factory.cpp
+++ b/moses/PP/Factory.cpp
@@ -87,13 +87,13 @@ boost::shared_ptr<PhraseProperty> PhrasePropertyFactory::ProduceProperty(const s
void PhrasePropertyFactory::PrintPP() const
{
- std::cerr << "Registered phrase properties:" << std::endl;
- Registry::const_iterator iter;
- for (iter = m_registry.begin(); iter != m_registry.end(); ++iter) {
- const std::string &ppName = iter->first;
- std::cerr << ppName << " ";
- }
- std::cerr << std::endl;
+ std::cerr << "Registered phrase properties:" << std::endl;
+ Registry::const_iterator iter;
+ for (iter = m_registry.begin(); iter != m_registry.end(); ++iter) {
+ const std::string &ppName = iter->first;
+ std::cerr << ppName << " ";
+ }
+ std::cerr << std::endl;
}
} // namespace Moses
diff --git a/moses/PP/NonTermContextProperty.cpp b/moses/PP/NonTermContextProperty.cpp
index df5e88d8e..d1ea6a554 100644
--- a/moses/PP/NonTermContextProperty.cpp
+++ b/moses/PP/NonTermContextProperty.cpp
@@ -14,7 +14,7 @@ NonTermContextProperty::NonTermContextProperty()
NonTermContextProperty::~NonTermContextProperty()
{
- //RemoveAllInColl(m_probStores);
+ //RemoveAllInColl(m_probStores);
}
void NonTermContextProperty::ProcessValue(const std::string &value)
@@ -29,39 +29,39 @@ void NonTermContextProperty::ProcessValue(const std::string &value)
size_t ind = 1;
while (ind < toks.size()) {
- vector<const Factor *> factors;
-
- for (size_t nt = 0; nt < numNT; ++nt) {
- size_t ntInd = Scan<size_t>(toks[ind]);
- assert(nt == ntInd);
- ++ind;
-
- for (size_t contextInd = 0; contextInd < 4; ++contextInd) {
- //cerr << "toks[" << ind << "]=" << toks[ind] << endl;
- const Factor *factor = fc.AddFactor(toks[ind], false);
- factors.push_back(factor);
- ++ind;
- }
- }
-
- // done with the context. Just get the count and put it all into data structures
- // cerr << "count=" << toks[ind] << endl;
- float count = Scan<float>(toks[ind]);
- ++ind;
-
- for (size_t i = 0; i < factors.size(); ++i) {
- size_t ntInd = i / 4;
- size_t contextInd = i % 4;
- const Factor *factor = factors[i];
- AddToMap(ntInd, contextInd, factor, count);
- }
+ vector<const Factor *> factors;
+
+ for (size_t nt = 0; nt < numNT; ++nt) {
+ size_t ntInd = Scan<size_t>(toks[ind]);
+ assert(nt == ntInd);
+ ++ind;
+
+ for (size_t contextInd = 0; contextInd < 4; ++contextInd) {
+ //cerr << "toks[" << ind << "]=" << toks[ind] << endl;
+ const Factor *factor = fc.AddFactor(toks[ind], false);
+ factors.push_back(factor);
+ ++ind;
+ }
+ }
+
+ // done with the context. Just get the count and put it all into data structures
+ // cerr << "count=" << toks[ind] << endl;
+ float count = Scan<float>(toks[ind]);
+ ++ind;
+
+ for (size_t i = 0; i < factors.size(); ++i) {
+ size_t ntInd = i / 4;
+ size_t contextInd = i % 4;
+ const Factor *factor = factors[i];
+ AddToMap(ntInd, contextInd, factor, count);
+ }
}
}
void NonTermContextProperty::AddToMap(size_t ntIndex, size_t index, const Factor *factor, float count)
{
if (ntIndex <= m_probStores.size()) {
- m_probStores.resize(ntIndex + 1);
+ m_probStores.resize(ntIndex + 1);
}
ProbStore &probStore = m_probStores[ntIndex];
@@ -69,38 +69,37 @@ void NonTermContextProperty::AddToMap(size_t ntIndex, size_t index, const Factor
}
float NonTermContextProperty::GetProb(size_t ntInd,
- size_t contextInd,
- const Factor *factor,
- float smoothConstant) const
+ size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const
{
- UTIL_THROW_IF2(ntInd >= m_probStores.size(), "Invalid nt index=" << ntInd);
- const ProbStore &probStore = m_probStores[ntInd];
- float ret = probStore.GetProb(contextInd, factor, smoothConstant);
- return ret;
+ UTIL_THROW_IF2(ntInd >= m_probStores.size(), "Invalid nt index=" << ntInd);
+ const ProbStore &probStore = m_probStores[ntInd];
+ float ret = probStore.GetProb(contextInd, factor, smoothConstant);
+ return ret;
}
//////////////////////////////////////////
void NonTermContextProperty::ProbStore::AddToMap(size_t index, const Factor *factor, float count)
{
- Map &map = m_vec[index];
-
- Map::iterator iter = map.find(factor);
- if (iter == map.end()) {
- map[factor] = count;
- }
- else {
- float &currCount = iter->second;
- currCount += count;
- }
-
- m_totalCount += count;
+ Map &map = m_vec[index];
+
+ Map::iterator iter = map.find(factor);
+ if (iter == map.end()) {
+ map[factor] = count;
+ } else {
+ float &currCount = iter->second;
+ currCount += count;
+ }
+
+ m_totalCount += count;
}
float NonTermContextProperty::ProbStore::GetProb(size_t contextInd,
- const Factor *factor,
- float smoothConstant) const
+ const Factor *factor,
+ float smoothConstant) const
{
float count = GetCount(contextInd, factor, smoothConstant);
float total = GetTotalCount(contextInd, smoothConstant);
@@ -109,27 +108,26 @@ float NonTermContextProperty::ProbStore::GetProb(size_t contextInd,
}
float NonTermContextProperty::ProbStore::GetCount(size_t contextInd,
- const Factor *factor,
- float smoothConstant) const
+ const Factor *factor,
+ float smoothConstant) const
{
- const Map &map = m_vec[contextInd];
-
- float count = smoothConstant;
- Map::const_iterator iter = map.find(factor);
- if (iter == map.end()) {
- // nothing
- }
- else {
- count += iter->second;
- }
-
- return count;
+ const Map &map = m_vec[contextInd];
+
+ float count = smoothConstant;
+ Map::const_iterator iter = map.find(factor);
+ if (iter == map.end()) {
+ // nothing
+ } else {
+ count += iter->second;
+ }
+
+ return count;
}
float NonTermContextProperty::ProbStore::GetTotalCount(size_t contextInd, float smoothConstant) const
{
- const Map &map = m_vec[contextInd];
- return m_totalCount + smoothConstant * map.size();
+ const Map &map = m_vec[contextInd];
+ return m_totalCount + smoothConstant * map.size();
}
diff --git a/moses/PP/NonTermContextProperty.h b/moses/PP/NonTermContextProperty.h
index 56db9cb32..efe007099 100644
--- a/moses/PP/NonTermContextProperty.h
+++ b/moses/PP/NonTermContextProperty.h
@@ -27,38 +27,40 @@ public:
};
float GetProb(size_t ntInd,
- size_t contextInd,
- const Factor *factor,
- float smoothConstant) const;
+ size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const;
protected:
- class ProbStore {
- typedef std::map<const Factor*, float> Map; // map word -> prob
- typedef std::vector<Map> Vec; // left outside, left inside, right inside, right outside
- Vec m_vec;
- float m_totalCount;
+ class ProbStore
+ {
+ typedef std::map<const Factor*, float> Map; // map word -> prob
+ typedef std::vector<Map> Vec; // left outside, left inside, right inside, right outside
+ Vec m_vec;
+ float m_totalCount;
- float GetCount(size_t contextInd,
- const Factor *factor,
- float smoothConstant) const;
- float GetTotalCount(size_t contextInd, float smoothConstant) const;
+ float GetCount(size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const;
+ float GetTotalCount(size_t contextInd, float smoothConstant) const;
public:
- ProbStore()
- :m_vec(4)
- ,m_totalCount(0)
- {}
+ ProbStore()
+ :m_vec(4)
+ ,m_totalCount(0) {
+ }
- float GetProb(size_t contextInd,
- const Factor *factor,
- float smoothConstant) const;
+ float GetProb(size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const;
- float GetSize(size_t index) const
- { return m_vec[index].size(); }
+ float GetSize(size_t index) const {
+ return m_vec[index].size();
+ }
- void AddToMap(size_t index, const Factor *factor, float count);
+ void AddToMap(size_t index, const Factor *factor, float count);
};
diff --git a/moses/PP/OrientationPhraseProperty.cpp b/moses/PP/OrientationPhraseProperty.cpp
index 653a1bf3b..4088a0d8b 100644
--- a/moses/PP/OrientationPhraseProperty.cpp
+++ b/moses/PP/OrientationPhraseProperty.cpp
@@ -7,14 +7,14 @@ namespace Moses
void OrientationPhraseProperty::ProcessValue(const std::string &value)
{
- // bidirectional MSLR phrase orientation with 2x4 orientation classes:
- // mono swap dright dleft
+ // bidirectional MSLR phrase orientation with 2x4 orientation classes:
+ // mono swap dleft dright
std::istringstream tokenizer(value);
try {
- if (! (tokenizer >> m_l2rMonoProbability >> m_l2rSwapProbability >> m_l2rDrightProbability >> m_l2rDleftProbability
- >> m_r2lMonoProbability >> m_r2lSwapProbability >> m_r2lDrightProbability >> m_r2lDleftProbability)) {
+ if (! (tokenizer >> m_l2rMonoProbability >> m_l2rSwapProbability >> m_l2rDleftProbability >> m_l2rDrightProbability
+ >> m_r2lMonoProbability >> m_r2lSwapProbability >> m_r2lDleftProbability >> m_r2lDrightProbability)) {
UTIL_THROW2("OrientationPhraseProperty: Not able to read value. Flawed property?");
}
} catch (const std::exception &e) {
diff --git a/moses/PP/OrientationPhraseProperty.h b/moses/PP/OrientationPhraseProperty.h
index 32c6ff208..d682e1c59 100644
--- a/moses/PP/OrientationPhraseProperty.h
+++ b/moses/PP/OrientationPhraseProperty.h
@@ -24,12 +24,16 @@ public:
return m_l2rSwapProbability;
};
+ double GetLeftToRightProbabilityDleft() const {
+ return m_l2rDleftProbability;
+ };
+
double GetLeftToRightProbabilityDright() const {
return m_l2rDrightProbability;
};
- double GetLeftToRightProbabilityDleft() const {
- return m_l2rDleftProbability;
+ double GetLeftToRightProbabilityDiscontinuous() const {
+ return m_l2rDleftProbability + m_l2rDrightProbability;
};
@@ -41,18 +45,22 @@ public:
return m_r2lSwapProbability;
};
+ double GetRightToLeftProbabilityDleft() const {
+ return m_r2lDleftProbability;
+ };
+
double GetRightToLeftProbabilityDright() const {
return m_r2lDrightProbability;
};
- double GetRightToLeftProbabilityDleft() const {
- return m_r2lDleftProbability;
+ double GetRightToLeftProbabilityDiscontinuous() const {
+ return m_r2lDleftProbability + m_r2lDrightProbability;
};
- virtual const std::string *GetValueString() const {
+ virtual const std::string *GetValueString() const {
UTIL_THROW2("OrientationPhraseProperty: value string not available in this phrase property");
- return NULL;
+ return NULL;
};
protected:
diff --git a/moses/PP/PhraseProperty.cpp b/moses/PP/PhraseProperty.cpp
index 614b39c60..69e3c3374 100644
--- a/moses/PP/PhraseProperty.cpp
+++ b/moses/PP/PhraseProperty.cpp
@@ -5,8 +5,8 @@ namespace Moses
std::ostream& operator<<(std::ostream &out, const PhraseProperty &obj)
{
- out << "Base phrase property";
- return out;
+ out << "Base phrase property";
+ return out;
}
}
diff --git a/moses/PP/PhraseProperty.h b/moses/PP/PhraseProperty.h
index b7437369b..76c294481 100644
--- a/moses/PP/PhraseProperty.h
+++ b/moses/PP/PhraseProperty.h
@@ -14,15 +14,21 @@ class PhraseProperty
public:
PhraseProperty() : m_value(NULL) {};
- ~PhraseProperty() { if ( m_value != NULL ) delete m_value; };
+ virtual ~PhraseProperty() {
+ if ( m_value != NULL ) delete m_value;
+ };
- virtual void ProcessValue(const std::string &value) { m_value = new std::string(value); };
+ virtual void ProcessValue(const std::string &value) {
+ m_value = new std::string(value);
+ };
- virtual const std::string *GetValueString() const { return m_value; };
+ virtual const std::string *GetValueString() const {
+ return m_value;
+ };
protected:
- std::string *m_value;
+ std::string *m_value;
};
diff --git a/moses/PP/SourceLabelsPhraseProperty.cpp b/moses/PP/SourceLabelsPhraseProperty.cpp
index 8e6a5dd6d..efe5ae741 100644
--- a/moses/PP/SourceLabelsPhraseProperty.cpp
+++ b/moses/PP/SourceLabelsPhraseProperty.cpp
@@ -27,53 +27,53 @@ void SourceLabelsPhraseProperty::ProcessValue(const std::string &value)
- // read source-labelled rule items
+ // read source-labelled rule items
std::priority_queue<float> ruleLabelledCountsPQ;
while (tokenizer.peek() != EOF) {
// try {
- SourceLabelsPhrasePropertyItem item;
- size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
+ SourceLabelsPhrasePropertyItem item;
+ size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
- if (m_nNTs == 1) {
+ if (m_nNTs == 1) {
- item.m_sourceLabelsRHSCount = m_totalCount;
+ item.m_sourceLabelsRHSCount = m_totalCount;
- } else { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
+ } else { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
- for (size_t i=0; i<m_nNTs-1; ++i) { // RHS source non-terminal labels
- size_t sourceLabelRHS;
- if (! (tokenizer >> sourceLabelRHS) ) { // RHS source non-terminal label
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side label index. Flawed property? " << value);
- }
- item.m_sourceLabelsRHS.push_back(sourceLabelRHS);
+ for (size_t i=0; i<m_nNTs-1; ++i) { // RHS source non-terminal labels
+ size_t sourceLabelRHS;
+ if (! (tokenizer >> sourceLabelRHS) ) { // RHS source non-terminal label
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side label index. Flawed property? " << value);
}
+ item.m_sourceLabelsRHS.push_back(sourceLabelRHS);
+ }
- if (! (tokenizer >> item.m_sourceLabelsRHSCount)) {
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side count. Flawed property? " << value);
- }
+ if (! (tokenizer >> item.m_sourceLabelsRHSCount)) {
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side count. Flawed property? " << value);
+ }
- if (! (tokenizer >> numberOfLHSsGivenRHS)) {
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of left-hand sides. Flawed property? " << value);
- }
+ if (! (tokenizer >> numberOfLHSsGivenRHS)) {
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of left-hand sides. Flawed property? " << value);
}
+ }
- for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS source non-terminal labels seen with this RHS
- size_t sourceLabelLHS;
- if (! (tokenizer >> sourceLabelLHS)) { // LHS source non-terminal label
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read left-hand side label index. Flawed property? " << value);
- }
- float ruleSourceLabelledCount;
- if (! (tokenizer >> ruleSourceLabelledCount)) {
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read count. Flawed property? " << value);
- }
- item.m_sourceLabelsLHSList.push_back( std::make_pair(sourceLabelLHS,ruleSourceLabelledCount) );
- ruleLabelledCountsPQ.push(ruleSourceLabelledCount);
+ for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS source non-terminal labels seen with this RHS
+ size_t sourceLabelLHS;
+ if (! (tokenizer >> sourceLabelLHS)) { // LHS source non-terminal label
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read left-hand side label index. Flawed property? " << value);
}
+ float ruleSourceLabelledCount;
+ if (! (tokenizer >> ruleSourceLabelledCount)) {
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read count. Flawed property? " << value);
+ }
+ item.m_sourceLabelsLHSList.push_back( std::make_pair(sourceLabelLHS,ruleSourceLabelledCount) );
+ ruleLabelledCountsPQ.push(ruleSourceLabelledCount);
+ }
- m_sourceLabelItems.push_back(item);
+ m_sourceLabelItems.push_back(item);
// } catch (const std::exception &e) {
// UTIL_THROW2("SourceLabelsPhraseProperty: Read error. Flawed property?");
diff --git a/moses/PP/SourceLabelsPhraseProperty.h b/moses/PP/SourceLabelsPhraseProperty.h
index 39b43ad3e..d9ec82776 100644
--- a/moses/PP/SourceLabelsPhraseProperty.h
+++ b/moses/PP/SourceLabelsPhraseProperty.h
@@ -9,28 +9,25 @@
namespace Moses
{
-// Note that we require label tokens (strings) in the corresponding property values of phrase table entries
+// Note that we require label tokens (strings) in the corresponding property values of phrase table entries
// to be replaced beforehand by indices (size_t) of a label vocabulary. (TODO: change that?)
class SourceLabelsPhrasePropertyItem
{
-friend class SourceLabelsPhraseProperty;
+ friend class SourceLabelsPhraseProperty;
public:
SourceLabelsPhrasePropertyItem() {};
- float GetSourceLabelsRHSCount() const
- {
+ float GetSourceLabelsRHSCount() const {
return m_sourceLabelsRHSCount;
};
- const std::list<size_t> &GetSourceLabelsRHS() const
- {
+ const std::list<size_t> &GetSourceLabelsRHS() const {
return m_sourceLabelsRHS;
};
- const std::list< std::pair<size_t,float> > &GetSourceLabelsLHSList() const
- {
+ const std::list< std::pair<size_t,float> > &GetSourceLabelsLHSList() const {
return m_sourceLabelsLHSList;
};
@@ -60,9 +57,9 @@ public:
return m_sourceLabelItems;
};
- virtual const std::string *GetValueString() const {
+ virtual const std::string *GetValueString() const {
UTIL_THROW2("SourceLabelsPhraseProperty: value string not available in this phrase property");
- return NULL;
+ return NULL;
};
protected:
diff --git a/moses/PP/SpanLengthPhraseProperty.cpp b/moses/PP/SpanLengthPhraseProperty.cpp
index d45c7b919..3a3fb3586 100644
--- a/moses/PP/SpanLengthPhraseProperty.cpp
+++ b/moses/PP/SpanLengthPhraseProperty.cpp
@@ -18,22 +18,21 @@ void SpanLengthPhraseProperty::ProcessValue(const std::string &value)
set< vector<string> > indices;
for (size_t i = 0; i < toks.size(); ++i) {
- const string &span = toks[i];
-
- // is it a ntIndex,sourceSpan,targetSpan or count ?
- vector<string> toks;
- Tokenize<string>(toks, span, ",");
- UTIL_THROW_IF2(toks.size() != 1 && toks.size() != 3, "Incorrect format for SpanLength: " << span);
-
- if (toks.size() == 1) {
- float count = Scan<float>(toks[0]);
- Populate(indices, count);
-
- indices.clear();
- }
- else {
- indices.insert(toks);
- }
+ const string &span = toks[i];
+
+ // is it a ntIndex,sourceSpan,targetSpan or count ?
+ vector<string> toks;
+ Tokenize<string>(toks, span, ",");
+ UTIL_THROW_IF2(toks.size() != 1 && toks.size() != 3, "Incorrect format for SpanLength: " << span);
+
+ if (toks.size() == 1) {
+ float count = Scan<float>(toks[0]);
+ Populate(indices, count);
+
+ indices.clear();
+ } else {
+ indices.insert(toks);
+ }
}
// totals
@@ -45,11 +44,11 @@ void SpanLengthPhraseProperty::Populate(const set< vector<string> > &indices, fl
{
set< vector<string> >::const_iterator iter;
for (iter = indices.begin(); iter != indices.end(); ++iter) {
- const vector<string> &toksStr = *iter;
- vector<size_t> toks = Scan<size_t>(toksStr);
- UTIL_THROW_IF2(toks.size() != 3, "Incorrect format for SpanLength. Size is " << toks.size());
+ const vector<string> &toksStr = *iter;
+ vector<size_t> toks = Scan<size_t>(toksStr);
+ UTIL_THROW_IF2(toks.size() != 3, "Incorrect format for SpanLength. Size is " << toks.size());
- Populate(toks, count);
+ Populate(toks, count);
}
}
@@ -59,8 +58,8 @@ void SpanLengthPhraseProperty::Populate(const std::vector<size_t> &toks, float c
size_t sourceLength = toks[1];
size_t targetLength = toks[2];
if (ntInd >= m_source.size() ) {
- m_source.resize(ntInd + 1);
- m_target.resize(ntInd + 1);
+ m_source.resize(ntInd + 1);
+ m_target.resize(ntInd + 1);
}
Map &sourceMap = m_source[ntInd].first;
@@ -74,54 +73,52 @@ void SpanLengthPhraseProperty::Populate(Map &map, size_t span, float count)
Map::iterator iter;
iter = map.find(span);
if (iter != map.end()) {
- float &value = iter->second;
- value += count;
- }
- else {
- map[span] = count;
+ float &value = iter->second;
+ value += count;
+ } else {
+ map[span] = count;
}
}
void SpanLengthPhraseProperty::CalcTotals(Vec &vec)
{
- for (size_t i = 0; i < vec.size(); ++i) {
- float total = 0;
-
- const Map &map = vec[i].first;
- Map::const_iterator iter;
- for (iter = map.begin(); iter != map.end(); ++iter) {
- float count = iter->second;
- total += count;
- }
-
- vec[i].second = total;
- }
+ for (size_t i = 0; i < vec.size(); ++i) {
+ float total = 0;
+
+ const Map &map = vec[i].first;
+ Map::const_iterator iter;
+ for (iter = map.begin(); iter != map.end(); ++iter) {
+ float count = iter->second;
+ total += count;
+ }
+
+ vec[i].second = total;
+ }
}
float SpanLengthPhraseProperty::GetProb(size_t ntInd, size_t sourceWidth, float smoothing) const
{
- float count;
-
- const std::pair<Map, float> &data = m_source[ntInd];
- const Map &map = data.first;
-
- if (map.size() == 0) {
- // should this ever be reached? there shouldn't be any span length proprty so FF shouldn't call this
- return 1.0f;
- }
-
- Map::const_iterator iter = map.find(sourceWidth);
- if (iter == map.end()) {
- count = 0;
- }
- else {
- count = iter->second;
- }
- count += smoothing;
-
- float total = data.second + smoothing * (float) map.size();
- float ret = count / total;
- return ret;
+ float count;
+
+ const std::pair<Map, float> &data = m_source[ntInd];
+ const Map &map = data.first;
+
+ if (map.size() == 0) {
+ // should this ever be reached? there shouldn't be any span length proprty so FF shouldn't call this
+ return 1.0f;
+ }
+
+ Map::const_iterator iter = map.find(sourceWidth);
+ if (iter == map.end()) {
+ count = 0;
+ } else {
+ count = iter->second;
+ }
+ count += smoothing;
+
+ float total = data.second + smoothing * (float) map.size();
+ float ret = count / total;
+ return ret;
}
}
diff --git a/moses/PP/SpanLengthPhraseProperty.h b/moses/PP/SpanLengthPhraseProperty.h
index 982c3ca0d..490b7db72 100644
--- a/moses/PP/SpanLengthPhraseProperty.h
+++ b/moses/PP/SpanLengthPhraseProperty.h
@@ -13,22 +13,22 @@ namespace Moses
class SpanLengthPhraseProperty : public PhraseProperty
{
public:
- SpanLengthPhraseProperty();
+ SpanLengthPhraseProperty();
- void ProcessValue(const std::string &value);
+ void ProcessValue(const std::string &value);
- float GetProb(size_t ntInd, size_t sourceWidth, float smoothing) const;
+ float GetProb(size_t ntInd, size_t sourceWidth, float smoothing) const;
protected:
- // fractional counts
- typedef std::map<size_t, float> Map;
- typedef std::vector<std::pair<Map, float> > Vec;
- Vec m_source, m_target;
+ // fractional counts
+ typedef std::map<size_t, float> Map;
+ typedef std::vector<std::pair<Map, float> > Vec;
+ Vec m_source, m_target;
- void Populate(const std::set< std::vector<std::string> > &indices, float count);
- void Populate(const std::vector<size_t> &toks, float count);
- void Populate(Map &map, size_t span, float count);
+ void Populate(const std::set< std::vector<std::string> > &indices, float count);
+ void Populate(const std::vector<size_t> &toks, float count);
+ void Populate(Map &map, size_t span, float count);
- void CalcTotals(Vec &vec);
+ void CalcTotals(Vec &vec);
};
} // namespace Moses
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index 85d28bdb0..c5677b73b 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -25,14 +25,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <fstream>
#include <sstream>
#include <algorithm>
+#include <boost/algorithm/string/predicate.hpp>
#include "Parameter.h"
#include "Util.h"
#include "InputFileStream.h"
#include "StaticData.h"
-#include "UserMessage.h"
#include "util/exception.hh"
using namespace std;
+using namespace boost::algorithm;
namespace Moses
{
@@ -104,7 +105,7 @@ Parameter::Parameter()
AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
AddParam("output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
AddParam("unpruned-search-graph", "usg", "When outputting chart search graph, do not exclude dead ends. Note: stack pruning may have eliminated some hypotheses");
- AddParam("output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF) - the flag should be followed byy a directory name, which must exist");
+ AddParam("output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF) - the flag should be followed by a directory name, which must exist");
AddParam("output-search-graph-hypergraph", "Output connected hypotheses of search into specified directory, one file per sentence, in a hypergraph format (see Kenneth Heafield's lazy hypergraph decoder). This flag is followed by 3 values: 'true (gz|txt|bz) directory-name'");
AddParam("include-lhs-in-search-graph", "lhssg", "When outputting chart search graph, include the label of the LHS of the rule (useful when using syntax)");
#ifdef HAVE_PROTOBUF
@@ -112,7 +113,6 @@ Parameter::Parameter()
#endif
AddParam("cube-pruning-pop-limit", "cbp", "How many hypotheses should be popped for each stack. (default = 1000)");
AddParam("cube-pruning-diversity", "cbd", "How many hypotheses should be created for each coverage. (default = 0)");
- AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing. (default = 0)");
AddParam("description", "Source language, target language, description");
AddParam("max-chart-span", "maximum num. of source word chart rules can consume (default 10)");
AddParam("non-terminals", "list of non-term symbols, space separated");
@@ -121,7 +121,7 @@ Parameter::Parameter()
AddParam("output-hypo-score", "Output the hypo score to stdout with the output string. For search error analysis. Default is false");
AddParam("unknown-lhs", "file containing target lhs of unknown words. 1 per line: LHS prob");
AddParam("cube-pruning-lazy-scoring", "cbls", "Don't fully score a hypothesis until it is popped");
- AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing, 4=stack with batched lm requests (default = 0)");
+ AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 3=chart (with cube pruning), 4=stack with batched lm requests, 5=chart (with incremental search), 6=string-to-tree, 7=tree-to-string, 8=tree-to-string (SCFG-based), 9=forest-to-string (default = 0)");
AddParam("link-param-count", "Number of parameters on word links when using confusion networks or lattices (default = 1)");
AddParam("description", "Source language, target language, description");
@@ -161,6 +161,7 @@ Parameter::Parameter()
AddParam("weight-pp", "pp", "DEPRECATED. DO NOT USE. weight for phrase pair feature");
AddParam("weight-pb", "pb", "DEPRECATED. DO NOT USE. weight for phrase boundary feature");
AddParam("weight-t", "tm", "DEPRECATED. DO NOT USE. weights for translation model components");
+ AddParam("weight-p", "w", "DEPRECATED. DO NOT USE. weight for phrase penalty");
AddParam("weight-w", "w", "DEPRECATED. DO NOT USE. weight for word penalty");
AddParam("weight-u", "u", "DEPRECATED. DO NOT USE. weight for unknown word penalty");
AddParam("weight-e", "e", "DEPRECATED. DO NOT USE. weight for word deletion");
@@ -198,14 +199,17 @@ Parameter::Parameter()
AddParam("feature-name-overwrite", "Override feature name (NOT arguments). Eg. SRILM-->KENLM, PhraseDictionaryMemory-->PhraseDictionaryScope3");
AddParam("feature", "All the feature functions should be here");
+
AddParam("print-id", "prefix translations with id. Default if false");
+ AddParam("print-passthrough", "output the sgml tag <passthrough> without any computation on that. Default is false");
+ AddParam("print-passthrough-in-n-best", "output the sgml tag <passthrough> without any computation on that in each entry of the n-best-list. Default is false");
+
AddParam("alternate-weight-setting", "aws", "alternate set of weights to used per xml specification");
AddParam("placeholder-factor", "Which source factor to use to store the original text for placeholders. The factor must not be used by a translation or gen model");
AddParam("no-cache", "Disable all phrase-table caching. Default = false (ie. enable caching)");
AddParam("default-non-term-for-empty-range-only", "Don't add [X] to all ranges, just ranges where there isn't a source non-term. Default = false (ie. add [X] everywhere)");
- AddParam("s2t", "Use specialized string-to-tree decoder.");
AddParam("s2t-parsing-algorithm", "Which S2T parsing algorithm to use. 0=recursive CYK+, 1=scope-3 (default = 0)");
AddParam("spe-src", "Simulated post-editing. Source filename");
@@ -217,9 +221,15 @@ Parameter::~Parameter()
{
}
-const PARAM_VEC &Parameter::GetParam(const std::string &paramName)
+const PARAM_VEC *Parameter::GetParam(const std::string &paramName) const
{
- return m_setting[paramName];
+ PARAM_MAP::const_iterator iter = m_setting.find( paramName );
+ if (iter == m_setting.end()) {
+ return NULL;
+ } else {
+ return &iter->second;
+ }
+
}
/** initialize a parameter, sub of constructor */
@@ -262,8 +272,9 @@ bool Parameter::isOption(const char* token)
if (! token) return false;
std::string tokenString(token);
size_t length = tokenString.size();
- if (length > 0 && tokenString.substr(0,1) != "-") return false;
- if (length > 1 && tokenString.substr(1,1).find_first_not_of("0123456789") == 0) return true;
+ if (length <= 1) return false;
+ if (!starts_with(tokenString, "-")) return false;
+ if (tokenString.substr(1,1).find_first_not_of("0123456789") == 0) return true;
return false;
}
@@ -286,12 +297,12 @@ bool Parameter::LoadParam(int argc, char* argv[])
PrintFF();
cerr << endl;
- UserMessage::Add("No configuration file was specified. Use -config or -f");
+ cerr << "No configuration file was specified. Use -config or -f";
cerr << endl;
return false;
} else {
if (!ReadConfigFile(configPath)) {
- UserMessage::Add("Could not read "+configPath);
+ std::cerr << "Could not read " << configPath;
return false;
}
}
@@ -330,27 +341,29 @@ bool Parameter::LoadParam(int argc, char* argv[])
}
// don't mix old and new format
- if ((isParamSpecified("feature") || isParamSpecified("weight"))
- && (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") ||
- isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") ||
- isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") ||
- isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") ||
- isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") ||
- isParamSpecified("weight-u") || isParamSpecified("weight-e") ||
- isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") ||
- isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") ||
- isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") ||
- isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") ||
- isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") ||
- isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature")
+ if ((GetParam("feature") || GetParam("weight"))
+ && (GetParam("weight-slm") || GetParam("weight-bl") || GetParam("weight-d") ||
+ GetParam("weight-dlm") || GetParam("weight-lrl") || GetParam("weight-generation") ||
+ GetParam("weight-i") || GetParam("weight-l") || GetParam("weight-lex") ||
+ GetParam("weight-glm") || GetParam("weight-wt") || GetParam("weight-pp") ||
+ GetParam("weight-pb") || GetParam("weight-t") || GetParam("weight-w") ||
+ GetParam("weight-p") ||
+ GetParam("weight-u") || GetParam("weight-e") ||
+ GetParam("dlm-mode") || GetParam("generation-file") || GetParam("global-lexical-file") ||
+ GetParam("glm-feature") || GetParam("lmodel-file") || GetParam("lmodel-dub") ||
+ GetParam("slmodel-file") || GetParam("slmodel-factor") ||
+ GetParam("slmodel-beam") || GetParam("ttable-file") || GetParam("phrase-pair-feature") ||
+ GetParam("phrase-boundary-source-feature") || GetParam("phrase-boundary-target-feature") || GetParam("phrase-length-feature") ||
+ GetParam("target-word-insertion-feature") || GetParam("source-word-deletion-feature") || GetParam("word-translation-feature")
)
) {
UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
}
// convert old weights args to new format
- if (!isParamSpecified("feature"))
+ if (GetParam("feature") == NULL) {
ConvertWeightArgs();
+ }
CreateWeightsMap();
WeightOverwrite();
@@ -361,13 +374,13 @@ bool Parameter::LoadParam(int argc, char* argv[])
string paramSwitch = (string) argv[i];
string paramName = paramSwitch.substr(1);
if (m_valid.find(paramName) == m_valid.end()) {
- UserMessage::Add("illegal switch: " + paramSwitch);
+ std::cerr << "illegal switch: " << paramSwitch;
noErrorFlag = false;
}
}
}
- //Save("/Users/mnadejde/Documents/workspace/MTM13/DATA/mtmGHKM/moses.ini.new");
+ //Save("/tmp/moses.ini.new");
// check if parameters make sense
return Validate() && noErrorFlag;
@@ -375,19 +388,16 @@ bool Parameter::LoadParam(int argc, char* argv[])
void Parameter::AddFeaturesCmd()
{
- if (!isParamSpecified("feature-add")) {
- return;
- }
-
- const PARAM_VEC &params = GetParam("feature-add");
+ const PARAM_VEC *params = GetParam("feature-add");
+ if (params) {
+ PARAM_VEC::const_iterator iter;
+ for (iter = params->begin(); iter != params->end(); ++iter) {
+ const string &line = *iter;
+ AddFeature(line);
+ }
- PARAM_VEC::const_iterator iter;
- for (iter = params.begin(); iter != params.end(); ++iter) {
- const string &line = *iter;
- AddFeature(line);
+ m_setting.erase("feature-add");
}
-
- m_setting.erase("feature-add");
}
std::vector<float> Parameter::GetWeights(const std::string &name)
@@ -461,9 +471,12 @@ void Parameter::ConvertWeightArgsSingleWeight(const string &oldWeightName, const
void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
{
+ const PARAM_VEC *params;
+
// process input weights 1st
- if (isParamSpecified("weight-i")) {
- vector<float> inputWeights = Scan<float>(m_setting["weight-i"]);
+ params = GetParam("weight-i");
+ if (params) {
+ vector<float> inputWeights = Scan<float>(*params);
PARAM_VEC &numInputScores = m_setting["input-scores"];
if (inputWeights.size() == 1) {
UTIL_THROW_IF2(numInputScores.size() != 0, "No [input-scores] section allowed");
@@ -485,28 +498,33 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
size_t numRealWordsInInput = 0;
map<string, size_t> ptIndices;
- if (GetParam("input-scores").size()) {
- numInputScores = Scan<size_t>(GetParam("input-scores")[0]);
- }
+ params = GetParam("input-scores");
+ if (params) {
+ numInputScores = Scan<size_t>(params->at(0));
- if (GetParam("input-scores").size() > 1) {
- numRealWordsInInput = Scan<size_t>(GetParam("input-scores")[1]);
+ if (params->size() > 1) {
+ numRealWordsInInput = Scan<size_t>(params->at(1));
+ }
}
// load phrase translation tables
- if (GetParam("ttable-file").size() > 0) {
+ params = GetParam("ttable-file");
+ if (params) {
// weights
- const vector<string> &translationVector = GetParam("ttable-file");
- vector<size_t> maxTargetPhrase = Scan<size_t>(GetParam("ttable-limit"));
+ const vector<string> translationVector = *params;
+
+ vector<size_t> maxTargetPhrase;
+ params = GetParam("ttable-limit");
+ if (params) {
+ maxTargetPhrase = Scan<size_t>(*params);
+ }
if(maxTargetPhrase.size() == 1 && translationVector.size() > 1) {
VERBOSE(1, "Using uniform ttable-limit of " << maxTargetPhrase[0] << " for all translation tables." << endl);
for(size_t i = 1; i < translationVector.size(); i++)
maxTargetPhrase.push_back(maxTargetPhrase[0]);
} else if(maxTargetPhrase.size() != 1 && maxTargetPhrase.size() < translationVector.size()) {
- stringstream strme;
- strme << "You specified " << translationVector.size() << " translation tables, but only " << maxTargetPhrase.size() << " ttable-limits.";
- UserMessage::Add(strme.str());
+ std::cerr << "You specified " << translationVector.size() << " translation tables, but only " << maxTargetPhrase.size() << " ttable-limits.";
return;
}
@@ -520,7 +538,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
vector<string> token = Tokenize(translationVector[currDict]);
if(currDict == 0 && token.size() == 4) {
- UserMessage::Add("Phrase table specification in old 4-field format. No longer supported");
+ std::cerr << "Phrase table specification in old 4-field format. No longer supported";
return;
}
UTIL_THROW_IF2(token.size() < 5, "Phrase table must have at least 5 scores");
@@ -550,6 +568,9 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
case 14: // DSuffixArray
ptType = "PhraseDictionaryDynSuffixArray";
break;
+ case 15: // DCacheBased:
+ ptType = "PhraseDictionaryDynamicCacheBased";
+ break;
default:
break;
}
@@ -568,8 +589,8 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
vector<float> weights(numFF);
for (size_t currFF = 0; currFF < numFF; ++currFF) {
- UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
- "Errors converting old phrase-table weights to new weights");
+ UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
+ "Errors converting old phrase-table weights to new weights");
float weight = Scan<float>(oldWeights[currOldInd]);
weights[currFF] = weight;
@@ -631,37 +652,36 @@ void Parameter::ConvertWeightArgsDistortion()
const string oldLexReordingName = "distortion-file";
// distortion / lex distortion
- const PARAM_VEC &oldWeights = GetParam(oldWeightName);
+ const PARAM_VEC *oldWeights = GetParam(oldWeightName);
- if (oldWeights.size() > 0) {
- if (!isParamSpecified("search-algorithm") ||
- (GetParam("search-algorithm").size() > 0
- && (Trim(GetParam("search-algorithm")[0]) == "0"
- ||Trim(GetParam("search-algorithm")[0]) == "1"
- )
+ if (oldWeights) {
+ const PARAM_VEC *searchAlgo = GetParam("search-algorithm");
+ if (searchAlgo == NULL ||
+ (searchAlgo->size() > 0
+ && (Trim(searchAlgo->at(0)) == "0" || Trim(searchAlgo->at(0)) == "1")
)
) {
// phrase-based. Add distance distortion to list of features
AddFeature("Distortion");
- SetWeight("Distortion", 0, Scan<float>(oldWeights[0]));
+ SetWeight("Distortion", 0, Scan<float>(oldWeights->at(0)));
}
// everything but the last is lex reordering model
size_t currOldInd = 1;
- const PARAM_VEC &lextable = GetParam(oldLexReordingName);
+ const PARAM_VEC *lextable = GetParam(oldLexReordingName);
- for (size_t indTable = 0; indTable < lextable.size(); ++indTable) {
- const string &line = lextable[indTable];
+ for (size_t indTable = 0; lextable && indTable < lextable->size(); ++indTable) {
+ const string &line = lextable->at(indTable);
vector<string> toks = Tokenize(line);
size_t numFF = Scan<size_t>(toks[2]);
vector<float> weights(numFF);
for (size_t currFF = 0; currFF < numFF; ++currFF) {
- UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
- "Errors converting old distortion weights to new weights");
- float weight = Scan<float>(oldWeights[currOldInd]);
+ UTIL_THROW_IF2(oldWeights && currOldInd >= oldWeights->size(),
+ "Errors converting old distortion weights to new weights");
+ float weight = Scan<float>(oldWeights->at(currOldInd));
weights[currFF] = weight;
++currOldInd;
@@ -674,8 +694,8 @@ void Parameter::ConvertWeightArgsDistortion()
vector<FactorType> factors = Tokenize<FactorType>(toks[0], "-");
UTIL_THROW_IF2(factors.size() != 2,
- "Error in old factor specification for lexicalized reordering model: "
- << toks[0]);
+ "Error in old factor specification for lexicalized reordering model: "
+ << toks[0]);
strme << "input-factor=" << factors[0]
<< " output-factor=" << factors[1] << " ";
@@ -695,21 +715,23 @@ void Parameter::ConvertWeightArgsLM()
{
const string oldWeightName = "weight-l";
const string oldFeatureName = "lmodel-file";
+ const PARAM_VEC *params;
bool isChartDecoding = true;
- if (!isParamSpecified("search-algorithm") ||
- (GetParam("search-algorithm").size() > 0
- && (Trim(GetParam("search-algorithm")[0]) == "0"
- ||Trim(GetParam("search-algorithm")[0]) == "1"
- )
+
+ params = GetParam("search-algorithm");
+ if (params == NULL ||
+ (params->size() > 0
+ && (Trim(params->at(0)) == "0" || Trim(params->at(0)) == "1")
)
) {
isChartDecoding = false;
}
vector<int> oovWeights;
- if (isParamSpecified("lmodel-oov-feature")) {
- oovWeights = Scan<int>(m_setting["lmodel-oov-feature"]);
+ params = GetParam("lmodel-oov-feature");
+ if (params) {
+ oovWeights = Scan<int>(*params);
}
PARAM_MAP::iterator iterMap;
@@ -739,7 +761,7 @@ void Parameter::ConvertWeightArgsLM()
newFeatureName = "KENLM";
break;
default:
- UTIL_THROW2("Unkown language model type id:" << lmType);
+ UTIL_THROW2("Unkown language model type id:" << lmType);
}
size_t numFF = 1;
@@ -748,8 +770,8 @@ void Parameter::ConvertWeightArgsLM()
vector<float> weightsLM(numFF);
for (size_t currFF = 0; currFF < numFF; ++currFF) {
- UTIL_THROW_IF2(currOldInd >= weights.size(),
- "Errors converting old LM weights to new weights");
+ UTIL_THROW_IF2(currOldInd >= weights.size(),
+ "Errors converting old LM weights to new weights");
weightsLM[currFF] = Scan<float>(weights[currOldInd]);
if (isChartDecoding) {
weightsLM[currFF] = UntransformLMScore(weightsLM[currFF]);
@@ -800,8 +822,8 @@ void Parameter::ConvertWeightArgsGeneration(const std::string &oldWeightName, co
vector<float> weights(numFF);
for (size_t currFF = 0; currFF < numFF; ++currFF) {
- UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
- "Errors converting old generation weights to new weights");
+ UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
+ "Errors converting old generation weights to new weights");
float weight = Scan<float>(oldWeights[currOldInd]);
weights[currFF] = weight;
@@ -829,11 +851,10 @@ void Parameter::ConvertWeightArgsWordPenalty()
const std::string newWeightName = "WordPenalty";
bool isChartDecoding = true;
- if (!isParamSpecified("search-algorithm") ||
- (GetParam("search-algorithm").size() > 0
- && (Trim(GetParam("search-algorithm")[0]) == "0"
- ||Trim(GetParam("search-algorithm")[0]) == "1"
- )
+ const PARAM_VEC *searchAlgo = GetParam("search-algorithm");
+ if (searchAlgo == NULL ||
+ (searchAlgo->size() > 0
+ && (Trim(searchAlgo->at(0)) == "0" || Trim(searchAlgo->at(0)) == "1")
)
) {
isChartDecoding = false;
@@ -860,10 +881,11 @@ void Parameter::ConvertWeightArgsWordPenalty()
void Parameter::ConvertPhrasePenalty()
{
string oldWeightName = "weight-p";
- if (isParamSpecified(oldWeightName)) {
- UTIL_THROW_IF2(m_setting[oldWeightName].size() != 1,
- "There should be only 1 phrase-penalty weight");
- float weight = Scan<float>(m_setting[oldWeightName][0]);
+ const PARAM_VEC *params = GetParam(oldWeightName);
+ if (params) {
+ UTIL_THROW_IF2(params->size() != 1,
+ "There should be only 1 phrase-penalty weight");
+ float weight = Scan<float>(params->at(0));
AddFeature("PhrasePenalty");
SetWeight("PhrasePenalty", 0, weight);
@@ -875,7 +897,7 @@ void Parameter::ConvertWeightArgs()
{
// can't handle discr LM. must do it manually 'cos of bigram/n-gram split
UTIL_THROW_IF2( m_setting.count("weight-dlm") != 0,
- "Can't handle discr LM. must do it manually 'cos of bigram/n-gram split");
+ "Can't handle discr LM. must do it manually 'cos of bigram/n-gram split");
// check that old & new format aren't mixed
if (m_setting.count("weight") &&
@@ -922,20 +944,20 @@ void Parameter::CreateWeightsMap()
void Parameter::CreateWeightsMap(const PARAM_VEC &vec)
{
for (size_t i = 0; i < vec.size(); ++i) {
- const string &line = vec[i];
- vector<string> toks = Tokenize(line);
- UTIL_THROW_IF2(toks.size() < 2,
- "Error in format of weights: " << line);
-
- string name = toks[0];
- name = name.substr(0, name.size() - 1);
-
- vector<float> weights(toks.size() - 1);
- for (size_t i = 1; i < toks.size(); ++i) {
- float weight = Scan<float>(toks[i]);
- weights[i - 1] = weight;
- }
- m_weights[name] = weights;
+ const string &line = vec[i];
+ vector<string> toks = Tokenize(line);
+ UTIL_THROW_IF2(toks.size() < 2,
+ "Error in format of weights: " << line);
+
+ string name = toks[0];
+ name = name.substr(0, name.size() - 1);
+
+ vector<float> weights(toks.size() - 1);
+ for (size_t i = 1; i < toks.size(); ++i) {
+ float weight = Scan<float>(toks[i]);
+ weights[i - 1] = weight;
+ }
+ m_weights[name] = weights;
}
}
@@ -948,7 +970,7 @@ void Parameter::WeightOverwrite()
// should only be on 1 line
UTIL_THROW_IF2(vec.size() != 1,
- "Weight override should only be on 1 line");
+ "Weight override should only be on 1 line");
string name("");
vector<float> weights;
@@ -956,7 +978,7 @@ void Parameter::WeightOverwrite()
for (size_t i = 0; i < toks.size(); ++i) {
const string &tok = toks[i];
- if (tok.substr(tok.size() - 1, 1) == "=") {
+ if (starts_with(tok, "=")) {
// start of new feature
if (name != "") {
@@ -987,51 +1009,30 @@ bool Parameter::Validate()
const std::string &key = iterParams->first;
if (m_valid.find(key) == m_valid.end()) {
- UserMessage::Add("Unknown parameter " + key);
+ std::cerr << "Unknown parameter " << key;
noErrorFlag = false;
}
}
if (m_setting["lmodel-dub"].size() > 0) {
if (m_setting["lmodel-file"].size() != m_setting["lmodel-dub"].size()) {
- stringstream errorMsg("");
- errorMsg << "Config and parameters specify "
- << static_cast<int>(m_setting["lmodel-file"].size())
- << " language model files (lmodel-file), but "
- << static_cast<int>(m_setting["lmodel-dub"].size())
- << " LM upperbounds (lmodel-dub)"
- << endl;
- UserMessage::Add(errorMsg.str());
+ std::cerr << "Config and parameters specify "
+ << static_cast<int>(m_setting["lmodel-file"].size())
+ << " language model files (lmodel-file), but "
+ << static_cast<int>(m_setting["lmodel-dub"].size())
+ << " LM upperbounds (lmodel-dub)"
+ << endl;
noErrorFlag = false;
}
}
- /*
- const vector<float> &lmWeights = GetWeights("LM");
- if (m_setting["lmodel-file"].size() * (m_setting.find("lmodel-oov-feature") != m_setting.end() ? 2 : 1)
- != lmWeights.size()) {
- stringstream errorMsg("");
- errorMsg << "Config and parameters specify "
- << static_cast<int>(m_setting["lmodel-file"].size())
- << " language model files (lmodel-file), but "
- << static_cast<int>(lmWeights.size())
- << " weights (weight-l)";
- errorMsg << endl << "You might be giving '-lmodel-file TYPE FACTOR ORDER FILENAME' but you should be giving these four as a single argument, i.e. '-lmodel-file \"TYPE FACTOR ORDER FILENAME\"'";
- errorMsg << endl << "You should also remember that each language model requires 2 weights, if and only if lmodel-oov-feature is on.";
- UserMessage::Add(errorMsg.str());
- noErrorFlag = false;
- }
- */
-
// do files exist?
// input file
if (noErrorFlag && m_setting["input-file"].size() == 1) {
noErrorFlag = FileExists(m_setting["input-file"][0]);
if (!noErrorFlag) {
- stringstream errorMsg("");
- errorMsg << endl << "Input file " << m_setting["input-file"][0] << " does not exist";
- UserMessage::Add(errorMsg.str());
+ std::cerr << endl << "Input file " << m_setting["input-file"][0] << " does not exist";
}
}
// generation tables
@@ -1079,11 +1080,9 @@ bool Parameter::FilesExist(const string &paramName, int fieldNo, std::vector<std
tokenizeIndex = static_cast<size_t>(fieldNo);
if (tokenizeIndex >= vec.size()) {
- stringstream errorMsg("");
- errorMsg << "Expected at least " << (tokenizeIndex+1) << " tokens per entry in '"
- << paramName << "', but only found "
- << vec.size();
- UserMessage::Add(errorMsg.str());
+ std::cerr << "Expected at least " << (tokenizeIndex+1) << " tokens per entry in '"
+ << paramName << "', but only found "
+ << vec.size();
return false;
}
const string &pathStr = vec[tokenizeIndex];
@@ -1093,9 +1092,7 @@ bool Parameter::FilesExist(const string &paramName, int fieldNo, std::vector<std
fileFound|=FileExists(pathStr + extensions[i]);
}
if(!fileFound) {
- stringstream errorMsg("");
- errorMsg << "File " << pathStr << " does not exist";
- UserMessage::Add(errorMsg.str());
+ std::cerr << "File " << pathStr << " does not exist";
return false;
}
}
@@ -1112,9 +1109,7 @@ string Parameter::FindParam(const string &paramSwitch, int argc, char* argv[])
if (i+1 < argc) {
return argv[i+1];
} else {
- stringstream errorMsg("");
- errorMsg << "Option " << paramSwitch << " requires a parameter!";
- UserMessage::Add(errorMsg.str());
+ std::cerr << "Option " << paramSwitch << " requires a parameter!";
// TODO return some sort of error, not the empty string
}
}
@@ -1317,8 +1312,8 @@ void Parameter::OverwriteParam(const string &paramName, PARAM_VEC values)
if (m_setting[paramName].size() > 1) {
VERBOSE(2," (the parameter had " << m_setting[paramName].size() << " previous values)");
UTIL_THROW_IF2(m_setting[paramName].size() != values.size(),
- "Number of weight override for " << paramName
- << " is not the same as the original number of weights");
+ "Number of weight override for " << paramName
+ << " is not the same as the original number of weights");
} else {
VERBOSE(2," (the parameter does not have previous values)");
m_setting[paramName].resize(values.size());
@@ -1373,6 +1368,27 @@ void Parameter::Save(const std::string path)
file.close();
}
+template<>
+void Parameter::SetParameter<bool>(bool &parameter, const std::string &parameterName, const bool &defaultValue) const
+{
+ const PARAM_VEC *params = GetParam(parameterName);
+
+ // default value if nothing is specified
+ parameter = defaultValue;
+ if (params == NULL) {
+ return;
+ }
+
+ // if parameter is just specified as, e.g. "-parameter" set it true
+ if (params->size() == 0) {
+ parameter = true;
+ }
+ // if paramter is specified "-parameter true" or "-parameter false"
+ else if (params->size() == 1) {
+ parameter = Scan<bool>( params->at(0));
+ }
}
+} // namespace
+
diff --git a/moses/Parameter.h b/moses/Parameter.h
index 7e5e75496..c6b08dd85 100644
--- a/moses/Parameter.h
+++ b/moses/Parameter.h
@@ -27,6 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <map>
#include <vector>
#include "TypeDef.h"
+#include "Util.h"
namespace Moses
{
@@ -73,6 +74,7 @@ protected:
void ConvertWeightArgsLM();
void ConvertWeightArgsDistortion();
void ConvertWeightArgsGeneration(const std::string &oldWeightName, const std::string &newWeightName);
+ void ConvertWeightArgsPhrasePenalty();
void ConvertWeightArgsWordPenalty();
void ConvertPhrasePenalty();
void CreateWeightsMap();
@@ -90,33 +92,18 @@ public:
void Explain();
/** return a vector of strings holding the whitespace-delimited values on the ini-file line corresponding to the given parameter name */
- const PARAM_VEC &GetParam(const std::string &paramName);
+ const PARAM_VEC *GetParam(const std::string &paramName) const;
/** check if parameter is defined (either in moses.ini or as switch) */
bool isParamSpecified(const std::string &paramName) const {
return m_setting.find( paramName ) != m_setting.end();
}
- const std::string GetFullName(std::string abbr) {
- return m_fullname[abbr];
- }
-
- const std::string GetAbbreviation(std::string full) {
- return m_abbreviation[full];
- }
- const PARAM_VEC &GetParamShortName(const std::string &paramName) {
- return GetParam(GetFullName(paramName));
- }
-
void OverwriteParam(const std::string &paramName, PARAM_VEC values);
- void OverwriteParamShortName(const std::string &paramShortName, PARAM_VEC values) {
- OverwriteParam(GetFullName(paramShortName),values);
- }
-
std::vector<float> GetWeights(const std::string &name);
std::map<std::string, std::vector<float> > GetAllWeights() const {
- return m_weights;
+ return m_weights;
}
std::set<std::string> GetWeightNames() const;
@@ -125,8 +112,22 @@ public:
}
void Save(const std::string path);
+
+ template<typename T>
+ void SetParameter(T &var, const std::string &name, const T &defaultValue) const {
+ const PARAM_VEC *params = GetParam(name);
+ if (params && params->size()) {
+ var = Scan<T>( params->at(0));
+ } else {
+ var = defaultValue;
+ }
+ }
+
};
+template<>
+void Parameter::SetParameter<bool>(bool &var, const std::string &name, const bool &defaultValue) const;
+
}
#endif
diff --git a/moses/PartialTranslOptColl.cpp b/moses/PartialTranslOptColl.cpp
index 709075c66..b0f906f2e 100644
--- a/moses/PartialTranslOptColl.cpp
+++ b/moses/PartialTranslOptColl.cpp
@@ -83,9 +83,9 @@ void PartialTranslOptColl::Prune()
// find nth element
NTH_ELEMENT4(m_list.begin(),
- m_list.begin() + m_maxSize,
- m_list.end(),
- ComparePartialTranslationOption);
+ m_list.begin() + m_maxSize,
+ m_list.end(),
+ ComparePartialTranslationOption);
m_worstScore = m_list[ m_maxSize-1 ]->GetFutureScore();
// delete the rest
diff --git a/moses/Phrase.cpp b/moses/Phrase.cpp
index 0aa8e4980..fe69ce008 100644
--- a/moses/Phrase.cpp
+++ b/moses/Phrase.cpp
@@ -210,8 +210,8 @@ void Phrase::CreateFromString(FactorDirection direction
size_t nextPos = annotatedWord.find('[', 1);
UTIL_THROW_IF2(nextPos == string::npos,
- "Incorrect formatting of non-terminal. Should have 2 non-terms, eg. [X][X]. "
- << "Current string: " << annotatedWord);
+ "Incorrect formatting of non-terminal. Should have 2 non-terms, eg. [X][X]. "
+ << "Current string: " << annotatedWord);
if (direction == Input)
annotatedWord = annotatedWord.substr(1, nextPos - 2);
@@ -381,7 +381,7 @@ void Phrase::InitStartEndWord()
size_t Phrase::Find(const Phrase &sought, int maxUnknown) const
{
if (GetSize() < sought.GetSize()) {
- // sought phrase too big
+ // sought phrase too big
return NOT_FOUND;
}
diff --git a/moses/Phrase.h b/moses/Phrase.h
index f6eb661de..947e50905 100644
--- a/moses/Phrase.h
+++ b/moses/Phrase.h
@@ -140,8 +140,9 @@ public:
}
size_t GetNumTerminals() const;
- size_t GetNumNonTerminals() const
- { return GetSize() - GetNumTerminals(); }
+ size_t GetNumNonTerminals() const {
+ return GetSize() - GetNumTerminals();
+ }
//! whether the 2D vector is a substring of this phrase
bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector
@@ -165,8 +166,8 @@ public:
}
void RemoveWord(size_t pos) {
- UTIL_THROW_IF2(pos >= m_words.size(),
- "Referencing position " << pos << " out of bound");
+ UTIL_THROW_IF2(pos >= m_words.size(),
+ "Referencing position " << pos << " out of bound");
m_words.erase(m_words.begin() + pos);
}
diff --git a/moses/PrefixTreeMap.cpp b/moses/PrefixTreeMap.cpp
index ee7565d8b..1719ebdba 100644
--- a/moses/PrefixTreeMap.cpp
+++ b/moses/PrefixTreeMap.cpp
@@ -65,7 +65,8 @@ void Candidates::readBin(FILE* f)
const LabelId PrefixTreeMap::MagicWord = std::numeric_limits<LabelId>::max() - 1;
//////////////////////////////////////////////////////////////////
-PrefixTreeMap::~PrefixTreeMap() {
+PrefixTreeMap::~PrefixTreeMap()
+{
if(m_FileSrc) {
fClose(m_FileSrc);
}
@@ -99,8 +100,7 @@ WordVoc &ReadVoc(std::map<std::string,WordVoc> &vocs, const std::string& filenam
WordVoc &voc = vocs[filename];
voc.Read(filename);
return voc;
- }
- else {
+ } else {
return vi->second;
}
}
@@ -162,7 +162,7 @@ void PrefixTreeMap::GetCandidates(const IPhrase& key, Candidates* cands)
return;
}
UTIL_THROW_IF2(m_Data[key[0]]->findKey(key[0]) >= m_Data[key[0]]->size(),
- "Key not found: " << key[0]);
+ "Key not found: " << key[0]);
OFF_T candOffset = m_Data[key[0]]->find(key);
if(candOffset == InvalidOffT) {
@@ -189,7 +189,7 @@ void PrefixTreeMap::GetCandidates(const PPimp& p, Candidates* cands)
std::vector< std::string const * > PrefixTreeMap::ConvertPhrase(const IPhrase& p, unsigned int voc) const
{
UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
- "Invalid vocab id: " << voc);
+ "Invalid vocab id: " << voc);
std::vector< std::string const * > result;
result.reserve(p.size());
for(IPhrase::const_iterator i = p.begin(); i != p.end(); ++i) {
@@ -201,7 +201,7 @@ std::vector< std::string const * > PrefixTreeMap::ConvertPhrase(const IPhrase& p
IPhrase PrefixTreeMap::ConvertPhrase(const std::vector< std::string >& p, unsigned int voc) const
{
UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
- "Invalid vocab id: " << voc);
+ "Invalid vocab id: " << voc);
IPhrase result;
result.reserve(p.size());
for(size_t i = 0; i < p.size(); ++i) {
@@ -213,14 +213,14 @@ IPhrase PrefixTreeMap::ConvertPhrase(const std::vector< std::string >& p, unsign
LabelId PrefixTreeMap::ConvertWord(const std::string& w, unsigned int voc) const
{
UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
- "Invalid vocab id: " << voc);
+ "Invalid vocab id: " << voc);
return m_Voc[voc]->index(w);
}
std::string PrefixTreeMap::ConvertWord(LabelId w, unsigned int voc) const
{
UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
- "Invalid vocab id: " << voc);
+ "Invalid vocab id: " << voc);
if(w == PrefixTreeMap::MagicWord) {
return "|||";
} else if (w == InvalidLabelId) {
diff --git a/moses/RuleCube.cpp b/moses/RuleCube.cpp
index 451f80219..3a33ba5e5 100644
--- a/moses/RuleCube.cpp
+++ b/moses/RuleCube.cpp
@@ -103,7 +103,7 @@ void RuleCube::CreateNeighbor(const RuleCubeItem &item, int dimensionIndex,
std::ostream& operator<<(std::ostream &out, const RuleCube &obj)
{
- out << obj.GetItemSetSize();
- return out;
+ out << obj.GetItemSetSize();
+ return out;
}
}
diff --git a/moses/RuleCube.h b/moses/RuleCube.h
index c2e4a9e33..204279f0f 100644
--- a/moses/RuleCube.h
+++ b/moses/RuleCube.h
@@ -74,7 +74,7 @@ class RuleCubeItemEqualityPred
public:
bool operator()(const RuleCubeItem *p, const RuleCubeItem *q) const {
bool ret = p->GetHypothesisDimensions() == q->GetHypothesisDimensions() &&
- p->GetTranslationDimension() == q->GetTranslationDimension();
+ p->GetTranslationDimension() == q->GetTranslationDimension();
return ret;
}
};
@@ -92,7 +92,7 @@ public:
~RuleCube();
float GetTopScore() const {
- UTIL_THROW_IF2(m_queue.empty(), "Empty queue, nothing to pop");
+ UTIL_THROW_IF2(m_queue.empty(), "Empty queue, nothing to pop");
RuleCubeItem *item = m_queue.top();
return item->GetScore();
}
@@ -107,8 +107,9 @@ public:
return m_transOpt;
}
- size_t GetItemSetSize() const
- { return m_covered.size(); }
+ size_t GetItemSetSize() const {
+ return m_covered.size();
+ }
private:
typedef boost::unordered_set<RuleCubeItem*,
diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp
index eedaa589e..a1c864692 100644
--- a/moses/ScoreComponentCollection.cpp
+++ b/moses/ScoreComponentCollection.cpp
@@ -1,10 +1,14 @@
// $Id$
#include <vector>
+#include <boost/algorithm/string/predicate.hpp>
#include "util/exception.hh"
#include "ScoreComponentCollection.h"
#include "StaticData.h"
+#include "moses/FF/StatelessFeatureFunction.h"
+#include "moses/FF/StatefulFeatureFunction.h"
using namespace std;
+using namespace boost::algorithm;
namespace Moses
{
@@ -85,7 +89,7 @@ void ScoreComponentCollection::MultiplyEquals(const FeatureFunction* sp, float s
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
std::stringstream name;
name << i->first;
- if (name.str().substr( 0, prefix.length() ).compare( prefix ) == 0)
+ if (starts_with(name.str(), prefix))
m_scores[i->first] = i->second * scalar;
}
}
@@ -98,7 +102,7 @@ size_t ScoreComponentCollection::GetNumberWeights(const FeatureFunction* sp)
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
std::stringstream name;
name << i->first;
- if (name.str().substr( 0, prefix.length() ).compare( prefix ) == 0)
+ if (starts_with(name.str(), prefix))
weights++;
}
return weights;
@@ -184,8 +188,8 @@ void ScoreComponentCollection::Save(ostream& out, bool multiline) const
string sep = " ";
string linesep = "\n";
if (!multiline) {
- sep = "=";
- linesep = " ";
+ sep = "=";
+ linesep = " ";
}
ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
for (; iter != s_scoreIndexes.end(); ++iter ) {
@@ -283,7 +287,7 @@ FVector ScoreComponentCollection::GetVectorForProducer(const FeatureFunction* sp
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
std::stringstream name;
name << i->first;
- if (name.str().substr( 0, prefix.length() ).compare( prefix ) == 0)
+ if (starts_with(name.str(), prefix))
fv[i->first] = i->second;
}
return fv;
@@ -301,6 +305,51 @@ void ScoreComponentCollection::PlusEquals(const FeatureFunction* sp, const Score
}
}
+void ScoreComponentCollection::OutputAllFeatureScores(std::ostream &out) const
+{
+ std::string lastName = "";
+ const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+ for( size_t i=0; i<sff.size(); i++ ) {
+ const StatefulFeatureFunction *ff = sff[i];
+ if (ff->IsTuneable()) {
+ OutputFeatureScores( out, ff, lastName );
+ }
+ }
+ const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
+ for( size_t i=0; i<slf.size(); i++ ) {
+ const StatelessFeatureFunction *ff = slf[i];
+ if (ff->IsTuneable()) {
+ OutputFeatureScores( out, ff, lastName );
+ }
+ }
+}
+
+void ScoreComponentCollection::OutputFeatureScores( std::ostream& out
+ , const FeatureFunction *ff
+ , std::string &lastName ) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ bool labeledOutput = staticData.IsLabeledNBestList();
+
+ // regular features (not sparse)
+ if (ff->GetNumScoreComponents() != 0) {
+ if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
+ lastName = ff->GetScoreProducerDescription();
+ out << " " << lastName << "=";
+ }
+ vector<float> scores = GetScoresForProducer( ff );
+ for (size_t j = 0; j<scores.size(); ++j) {
+ out << " " << scores[j];
+ }
+ }
+
+ // sparse features
+ const FVector scores = GetVectorForProducer( ff );
+ for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
+ out << " " << i->first << "= " << i->second;
+ }
+}
+
}
diff --git a/moses/ScoreComponentCollection.h b/moses/ScoreComponentCollection.h
index b44216d29..ce285b59e 100644
--- a/moses/ScoreComponentCollection.h
+++ b/moses/ScoreComponentCollection.h
@@ -49,11 +49,11 @@ struct ScorePair {
std::vector<float> denseScores;
std::map<StringPiece, float> sparseScores;
- ScorePair()
- {}
+ ScorePair() {
+ }
ScorePair(const std::vector<float> &other)
- :denseScores(other)
- {}
+ :denseScores(other) {
+ }
void PlusEquals(const ScorePair &other);
void PlusEquals(const StringPiece &key, float value);
@@ -106,9 +106,9 @@ public:
if (indexIter == s_scoreIndexes.end()) {
std::stringstream strme;
strme << "ERROR: FeatureFunction: " << sp->GetScoreProducerDescription() <<
- " not registered with ScoreIndexMap" << std::endl;
+ " not registered with ScoreIndexMap" << std::endl;
strme << "You must call ScoreComponentCollection.RegisterScoreProducer() " <<
- " for every FeatureFunction" << std::endl;
+ " for every FeatureFunction" << std::endl;
UTIL_THROW2(strme.str());
}
return indexIter->second;
@@ -200,6 +200,11 @@ public:
m_scores.sparsePlusEquals(rhs.m_scores);
}
+ // add only core features
+ void CorePlusEquals(const ScoreComponentCollection& rhs) {
+ m_scores.corePlusEquals(rhs.m_scores);
+ }
+
void PlusEquals(const FVector& scores) {
m_scores += scores;
}
@@ -237,7 +242,7 @@ public:
void PlusEquals(const FeatureFunction* sp, const std::vector<float>& scores) {
IndexPair indexes = GetIndexes(sp);
UTIL_THROW_IF2(scores.size() != indexes.second - indexes.first,
- "Number of scores is incorrect");
+ "Number of scores is incorrect");
for (size_t i = 0; i < scores.size(); ++i) {
m_scores[i + indexes.first] += scores[i];
}
@@ -249,7 +254,7 @@ public:
void PlusEquals(const FeatureFunction* sp, float score) {
IndexPair indexes = GetIndexes(sp);
UTIL_THROW_IF2(1 != indexes.second - indexes.first,
- "Number of scores is incorrect");
+ "Number of scores is incorrect");
m_scores[indexes.first] += score;
}
@@ -284,7 +289,7 @@ public:
void Assign(const FeatureFunction* sp, float score) {
IndexPair indexes = GetIndexes(sp);
UTIL_THROW_IF2(1 != indexes.second - indexes.first,
- "Feature function must must only contain 1 score");
+ "Feature function must must only contain 1 score");
m_scores[indexes.first] = score;
}
@@ -315,7 +320,7 @@ public:
float PartialInnerProduct(const FeatureFunction* sp, const std::vector<float>& rhs) const {
std::vector<float> lhs = GetScoresForProducer(sp);
UTIL_THROW_IF2(lhs.size() != rhs.size(),
- "Number of weights must match number of scores");
+ "Number of weights must match number of scores");
return std::inner_product(lhs.begin(), lhs.end(), rhs.begin(), 0.0f);
}
@@ -369,7 +374,7 @@ public:
float GetScoreForProducer(const FeatureFunction* sp) const {
IndexPair indexes = GetIndexes(sp);
UTIL_THROW_IF2(indexes.second - indexes.first != 1,
- "Feature function must must only contain 1 score");
+ "Feature function must must only contain 1 score");
return m_scores[indexes.first];
}
@@ -429,6 +434,11 @@ public:
m_scores.merge(other.m_scores);
}
+ void OutputAllFeatureScores(std::ostream &out) const;
+ void OutputFeatureScores( std::ostream& out
+ , const Moses::FeatureFunction *ff
+ , std::string &lastName ) const;
+
#ifdef MPI_ENABLE
public:
friend class boost::serialization::access;
diff --git a/moses/ScoreComponentCollectionTest.cpp b/moses/ScoreComponentCollectionTest.cpp
index a238d66b8..87c4f03b7 100644
--- a/moses/ScoreComponentCollectionTest.cpp
+++ b/moses/ScoreComponentCollectionTest.cpp
@@ -37,17 +37,21 @@ public:
void EvaluateWhenApplied(const Hypothesis&, ScoreComponentCollection*) const {}
void EvaluateWhenApplied(const ChartHypothesis&, ScoreComponentCollection*) const {}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
};
diff --git a/moses/Search.cpp b/moses/Search.cpp
index 030071021..8f947f622 100644
--- a/moses/Search.cpp
+++ b/moses/Search.cpp
@@ -2,7 +2,6 @@
#include "SearchCubePruning.h"
#include "SearchNormal.h"
#include "SearchNormalBatch.h"
-#include "UserMessage.h"
#include "util/exception.hh"
namespace Moses
@@ -25,12 +24,10 @@ Search *Search::CreateSearch(Manager& manager, const InputType &source,
return new SearchNormal(manager,source, transOptColl);
case CubePruning:
return new SearchCubePruning(manager, source, transOptColl);
- case CubeGrowing:
- return NULL;
case NormalBatch:
return new SearchNormalBatch(manager, source, transOptColl);
default:
- UTIL_THROW2("ERROR: search. Aborting\n");
+ UTIL_THROW2("ERROR: search. Aborting\n");
return NULL;
}
}
diff --git a/moses/Search.h b/moses/Search.h
index 90427f3c2..164cc33ef 100644
--- a/moses/Search.h
+++ b/moses/Search.h
@@ -30,7 +30,7 @@ public:
virtual const Hypothesis *GetBestHypothesis() const = 0;
//! Decode the sentence according to the specified search algorithm.
- virtual void ProcessSentence() = 0;
+ virtual void Decode() = 0;
explicit Search(Manager& manager);
virtual ~Search() {}
diff --git a/moses/SearchCubePruning.cpp b/moses/SearchCubePruning.cpp
index 60fcef108..6c981276e 100644
--- a/moses/SearchCubePruning.cpp
+++ b/moses/SearchCubePruning.cpp
@@ -64,7 +64,7 @@ SearchCubePruning::~SearchCubePruning()
* Main decoder loop that translates a sentence by expanding
* hypotheses stack by stack, until the end of the sentence.
*/
-void SearchCubePruning::ProcessSentence()
+void SearchCubePruning::Decode()
{
const StaticData &staticData = StaticData::Instance();
diff --git a/moses/SearchCubePruning.h b/moses/SearchCubePruning.h
index d959c9e64..334204004 100644
--- a/moses/SearchCubePruning.h
+++ b/moses/SearchCubePruning.h
@@ -35,7 +35,7 @@ public:
SearchCubePruning(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
~SearchCubePruning();
- void ProcessSentence();
+ void Decode();
void OutputHypoStackSize();
void OutputHypoStack(int stack);
diff --git a/moses/SearchNormal.cpp b/moses/SearchNormal.cpp
index 2314d85f7..786b554c6 100644
--- a/moses/SearchNormal.cpp
+++ b/moses/SearchNormal.cpp
@@ -49,7 +49,7 @@ SearchNormal::~SearchNormal()
* Main decoder loop that translates a sentence by expanding
* hypotheses stack by stack, until the end of the sentence.
*/
-void SearchNormal::ProcessSentence()
+void SearchNormal::Decode()
{
const StaticData &staticData = StaticData::Instance();
SentenceStats &stats = m_manager.GetSentenceStats();
@@ -397,13 +397,13 @@ void SearchNormal::OutputHypoStackSize()
void SearchNormal::OutputHypoStack()
{
- // all stacks
- int i = 0;
- vector < HypothesisStack* >::iterator iterStack;
- for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
- HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
- TRACE_ERR( "Stack " << i++ << ": " << endl << hypoColl << endl);
- }
+ // all stacks
+ int i = 0;
+ vector < HypothesisStack* >::iterator iterStack;
+ for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
+ HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
+ TRACE_ERR( "Stack " << i++ << ": " << endl << hypoColl << endl);
+ }
}
}
diff --git a/moses/SearchNormal.h b/moses/SearchNormal.h
index d76e102c2..2d43187b6 100644
--- a/moses/SearchNormal.h
+++ b/moses/SearchNormal.h
@@ -36,7 +36,7 @@ public:
SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
~SearchNormal();
- void ProcessSentence();
+ void Decode();
void OutputHypoStackSize();
void OutputHypoStack();
diff --git a/moses/SearchNormalBatch.cpp b/moses/SearchNormalBatch.cpp
index 9700a0694..612a5deea 100644
--- a/moses/SearchNormalBatch.cpp
+++ b/moses/SearchNormalBatch.cpp
@@ -40,7 +40,7 @@ SearchNormalBatch::~SearchNormalBatch()
* Main decoder loop that translates a sentence by expanding
* hypotheses stack by stack, until the end of the sentence.
*/
-void SearchNormalBatch::ProcessSentence()
+void SearchNormalBatch::Decode()
{
const StaticData &staticData = StaticData::Instance();
SentenceStats &stats = m_manager.GetSentenceStats();
@@ -140,7 +140,7 @@ ExpandHypothesis(const Hypothesis &hypothesis,
}
m_partial_hypos.push_back(newHypo);
} else {
- UTIL_THROW2("can't use early discarding with batch decoding!");
+ UTIL_THROW2("can't use early discarding with batch decoding!");
}
}
diff --git a/moses/SearchNormalBatch.h b/moses/SearchNormalBatch.h
index 8cb982649..1e8acc579 100644
--- a/moses/SearchNormalBatch.h
+++ b/moses/SearchNormalBatch.h
@@ -35,7 +35,7 @@ public:
SearchNormalBatch(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
~SearchNormalBatch();
- void ProcessSentence();
+ void Decode();
};
diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp
index 6754321bd..58d650aa3 100644
--- a/moses/Sentence.cpp
+++ b/moses/Sentence.cpp
@@ -26,6 +26,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Sentence.h"
#include "TranslationOptionCollectionText.h"
#include "StaticData.h"
+#include "moses/FF/DynamicCacheBasedLanguageModel.h"
+#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
#include "ChartTranslationOptions.h"
#include "Util.h"
#include "XmlOption.h"
@@ -126,8 +128,41 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
this->SetSpecifiesWeightSetting(false);
}
+ std::vector< std::map<std::string, std::string> > dlt_meta = ProcessAndStripDLT(line);
+
+ PhraseDictionaryDynamicCacheBased* cbtm = NULL;
+ DynamicCacheBasedLanguageModel* cblm = NULL;
+ std::vector< std::map<std::string, std::string> >::iterator dlt_meta_it = dlt_meta.begin();
+ for (dlt_meta_it = dlt_meta.begin(); dlt_meta_it != dlt_meta.end(); ++dlt_meta_it) {
+
+ if ((*dlt_meta_it).find("type") != (*dlt_meta_it).end()) {
+ if ((*dlt_meta_it)["type"] == "cbtm") {
+ std::string id = "default";
+ if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) {
+ id = (*dlt_meta_it)["id"];
+ }
+ cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst(id);
+ if (cbtm) cbtm->ExecuteDlt(*dlt_meta_it);
+ }
+ if ((*dlt_meta_it)["type"] == "cblm") {
+ std::string id = "default";
+ if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) {
+ id = (*dlt_meta_it)["id"];
+ }
+ cblm = DynamicCacheBasedLanguageModel::InstanceNonConst(id);
+ if (cblm) cblm->ExecuteDlt(*dlt_meta_it);
+ }
+ }
+ }
+
+ // if sentences is specified as "<passthrough tag1=""/>"
+ if (staticData.IsPassthroughEnabled() || staticData.IsPassthroughInNBestEnabled()) {
+ std::string passthrough = PassthroughSGML(line,"passthrough");
+ this->SetPassthroughInformation(passthrough);
+ }
+
+
// parse XML markup in translation line
- //const StaticData &staticData = StaticData::Instance();
std::vector< size_t > xmlWalls;
std::vector< std::pair<size_t, std::string> > placeholders;
@@ -312,10 +347,10 @@ std::vector <ChartTranslationOptions*> Sentence::GetXmlChartTranslationOptions()
return ret;
}
-void
+void
Sentence::
-CreateFromString(const std::vector<FactorType> &factorOrder,
- const std::string &phraseString)
+CreateFromString(const std::vector<FactorType> &factorOrder,
+ const std::string &phraseString)
// , const std::string &factorDelimiter)
{
// Phrase::CreateFromString(Input, factorOrder, phraseString, factorDelimiter, NULL);
diff --git a/moses/Sentence.h b/moses/Sentence.h
index 231ee85a5..4f206c0d4 100644
--- a/moses/Sentence.h
+++ b/moses/Sentence.h
@@ -91,13 +91,13 @@ public:
void GetXmlTranslationOptions(std::vector <TranslationOption*> &list, size_t startPos, size_t endPos) const;
std::vector <ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
- int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
+ virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
void Print(std::ostream& out) const;
TranslationOptionCollection* CreateTranslationOptionCollection() const;
- void CreateFromString(const std::vector<FactorType> &factorOrder
- , const std::string &phraseString); // , const std::string &factorDelimiter);
+ virtual void CreateFromString(const std::vector<FactorType> &factorOrder
+ , const std::string &phraseString); // , const std::string &factorDelimiter);
const NonTerminalSet &GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const {
return m_defaultLabelSet;
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index 63b2be844..94d5381f5 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -1,5 +1,5 @@
// $Id$
-// vim:tabstop=2
+// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@@ -21,11 +21,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <string>
+#include <boost/algorithm/string/predicate.hpp>
+#include "moses/FF/Factory.h"
#include "TypeDef.h"
#include "moses/FF/WordPenaltyProducer.h"
#include "moses/FF/UnknownWordPenaltyProducer.h"
#include "moses/FF/InputFeature.h"
+#include "moses/FF/DynamicCacheBasedLanguageModel.h"
+#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
#include "DecodeStepTranslation.h"
#include "DecodeStepGeneration.h"
@@ -34,7 +38,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Util.h"
#include "FactorCollection.h"
#include "Timer.h"
-#include "UserMessage.h"
#include "TranslationOption.h"
#include "DecodeGraph.h"
#include "InputFileStream.h"
@@ -48,6 +51,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#endif
using namespace std;
+using namespace boost::algorithm;
namespace Moses
{
@@ -58,16 +62,13 @@ StaticData StaticData::s_instance;
StaticData::StaticData()
:m_sourceStartPosMattersForRecombination(false)
,m_inputType(SentenceInput)
- ,m_detailedTranslationReportingFilePath()
- ,m_detailedTreeFragmentsTranslationReportingFilePath()
,m_onlyDistinctNBest(false)
,m_needAlignmentInfo(false)
- ,m_factorDelimiter("|") // default delimiter between factors
,m_lmEnableOOVFeature(false)
,m_isAlwaysCreateDirectTranslationOption(false)
,m_currentWeightSetting("default")
+ ,m_requireSortingAfterSourceContext(false)
,m_treeStructure(NULL)
- ,m_useS2TDecoder(false)
{
m_xmlBrackets.first="<";
m_xmlBrackets.second=">";
@@ -103,22 +104,20 @@ bool StaticData::LoadData(Parameter *parameter)
ResetUserTime();
m_parameter = parameter;
+ const PARAM_VEC *params;
+
// verbose level
- m_verboseLevel = 1;
- if (m_parameter->GetParam("verbose").size() == 1) {
- m_verboseLevel = Scan<size_t>( m_parameter->GetParam("verbose")[0]);
- }
+ m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1);
// to cube or not to cube
- m_searchAlgorithm = (m_parameter->GetParam("search-algorithm").size() > 0) ?
- (SearchAlgorithm) Scan<size_t>(m_parameter->GetParam("search-algorithm")[0]) : Normal;
+ m_parameter->SetParameter(m_searchAlgorithm, "search-algorithm", Normal);
if (IsChart())
LoadChartDecodingParameters();
// input type has to be specified BEFORE loading the phrase tables!
- if(m_parameter->GetParam("inputtype").size())
- m_inputType= (InputTypeEnum) Scan<int>(m_parameter->GetParam("inputtype")[0]);
+ m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
+
std::string s_it = "text input";
if (m_inputType == 1) {
s_it = "confusion net";
@@ -131,92 +130,97 @@ bool StaticData::LoadData(Parameter *parameter)
}
VERBOSE(2,"input type is: "<<s_it<<"\n");
- if(m_parameter->GetParam("recover-input-path").size()) {
- m_recoverPath = Scan<bool>(m_parameter->GetParam("recover-input-path")[0]);
- if (m_recoverPath && m_inputType == SentenceInput) {
- TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
- m_recoverPath = false;
- }
+ m_parameter->SetParameter(m_recoverPath, "recover-input-path", false);
+ if (m_recoverPath && m_inputType == SentenceInput) {
+ TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
+ m_recoverPath = false;
}
// factor delimiter
- if (m_parameter->GetParam("factor-delimiter").size() > 0) {
- m_factorDelimiter = m_parameter->GetParam("factor-delimiter")[0];
- if (m_factorDelimiter == "none")
- m_factorDelimiter = "";
+ m_parameter->SetParameter<string>(m_factorDelimiter, "factor-delimiter", "|");
+ if (m_factorDelimiter == "none") {
+ m_factorDelimiter = "";
}
- SetBooleanParameter( &m_continuePartialTranslation, "continue-partial-translation", false );
- SetBooleanParameter( &m_outputHypoScore, "output-hypo-score", false );
+ m_parameter->SetParameter( m_continuePartialTranslation, "continue-partial-translation", false );
+ m_parameter->SetParameter( m_outputHypoScore, "output-hypo-score", false );
//word-to-word alignment
// alignments
- SetBooleanParameter( &m_PrintAlignmentInfo, "print-alignment-info", false );
+ m_parameter->SetParameter( m_PrintAlignmentInfo, "print-alignment-info", false );
if (m_PrintAlignmentInfo) {
m_needAlignmentInfo = true;
}
- if(m_parameter->GetParam("sort-word-alignment").size()) {
- m_wordAlignmentSort = (WordAlignmentSort) Scan<size_t>(m_parameter->GetParam("sort-word-alignment")[0]);
- }
+ m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort);
- SetBooleanParameter( &m_PrintAlignmentInfoNbest, "print-alignment-info-in-n-best", false );
+ m_parameter->SetParameter( m_PrintAlignmentInfoNbest, "print-alignment-info-in-n-best", false );
if (m_PrintAlignmentInfoNbest) {
m_needAlignmentInfo = true;
}
- if (m_parameter->GetParam("alignment-output-file").size() > 0) {
- m_alignmentOutputFile = Scan<std::string>(m_parameter->GetParam("alignment-output-file")[0]);
+ params = m_parameter->GetParam("alignment-output-file");
+ if (params && params->size()) {
+ m_alignmentOutputFile = Scan<std::string>(params->at(0));
m_needAlignmentInfo = true;
}
+ m_parameter->SetParameter( m_PrintID, "print-id", false );
+ m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false );
+ m_parameter->SetParameter( m_PrintPassthroughInformationInNBest, "print-passthrough-in-n-best", false );
+
// n-best
- if (m_parameter->GetParam("n-best-list").size() >= 2) {
- m_nBestFilePath = m_parameter->GetParam("n-best-list")[0];
- m_nBestSize = Scan<size_t>( m_parameter->GetParam("n-best-list")[1] );
- m_onlyDistinctNBest=(m_parameter->GetParam("n-best-list").size()>2
- && m_parameter->GetParam("n-best-list")[2]=="distinct");
- } else if (m_parameter->GetParam("n-best-list").size() == 1) {
- UserMessage::Add(string("wrong format for switch -n-best-list file size"));
- return false;
+ params = m_parameter->GetParam("n-best-list");
+ if (params) {
+ if (params->size() >= 2) {
+ m_nBestFilePath = params->at(0);
+ m_nBestSize = Scan<size_t>( params->at(1) );
+ m_onlyDistinctNBest=(params->size()>2 && params->at(2)=="distinct");
+ } else {
+ std::cerr << "wrong format for switch -n-best-list file size [disinct]";
+ return false;
+ }
} else {
m_nBestSize = 0;
}
- if (m_parameter->GetParam("n-best-factor").size() > 0) {
- m_nBestFactor = Scan<size_t>( m_parameter->GetParam("n-best-factor")[0]);
- } else {
- m_nBestFactor = 20;
- }
+
+ m_parameter->SetParameter<size_t>(m_nBestFactor, "n-best-factor", 20);
//lattice samples
- if (m_parameter->GetParam("lattice-samples").size() ==2 ) {
- m_latticeSamplesFilePath = m_parameter->GetParam("lattice-samples")[0];
- m_latticeSamplesSize = Scan<size_t>(m_parameter->GetParam("lattice-samples")[1]);
- } else if (m_parameter->GetParam("lattice-samples").size() != 0 ) {
- UserMessage::Add(string("wrong format for switch -lattice-samples file size"));
- return false;
+ params = m_parameter->GetParam("lattice-samples");
+ if (params) {
+ if (params->size() ==2 ) {
+ m_latticeSamplesFilePath = params->at(0);
+ m_latticeSamplesSize = Scan<size_t>(params->at(1));
+ } else {
+ std::cerr <<"wrong format for switch -lattice-samples file size";
+ return false;
+ }
} else {
m_latticeSamplesSize = 0;
}
// word graph
- if (m_parameter->GetParam("output-word-graph").size() == 2)
+ params = m_parameter->GetParam("output-word-graph");
+ if (params && params->size() == 2)
m_outputWordGraph = true;
else
m_outputWordGraph = false;
// search graph
- if (m_parameter->GetParam("output-search-graph").size() > 0) {
- if (m_parameter->GetParam("output-search-graph").size() != 1) {
- UserMessage::Add(string("ERROR: wrong format for switch -output-search-graph file"));
+ params = m_parameter->GetParam("output-search-graph");
+ if (params && params->size()) {
+ if (params->size() != 1) {
+ std::cerr << "ERROR: wrong format for switch -output-search-graph file";
return false;
}
m_outputSearchGraph = true;
}
// ... in extended format
- else if (m_parameter->GetParam("output-search-graph-extended").size() > 0) {
- if (m_parameter->GetParam("output-search-graph-extended").size() != 1) {
- UserMessage::Add(string("ERROR: wrong format for switch -output-search-graph-extended file"));
+ else if (m_parameter->GetParam("output-search-graph-extended") &&
+ m_parameter->GetParam("output-search-graph-extended")->size()) {
+ if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) {
+ std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file";
return false;
}
m_outputSearchGraph = true;
@@ -224,69 +228,67 @@ bool StaticData::LoadData(Parameter *parameter)
} else {
m_outputSearchGraph = false;
}
- if (m_parameter->GetParam("output-search-graph-slf").size() > 0) {
+
+ params = m_parameter->GetParam("output-search-graph-slf");
+ if (params && params->size()) {
m_outputSearchGraphSLF = true;
} else {
m_outputSearchGraphSLF = false;
}
- if (m_parameter->GetParam("output-search-graph-hypergraph").size() > 0) {
+
+ params = m_parameter->GetParam("output-search-graph-hypergraph");
+ if (params && params->size()) {
m_outputSearchGraphHypergraph = true;
} else {
m_outputSearchGraphHypergraph = false;
}
+
#ifdef HAVE_PROTOBUF
- if (m_parameter->GetParam("output-search-graph-pb").size() > 0) {
- if (m_parameter->GetParam("output-search-graph-pb").size() != 1) {
- UserMessage::Add(string("ERROR: wrong format for switch -output-search-graph-pb path"));
+ params = m_parameter->GetParam("output-search-graph-pb");
+ if (params && params->size()) {
+ if (params->size() != 1) {
+ cerr << "ERROR: wrong format for switch -output-search-graph-pb path";
return false;
}
m_outputSearchGraphPB = true;
} else
m_outputSearchGraphPB = false;
#endif
- SetBooleanParameter( &m_unprunedSearchGraph, "unpruned-search-graph", false );
- SetBooleanParameter( &m_includeLHSInSearchGraph, "include-lhs-in-search-graph", false );
- if (m_parameter->isParamSpecified("output-unknowns")) {
+ m_parameter->SetParameter( m_unprunedSearchGraph, "unpruned-search-graph", false );
+ m_parameter->SetParameter( m_includeLHSInSearchGraph, "include-lhs-in-search-graph", false );
- if (m_parameter->GetParam("output-unknowns").size() == 1) {
- m_outputUnknownsFile =Scan<string>(m_parameter->GetParam("output-unknowns")[0]);
- } else {
- UserMessage::Add(string("need to specify exactly one file name for unknowns"));
- return false;
- }
- }
+ m_parameter->SetParameter<string>(m_outputUnknownsFile, "output-unknowns", "");
// include feature names in the n-best list
- SetBooleanParameter( &m_labeledNBestList, "labeled-n-best-list", true );
+ m_parameter->SetParameter( m_labeledNBestList, "labeled-n-best-list", true );
// include word alignment in the n-best list
- SetBooleanParameter( &m_nBestIncludesSegmentation, "include-segmentation-in-n-best", false );
+ m_parameter->SetParameter( m_nBestIncludesSegmentation, "include-segmentation-in-n-best", false );
// printing source phrase spans
- SetBooleanParameter( &m_reportSegmentation, "report-segmentation", false );
- SetBooleanParameter( &m_reportSegmentationEnriched, "report-segmentation-enriched", false );
+ m_parameter->SetParameter( m_reportSegmentation, "report-segmentation", false );
+ m_parameter->SetParameter( m_reportSegmentationEnriched, "report-segmentation-enriched", false );
// print all factors of output translations
- SetBooleanParameter( &m_reportAllFactors, "report-all-factors", false );
+ m_parameter->SetParameter( m_reportAllFactors, "report-all-factors", false );
// print all factors of output translations
- SetBooleanParameter( &m_reportAllFactorsNBest, "report-all-factors-in-n-best", false );
+ m_parameter->SetParameter( m_reportAllFactorsNBest, "report-all-factors-in-n-best", false );
//input factors
- const vector<string> &inputFactorVector = m_parameter->GetParam("input-factors");
- for(size_t i=0; i<inputFactorVector.size(); i++) {
- m_inputFactorOrder.push_back(Scan<FactorType>(inputFactorVector[i]));
+ params = m_parameter->GetParam("input-factors");
+ if (params) {
+ m_inputFactorOrder = Scan<FactorType>(*params);
}
if(m_inputFactorOrder.empty()) {
- UserMessage::Add(string("no input factor specified in config file"));
- return false;
+ m_inputFactorOrder.push_back(0);
}
//output factors
- const vector<string> &outputFactorVector = m_parameter->GetParam("output-factors");
- for(size_t i=0; i<outputFactorVector.size(); i++) {
- m_outputFactorOrder.push_back(Scan<FactorType>(outputFactorVector[i]));
+ params = m_parameter->GetParam("output-factors");
+ if (params) {
+ m_outputFactorOrder = Scan<FactorType>(*params);
}
if(m_outputFactorOrder.empty()) {
// default. output factor 0
@@ -294,234 +296,182 @@ bool StaticData::LoadData(Parameter *parameter)
}
//source word deletion
- SetBooleanParameter( &m_wordDeletionEnabled, "phrase-drop-allowed", false );
+ m_parameter->SetParameter(m_wordDeletionEnabled, "phrase-drop-allowed", false );
//Disable discarding
- SetBooleanParameter(&m_disableDiscarding, "disable-discarding", false);
+ m_parameter->SetParameter(m_disableDiscarding, "disable-discarding", false);
+
+ //Print Translation Options
+ m_parameter->SetParameter(m_printTranslationOptions, "print-translation-option", false );
//Print All Derivations
- SetBooleanParameter( &m_printAllDerivations , "print-all-derivations", false );
+ m_parameter->SetParameter(m_printAllDerivations , "print-all-derivations", false );
// additional output
- if (m_parameter->isParamSpecified("translation-details")) {
- const vector<string> &args = m_parameter->GetParam("translation-details");
- if (args.size() == 1) {
- m_detailedTranslationReportingFilePath = args[0];
- } else {
- UserMessage::Add(string("the translation-details option requires exactly one filename argument"));
- return false;
- }
- }
- if (m_parameter->isParamSpecified("tree-translation-details")) {
- const vector<string> &args = m_parameter->GetParam("tree-translation-details");
- if (args.size() == 1) {
- m_detailedTreeFragmentsTranslationReportingFilePath = args[0];
- } else {
- UserMessage::Add(string("the tree-translation-details option requires exactly one filename argument"));
- return false;
- }
- }
+ m_parameter->SetParameter<string>(m_detailedTranslationReportingFilePath, "translation-details", "");
+ m_parameter->SetParameter<string>(m_detailedTreeFragmentsTranslationReportingFilePath, "tree-translation-details", "");
//DIMw
- if (m_parameter->isParamSpecified("translation-all-details")) {
- const vector<string> &args = m_parameter->GetParam("translation-all-details");
- if (args.size() == 1) {
- m_detailedAllTranslationReportingFilePath = args[0];
- } else {
- UserMessage::Add(string("the translation-all-details option requires exactly one filename argument"));
- return false;
- }
- }
+ m_parameter->SetParameter<string>(m_detailedAllTranslationReportingFilePath, "translation-all-details", "");
// reordering constraints
- m_maxDistortion = (m_parameter->GetParam("distortion-limit").size() > 0) ?
- Scan<int>(m_parameter->GetParam("distortion-limit")[0])
- : -1;
- SetBooleanParameter( &m_reorderingConstraint, "monotone-at-punctuation", false );
+ m_parameter->SetParameter(m_maxDistortion, "distortion-limit", -1);
+
+ m_parameter->SetParameter(m_reorderingConstraint, "monotone-at-punctuation", false );
// settings for pruning
- m_maxHypoStackSize = (m_parameter->GetParam("stack").size() > 0)
- ? Scan<size_t>(m_parameter->GetParam("stack")[0]) : DEFAULT_MAX_HYPOSTACK_SIZE;
+ m_parameter->SetParameter(m_maxHypoStackSize, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
m_minHypoStackDiversity = 0;
- if (m_parameter->GetParam("stack-diversity").size() > 0) {
+ params = m_parameter->GetParam("stack-diversity");
+ if (params && params->size()) {
if (m_maxDistortion > 15) {
- UserMessage::Add("stack diversity > 0 is not allowed for distortion limits larger than 15");
+ std::cerr << "stack diversity > 0 is not allowed for distortion limits larger than 15";
return false;
}
if (m_inputType == WordLatticeInput) {
- UserMessage::Add("stack diversity > 0 is not allowed for lattice input");
+ std::cerr << "stack diversity > 0 is not allowed for lattice input";
return false;
}
- m_minHypoStackDiversity = Scan<size_t>(m_parameter->GetParam("stack-diversity")[0]);
+ m_minHypoStackDiversity = Scan<size_t>(params->at(0));
}
- m_beamWidth = (m_parameter->GetParam("beam-threshold").size() > 0) ?
- TransformScore(Scan<float>(m_parameter->GetParam("beam-threshold")[0]))
- : TransformScore(DEFAULT_BEAM_WIDTH);
- m_earlyDiscardingThreshold = (m_parameter->GetParam("early-discarding-threshold").size() > 0) ?
- TransformScore(Scan<float>(m_parameter->GetParam("early-discarding-threshold")[0]))
- : TransformScore(DEFAULT_EARLY_DISCARDING_THRESHOLD);
- m_translationOptionThreshold = (m_parameter->GetParam("translation-option-threshold").size() > 0) ?
- TransformScore(Scan<float>(m_parameter->GetParam("translation-option-threshold")[0]))
- : TransformScore(DEFAULT_TRANSLATION_OPTION_THRESHOLD);
-
- m_maxNoTransOptPerCoverage = (m_parameter->GetParam("max-trans-opt-per-coverage").size() > 0)
- ? Scan<size_t>(m_parameter->GetParam("max-trans-opt-per-coverage")[0]) : DEFAULT_MAX_TRANS_OPT_SIZE;
+ m_parameter->SetParameter(m_beamWidth, "beam-threshold", DEFAULT_BEAM_WIDTH);
+ m_beamWidth = TransformScore(m_beamWidth);
- m_maxNoPartTransOpt = (m_parameter->GetParam("max-partial-trans-opt").size() > 0)
- ? Scan<size_t>(m_parameter->GetParam("max-partial-trans-opt")[0]) : DEFAULT_MAX_PART_TRANS_OPT_SIZE;
+ m_parameter->SetParameter(m_earlyDiscardingThreshold, "early-discarding-threshold", DEFAULT_EARLY_DISCARDING_THRESHOLD);
+ m_earlyDiscardingThreshold = TransformScore(m_earlyDiscardingThreshold);
- m_maxPhraseLength = (m_parameter->GetParam("max-phrase-length").size() > 0)
- ? Scan<size_t>(m_parameter->GetParam("max-phrase-length")[0]) : DEFAULT_MAX_PHRASE_LENGTH;
+ m_parameter->SetParameter(m_translationOptionThreshold, "translation-option-threshold", DEFAULT_TRANSLATION_OPTION_THRESHOLD);
+ m_translationOptionThreshold = TransformScore(m_translationOptionThreshold);
- m_cubePruningPopLimit = (m_parameter->GetParam("cube-pruning-pop-limit").size() > 0)
- ? Scan<size_t>(m_parameter->GetParam("cube-pruning-pop-limit")[0]) : DEFAULT_CUBE_PRUNING_POP_LIMIT;
+ m_parameter->SetParameter(m_maxNoTransOptPerCoverage, "max-trans-opt-per-coverage", DEFAULT_MAX_TRANS_OPT_SIZE);
+ m_parameter->SetParameter(m_maxNoPartTransOpt, "max-partial-trans-opt", DEFAULT_MAX_PART_TRANS_OPT_SIZE);
+ m_parameter->SetParameter(m_maxPhraseLength, "max-phrase-length", DEFAULT_MAX_PHRASE_LENGTH);
+ m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit", DEFAULT_CUBE_PRUNING_POP_LIMIT);
+ m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity", DEFAULT_CUBE_PRUNING_DIVERSITY);
- m_cubePruningDiversity = (m_parameter->GetParam("cube-pruning-diversity").size() > 0)
- ? Scan<size_t>(m_parameter->GetParam("cube-pruning-diversity")[0]) : DEFAULT_CUBE_PRUNING_DIVERSITY;
-
- SetBooleanParameter(&m_cubePruningLazyScoring, "cube-pruning-lazy-scoring", false);
+ m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring", false);
// early distortion cost
- SetBooleanParameter( &m_useEarlyDistortionCost, "early-distortion-cost", false );
+ m_parameter->SetParameter(m_useEarlyDistortionCost, "early-distortion-cost", false );
// unknown word processing
- SetBooleanParameter( &m_dropUnknown, "drop-unknown", false );
- SetBooleanParameter( &m_markUnknown, "mark-unknown", false );
+ m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false );
+ m_parameter->SetParameter(m_markUnknown, "mark-unknown", false );
- SetBooleanParameter( &m_lmEnableOOVFeature, "lmodel-oov-feature", false);
+ m_parameter->SetParameter(m_lmEnableOOVFeature, "lmodel-oov-feature", false);
// minimum Bayes risk decoding
- SetBooleanParameter( &m_mbr, "minimum-bayes-risk", false );
- m_mbrSize = (m_parameter->GetParam("mbr-size").size() > 0) ?
- Scan<size_t>(m_parameter->GetParam("mbr-size")[0]) : 200;
- m_mbrScale = (m_parameter->GetParam("mbr-scale").size() > 0) ?
- Scan<float>(m_parameter->GetParam("mbr-scale")[0]) : 1.0f;
+ m_parameter->SetParameter(m_mbr, "minimum-bayes-risk", false );
+ m_parameter->SetParameter<size_t>(m_mbrSize, "mbr-size", 200);
+ m_parameter->SetParameter(m_mbrScale, "mbr-scale", 1.0f);
//lattice mbr
- SetBooleanParameter( &m_useLatticeMBR, "lminimum-bayes-risk", false );
+ m_parameter->SetParameter(m_useLatticeMBR, "lminimum-bayes-risk", false );
if (m_useLatticeMBR && m_mbr) {
- cerr << "Errror: Cannot use both n-best mbr and lattice mbr together" << endl;
+ cerr << "Error: Cannot use both n-best mbr and lattice mbr together" << endl;
exit(1);
}
//mira training
- SetBooleanParameter( &m_mira, "mira", false );
+ m_parameter->SetParameter(m_mira, "mira", false );
// lattice MBR
if (m_useLatticeMBR) m_mbr = true;
- m_lmbrPruning = (m_parameter->GetParam("lmbr-pruning-factor").size() > 0) ?
- Scan<size_t>(m_parameter->GetParam("lmbr-pruning-factor")[0]) : 30;
- m_lmbrThetas = Scan<float>(m_parameter->GetParam("lmbr-thetas"));
- SetBooleanParameter( &m_useLatticeHypSetForLatticeMBR, "lattice-hypo-set", false );
- m_lmbrPrecision = (m_parameter->GetParam("lmbr-p").size() > 0) ?
- Scan<float>(m_parameter->GetParam("lmbr-p")[0]) : 0.8f;
- m_lmbrPRatio = (m_parameter->GetParam("lmbr-r").size() > 0) ?
- Scan<float>(m_parameter->GetParam("lmbr-r")[0]) : 0.6f;
- m_lmbrMapWeight = (m_parameter->GetParam("lmbr-map-weight").size() >0) ?
- Scan<float>(m_parameter->GetParam("lmbr-map-weight")[0]) : 0.0f;
+ m_parameter->SetParameter<size_t>(m_lmbrPruning, "lmbr-pruning-factor", 30);
+ m_parameter->SetParameter(m_lmbrPrecision, "lmbr-p", 0.8f);
+ m_parameter->SetParameter(m_lmbrPRatio, "lmbr-r", 0.6f);
+ m_parameter->SetParameter(m_lmbrMapWeight, "lmbr-map-weight", 0.0f);
+ m_parameter->SetParameter(m_useLatticeHypSetForLatticeMBR, "lattice-hypo-set", false );
+
+ params = m_parameter->GetParam("lmbr-thetas");
+ if (params) {
+ m_lmbrThetas = Scan<float>(*params);
+ }
//consensus decoding
- SetBooleanParameter( &m_useConsensusDecoding, "consensus-decoding", false );
+ m_parameter->SetParameter(m_useConsensusDecoding, "consensus-decoding", false );
if (m_useConsensusDecoding && m_mbr) {
cerr<< "Error: Cannot use consensus decoding together with mbr" << endl;
exit(1);
}
if (m_useConsensusDecoding) m_mbr=true;
- SetBooleanParameter( &m_defaultNonTermOnlyForEmptyRange, "default-non-term-for-empty-range-only", false );
- SetBooleanParameter( &m_printNBestTrees, "n-best-trees", false );
+ m_parameter->SetParameter(m_defaultNonTermOnlyForEmptyRange, "default-non-term-for-empty-range-only", false );
+ m_parameter->SetParameter(m_printNBestTrees, "n-best-trees", false );
// S2T decoder
- SetBooleanParameter( &m_useS2TDecoder, "s2t", false );
- m_s2tParsingAlgorithm = (m_parameter->GetParam("s2t-parsing-algorithm").size() > 0) ?
- (S2TParsingAlgorithm) Scan<size_t>(m_parameter->GetParam("s2t-parsing-algorithm")[0]) : RecursiveCYKPlus;
+ m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm", RecursiveCYKPlus);
// Compact phrase table and reordering model
- SetBooleanParameter( &m_minphrMemory, "minphr-memory", false );
- SetBooleanParameter( &m_minlexrMemory, "minlexr-memory", false );
+ m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false );
+ m_parameter->SetParameter(m_minlexrMemory, "minlexr-memory", false );
- m_timeout_threshold = (m_parameter->GetParam("time-out").size() > 0) ?
- Scan<size_t>(m_parameter->GetParam("time-out")[0]) : -1;
+ m_parameter->SetParameter<size_t>(m_timeout_threshold, "time-out", -1);
m_timeout = (GetTimeoutThreshold() == (size_t)-1) ? false : true;
-
- m_lmcache_cleanup_threshold = (m_parameter->GetParam("clean-lm-cache").size() > 0) ?
- Scan<size_t>(m_parameter->GetParam("clean-lm-cache")[0]) : 1;
+ m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1);
m_threadCount = 1;
- const std::vector<std::string> &threadInfo = m_parameter->GetParam("threads");
- if (!threadInfo.empty()) {
- if (threadInfo[0] == "all") {
+ params = m_parameter->GetParam("threads");
+ if (params && params->size()) {
+ if (params->at(0) == "all") {
#ifdef WITH_THREADS
m_threadCount = boost::thread::hardware_concurrency();
if (!m_threadCount) {
- UserMessage::Add("-threads all specified but Boost doesn't know how many cores there are");
+ std::cerr << "-threads all specified but Boost doesn't know how many cores there are";
return false;
}
#else
- UserMessage::Add("-threads all specified but moses not built with thread support");
+ std::cerr << "-threads all specified but moses not built with thread support";
return false;
#endif
} else {
- m_threadCount = Scan<int>(threadInfo[0]);
+ m_threadCount = Scan<int>(params->at(0));
if (m_threadCount < 1) {
- UserMessage::Add("Specify at least one thread.");
+ std::cerr << "Specify at least one thread.";
return false;
}
#ifndef WITH_THREADS
if (m_threadCount > 1) {
- UserMessage::Add(std::string("Error: Thread count of ") + threadInfo[0] + " but moses not built with thread support");
+ std::cerr << "Error: Thread count of " << params->at(0) << " but moses not built with thread support";
return false;
}
#endif
}
}
- m_startTranslationId = (m_parameter->GetParam("start-translation-id").size() > 0) ?
- Scan<long>(m_parameter->GetParam("start-translation-id")[0]) : 0;
+ m_parameter->SetParameter<long>(m_startTranslationId, "start-translation-id", 0);
// use of xml in input
- if (m_parameter->GetParam("xml-input").size() == 0) m_xmlInputType = XmlPassThrough;
- else if (m_parameter->GetParam("xml-input")[0]=="exclusive") m_xmlInputType = XmlExclusive;
- else if (m_parameter->GetParam("xml-input")[0]=="inclusive") m_xmlInputType = XmlInclusive;
- else if (m_parameter->GetParam("xml-input")[0]=="constraint") m_xmlInputType = XmlConstraint;
- else if (m_parameter->GetParam("xml-input")[0]=="ignore") m_xmlInputType = XmlIgnore;
- else if (m_parameter->GetParam("xml-input")[0]=="pass-through") m_xmlInputType = XmlPassThrough;
- else {
- UserMessage::Add("invalid xml-input value, must be pass-through, exclusive, inclusive, constraint, or ignore");
- return false;
- }
+ m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
// specify XML tags opening and closing brackets for XML option
- if (m_parameter->GetParam("xml-brackets").size() > 0) {
- std::vector<std::string> brackets = Tokenize(m_parameter->GetParam("xml-brackets")[0]);
+ params = m_parameter->GetParam("xml-brackets");
+ if (params && params->size()) {
+ std::vector<std::string> brackets = Tokenize(params->at(0));
if(brackets.size()!=2) {
cerr << "invalid xml-brackets value, must specify exactly 2 blank-delimited strings for XML tags opening and closing brackets" << endl;
exit(1);
}
m_xmlBrackets.first= brackets[0];
m_xmlBrackets.second=brackets[1];
- VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
- << m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl);
+ VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
+ << m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl);
}
- if (m_parameter->GetParam("placeholder-factor").size() > 0) {
- m_placeHolderFactor = Scan<FactorType>(m_parameter->GetParam("placeholder-factor")[0]);
- } else {
- m_placeHolderFactor = NOT_FOUND;
- }
+ m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor", NOT_FOUND);
std::map<std::string, std::string> featureNameOverride = OverrideFeatureNames();
// all features
map<string, int> featureIndexMap;
- const vector<string> &features = m_parameter->GetParam("feature");
- for (size_t i = 0; i < features.size(); ++i) {
- const string &line = Trim(features[i]);
+ params = m_parameter->GetParam("feature");
+ for (size_t i = 0; params && i < params->size(); ++i) {
+ const string &line = Trim(params->at(i));
VERBOSE(1,"line=" << line << endl);
if (line.empty())
continue;
@@ -531,26 +481,25 @@ bool StaticData::LoadData(Parameter *parameter)
string &feature = toks[0];
std::map<std::string, std::string>::const_iterator iter = featureNameOverride.find(feature);
if (iter == featureNameOverride.end()) {
- // feature name not override
- m_registry.Construct(feature, line);
- }
- else {
- // replace feature name with new name
- string newName = iter->second;
- feature = newName;
- string newLine = Join(" ", toks);
- m_registry.Construct(newName, newLine);
+ // feature name not override
+ m_registry.Construct(feature, line);
+ } else {
+ // replace feature name with new name
+ string newName = iter->second;
+ feature = newName;
+ string newLine = Join(" ", toks);
+ m_registry.Construct(newName, newLine);
}
}
NoCache();
OverrideFeatures();
- if (!m_parameter->isParamSpecified("show-weights")) {
+ if (m_parameter->GetParam("show-weights") == NULL) {
LoadFeatureFunctions();
}
- if (!LoadDecodeGraphs()) return false;
+ LoadDecodeGraphs();
if (!CheckWeights()) {
@@ -560,15 +509,12 @@ bool StaticData::LoadData(Parameter *parameter)
//Add any other features here.
//Load extra feature weights
- vector<string> extraWeightConfig = m_parameter->GetParam("weight-file");
- if (extraWeightConfig.size()) {
- if (extraWeightConfig.size() != 1) {
- UserMessage::Add("One argument should be supplied for weight-file");
- return false;
- }
+ string weightFile;
+ m_parameter->SetParameter<string>(weightFile, "weight-file", "");
+ if (!weightFile.empty()) {
ScoreComponentCollection extraWeights;
- if (!extraWeights.Load(extraWeightConfig[0])) {
- UserMessage::Add("Unable to load weights from " + extraWeightConfig[0]);
+ if (!extraWeights.Load(weightFile)) {
+ std::cerr << "Unable to load weights from " << weightFile;
return false;
}
m_allWeights.PlusEquals(extraWeights);
@@ -578,7 +524,8 @@ bool StaticData::LoadData(Parameter *parameter)
LoadSparseWeightsFromConfig();
// alternate weight settings
- if (m_parameter->GetParam("alternate-weight-setting").size() > 0) {
+ params = m_parameter->GetParam("alternate-weight-setting");
+ if (params && params->size()) {
if (!LoadAlternateWeightSettings()) {
return false;
}
@@ -586,25 +533,6 @@ bool StaticData::LoadData(Parameter *parameter)
return true;
}
-void StaticData::SetBooleanParameter( bool *parameter, string parameterName, bool defaultValue )
-{
- // default value if nothing is specified
- *parameter = defaultValue;
- if (! m_parameter->isParamSpecified( parameterName ) ) {
- return;
- }
-
- // if parameter is just specified as, e.g. "-parameter" set it true
- if (m_parameter->GetParam( parameterName ).size() == 0) {
- *parameter = true;
- }
-
- // if paramter is specified "-parameter true" or "-parameter false"
- else if (m_parameter->GetParam( parameterName ).size() == 1) {
- *parameter = Scan<bool>( m_parameter->GetParam( parameterName )[0]);
- }
-}
-
void StaticData::SetWeight(const FeatureFunction* sp, float weight)
{
m_allWeights.Resize();
@@ -620,13 +548,7 @@ void StaticData::SetWeights(const FeatureFunction* sp, const std::vector<float>&
void StaticData::LoadNonTerminals()
{
string defaultNonTerminals;
-
- if (m_parameter->GetParam("non-terminals").size() == 0) {
- defaultNonTerminals = "X";
- } else {
- vector<std::string> tokens = Tokenize(m_parameter->GetParam("non-terminals")[0]);
- defaultNonTerminals = tokens[0];
- }
+ m_parameter->SetParameter<string>(defaultNonTerminals, "non-terminals", "X");
FactorCollection &factorCollection = FactorCollection::Instance();
@@ -638,22 +560,23 @@ void StaticData::LoadNonTerminals()
const Factor *targetFactor = factorCollection.AddFactor(Output, 0, defaultNonTerminals, true);
m_outputDefaultNonTerminal.SetFactor(0, targetFactor);
- // for unknwon words
- if (m_parameter->GetParam("unknown-lhs").size() == 0) {
+ // for unknown words
+ const PARAM_VEC *params = m_parameter->GetParam("unknown-lhs");
+ if (params == NULL || params->size() == 0) {
UnknownLHSEntry entry(defaultNonTerminals, 0.0f);
m_unknownLHS.push_back(entry);
} else {
- const string &filePath = m_parameter->GetParam("unknown-lhs")[0];
+ const string &filePath = params->at(0);
InputFileStream inStream(filePath);
string line;
while(getline(inStream, line)) {
vector<string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size() != 2,
- "Incorrect unknown LHS format: " << line);
+ "Incorrect unknown LHS format: " << line);
UnknownLHSEntry entry(tokens[0], Scan<float>(tokens[1]));
m_unknownLHS.push_back(entry);
- // const Factor *targetFactor =
+ // const Factor *targetFactor =
factorCollection.AddFactor(Output, 0, tokens[0], true);
}
@@ -666,20 +589,47 @@ void StaticData::LoadChartDecodingParameters()
LoadNonTerminals();
// source label overlap
- if (m_parameter->GetParam("source-label-overlap").size() > 0) {
- m_sourceLabelOverlap = (SourceLabelOverlap) Scan<int>(m_parameter->GetParam("source-label-overlap")[0]);
- } else {
- m_sourceLabelOverlap = SourceLabelOverlapAdd;
+ m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap", SourceLabelOverlapAdd);
+ m_parameter->SetParameter(m_ruleLimit, "rule-limit", DEFAULT_MAX_TRANS_OPT_SIZE);
+
+}
+
+void StaticData::LoadDecodeGraphs()
+{
+ vector<string> mappingVector;
+ vector<size_t> maxChartSpans;
+
+ const PARAM_VEC *params;
+
+ params = m_parameter->GetParam("mapping");
+ if (params && params->size()) {
+ mappingVector = *params;
+ }
+
+ params = m_parameter->GetParam("max-chart-span");
+ if (params && params->size()) {
+ maxChartSpans = Scan<size_t>(*params);
}
- m_ruleLimit = (m_parameter->GetParam("rule-limit").size() > 0)
- ? Scan<size_t>(m_parameter->GetParam("rule-limit")[0]) : DEFAULT_MAX_TRANS_OPT_SIZE;
+ vector<string> toks = Tokenize(mappingVector[0]);
+ if (toks.size() == 3) {
+ // eg 0 T 0
+ LoadDecodeGraphsOld(mappingVector, maxChartSpans);
+ } else if (toks.size() == 2) {
+ if (toks[0] == "T" || toks[0] == "G") {
+ // eg. T 0
+ LoadDecodeGraphsOld(mappingVector, maxChartSpans);
+ } else {
+ // eg. 0 TM1
+ LoadDecodeGraphsNew(mappingVector, maxChartSpans);
+ }
+ } else {
+ UTIL_THROW(util::Exception, "Malformed mapping");
+ }
}
-bool StaticData::LoadDecodeGraphs()
+void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const vector<size_t> &maxChartSpans)
{
- const vector<string> &mappingVector = m_parameter->GetParam("mapping");
- const vector<size_t> &maxChartSpans = Scan<size_t>(m_parameter->GetParam("max-chart-span"));
const vector<PhraseDictionary*>& pts = PhraseDictionary::GetColl();
const vector<GenerationDictionary*>& gens = GenerationDictionary::GetColl();
@@ -693,15 +643,17 @@ bool StaticData::LoadDecodeGraphs()
DecodeType decodeType;
size_t index;
if (token.size() == 2) {
+ // eg. T 0
decodeGraphInd = 0;
decodeType = token[0] == "T" ? Translate : Generate;
index = Scan<size_t>(token[1]);
} else if (token.size() == 3) {
+ // eg. 0 T 0
// For specifying multiple translation model
decodeGraphInd = Scan<size_t>(token[0]);
//the vectorList index can only increment by one
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1,
- "Malformed mapping");
+ "Malformed mapping");
if (decodeGraphInd > prevDecodeGraphInd) {
prev = NULL;
}
@@ -736,8 +688,8 @@ bool StaticData::LoadDecodeGraphs()
}
decodeStep = new DecodeStepGeneration(gens[index], prev, *featuresRemaining);
break;
- case InsertNullFertilityWord:
- UTIL_THROW(util::Exception, "Please implement NullFertilityInsertion.");
+ default:
+ UTIL_THROW(util::Exception, "Unknown decode step");
break;
}
@@ -765,98 +717,97 @@ bool StaticData::LoadDecodeGraphs()
// set maximum n-gram size for backoff approach to decoding paths
// default is always use subsequent paths (value = 0)
// if specified, record maxmimum unseen n-gram size
- const vector<string> &backoffVector = m_parameter->GetParam("decoding-graph-backoff");
- for(size_t i=0; i<m_decodeGraphs.size() && i<backoffVector.size(); i++) {
- DecodeGraph &decodeGraph = *m_decodeGraphs[i];
+ const vector<string> *backoffVector = m_parameter->GetParam("decoding-graph-backoff");
+ for(size_t i=0; i<m_decodeGraphs.size() && backoffVector && i<backoffVector->size(); i++) {
+ DecodeGraph &decodeGraph = *m_decodeGraphs[i];
- if (i < backoffVector.size()) {
- decodeGraph.SetBackoff(Scan<size_t>(backoffVector[i]));
- }
+ if (i < backoffVector->size()) {
+ decodeGraph.SetBackoff(Scan<size_t>(backoffVector->at(i)));
+ }
}
-
- return true;
}
-void StaticData::ReLoadParameter()
+void StaticData::LoadDecodeGraphsNew(const std::vector<std::string> &mappingVector, const std::vector<size_t> &maxChartSpans)
{
- UTIL_THROW(util::Exception, "completely redo. Too many hardcoded ff"); // TODO completely redo. Too many hardcoded ff
- /*
- m_verboseLevel = 1;
- if (m_parameter->GetParam("verbose").size() == 1) {
- m_verboseLevel = Scan<size_t>( m_parameter->GetParam("verbose")[0]);
- }
-
- // check whether "weight-u" is already set
- if (m_parameter->isParamShortNameSpecified("u")) {
- if (m_parameter->GetParamShortName("u").size() < 1 ) {
- PARAM_VEC w(1,"1.0");
- m_parameter->OverwriteParamShortName("u", w);
- }
- }
+ const std::vector<FeatureFunction*> *featuresRemaining = &FeatureFunction::GetFeatureFunctions();
+ DecodeStep *prev = 0;
+ size_t prevDecodeGraphInd = 0;
- //loop over all ScoreProducer to update weights
+ for(size_t i=0; i<mappingVector.size(); i++) {
+ vector<string> token = Tokenize(mappingVector[i]);
+ size_t decodeGraphInd;
- std::vector<const ScoreProducer*>::const_iterator iterSP;
- for (iterSP = transSystem.GetFeatureFunctions().begin() ; iterSP != transSystem.GetFeatureFunctions().end() ; ++iterSP) {
- std::string paramShortName = (*iterSP)->GetScoreProducerWeightShortName();
- vector<float> Weights = Scan<float>(m_parameter->GetParamShortName(paramShortName));
+ decodeGraphInd = Scan<size_t>(token[0]);
+ //the vectorList index can only increment by one
+ UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1,
+ "Malformed mapping");
+ if (decodeGraphInd > prevDecodeGraphInd) {
+ prev = NULL;
+ }
- if (paramShortName == "d") { //basic distortion model takes the first weight
- if ((*iterSP)->GetScoreProducerDescription() == "Distortion") {
- Weights.resize(1); //take only the first element
- } else { //lexicalized reordering model takes the other
- Weights.erase(Weights.begin()); //remove the first element
- }
- // std::cerr << "this is the Distortion Score Producer -> " << (*iterSP)->GetScoreProducerDescription() << std::cerr;
- // std::cerr << "this is the Distortion Score Producer; it has " << (*iterSP)->GetNumScoreComponents() << " weights"<< std::cerr;
- // std::cerr << Weights << std::endl;
- } else if (paramShortName == "tm") {
- continue;
+ if (prevDecodeGraphInd < decodeGraphInd) {
+ featuresRemaining = &FeatureFunction::GetFeatureFunctions();
}
- SetWeights(*iterSP, Weights);
- }
- // std::cerr << "There are " << m_phraseDictionary.size() << " m_phraseDictionaryfeatures" << std::endl;
+ FeatureFunction &ff = FeatureFunction::FindFeatureFunction(token[1]);
- const vector<float> WeightsTM = Scan<float>(m_parameter->GetParamShortName("tm"));
- // std::cerr << "WeightsTM: " << WeightsTM << std::endl;
+ DecodeStep* decodeStep = NULL;
+ if (typeid(ff) == typeid(PhraseDictionary)) {
+ decodeStep = new DecodeStepTranslation(&static_cast<PhraseDictionary&>(ff), prev, *featuresRemaining);
+ } else if (typeid(ff) == typeid(GenerationDictionary)) {
+ decodeStep = new DecodeStepGeneration(&static_cast<GenerationDictionary&>(ff), prev, *featuresRemaining);
+ } else {
+ UTIL_THROW(util::Exception, "Unknown decode step");
+ }
- const vector<float> WeightsLM = Scan<float>(m_parameter->GetParamShortName("lm"));
- // std::cerr << "WeightsLM: " << WeightsLM << std::endl;
+ featuresRemaining = &decodeStep->GetFeaturesRemaining();
- size_t index_WeightTM = 0;
- for(size_t i=0; i<transSystem.GetPhraseDictionaries().size(); ++i) {
- PhraseDictionaryFeature &phraseDictionaryFeature = *m_phraseDictionary[i];
+ UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
+ if (m_decodeGraphs.size() < decodeGraphInd + 1) {
+ DecodeGraph *decodeGraph;
+ if (IsChart()) {
+ size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
+ VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
+ decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
+ } else {
+ decodeGraph = new DecodeGraph(m_decodeGraphs.size());
+ }
- // std::cerr << "phraseDictionaryFeature.GetNumScoreComponents():" << phraseDictionaryFeature.GetNumScoreComponents() << std::endl;
- // std::cerr << "phraseDictionaryFeature.GetNumInputScores():" << phraseDictionaryFeature.GetNumInputScores() << std::endl;
+ m_decodeGraphs.push_back(decodeGraph); // TODO max chart span
+ }
- vector<float> tmp_weights;
- for(size_t j=0; j<phraseDictionaryFeature.GetNumScoreComponents(); ++j)
- tmp_weights.push_back(WeightsTM[index_WeightTM++]);
+ m_decodeGraphs[decodeGraphInd]->Add(decodeStep);
+ prev = decodeStep;
+ prevDecodeGraphInd = decodeGraphInd;
+ }
- // std::cerr << tmp_weights << std::endl;
+ // set maximum n-gram size for backoff approach to decoding paths
+ // default is always use subsequent paths (value = 0)
+ // if specified, record maxmimum unseen n-gram size
+ const vector<string> *backoffVector = m_parameter->GetParam("decoding-graph-backoff");
+ for(size_t i=0; i<m_decodeGraphs.size() && backoffVector && i<backoffVector->size(); i++) {
+ DecodeGraph &decodeGraph = *m_decodeGraphs[i];
- SetWeights(&phraseDictionaryFeature, tmp_weights);
+ if (i < backoffVector->size()) {
+ decodeGraph.SetBackoff(Scan<size_t>(backoffVector->at(i)));
+ }
}
- */
+
}
void StaticData::ReLoadBleuScoreFeatureParameter(float weight)
{
- assert(false);
- /*
//loop over ScoreProducers to update weights of BleuScoreFeature
+ const std::vector<FeatureFunction*> &producers = FeatureFunction::GetFeatureFunctions();
+ for(size_t i=0; i<producers.size(); ++i) {
+ FeatureFunction *ff = producers[i];
+ std::string ffName = ff->GetScoreProducerDescription();
- std::vector<const ScoreProducer*>::const_iterator iterSP;
- for (iterSP = transSystem.GetFeatureFunctions().begin() ; iterSP != transSystem.GetFeatureFunctions().end() ; ++iterSP) {
- std::string paramShortName = (*iterSP)->GetScoreProducerWeightShortName();
- if (paramShortName == "bl") {
- SetWeight(*iterSP, weight);
+ if (ffName == "BleuScoreFeature") {
+ SetWeight(ff, weight);
break;
}
}
- */
}
// ScoreComponentCollection StaticData::GetAllWeightsScoreComponentCollection() const {}
@@ -895,11 +846,6 @@ float StaticData::GetWeightWordPenalty() const
return weightWP;
}
-float StaticData::GetWeightUnknownWordPenalty() const
-{
- return GetWeight(&UnknownWordPenaltyProducer::Instance());
-}
-
void StaticData::InitializeForInput(const InputType& source) const
{
const std::vector<FeatureFunction*> &producers = FeatureFunction::GetFeatureFunctions();
@@ -927,13 +873,16 @@ void StaticData::CleanUpAfterSentenceProcessing(const InputType& source) const
void StaticData::LoadFeatureFunctions()
{
- const std::vector<FeatureFunction*> &ffs
- = FeatureFunction::GetFeatureFunctions();
+ const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
std::vector<FeatureFunction*>::const_iterator iter;
for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
FeatureFunction *ff = *iter;
bool doLoad = true;
+ if (ff->RequireSortingAfterSourceContext()) {
+ m_requireSortingAfterSourceContext = true;
+ }
+
// if (PhraseDictionary *ffCast = dynamic_cast<PhraseDictionary*>(ff)) {
if (dynamic_cast<PhraseDictionary*>(ff)) {
doLoad = false;
@@ -982,8 +931,7 @@ bool StaticData::CheckWeights() const
VERBOSE(1,fname << "\n");
if (featureNames.find(fname) != featureNames.end()) {
weightNames.erase(iter++);
- }
- else {
+ } else {
++iter;
}
}
@@ -1002,7 +950,8 @@ bool StaticData::CheckWeights() const
}
-void StaticData::LoadSparseWeightsFromConfig() {
+void StaticData::LoadSparseWeightsFromConfig()
+{
set<string> featureNames;
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
for (size_t i = 0; i < ffs.size(); ++i) {
@@ -1017,7 +966,7 @@ void StaticData::LoadSparseWeightsFromConfig() {
// this indicates that it is sparse feature
if (featureNames.find(iter->first) == featureNames.end()) {
UTIL_THROW_IF2(iter->second.size() != 1, "ERROR: only one weight per sparse feature allowed: " << iter->first);
- m_allWeights.Assign(iter->first, iter->second[0]);
+ m_allWeights.Assign(iter->first, iter->second[0]);
}
}
@@ -1032,7 +981,11 @@ bool StaticData::LoadAlternateWeightSettings()
return false;
}
- const vector<string> &weightSpecification = m_parameter->GetParam("alternate-weight-setting");
+ vector<string> weightSpecification;
+ const PARAM_VEC *params = m_parameter->GetParam("alternate-weight-setting");
+ if (params && params->size()) {
+ weightSpecification = *params;
+ }
// get mapping from feature names to feature functions
map<string,FeatureFunction*> nameToFF;
@@ -1056,7 +1009,7 @@ bool StaticData::LoadAlternateWeightSettings()
currentId = args[1];
VERBOSE(1,"alternate weight setting " << currentId << endl);
UTIL_THROW_IF2(m_weightSetting.find(currentId) != m_weightSetting.end(),
- "Duplicate alternate weight id: " << currentId);
+ "Duplicate alternate weight id: " << currentId);
m_weightSetting[ currentId ] = new ScoreComponentCollection;
// other specifications
@@ -1065,12 +1018,12 @@ bool StaticData::LoadAlternateWeightSettings()
// sparse weights
if (args[0] == "weight-file") {
if (args.size() != 2) {
- UserMessage::Add("One argument should be supplied for weight-file");
+ std::cerr << "One argument should be supplied for weight-file";
return false;
}
ScoreComponentCollection extraWeights;
if (!extraWeights.Load(args[1])) {
- UserMessage::Add("Unable to load weights from " + args[1]);
+ std::cerr << "Unable to load weights from " << args[1];
return false;
}
m_weightSetting[ currentId ]->PlusEquals(extraWeights);
@@ -1082,8 +1035,7 @@ bool StaticData::LoadAlternateWeightSettings()
vector<string> featureFunctionName = Tokenize(args[1], ",");
for(size_t k=0; k<featureFunctionName.size(); k++) {
// check if a valid nane
- map<string,FeatureFunction*>::iterator ffLookUp
- = nameToFF.find(featureFunctionName[k]);
+ map<string,FeatureFunction*>::iterator ffLookUp = nameToFF.find(featureFunctionName[k]);
if (ffLookUp == nameToFF.end()) {
cerr << "ERROR: alternate weight setting " << currentId
<< " specifies to ignore feature function " << featureFunctionName[k]
@@ -1102,7 +1054,7 @@ bool StaticData::LoadAlternateWeightSettings()
UTIL_THROW_IF2(currentId.empty(), "No alternative weights specified");
vector<string> tokens = Tokenize(weightSpecification[i]);
UTIL_THROW_IF2(tokens.size() < 2
- , "Incorrect format for alternate weights: " << weightSpecification[i]);
+ , "Incorrect format for alternate weights: " << weightSpecification[i]);
// get name and weight values
string name = tokens[0];
@@ -1131,36 +1083,39 @@ bool StaticData::LoadAlternateWeightSettings()
void StaticData::NoCache()
{
- bool noCache;
- SetBooleanParameter( &noCache, "no-cache", false );
-
- if (noCache) {
- const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
- for (size_t i = 0; i < pts.size(); ++i) {
- PhraseDictionary &pt = *pts[i];
- pt.SetParameter("cache-size", "0");
- }
- }
+ bool noCache;
+ m_parameter->SetParameter(noCache, "no-cache", false );
+
+ if (noCache) {
+ const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
+ for (size_t i = 0; i < pts.size(); ++i) {
+ PhraseDictionary &pt = *pts[i];
+ pt.SetParameter("cache-size", "0");
+ }
+ }
}
std::map<std::string, std::string> StaticData::OverrideFeatureNames()
{
- std::map<std::string, std::string> ret;
-
- const PARAM_VEC &params = m_parameter->GetParam("feature-name-overwrite");
- if (params.size()) {
- UTIL_THROW_IF2(params.size() != 1, "Only provide 1 line in the section [feature-name-overwrite]");
- vector<string> toks = Tokenize(params[0]);
- UTIL_THROW_IF2(toks.size() % 2 != 0, "Format of -feature-name-overwrite must be [old-name new-name]*");
-
- for (size_t i = 0; i < toks.size(); i += 2) {
- const string &oldName = toks[i];
- const string &newName = toks[i+1];
- ret[oldName] = newName;
- }
- }
-
- if (m_useS2TDecoder) {
+ std::map<std::string, std::string> ret;
+
+ const PARAM_VEC *params = m_parameter->GetParam("feature-name-overwrite");
+ if (params && params->size()) {
+ UTIL_THROW_IF2(params->size() != 1, "Only provide 1 line in the section [feature-name-overwrite]");
+ vector<string> toks = Tokenize(params->at(0));
+ UTIL_THROW_IF2(toks.size() % 2 != 0, "Format of -feature-name-overwrite must be [old-name new-name]*");
+
+ for (size_t i = 0; i < toks.size(); i += 2) {
+ const string &oldName = toks[i];
+ const string &newName = toks[i+1];
+ ret[oldName] = newName;
+ }
+ }
+
+ // FIXME Does this make sense for F2S? Perhaps it should be changed once
+ // FIXME the pipeline uses RuleTable consistently.
+ if (m_searchAlgorithm == SyntaxS2T || m_searchAlgorithm == SyntaxT2S ||
+ m_searchAlgorithm == SyntaxT2S_SCFG || m_searchAlgorithm == SyntaxF2S) {
// Automatically override PhraseDictionary{Memory,Scope3}. This will
// have to change if the FF parameters diverge too much in the future,
// but for now it makes switching between the old and new decoders much
@@ -1169,14 +1124,14 @@ std::map<std::string, std::string> StaticData::OverrideFeatureNames()
ret["PhraseDictionaryScope3"] = "RuleTable";
}
- return ret;
+ return ret;
}
void StaticData::OverrideFeatures()
{
- const PARAM_VEC &params = m_parameter->GetParam("feature-overwrite");
- for (size_t i = 0; i < params.size(); ++i) {
- const string &str = params[i];
+ const PARAM_VEC *params = m_parameter->GetParam("feature-overwrite");
+ for (size_t i = 0; params && i < params->size(); ++i) {
+ const string &str = params->at(i);
vector<string> toks = Tokenize(str);
UTIL_THROW_IF2(toks.size() <= 1, "Incorrect format for feature override: " << str);
@@ -1221,24 +1176,24 @@ void StaticData::ResetWeights(const std::string &denseWeights, const std::string
vector<float> weights;
vector<string> toks = Tokenize(denseWeights);
for (size_t i = 0; i < toks.size(); ++i) {
- const string &tok = toks[i];
-
- if (tok.substr(tok.size() - 1, 1) == "=") {
- // start of new feature
-
- if (name != "") {
- // save previous ff
- const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
- m_allWeights.Assign(&ff, weights);
- weights.clear();
- }
-
- name = tok.substr(0, tok.size() - 1);
- } else {
- // a weight for curr ff
- float weight = Scan<float>(toks[i]);
- weights.push_back(weight);
- }
+ const string &tok = toks[i];
+
+ if (ends_with(tok, "=")) {
+ // start of new feature
+
+ if (name != "") {
+ // save previous ff
+ const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
+ m_allWeights.Assign(&ff, weights);
+ weights.clear();
+ }
+
+ name = tok.substr(0, tok.size() - 1);
+ } else {
+ // a weight for curr ff
+ float weight = Scan<float>(toks[i]);
+ weights.push_back(weight);
+ }
}
const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
@@ -1248,14 +1203,14 @@ void StaticData::ResetWeights(const std::string &denseWeights, const std::string
InputFileStream sparseStrme(sparseFile);
string line;
while (getline(sparseStrme, line)) {
- vector<string> toks = Tokenize(line);
- UTIL_THROW_IF2(toks.size() != 2, "Incorrect sparse weight format. Should be FFName_spareseName weight");
+ vector<string> toks = Tokenize(line);
+ UTIL_THROW_IF2(toks.size() != 2, "Incorrect sparse weight format. Should be FFName_spareseName weight");
- vector<string> names = Tokenize(toks[0], "_");
- UTIL_THROW_IF2(names.size() != 2, "Incorrect sparse weight name. Should be FFName_spareseName");
+ vector<string> names = Tokenize(toks[0], "_");
+ UTIL_THROW_IF2(names.size() != 2, "Incorrect sparse weight name. Should be FFName_spareseName");
- const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(names[0]);
- m_allWeights.Assign(&ff, names[1], Scan<float>(toks[1]));
+ const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(names[0]);
+ m_allWeights.Assign(&ff, names[1], Scan<float>(toks[1]));
}
}
diff --git a/moses/StaticData.h b/moses/StaticData.h
index feb6c8c85..193f79aad 100644
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@@ -31,7 +31,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <utility>
#include <fstream>
#include <string>
-#include "UserMessage.h"
#ifdef WITH_THREADS
#include <boost/thread.hpp>
@@ -51,6 +50,9 @@ class InputType;
class DecodeGraph;
class DecodeStep;
+class DynamicCacheBasedLanguageModel;
+class PhraseDictionaryDynamicCacheBased;
+
typedef std::pair<std::string, float> UnknownLHSEntry;
typedef std::vector<UnknownLHSEntry> UnknownLHSList;
@@ -97,18 +99,20 @@ protected:
, m_maxNoPartTransOpt
, m_maxPhraseLength;
- std::string m_nBestFilePath, m_latticeSamplesFilePath;
- bool m_labeledNBestList,m_nBestIncludesSegmentation;
+ std::string m_nBestFilePath, m_latticeSamplesFilePath;
+ bool m_labeledNBestList,m_nBestIncludesSegmentation;
bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them
bool m_markUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = mark and (ignore) them
bool m_wordDeletionEnabled;
bool m_disableDiscarding;
bool m_printAllDerivations;
+ bool m_printTranslationOptions;
bool m_sourceStartPosMattersForRecombination;
bool m_recoverPath;
bool m_outputHypoScore;
+ bool m_requireSortingAfterSourceContext;
SearchAlgorithm m_searchAlgorithm;
InputTypeEnum m_inputType;
@@ -130,6 +134,10 @@ protected:
bool m_needAlignmentInfo;
bool m_PrintAlignmentInfoNbest;
+ bool m_PrintID;
+ bool m_PrintPassthroughInformation;
+ bool m_PrintPassthroughInformationInNBest;
+
std::string m_alignmentOutputFile;
std::string m_factorDelimiter; //! by default, |, but it can be changed
@@ -199,7 +207,6 @@ protected:
FactorType m_placeHolderFactor;
bool m_useLegacyPT;
bool m_defaultNonTermOnlyForEmptyRange;
- bool m_useS2TDecoder;
S2TParsingAlgorithm m_s2tParsingAlgorithm;
bool m_printNBestTrees;
@@ -211,11 +218,10 @@ protected:
void LoadChartDecodingParameters();
void LoadNonTerminals();
- //! helper fn to set bool param from ini file/command line
- void SetBooleanParameter(bool *paramter, std::string parameterName, bool defaultValue);
-
//! load decoding steps
- bool LoadDecodeGraphs();
+ void LoadDecodeGraphs();
+ void LoadDecodeGraphsOld(const std::vector<std::string> &mappingVector, const std::vector<size_t> &maxChartSpans);
+ void LoadDecodeGraphsNew(const std::vector<std::string> &mappingVector, const std::vector<size_t> &maxChartSpans);
void NoCache();
@@ -264,10 +270,6 @@ public:
bool LoadData(Parameter *parameter);
void ClearData();
- const PARAM_VEC &GetParam(const std::string &paramName) const {
- return m_parameter->GetParam(paramName);
- }
-
const Parameter &GetParameter() const {
return *m_parameter;
}
@@ -321,6 +323,15 @@ public:
size_t IsPathRecoveryEnabled() const {
return m_recoverPath;
}
+ bool IsIDEnabled() const {
+ return m_PrintID;
+ }
+ bool IsPassthroughEnabled() const {
+ return m_PrintPassthroughInformation;
+ }
+ bool IsPassthroughInNBestEnabled() const {
+ return m_PrintPassthroughInformationInNBest;
+ }
int GetMaxDistortion() const {
return m_maxDistortion;
}
@@ -389,10 +400,6 @@ public:
return m_minlexrMemory;
}
- const std::vector<std::string> &GetDescription() const {
- return m_parameter->GetParam("description");
- }
-
// for mert
size_t GetNBestSize() const {
return m_nBestSize;
@@ -430,7 +437,7 @@ public:
return m_searchAlgorithm;
}
bool IsChart() const {
- return m_searchAlgorithm == ChartDecoding || m_searchAlgorithm == ChartIncremental;
+ return m_searchAlgorithm == CYKPlus || m_searchAlgorithm == ChartIncremental;
}
const ScoreComponentCollection& GetAllWeights() const {
@@ -569,6 +576,10 @@ public:
return m_xmlBrackets;
}
+ bool PrintTranslationOptions() const {
+ return m_printTranslationOptions;
+ }
+
bool PrintAllDerivations() const {
return m_printAllDerivations;
}
@@ -602,7 +613,6 @@ public:
return m_continuePartialTranslation;
}
- void ReLoadParameter();
void ReLoadBleuScoreFeatureParameter(float weight);
Parameter* GetParameter() {
@@ -689,7 +699,7 @@ public:
// model must support alternate weight settings
if (!GetHasAlternateWeightSettings()) {
- UserMessage::Add("Warning: Input specifies weight setting, but model does not support alternate weight settings.");
+ std::cerr << "Warning: Input specifies weight setting, but model does not support alternate weight settings.";
return;
}
@@ -700,10 +710,8 @@ public:
// if not found, resort to default
if (i == m_weightSetting.end()) {
- std::stringstream strme;
- strme << "Warning: Specified weight setting " << settingName
- << " does not exist in model, using default weight setting instead";
- UserMessage::Add(strme.str());
+ std::cerr << "Warning: Specified weight setting " << settingName
+ << " does not exist in model, using default weight setting instead";
i = m_weightSetting.find( "default" );
m_currentWeightSetting = "default";
}
@@ -713,7 +721,6 @@ public:
}
float GetWeightWordPenalty() const;
- float GetWeightUnknownWordPenalty() const;
const std::vector<DecodeGraph*>& GetDecodeGraphs() const {
return m_decodeGraphs;
@@ -736,11 +743,13 @@ public:
return m_placeHolderFactor;
}
- const FeatureRegistry &GetFeatureRegistry() const
- { return m_registry; }
+ const FeatureRegistry &GetFeatureRegistry() const {
+ return m_registry;
+ }
- const PhrasePropertyFactory &GetPhrasePropertyFactory() const
- { return m_phrasePropertyFactory; }
+ const PhrasePropertyFactory &GetPhrasePropertyFactory() const {
+ return m_phrasePropertyFactory;
+ }
/** check whether we should be using the old code to support binary phrase-table.
** eventually, we'll stop support the binary phrase-table and delete this legacy code
@@ -758,24 +767,21 @@ public:
return m_softMatchesMap;
}
-
void ResetWeights(const std::string &denseWeights, const std::string &sparseFile);
// need global access for output of tree structure
const StatefulFeatureFunction* GetTreeStructure() const {
- return m_treeStructure;
+ return m_treeStructure;
}
void SetTreeStructure(const StatefulFeatureFunction* treeStructure) {
- m_treeStructure = treeStructure;
+ m_treeStructure = treeStructure;
}
- bool GetDefaultNonTermOnlyForEmptyRange() const
- { return m_defaultNonTermOnlyForEmptyRange; }
-
- bool UseS2TDecoder() const {
- return m_useS2TDecoder;
+ bool GetDefaultNonTermOnlyForEmptyRange() const {
+ return m_defaultNonTermOnlyForEmptyRange;
}
+
S2TParsingAlgorithm GetS2TParsingAlgorithm() const {
return m_s2tParsingAlgorithm;
}
@@ -784,6 +790,10 @@ public:
return m_printNBestTrees;
}
+ bool RequireSortingAfterSourceContext() const {
+ return m_requireSortingAfterSourceContext;
+ }
+
};
}
diff --git a/moses/SyntacticLanguageModel.h b/moses/SyntacticLanguageModel.h
index 76882a4d1..ad11d29bf 100644
--- a/moses/SyntacticLanguageModel.h
+++ b/moses/SyntacticLanguageModel.h
@@ -31,8 +31,8 @@ public:
ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
- int featureID,
- ScoreComponentCollection* accumulator) const {
+ int featureID,
+ ScoreComponentCollection* accumulator) const {
throw std::runtime_error("Syntactic LM can only be used with phrase-based decoder.");
}
diff --git a/moses/Syntax/BoundedPriorityContainer.h b/moses/Syntax/BoundedPriorityContainer.h
index 9afc1b75d..192f9ff2e 100644
--- a/moses/Syntax/BoundedPriorityContainer.h
+++ b/moses/Syntax/BoundedPriorityContainer.h
@@ -24,20 +24,30 @@ namespace Syntax
template<typename T>
class BoundedPriorityContainer
{
- public:
+public:
typedef typename std::vector<T>::iterator Iterator;
typedef typename std::vector<T>::const_iterator ConstIterator;
BoundedPriorityContainer(std::size_t);
- Iterator Begin() { return m_elements.begin(); }
- Iterator End() { return m_elements.begin()+m_size; }
+ Iterator Begin() {
+ return m_elements.begin();
+ }
+ Iterator End() {
+ return m_elements.begin()+m_size;
+ }
- ConstIterator Begin() const { return m_elements.begin(); }
- ConstIterator End() const { return m_elements.begin()+m_size; }
+ ConstIterator Begin() const {
+ return m_elements.begin();
+ }
+ ConstIterator End() const {
+ return m_elements.begin()+m_size;
+ }
// Return the number of elements currently held.
- std::size_t Size() const { return m_size; }
+ std::size_t Size() const {
+ return m_size;
+ }
// 'Lazily' clear the container by setting the size to 0 (allowing elements
// to be overwritten).
@@ -46,7 +56,12 @@ class BoundedPriorityContainer
// TODO Alternative, is to clear m_queue by assigning an empty queue value
// TODO but that might incur an alloc-related overhead when the new underlying
// TODO has to be regrown.
- void LazyClear() { m_size = 0; while (!m_queue.empty()) { m_queue.pop(); } }
+ void LazyClear() {
+ m_size = 0;
+ while (!m_queue.empty()) {
+ m_queue.pop();
+ }
+ }
// Insert the given object iff
// i) the container is not full yet, or
@@ -67,17 +82,16 @@ class BoundedPriorityContainer
// Determine if an object with the given priority would be accepted for
// insertion based on the current contents of the container.
- bool WouldAccept(float priority)
- {
+ bool WouldAccept(float priority) {
return m_size < m_limit || priority > m_queue.top().first;
}
- private:
+private:
typedef std::pair<float, int> PriorityIndexPair;
class PriorityIndexPairOrderer
{
- public:
+ public:
bool operator()(const PriorityIndexPair &p,
const PriorityIndexPair &q) const {
return p.first > q.first;
@@ -87,8 +101,8 @@ class BoundedPriorityContainer
// Min-priority queue. The queue stores the indices of the elements, not
// the elements themselves to keep down the costs of heap maintenance.
typedef std::priority_queue<PriorityIndexPair,
- std::vector<PriorityIndexPair>,
- PriorityIndexPairOrderer> Queue;
+ std::vector<PriorityIndexPair>,
+ PriorityIndexPairOrderer> Queue;
// The elements are stored in a vector. Note that the size of this vector
// can be greater than m_size (after a call to LazyClear).
diff --git a/moses/Syntax/Cube.cpp b/moses/Syntax/Cube.cpp
index 4fcf50829..08d75529f 100644
--- a/moses/Syntax/Cube.cpp
+++ b/moses/Syntax/Cube.cpp
@@ -14,7 +14,7 @@ namespace Syntax
{
Cube::Cube(const SHyperedgeBundle &bundle)
- : m_bundle(bundle)
+ : m_bundle(bundle)
{
// Create the SHyperedge for the 'corner' of the cube.
std::vector<int> coordinates(bundle.stacks.size()+1, 0);
@@ -94,7 +94,7 @@ SHyperedge *Cube::CreateHyperedge(const std::vector<int> &coordinates)
head->best = hyperedge;
head->pvertex = 0; // FIXME???
head->state.resize(
- StatefulFeatureFunction::GetStatefulFeatureFunctions().size());
+ StatefulFeatureFunction::GetStatefulFeatureFunctions().size());
hyperedge->head = head;
hyperedge->tail.resize(coordinates.size()-1);
@@ -102,11 +102,16 @@ SHyperedge *Cube::CreateHyperedge(const std::vector<int> &coordinates)
boost::shared_ptr<SVertex> pred = (*m_bundle.stacks[i])[coordinates[i]];
hyperedge->tail[i] = pred.get();
if (pred->best) {
- hyperedge->scoreBreakdown.PlusEquals(pred->best->scoreBreakdown);
+ hyperedge->label.scoreBreakdown.PlusEquals(
+ pred->best->label.scoreBreakdown);
}
}
- hyperedge->translation = *(m_bundle.translations->begin()+coordinates.back());
- hyperedge->scoreBreakdown.PlusEquals(hyperedge->translation->GetScoreBreakdown());
+
+ hyperedge->label.translation =
+ *(m_bundle.translations->begin()+coordinates.back());
+
+ hyperedge->label.scoreBreakdown.PlusEquals(
+ hyperedge->label.translation->GetScoreBreakdown());
const StaticData &staticData = StaticData::Instance();
@@ -116,7 +121,7 @@ SHyperedge *Cube::CreateHyperedge(const std::vector<int> &coordinates)
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) {
if (!staticData.IsFeatureFunctionIgnored(*sfs[i])) {
- sfs[i]->EvaluateWhenApplied(*hyperedge, &hyperedge->scoreBreakdown);
+ sfs[i]->EvaluateWhenApplied(*hyperedge, &hyperedge->label.scoreBreakdown);
}
}
@@ -125,11 +130,12 @@ SHyperedge *Cube::CreateHyperedge(const std::vector<int> &coordinates)
for (unsigned i = 0; i < ffs.size(); ++i) {
if (!staticData.IsFeatureFunctionIgnored(*ffs[i])) {
head->state[i] =
- ffs[i]->EvaluateWhenApplied(*hyperedge, i, &hyperedge->scoreBreakdown);
+ ffs[i]->EvaluateWhenApplied(*hyperedge, i,
+ &hyperedge->label.scoreBreakdown);
}
}
- hyperedge->score = hyperedge->scoreBreakdown.GetWeightedScore();
+ hyperedge->label.score = hyperedge->label.scoreBreakdown.GetWeightedScore();
return hyperedge;
}
diff --git a/moses/Syntax/Cube.h b/moses/Syntax/Cube.h
index a28440834..b18968ca9 100644
--- a/moses/Syntax/Cube.h
+++ b/moses/Syntax/Cube.h
@@ -19,31 +19,35 @@ namespace Syntax
// best-first order.
class Cube
{
- public:
+public:
Cube(const SHyperedgeBundle &);
~Cube();
SHyperedge *Pop();
- SHyperedge *Top() const { return m_queue.top().first; }
+ SHyperedge *Top() const {
+ return m_queue.top().first;
+ }
- bool IsEmpty() const { return m_queue.empty(); }
+ bool IsEmpty() const {
+ return m_queue.empty();
+ }
- private:
+private:
typedef boost::unordered_set<std::vector<int> > CoordinateSet;
typedef std::pair<SHyperedge *, const std::vector<int> *> QueueItem;
class QueueItemOrderer
{
- public:
+ public:
bool operator()(const QueueItem &p, const QueueItem &q) const {
- return p.first->score < q.first->score;
+ return p.first->label.score < q.first->label.score;
}
};
typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
- QueueItemOrderer> Queue;
+ QueueItemOrderer> Queue;
SHyperedge *CreateHyperedge(const std::vector<int> &);
void CreateNeighbour(const std::vector<int> &);
diff --git a/moses/Syntax/CubeQueue.h b/moses/Syntax/CubeQueue.h
index 304e59409..15a9bb211 100644
--- a/moses/Syntax/CubeQueue.h
+++ b/moses/Syntax/CubeQueue.h
@@ -14,7 +14,7 @@ namespace Syntax
class CubeQueue
{
- public:
+public:
template<typename InputIterator>
CubeQueue(InputIterator, InputIterator);
@@ -22,14 +22,16 @@ class CubeQueue
SHyperedge *Pop();
- bool IsEmpty() const { return m_queue.empty(); }
+ bool IsEmpty() const {
+ return m_queue.empty();
+ }
- private:
+private:
class CubeOrderer
{
- public:
+ public:
bool operator()(const Cube *p, const Cube *q) const {
- return p->Top()->score < q->Top()->score;
+ return p->Top()->label.score < q->Top()->label.score;
}
};
diff --git a/moses/Syntax/F2S/DerivationWriter.cpp b/moses/Syntax/F2S/DerivationWriter.cpp
new file mode 100644
index 000000000..efa3c3d47
--- /dev/null
+++ b/moses/Syntax/F2S/DerivationWriter.cpp
@@ -0,0 +1,101 @@
+#include "DerivationWriter.h"
+
+#include "moses/Factor.h"
+#include "moses/Syntax/PVertex.h"
+#include "moses/Syntax/SHyperedge.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+// 1-best version.
+void DerivationWriter::Write(const SHyperedge &shyperedge,
+ std::size_t sentNum, std::ostream &out)
+{
+ WriteLine(shyperedge, sentNum, out);
+ for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) {
+ const SVertex &pred = *(shyperedge.tail[i]);
+ if (pred.best) {
+ Write(*pred.best, sentNum, out);
+ }
+ }
+}
+
+// k-best derivation.
+void DerivationWriter::Write(const KBestExtractor::Derivation &derivation,
+ std::size_t sentNum, std::ostream &out)
+{
+ WriteLine(derivation.edge->shyperedge, sentNum, out);
+ for (std::size_t i = 0; i < derivation.subderivations.size(); ++i) {
+ Write(*(derivation.subderivations[i]), sentNum, out);
+ }
+}
+
+void DerivationWriter::WriteLine(const SHyperedge &shyperedge,
+ std::size_t sentNum, std::ostream &out)
+{
+ // Sentence number.
+ out << sentNum << " |||";
+
+ // Source LHS.
+ out << " ";
+ WriteSymbol(shyperedge.head->pvertex->symbol, out);
+ out << " ->";
+
+ // Source RHS symbols.
+ for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) {
+ const Word &symbol = shyperedge.tail[i]->pvertex->symbol;
+ out << " ";
+ WriteSymbol(symbol, out);
+ }
+ out << " |||";
+
+ // Target RHS.
+ out << " [X] ->";
+
+ // Target RHS symbols.
+ const TargetPhrase &phrase = *(shyperedge.label.translation);
+ for (std::size_t i = 0; i < phrase.GetSize(); ++i) {
+ const Word &symbol = phrase.GetWord(i);
+ out << " ";
+ if (symbol.IsNonTerminal()) {
+ out << "[X]";
+ } else {
+ WriteSymbol(symbol, out);
+ }
+ }
+ out << " |||";
+
+ // Non-terminal alignments
+ const AlignmentInfo &a = phrase.GetAlignNonTerm();
+ for (AlignmentInfo::const_iterator p = a.begin(); p != a.end(); ++p) {
+ out << " " << p->first << "-" << p->second;
+ }
+ out << " |||";
+
+ // Spans covered by source RHS symbols.
+ for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) {
+ const SVertex *child = shyperedge.tail[i];
+ const WordsRange &span = child->pvertex->span;
+ out << " " << span.GetStartPos() << ".." << span.GetEndPos();
+ }
+
+ out << "\n";
+}
+
+void DerivationWriter::WriteSymbol(const Word &symbol, std::ostream &out)
+{
+ const Factor *f = symbol[0];
+ if (symbol.IsNonTerminal()) {
+ out << "[" << f->GetString() << "]";
+ } else {
+ out << f->GetString();
+ }
+}
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/DerivationWriter.h b/moses/Syntax/F2S/DerivationWriter.h
new file mode 100644
index 000000000..76ca14313
--- /dev/null
+++ b/moses/Syntax/F2S/DerivationWriter.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <ostream>
+
+#include "moses/Syntax/KBestExtractor.h"
+#include "moses/Word.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+struct SHyperedge;
+
+namespace F2S
+{
+
+// Writes a string representation of a derivation to a std::ostream. This is
+// used by the -translation-details / -T option.
+// TODO Merge this with S2T::DerivationWriter.
+class DerivationWriter
+{
+ public:
+ // 1-best version.
+ static void Write(const SHyperedge&, std::size_t, std::ostream &);
+
+ // k-best version.
+ static void Write(const KBestExtractor::Derivation &, std::size_t,
+ std::ostream &);
+ private:
+ static void WriteLine(const SHyperedge &, std::size_t, std::ostream &);
+ static void WriteSymbol(const Word &, std::ostream &);
+};
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/Forest.cpp b/moses/Syntax/F2S/Forest.cpp
new file mode 100644
index 000000000..e130d5ec2
--- /dev/null
+++ b/moses/Syntax/F2S/Forest.cpp
@@ -0,0 +1,34 @@
+#include "Forest.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+Forest::~Forest()
+{
+ Clear();
+}
+
+void Forest::Clear()
+{
+ for (std::vector<Vertex *>::iterator p = vertices.begin();
+ p != vertices.end(); ++p) {
+ delete *p;
+ }
+ vertices.clear();
+}
+
+Forest::Vertex::~Vertex()
+{
+ for (std::vector<Hyperedge *>::iterator p = incoming.begin();
+ p != incoming.end(); ++p) {
+ delete *p;
+ }
+}
+
+} // F2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/F2S/Forest.h b/moses/Syntax/F2S/Forest.h
new file mode 100644
index 000000000..6673b43be
--- /dev/null
+++ b/moses/Syntax/F2S/Forest.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include "vector"
+
+#include "moses/Syntax/PVertex.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+class Forest
+{
+ public:
+ struct Vertex;
+
+ struct Hyperedge {
+ Vertex *head;
+ std::vector<Vertex *> tail;
+ float weight;
+ };
+
+ struct Vertex {
+ Vertex(const PVertex &v) : pvertex(v) {}
+ ~Vertex(); // Deletes incoming hyperedges.
+ PVertex pvertex;
+ std::vector<Hyperedge *> incoming;
+ };
+
+ // Constructor.
+ Forest() {}
+
+ // Destructor (deletes vertices).
+ ~Forest();
+
+ // Delete all vertices.
+ void Clear();
+
+ std::vector<Vertex *> vertices;
+
+ private:
+ // Copying is not allowed.
+ Forest(const Forest &);
+ Forest &operator=(const Forest &);
+};
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/GlueRuleSynthesizer.cpp b/moses/Syntax/F2S/GlueRuleSynthesizer.cpp
new file mode 100644
index 000000000..7c7d35beb
--- /dev/null
+++ b/moses/Syntax/F2S/GlueRuleSynthesizer.cpp
@@ -0,0 +1,85 @@
+#include "GlueRuleSynthesizer.h"
+
+#include <sstream>
+
+#include "moses/FF/UnknownWordPenaltyProducer.h"
+#include "moses/StaticData.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+GlueRuleSynthesizer::GlueRuleSynthesizer(HyperTree &trie)
+ : m_hyperTree(trie)
+{
+ const std::vector<FactorType> &inputFactorOrder =
+ StaticData::Instance().GetInputFactorOrder();
+ Word *lhs = NULL;
+ m_dummySourcePhrase.CreateFromString(Input, inputFactorOrder, "hello", &lhs);
+ delete lhs;
+}
+
+void GlueRuleSynthesizer::SynthesizeRule(const Forest::Hyperedge &e)
+{
+ HyperPath source;
+ SynthesizeHyperPath(e, source);
+ TargetPhrase *tp = SynthesizeTargetPhrase(e);
+ TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(m_hyperTree,
+ source);
+ tpc.Add(tp);
+}
+
+void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e,
+ HyperPath &path)
+{
+ path.nodeSeqs.clear();
+ path.nodeSeqs.resize(2);
+ path.nodeSeqs[0].push_back(e.head->pvertex.symbol[0]->GetId());
+ for (std::vector<Forest::Vertex*>::const_iterator p = e.tail.begin();
+ p != e.tail.end(); ++p) {
+ const Forest::Vertex &child = **p;
+ path.nodeSeqs[1].push_back(child.pvertex.symbol[0]->GetId());
+ }
+}
+
+TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
+ const Forest::Hyperedge &e)
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
+ UnknownWordPenaltyProducer::Instance();
+
+ TargetPhrase *targetPhrase = new TargetPhrase();
+
+ std::ostringstream alignmentSS;
+ for (std::size_t i = 0; i < e.tail.size(); ++i) {
+ const Word &symbol = e.tail[i]->pvertex.symbol;
+ if (symbol.IsNonTerminal()) {
+ targetPhrase->AddWord(staticData.GetOutputDefaultNonTerminal());
+ } else {
+ // TODO Check this
+ Word &targetWord = targetPhrase->AddWord();
+ targetWord.CreateUnknownWord(symbol);
+ }
+ alignmentSS << i << "-" << i << " ";
+ }
+
+ // Assign the lowest possible score so that glue rules are only used when
+ // absolutely required.
+ float score = LOWEST_SCORE;
+ targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score);
+ targetPhrase->EvaluateInIsolation(m_dummySourcePhrase);
+ Word *targetLhs = new Word(staticData.GetOutputDefaultNonTerminal());
+ targetPhrase->SetTargetLHS(targetLhs);
+ targetPhrase->SetAlignmentInfo(alignmentSS.str());
+
+ return targetPhrase;
+}
+
+} // F2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/F2S/GlueRuleSynthesizer.h b/moses/Syntax/F2S/GlueRuleSynthesizer.h
new file mode 100644
index 000000000..77b454f87
--- /dev/null
+++ b/moses/Syntax/F2S/GlueRuleSynthesizer.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "moses/Phrase.h"
+#include "moses/TargetPhrase.h"
+
+#include "HyperTree.h"
+#include "HyperTreeCreator.h"
+#include "Forest.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+class GlueRuleSynthesizer : public HyperTreeCreator
+{
+ public:
+ GlueRuleSynthesizer(HyperTree &);
+
+ // Synthesize the minimal, monotone rule that can be applied to the given
+ // hyperedge and add it to the rule trie.
+ void SynthesizeRule(const Forest::Hyperedge &);
+
+ private:
+ void SynthesizeHyperPath(const Forest::Hyperedge &, HyperPath &);
+
+ TargetPhrase *SynthesizeTargetPhrase(const Forest::Hyperedge &);
+
+ HyperTree &m_hyperTree;
+ Phrase m_dummySourcePhrase;
+};
+
+} // F2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/F2S/HyperPath.cpp b/moses/Syntax/F2S/HyperPath.cpp
new file mode 100644
index 000000000..e60b4f411
--- /dev/null
+++ b/moses/Syntax/F2S/HyperPath.cpp
@@ -0,0 +1,20 @@
+#include "HyperPath.h"
+
+#include <limits>
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+const std::size_t HyperPath::kEpsilon =
+ std::numeric_limits<std::size_t>::max()-1;
+
+const std::size_t HyperPath::kComma =
+ std::numeric_limits<std::size_t>::max()-2;
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/HyperPath.h b/moses/Syntax/F2S/HyperPath.h
new file mode 100644
index 000000000..4a11990e8
--- /dev/null
+++ b/moses/Syntax/F2S/HyperPath.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <vector>
+
+#include "moses/Factor.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+// A HyperPath for representing the source-side tree fragment of a
+// tree-to-string rule. See this paper:
+//
+// Hui Zhang, Min Zhang, Haizhou Li, and Chew Lim Tan
+// "Fast Translation Rule Matching for Syntax-based Statistical Machine
+// Translation"
+// In proceedings of EMNLP 2009
+//
+struct HyperPath
+{
+ public:
+ typedef std::vector<std::size_t> NodeSeq;
+
+ static const std::size_t kEpsilon;
+ static const std::size_t kComma;
+
+ std::vector<NodeSeq> nodeSeqs;
+};
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/HyperPathLoader.cpp b/moses/Syntax/F2S/HyperPathLoader.cpp
new file mode 100644
index 000000000..e4f22ae07
--- /dev/null
+++ b/moses/Syntax/F2S/HyperPathLoader.cpp
@@ -0,0 +1,172 @@
+#include "HyperPathLoader.h"
+
+#include "TreeFragmentTokenizer.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+HyperPathLoader::HyperPathLoader(FactorDirection direction,
+ const std::vector<FactorType> &factorOrder)
+ : m_direction(direction)
+ , m_factorOrder(factorOrder)
+{
+}
+
+void HyperPathLoader::Load(const StringPiece &s, HyperPath &path)
+{
+ path.nodeSeqs.clear();
+ // Tokenize the string and store the tokens in m_tokenSeq.
+ m_tokenSeq.clear();
+ for (TreeFragmentTokenizer p(s); p != TreeFragmentTokenizer(); ++p) {
+ m_tokenSeq.push_back(*p);
+ }
+ // Determine the height of the tree fragment.
+ int height = DetermineHeight();
+ // Ensure path contains the correct number of elements.
+ path.nodeSeqs.resize(height+1);
+ // Generate the fragment's NodeTuple sequence and store it in m_nodeTupleSeq.
+ GenerateNodeTupleSeq(height);
+ // Fill the HyperPath.
+ for (int depth = 0; depth <= height; ++depth) {
+ int prevParent = -1;
+// TODO Generate one node tuple sequence for each depth instead of one
+// TODO sequence that contains node tuples at every depth
+ for (std::vector<NodeTuple>::const_iterator p = m_nodeTupleSeq.begin();
+ p != m_nodeTupleSeq.end(); ++p) {
+ const NodeTuple &tuple = *p;
+ if (tuple.depth != depth) {
+ continue;
+ }
+ if (prevParent != -1 && tuple.parent != prevParent) {
+ path.nodeSeqs[depth].push_back(HyperPath::kComma);
+ }
+ path.nodeSeqs[depth].push_back(tuple.symbol);
+ prevParent = tuple.parent;
+ }
+ }
+}
+
+int HyperPathLoader::DetermineHeight() const
+{
+ int height = 0;
+ int maxHeight = 0;
+ std::size_t numTokens = m_tokenSeq.size();
+ for (std::size_t i = 0; i < numTokens; ++i) {
+ if (m_tokenSeq[i].type == TreeFragmentToken_LSB) {
+ assert(i+2 < numTokens);
+ // Does this bracket indicate the start of a subtree or the start of
+ // a non-terminal leaf?
+ if (m_tokenSeq[i+2].type != TreeFragmentToken_RSB) { // It's a subtree.
+ maxHeight = std::max(++height, maxHeight);
+ } else { // It's a non-terminal leaf: jump to its end.
+ i += 2;
+ }
+ } else if (m_tokenSeq[i].type == TreeFragmentToken_RSB) {
+ --height;
+ }
+ }
+ return maxHeight;
+}
+
+void HyperPathLoader::GenerateNodeTupleSeq(int height)
+{
+ m_nodeTupleSeq.clear();
+
+ // Initialize the stack of parent indices.
+ assert(m_parentStack.empty());
+ m_parentStack.push(-1);
+
+ // Initialize a temporary tuple that tracks the state as we iterate over
+ // the tree fragment tokens.
+ NodeTuple tuple;
+ tuple.index = -1;
+ tuple.parent = -1;
+ tuple.depth = -1;
+ tuple.symbol = HyperPath::kEpsilon;
+
+ // Iterate over the tree fragment tokens.
+ std::size_t numTokens = m_tokenSeq.size();
+ for (std::size_t i = 0; i < numTokens; ++i) {
+ if (m_tokenSeq[i].type == TreeFragmentToken_LSB) {
+ assert(i+2 < numTokens);
+ // Does this bracket indicate the start of a subtree or the start of
+ // a non-terminal leaf?
+ if (m_tokenSeq[i+2].type != TreeFragmentToken_RSB) { // It's a subtree.
+ ++tuple.index;
+ tuple.parent = m_parentStack.top();
+ m_parentStack.push(tuple.index);
+ ++tuple.depth;
+ tuple.symbol = AddNonTerminalFactor(m_tokenSeq[++i].value)->GetId();
+ m_nodeTupleSeq.push_back(tuple);
+ } else { // It's a non-terminal leaf.
+ ++tuple.index;
+ tuple.parent = m_parentStack.top();
+ ++tuple.depth;
+ tuple.symbol = AddNonTerminalFactor(m_tokenSeq[++i].value)->GetId();
+ m_nodeTupleSeq.push_back(tuple);
+ // Add virtual nodes if required.
+ if (tuple.depth < height) {
+ int origDepth = tuple.depth;
+ m_parentStack.push(tuple.index);
+ for (int depth = origDepth+1; depth <= height; ++depth) {
+ ++tuple.index;
+ tuple.parent = m_parentStack.top();
+ m_parentStack.push(tuple.index);
+ tuple.depth = depth;
+ tuple.symbol = HyperPath::kEpsilon;
+ m_nodeTupleSeq.push_back(tuple);
+ }
+ for (int depth = origDepth; depth <= height; ++depth) {
+ m_parentStack.pop();
+ }
+ tuple.depth = origDepth;
+ }
+ --tuple.depth;
+ // Skip over the closing bracket.
+ ++i;
+ }
+ } else if (m_tokenSeq[i].type == TreeFragmentToken_WORD) {
+ // Token i is a word that doesn't follow a bracket. This must be a
+ // terminal since all non-terminals are either non-leaves (which follow
+ // an opening bracket) or are enclosed in brackets.
+ ++tuple.index;
+ tuple.parent = m_parentStack.top();
+ ++tuple.depth;
+ tuple.symbol = AddTerminalFactor(m_tokenSeq[i].value)->GetId();
+ m_nodeTupleSeq.push_back(tuple);
+ // Add virtual nodes if required.
+ if (m_tokenSeq[i+1].type == TreeFragmentToken_RSB &&
+ tuple.depth < height) {
+ int origDepth = tuple.depth;
+ m_parentStack.push(tuple.index);
+ for (int depth = origDepth+1; depth <= height; ++depth) {
+ ++tuple.index;
+ tuple.parent = m_parentStack.top();
+ m_parentStack.push(tuple.index);
+ tuple.depth = depth;
+ tuple.symbol = HyperPath::kEpsilon;
+ m_nodeTupleSeq.push_back(tuple);
+ }
+ for (int depth = origDepth; depth <= height; ++depth) {
+ m_parentStack.pop();
+ }
+ tuple.depth = origDepth;
+ }
+ --tuple.depth;
+ } else if (m_tokenSeq[i].type == TreeFragmentToken_RSB) {
+ m_parentStack.pop();
+ --tuple.depth;
+ }
+ }
+
+ // Remove the -1 parent index.
+ m_parentStack.pop();
+}
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/HyperPathLoader.h b/moses/Syntax/F2S/HyperPathLoader.h
new file mode 100644
index 000000000..27cd7c306
--- /dev/null
+++ b/moses/Syntax/F2S/HyperPathLoader.h
@@ -0,0 +1,70 @@
+#pragma once
+
+#include <stack>
+#include <vector>
+
+#include "util/string_piece.hh"
+
+#include "moses/FactorCollection.h"
+#include "moses/TypeDef.h"
+
+#include "HyperPath.h"
+#include "TreeFragmentTokenizer.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+// Parses a string representation of a tree fragment, adding the terminals
+// and non-terminals to FactorCollection::Instance() and building a
+// HyperPath object.
+//
+// This class is designed to be used during rule table loading. Since every
+// rule has a tree fragment on the source-side, Load() may be called millions
+// of times. The algorithm therefore sacrifices readability for speed and
+// shoehorns everything into two passes over the input token sequence.
+//
+class HyperPathLoader
+{
+ public:
+ HyperPathLoader(FactorDirection, const std::vector<FactorType> &);
+
+ void Load(const StringPiece &, HyperPath &);
+
+ private:
+ struct NodeTuple {
+ int index; // Preorder index of the node.
+ int parent; // Preorder index of the node's parent.
+ int depth; // Depth of the node.
+ std::size_t symbol; // Either the factor ID of a tree terminal/non-terminal
+ // or for virtual nodes, HyperPath::kEpsilon.
+ };
+
+ // Determine the height of the current tree fragment (stored in m_tokenSeq).
+ int DetermineHeight() const;
+
+ // Generate the preorder sequence of NodeTuples for the current tree fragment,
+ // including virtual nodes.
+ void GenerateNodeTupleSeq(int height);
+
+ const Factor *AddTerminalFactor(const StringPiece &s) {
+ return FactorCollection::Instance().AddFactor(s, false);
+ }
+
+ const Factor *AddNonTerminalFactor(const StringPiece &s) {
+ return FactorCollection::Instance().AddFactor(s, true);
+ }
+
+ FactorDirection m_direction;
+ const std::vector<FactorType> &m_factorOrder;
+ std::vector<TreeFragmentToken> m_tokenSeq;
+ std::vector<NodeTuple> m_nodeTupleSeq;
+ std::stack<int> m_parentStack;
+};
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/HyperTree.cpp b/moses/Syntax/F2S/HyperTree.cpp
new file mode 100644
index 000000000..cf28f275e
--- /dev/null
+++ b/moses/Syntax/F2S/HyperTree.cpp
@@ -0,0 +1,70 @@
+#include "HyperTree.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+void HyperTree::Node::Prune(std::size_t tableLimit)
+{
+ // Recusively prune child nodes.
+ for (Map::iterator p = m_map.begin(); p != m_map.end(); ++p) {
+ p->second.Prune(tableLimit);
+ }
+ // Prune TargetPhraseCollection at this node.
+ m_targetPhraseCollection.Prune(true, tableLimit);
+}
+
+void HyperTree::Node::Sort(std::size_t tableLimit)
+{
+ // Recusively sort child nodes.
+ for (Map::iterator p = m_map.begin(); p != m_map.end(); ++p) {
+ p->second.Sort(tableLimit);
+ }
+ // Sort TargetPhraseCollection at this node.
+ m_targetPhraseCollection.Sort(true, tableLimit);
+}
+
+HyperTree::Node *HyperTree::Node::GetOrCreateChild(
+ const HyperPath::NodeSeq &nodeSeq)
+{
+ return &m_map[nodeSeq];
+}
+
+const HyperTree::Node *HyperTree::Node::GetChild(
+ const HyperPath::NodeSeq &nodeSeq) const
+{
+ Map::const_iterator p = m_map.find(nodeSeq);
+ return (p == m_map.end()) ? NULL : &p->second;
+}
+
+TargetPhraseCollection &HyperTree::GetOrCreateTargetPhraseCollection(
+ const HyperPath &hyperPath)
+{
+ Node &node = GetOrCreateNode(hyperPath);
+ return node.GetTargetPhraseCollection();
+}
+
+HyperTree::Node &HyperTree::GetOrCreateNode(const HyperPath &hyperPath)
+{
+ const std::size_t height = hyperPath.nodeSeqs.size();
+ Node *node = &m_root;
+ for (std::size_t i = 0; i < height; ++i) {
+ const HyperPath::NodeSeq &nodeSeq = hyperPath.nodeSeqs[i];
+ node = node->GetOrCreateChild(nodeSeq);
+ }
+ return *node;
+}
+
+void HyperTree::SortAndPrune(std::size_t tableLimit)
+{
+ if (tableLimit) {
+ m_root.Sort(tableLimit);
+ }
+}
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/HyperTree.h b/moses/Syntax/F2S/HyperTree.h
new file mode 100644
index 000000000..75706712f
--- /dev/null
+++ b/moses/Syntax/F2S/HyperTree.h
@@ -0,0 +1,79 @@
+#pragma once
+
+#include <map>
+#include <vector>
+
+#include <boost/unordered_map.hpp>
+
+#include "moses/Syntax/RuleTable.h"
+#include "moses/TargetPhraseCollection.h"
+
+#include "HyperPath.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+// A HyperTree for representing a tree-to-string rule table. See this paper:
+//
+// Hui Zhang, Min Zhang, Haizhou Li, and Chew Lim Tan
+// "Fast Translation Rule Matching for Syntax-based Statistical Machine
+// Translation"
+// In proceedings of EMNLP 2009
+//
+class HyperTree : public RuleTable
+{
+ public:
+ class Node
+ {
+ public:
+ typedef boost::unordered_map<HyperPath::NodeSeq, Node> Map;
+
+ bool IsLeaf() const { return m_map.empty(); }
+
+ bool HasRules() const { return !m_targetPhraseCollection.IsEmpty(); }
+
+ void Prune(std::size_t tableLimit);
+ void Sort(std::size_t tableLimit);
+
+ Node *GetOrCreateChild(const HyperPath::NodeSeq &);
+
+ const Node *GetChild(const HyperPath::NodeSeq &) const;
+
+ const TargetPhraseCollection &GetTargetPhraseCollection() const {
+ return m_targetPhraseCollection;
+ }
+
+ TargetPhraseCollection &GetTargetPhraseCollection() {
+ return m_targetPhraseCollection;
+ }
+
+ const Map &GetMap() const { return m_map; }
+
+ private:
+ Map m_map;
+ TargetPhraseCollection m_targetPhraseCollection;
+ };
+
+ HyperTree(const RuleTableFF *ff) : RuleTable(ff) {}
+
+ const Node &GetRootNode() const { return m_root; }
+
+ private:
+ friend class HyperTreeCreator;
+
+ TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const HyperPath &);
+
+ Node &GetOrCreateNode(const HyperPath &);
+
+ void SortAndPrune(std::size_t);
+
+ Node m_root;
+};
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/HyperTreeCreator.h b/moses/Syntax/F2S/HyperTreeCreator.h
new file mode 100644
index 000000000..bbae6e5c7
--- /dev/null
+++ b/moses/Syntax/F2S/HyperTreeCreator.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include "HyperTree.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+// Base for classes that create a HyperTree (currently HyperTreeLoader and
+// GlueRuleSynthesizer). HyperTreeCreator is a friend of HyperTree.
+class HyperTreeCreator
+{
+ protected:
+ // Provide access to HyperTree's private SortAndPrune function.
+ void SortAndPrune(HyperTree &trie, std::size_t limit) {
+ trie.SortAndPrune(limit);
+ }
+
+ // Provide access to HyperTree's private GetOrCreateTargetPhraseCollection
+ // function.
+ TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
+ HyperTree &trie, const HyperPath &fragment) {
+ return trie.GetOrCreateTargetPhraseCollection(fragment);
+ }
+};
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/HyperTreeLoader.cpp b/moses/Syntax/F2S/HyperTreeLoader.cpp
new file mode 100644
index 000000000..8dcadef55
--- /dev/null
+++ b/moses/Syntax/F2S/HyperTreeLoader.cpp
@@ -0,0 +1,148 @@
+#include "HyperTreeLoader.h"
+
+#include <sys/stat.h>
+#include <stdlib.h>
+
+#include <fstream>
+#include <string>
+#include <iterator>
+#include <algorithm>
+#include <iostream>
+
+#include "moses/FactorCollection.h"
+#include "moses/Word.h"
+#include "moses/Util.h"
+#include "moses/InputFileStream.h"
+#include "moses/StaticData.h"
+#include "moses/WordsRange.h"
+#include "moses/ChartTranslationOptionList.h"
+#include "moses/FactorCollection.h"
+#include "moses/Syntax/RuleTableFF.h"
+#include "util/file_piece.hh"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+#include "util/double-conversion/double-conversion.h"
+#include "util/exception.hh"
+
+#include "HyperPath.h"
+#include "HyperPathLoader.h"
+#include "HyperTree.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
+ const std::vector<FactorType> &output,
+ const std::string &inFile,
+ const RuleTableFF &ff,
+ HyperTree &trie)
+{
+ PrintUserTime(std::string("Start loading HyperTree"));
+
+ const StaticData &staticData = StaticData::Instance();
+ const std::string &factorDelimiter = staticData.GetFactorDelimiter();
+
+ std::size_t count = 0;
+
+ std::ostream *progress = NULL;
+ IFVERBOSE(1) progress = &std::cerr;
+ util::FilePiece in(inFile.c_str(), progress);
+
+ // reused variables
+ std::vector<float> scoreVector;
+ StringPiece line;
+
+ double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
+
+ HyperPathLoader hyperPathLoader(Input, input);
+
+ Phrase dummySourcePhrase;
+ {
+ Word *lhs = NULL;
+ dummySourcePhrase.CreateFromString(Input, input, "hello", &lhs);
+ delete lhs;
+ }
+
+ while(true) {
+ try {
+ line = in.ReadLine();
+ } catch (const util::EndOfFileException &e) {
+ break;
+ }
+
+ util::TokenIter<util::MultiCharacter> pipes(line, "|||");
+ StringPiece sourceString(*pipes);
+ StringPiece targetString(*++pipes);
+ StringPiece scoreString(*++pipes);
+
+ StringPiece alignString;
+ if (++pipes) {
+ StringPiece temp(*pipes);
+ alignString = temp;
+ }
+
+ if (++pipes) {
+ StringPiece str(*pipes); //counts
+ }
+
+ scoreVector.clear();
+ for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
+ int processed;
+ float score = converter.StringToFloat(s->data(), s->length(), &processed);
+ UTIL_THROW_IF2(isnan(score), "Bad score " << *s << " on line " << count);
+ scoreVector.push_back(FloorScore(TransformScore(score)));
+ }
+ const std::size_t numScoreComponents = ff.GetNumScoreComponents();
+ if (scoreVector.size() != numScoreComponents) {
+ UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
+ << numScoreComponents << ") of score components on line " << count);
+ }
+
+ // Source-side
+ HyperPath sourceFragment;
+ hyperPathLoader.Load(sourceString, sourceFragment);
+
+ // Target-side
+ TargetPhrase *targetPhrase = new TargetPhrase(&ff);
+ Word *targetLHS = NULL;
+ targetPhrase->CreateFromString(Output, output, targetString, &targetLHS);
+ targetPhrase->SetTargetLHS(targetLHS);
+ targetPhrase->SetAlignmentInfo(alignString);
+
+ if (++pipes) {
+ StringPiece sparseString(*pipes);
+ targetPhrase->SetSparseScore(&ff, sparseString);
+ }
+
+ if (++pipes) {
+ StringPiece propertiesString(*pipes);
+ targetPhrase->SetProperties(propertiesString);
+ }
+
+ targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
+ targetPhrase->EvaluateInIsolation(dummySourcePhrase,
+ ff.GetFeaturesToApply());
+
+ // Add rule to trie.
+ TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
+ trie, sourceFragment);
+ phraseColl.Add(targetPhrase);
+
+ count++;
+ }
+
+ // sort and prune each target phrase collection
+ if (ff.GetTableLimit()) {
+ SortAndPrune(trie, ff.GetTableLimit());
+ }
+
+ return true;
+}
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/HyperTreeLoader.h b/moses/Syntax/F2S/HyperTreeLoader.h
new file mode 100644
index 000000000..b760834d3
--- /dev/null
+++ b/moses/Syntax/F2S/HyperTreeLoader.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <istream>
+#include <vector>
+
+#include "moses/TypeDef.h"
+#include "moses/Syntax/RuleTableFF.h"
+
+#include "HyperTree.h"
+#include "HyperTreeCreator.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+class HyperTreeLoader : public HyperTreeCreator
+{
+ public:
+ bool Load(const std::vector<FactorType> &input,
+ const std::vector<FactorType> &output,
+ const std::string &inFile,
+ const RuleTableFF &,
+ HyperTree &);
+};
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/Manager-inl.h b/moses/Syntax/F2S/Manager-inl.h
new file mode 100644
index 000000000..5eb722cf7
--- /dev/null
+++ b/moses/Syntax/F2S/Manager-inl.h
@@ -0,0 +1,318 @@
+#pragma once
+
+#include "moses/DecodeGraph.h"
+#include "moses/ForestInput.h"
+#include "moses/StaticData.h"
+#include "moses/Syntax/BoundedPriorityContainer.h"
+#include "moses/Syntax/CubeQueue.h"
+#include "moses/Syntax/PHyperedge.h"
+#include "moses/Syntax/RuleTable.h"
+#include "moses/Syntax/RuleTableFF.h"
+#include "moses/Syntax/SHyperedgeBundle.h"
+#include "moses/Syntax/SVertex.h"
+#include "moses/Syntax/SVertexRecombinationOrderer.h"
+#include "moses/Syntax/SymbolEqualityPred.h"
+#include "moses/Syntax/SymbolHasher.h"
+#include "moses/Syntax/T2S/InputTree.h"
+#include "moses/Syntax/T2S/InputTreeBuilder.h"
+#include "moses/Syntax/T2S/InputTreeToForest.h"
+#include "moses/TreeInput.h"
+
+#include "DerivationWriter.h"
+#include "GlueRuleSynthesizer.h"
+#include "HyperTree.h"
+#include "RuleMatcherCallback.h"
+#include "TopologicalSorter.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+template<typename RuleMatcher>
+Manager<RuleMatcher>::Manager(const InputType &source)
+ : Syntax::Manager(source)
+{
+ if (const ForestInput *p = dynamic_cast<const ForestInput*>(&source)) {
+ m_forest = p->GetForest();
+ m_rootVertex = p->GetRootVertex();
+ } else if (const TreeInput *p = dynamic_cast<const TreeInput*>(&source)) {
+ T2S::InputTreeBuilder builder;
+ T2S::InputTree tmpTree;
+ builder.Build(*p, "Q", tmpTree);
+ boost::shared_ptr<Forest> forest = boost::make_shared<Forest>();
+ m_rootVertex = T2S::InputTreeToForest(tmpTree, *forest);
+ m_forest = forest;
+ }
+}
+
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::Decode()
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ // Get various pruning-related constants.
+ const std::size_t popLimit = staticData.GetCubePruningPopLimit();
+ const std::size_t ruleLimit = staticData.GetRuleLimit();
+ const std::size_t stackLimit = staticData.GetMaxHypoStackSize();
+
+ // Initialize the stacks.
+ InitializeStacks();
+
+ // Initialize the rule matchers.
+ InitializeRuleMatchers();
+
+ // Create a callback to process the PHyperedges produced by the rule matchers.
+ RuleMatcherCallback callback(m_stackMap, ruleLimit);
+
+ // Create a glue rule synthesizer.
+ GlueRuleSynthesizer glueRuleSynthesizer(*m_glueRuleTrie);
+
+ // Sort the input forest's vertices into bottom-up topological order.
+ std::vector<const Forest::Vertex *> sortedVertices;
+ TopologicalSorter sorter;
+ sorter.Sort(*m_forest, sortedVertices);
+
+ // Visit each vertex of the input forest in topological order.
+ for (std::vector<const Forest::Vertex *>::const_iterator
+ p = sortedVertices.begin(); p != sortedVertices.end(); ++p) {
+ const Forest::Vertex &vertex = **p;
+
+ // Skip terminal vertices.
+ if (vertex.incoming.empty()) {
+ continue;
+ }
+
+ // Call the rule matchers to generate PHyperedges for this vertex and
+ // convert each one to a SHyperedgeBundle (via the callback). The
+ // callback prunes the SHyperedgeBundles and keeps the best ones (up
+ // to ruleLimit).
+ callback.ClearContainer();
+ for (typename std::vector<boost::shared_ptr<RuleMatcher> >::iterator
+ q = m_mainRuleMatchers.begin(); q != m_mainRuleMatchers.end(); ++q) {
+ (*q)->EnumerateHyperedges(vertex, callback);
+ }
+
+ // Retrieve the (pruned) set of SHyperedgeBundles from the callback.
+ const BoundedPriorityContainer<SHyperedgeBundle> &bundles =
+ callback.GetContainer();
+
+ // Check if any rules were matched. If not then for each incoming
+ // hyperedge, synthesize a glue rule that is guaranteed to match.
+ if (bundles.Size() == 0) {
+ for (std::vector<Forest::Hyperedge *>::const_iterator p =
+ vertex.incoming.begin(); p != vertex.incoming.end(); ++p) {
+ glueRuleSynthesizer.SynthesizeRule(**p);
+ }
+ m_glueRuleMatcher->EnumerateHyperedges(vertex, callback);
+ // FIXME This assertion occasionally fails -- why?
+ // assert(bundles.Size() == vertex.incoming.size());
+ }
+
+ // Use cube pruning to extract SHyperedges from SHyperedgeBundles and
+ // collect the SHyperedges in a buffer.
+ CubeQueue cubeQueue(bundles.Begin(), bundles.End());
+ std::size_t count = 0;
+ std::vector<SHyperedge*> buffer;
+ while (count < popLimit && !cubeQueue.IsEmpty()) {
+ SHyperedge *hyperedge = cubeQueue.Pop();
+ // FIXME See corresponding code in S2T::Manager
+ // BEGIN{HACK}
+ hyperedge->head->pvertex = &(vertex.pvertex);
+ // END{HACK}
+ buffer.push_back(hyperedge);
+ ++count;
+ }
+
+ // Recombine SVertices and sort into a stack.
+ SVertexStack &stack = m_stackMap[&(vertex.pvertex)];
+ RecombineAndSort(buffer, stack);
+
+ // Prune stack.
+ if (stackLimit > 0 && stack.size() > stackLimit) {
+ stack.resize(stackLimit);
+ }
+ }
+}
+
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::InitializeRuleMatchers()
+{
+ const std::vector<RuleTableFF*> &ffs = RuleTableFF::Instances();
+ for (std::size_t i = 0; i < ffs.size(); ++i) {
+ RuleTableFF *ff = ffs[i];
+ // This may change in the future, but currently we assume that every
+ // RuleTableFF is associated with a static, file-based rule table of
+ // some sort and that the table should have been loaded into a RuleTable
+ // by this point.
+ const RuleTable *table = ff->GetTable();
+ assert(table);
+ RuleTable *nonConstTable = const_cast<RuleTable*>(table);
+ HyperTree *trie = dynamic_cast<HyperTree*>(nonConstTable);
+ assert(trie);
+ boost::shared_ptr<RuleMatcher> p(new RuleMatcher(*trie));
+ m_mainRuleMatchers.push_back(p);
+ }
+
+ // Create an additional rule trie + matcher for glue rules (which are
+ // synthesized on demand).
+ // FIXME Add a hidden RuleTableFF for the glue rule trie(?)
+ m_glueRuleTrie.reset(new HyperTree(ffs[0]));
+ m_glueRuleMatcher = boost::shared_ptr<RuleMatcher>(
+ new RuleMatcher(*m_glueRuleTrie));
+}
+
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::InitializeStacks()
+{
+ // Check that m_forest has been initialized.
+ assert(!m_forest->vertices.empty());
+
+ for (std::vector<Forest::Vertex *>::const_iterator
+ p = m_forest->vertices.begin(); p != m_forest->vertices.end(); ++p) {
+ const Forest::Vertex &vertex = **p;
+
+ // Create an empty stack.
+ SVertexStack &stack = m_stackMap[&(vertex.pvertex)];
+
+ // For terminals only, add a single SVertex.
+ if (vertex.incoming.empty()) {
+ boost::shared_ptr<SVertex> v(new SVertex());
+ v->best = 0;
+ v->pvertex = &(vertex.pvertex);
+ stack.push_back(v);
+ }
+ }
+}
+
+
+template<typename RuleMatcher>
+const SHyperedge *Manager<RuleMatcher>::GetBestSHyperedge() const
+{
+ PVertexToStackMap::const_iterator p = m_stackMap.find(&m_rootVertex->pvertex);
+ assert(p != m_stackMap.end());
+ const SVertexStack &stack = p->second;
+ assert(!stack.empty());
+ return stack[0]->best;
+}
+
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::ExtractKBest(
+ std::size_t k,
+ std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
+ bool onlyDistinct) const
+{
+ kBestList.clear();
+ if (k == 0 || m_source.GetSize() == 0) {
+ return;
+ }
+
+ // Get the top-level SVertex stack.
+ PVertexToStackMap::const_iterator p = m_stackMap.find(&m_rootVertex->pvertex);
+ assert(p != m_stackMap.end());
+ const SVertexStack &stack = p->second;
+ assert(!stack.empty());
+
+ KBestExtractor extractor;
+
+ if (!onlyDistinct) {
+ // Return the k-best list as is, including duplicate translations.
+ extractor.Extract(stack, k, kBestList);
+ return;
+ }
+
+ // Determine how many derivations to extract. If the k-best list is
+ // restricted to distinct translations then this limit should be bigger
+ // than k. The k-best factor determines how much bigger the limit should be,
+ // with 0 being 'unlimited.' This actually sets a large-ish limit in case
+ // too many translations are identical.
+ const StaticData &staticData = StaticData::Instance();
+ const std::size_t nBestFactor = staticData.GetNBestFactor();
+ std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor;
+
+ // Extract the derivations.
+ KBestExtractor::KBestVec bigList;
+ bigList.reserve(numDerivations);
+ extractor.Extract(stack, numDerivations, bigList);
+
+ // Copy derivations into kBestList, skipping ones with repeated translations.
+ std::set<Phrase> distinct;
+ for (KBestExtractor::KBestVec::const_iterator p = bigList.begin();
+ kBestList.size() < k && p != bigList.end(); ++p) {
+ boost::shared_ptr<KBestExtractor::Derivation> derivation = *p;
+ Phrase translation = KBestExtractor::GetOutputPhrase(*derivation);
+ if (distinct.insert(translation).second) {
+ kBestList.push_back(derivation);
+ }
+ }
+}
+
+// TODO Move this function into parent directory (Recombiner class?) and
+// TODO share with S2T
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::RecombineAndSort(
+ const std::vector<SHyperedge*> &buffer, SVertexStack &stack)
+{
+ // Step 1: Create a map containing a single instance of each distinct vertex
+ // (where distinctness is defined by the state value). The hyperedges'
+ // head pointers are updated to point to the vertex instances in the map and
+ // any 'duplicate' vertices are deleted.
+// TODO Set?
+ typedef std::map<SVertex *, SVertex *, SVertexRecombinationOrderer> Map;
+ Map map;
+ for (std::vector<SHyperedge*>::const_iterator p = buffer.begin();
+ p != buffer.end(); ++p) {
+ SHyperedge *h = *p;
+ SVertex *v = h->head;
+ assert(v->best == h);
+ assert(v->recombined.empty());
+ std::pair<Map::iterator, bool> result = map.insert(Map::value_type(v, v));
+ if (result.second) {
+ continue; // v's recombination value hasn't been seen before.
+ }
+ // v is a duplicate (according to the recombination rules).
+ // Compare the score of h against the score of the best incoming hyperedge
+ // for the stored vertex.
+ SVertex *storedVertex = result.first->second;
+ if (h->label.score > storedVertex->best->label.score) {
+ // h's score is better.
+ storedVertex->recombined.push_back(storedVertex->best);
+ storedVertex->best = h;
+ } else {
+ storedVertex->recombined.push_back(h);
+ }
+ h->head->best = 0;
+ delete h->head;
+ h->head = storedVertex;
+ }
+
+ // Step 2: Copy the vertices from the map to the stack.
+ stack.clear();
+ stack.reserve(map.size());
+ for (Map::const_iterator p = map.begin(); p != map.end(); ++p) {
+ stack.push_back(boost::shared_ptr<SVertex>(p->first));
+ }
+
+ // Step 3: Sort the vertices in the stack.
+ std::sort(stack.begin(), stack.end(), SVertexStackContentOrderer());
+}
+
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::OutputDetailedTranslationReport(
+ OutputCollector *collector) const
+{
+ const SHyperedge *best = GetBestSHyperedge();
+ if (best == NULL || collector == NULL) {
+ return;
+ }
+ long translationId = m_source.GetTranslationId();
+ std::ostringstream out;
+ DerivationWriter::Write(*best, translationId, out);
+ collector->Write(translationId, out.str());
+}
+
+} // F2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/F2S/Manager.h b/moses/Syntax/F2S/Manager.h
new file mode 100644
index 000000000..1705d4f64
--- /dev/null
+++ b/moses/Syntax/F2S/Manager.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <set>
+#include <vector>
+
+#include <boost/shared_ptr.hpp>
+#include <boost/unordered_map.hpp>
+
+#include "moses/InputType.h"
+#include "moses/Syntax/KBestExtractor.h"
+#include "moses/Syntax/Manager.h"
+#include "moses/Syntax/SVertexStack.h"
+#include "moses/Word.h"
+
+#include "Forest.h"
+#include "HyperTree.h"
+#include "PVertexToStackMap.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+
+struct SHyperedge;
+
+namespace F2S
+{
+
+template<typename RuleMatcher>
+class Manager : public Syntax::Manager
+{
+ public:
+ Manager(const InputType &);
+
+ void Decode();
+
+ // Get the SHyperedge for the 1-best derivation.
+ const SHyperedge *GetBestSHyperedge() const;
+
+ void ExtractKBest(
+ std::size_t k,
+ std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
+ bool onlyDistinct=false) const;
+
+ void OutputDetailedTranslationReport(OutputCollector *collector) const;
+
+ private:
+ const Forest::Vertex &FindRootNode(const Forest &);
+
+ void InitializeRuleMatchers();
+
+ void InitializeStacks();
+
+ void RecombineAndSort(const std::vector<SHyperedge*> &, SVertexStack &);
+
+ boost::shared_ptr<const Forest> m_forest;
+ const Forest::Vertex *m_rootVertex;
+ PVertexToStackMap m_stackMap;
+ boost::shared_ptr<HyperTree> m_glueRuleTrie;
+ std::vector<boost::shared_ptr<RuleMatcher> > m_mainRuleMatchers;
+ boost::shared_ptr<RuleMatcher> m_glueRuleMatcher;
+};
+
+} // F2S
+} // Syntax
+} // Moses
+
+// Implementation
+#include "Manager-inl.h"
diff --git a/moses/Syntax/F2S/PHyperedgeToSHyperedgeBundle.h b/moses/Syntax/F2S/PHyperedgeToSHyperedgeBundle.h
new file mode 100644
index 000000000..81c6f3da7
--- /dev/null
+++ b/moses/Syntax/F2S/PHyperedgeToSHyperedgeBundle.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include "moses/Syntax/PHyperedge.h"
+#include "moses/Syntax/PVertex.h"
+#include "moses/Syntax/SHyperedgeBundle.h"
+
+#include "PVertexToStackMap.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+// Given a PHyperedge object and SStackSet produces a SHyperedgeBundle object.
+inline void PHyperedgeToSHyperedgeBundle(const PHyperedge &hyperedge,
+ const PVertexToStackMap &stackMap,
+ SHyperedgeBundle &bundle) {
+ bundle.translations = hyperedge.label.translations;
+ bundle.stacks.clear();
+ for (std::vector<PVertex*>::const_iterator p = hyperedge.tail.begin();
+ p != hyperedge.tail.end(); ++p) {
+ PVertexToStackMap::const_iterator q = stackMap.find(*p);
+ const SVertexStack &stack = q->second;
+ bundle.stacks.push_back(&stack);
+ }
+}
+
+} // F2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/F2S/PVertexToStackMap.h b/moses/Syntax/F2S/PVertexToStackMap.h
new file mode 100644
index 000000000..9e3142492
--- /dev/null
+++ b/moses/Syntax/F2S/PVertexToStackMap.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <boost/unordered_map.hpp>
+
+#include "moses/Syntax/PVertex.h"
+#include "moses/Syntax/SVertexStack.h"
+
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+typedef boost::unordered_map<const PVertex *, SVertexStack> PVertexToStackMap;
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/RuleMatcher.h b/moses/Syntax/F2S/RuleMatcher.h
new file mode 100644
index 000000000..ac3a4c065
--- /dev/null
+++ b/moses/Syntax/F2S/RuleMatcher.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "Forest.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+// Base class for rule matchers.
+template<typename Callback>
+class RuleMatcher
+{
+ public:
+ virtual ~RuleMatcher() {}
+
+ virtual void EnumerateHyperedges(const Forest::Vertex &, Callback &) = 0;
+};
+
+} // F2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/F2S/RuleMatcherCallback.h b/moses/Syntax/F2S/RuleMatcherCallback.h
new file mode 100644
index 000000000..c240b87db
--- /dev/null
+++ b/moses/Syntax/F2S/RuleMatcherCallback.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include "moses/Syntax/BoundedPriorityContainer.h"
+#include "moses/Syntax/PHyperedge.h"
+#include "moses/Syntax/PVertex.h"
+#include "moses/Syntax/SHyperedgeBundle.h"
+#include "moses/Syntax/SHyperedgeBundleScorer.h"
+
+#include "PHyperedgeToSHyperedgeBundle.h"
+#include "PVertexToStackMap.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+class RuleMatcherCallback {
+ private:
+ typedef BoundedPriorityContainer<SHyperedgeBundle> Container;
+
+ public:
+ RuleMatcherCallback(const PVertexToStackMap &stackMap, std::size_t ruleLimit)
+ : m_stackMap(stackMap)
+ , m_container(ruleLimit) {}
+
+ void operator()(const PHyperedge &hyperedge) {
+ PHyperedgeToSHyperedgeBundle(hyperedge, m_stackMap, m_tmpBundle);
+ float score = SHyperedgeBundleScorer::Score(m_tmpBundle);
+ m_container.SwapIn(m_tmpBundle, score);
+ }
+
+ void ClearContainer() { m_container.LazyClear(); }
+
+ const Container &GetContainer() { return m_container; }
+
+ private:
+ const PVertexToStackMap &m_stackMap;
+ SHyperedgeBundle m_tmpBundle;
+ BoundedPriorityContainer<SHyperedgeBundle> m_container;
+};
+
+} // F2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/F2S/RuleMatcherHyperTree-inl.h b/moses/Syntax/F2S/RuleMatcherHyperTree-inl.h
new file mode 100644
index 000000000..456594873
--- /dev/null
+++ b/moses/Syntax/F2S/RuleMatcherHyperTree-inl.h
@@ -0,0 +1,192 @@
+#pragma once
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+template<typename Callback>
+RuleMatcherHyperTree<Callback>::RuleMatcherHyperTree(const HyperTree &ruleTrie)
+ : m_ruleTrie(ruleTrie)
+{
+}
+
+template<typename Callback>
+void RuleMatcherHyperTree<Callback>::EnumerateHyperedges(
+ const Forest::Vertex &v, Callback &callback)
+{
+ const HyperTree::Node &root = m_ruleTrie.GetRootNode();
+ HyperPath::NodeSeq nodeSeq(1, v.pvertex.symbol[0]->GetId());
+ const HyperTree::Node *child = root.GetChild(nodeSeq);
+ if (!child) {
+ return;
+ }
+
+ m_hyperedge.head = const_cast<PVertex*>(&v.pvertex);
+
+ // Initialize the queue.
+ MatchItem item;
+ item.annotatedFNS.fns = FNS(1, &v);
+ item.trieNode = child;
+ m_queue.push(item);
+
+ while (!m_queue.empty()) {
+ MatchItem item = m_queue.front();
+ m_queue.pop();
+ if (item.trieNode->HasRules()) {
+ const FNS &fns = item.annotatedFNS.fns;
+ m_hyperedge.tail.clear();
+ for (FNS::const_iterator p = fns.begin(); p != fns.end(); ++p) {
+ const Forest::Vertex *v = *p;
+ m_hyperedge.tail.push_back(const_cast<PVertex *>(&(v->pvertex)));
+ }
+ m_hyperedge.label.translations =
+ &(item.trieNode->GetTargetPhraseCollection());
+ callback(m_hyperedge);
+ }
+ PropagateNextLexel(item);
+ }
+}
+
+template<typename Callback>
+void RuleMatcherHyperTree<Callback>::PropagateNextLexel(const MatchItem &item)
+{
+ std::vector<AnnotatedFNS> tfns;
+ std::vector<AnnotatedFNS> rfns;
+ std::vector<AnnotatedFNS> rfns2;
+
+ const HyperTree::Node &trieNode = *(item.trieNode);
+ const HyperTree::Node::Map &map = trieNode.GetMap();
+
+ for (HyperTree::Node::Map::const_iterator p = map.begin();
+ p != map.end(); ++p) {
+ const HyperPath::NodeSeq &edgeLabel = p->first;
+ const HyperTree::Node &child = p->second;
+
+ const int numSubSeqs = CountCommas(edgeLabel) + 1;
+
+ std::size_t pos = 0;
+ for (int i = 0; i < numSubSeqs; ++i) {
+ const FNS &fns = item.annotatedFNS.fns;
+ tfns.clear();
+ if (edgeLabel[pos] == HyperPath::kEpsilon) {
+ AnnotatedFNS x;
+ x.fns = FNS(1, fns[i]);
+ tfns.push_back(x);
+ pos += 2;
+ } else {
+ const int subSeqLength = SubSeqLength(edgeLabel, pos);
+ const std::vector<Forest::Hyperedge*> &incoming = fns[i]->incoming;
+ for (std::vector<Forest::Hyperedge *>::const_iterator q =
+ incoming.begin(); q != incoming.end(); ++q) {
+ const Forest::Hyperedge &edge = **q;
+ if (MatchChildren(edge.tail, edgeLabel, pos, subSeqLength)) {
+ tfns.resize(tfns.size()+1);
+ tfns.back().fns.assign(edge.tail.begin(), edge.tail.end());
+ tfns.back().fragment.push_back(&edge);
+ }
+ }
+ pos += subSeqLength + 1;
+ }
+ if (tfns.empty()) {
+ rfns.clear();
+ break;
+ } else if (i == 0) {
+ rfns.swap(tfns);
+ } else {
+ CartesianProduct(rfns, tfns, rfns2);
+ rfns.swap(rfns2);
+ }
+ }
+
+ for (typename std::vector<AnnotatedFNS>::const_iterator q = rfns.begin();
+ q != rfns.end(); ++q) {
+ MatchItem newItem;
+ newItem.annotatedFNS.fns = q->fns;
+ newItem.annotatedFNS.fragment = item.annotatedFNS.fragment;
+ newItem.annotatedFNS.fragment.insert(newItem.annotatedFNS.fragment.end(),
+ q->fragment.begin(),
+ q->fragment.end());
+ newItem.trieNode = &child;
+ m_queue.push(newItem);
+ }
+ }
+}
+
+template<typename Callback>
+void RuleMatcherHyperTree<Callback>::CartesianProduct(
+ const std::vector<AnnotatedFNS> &x,
+ const std::vector<AnnotatedFNS> &y,
+ std::vector<AnnotatedFNS> &z)
+{
+ z.clear();
+ z.reserve(x.size() * y.size());
+ for (typename std::vector<AnnotatedFNS>::const_iterator p = x.begin();
+ p != x.end(); ++p) {
+ const AnnotatedFNS &a = *p;
+ for (typename std::vector<AnnotatedFNS>::const_iterator q = y.begin();
+ q != y.end(); ++q) {
+ const AnnotatedFNS &b = *q;
+ // Create a new AnnotatedFNS.
+ z.resize(z.size()+1);
+ AnnotatedFNS &c = z.back();
+ // Combine frontier node sequences from a and b.
+ c.fns.reserve(a.fns.size() + b.fns.size());
+ c.fns.assign(a.fns.begin(), a.fns.end());
+ c.fns.insert(c.fns.end(), b.fns.begin(), b.fns.end());
+ // Combine tree fragments from a and b.
+ c.fragment.reserve(a.fragment.size() + b.fragment.size());
+ c.fragment.assign(a.fragment.begin(), a.fragment.end());
+ c.fragment.insert(c.fragment.end(), b.fragment.begin(), b.fragment.end());
+ }
+ }
+}
+
+template<typename Callback>
+bool RuleMatcherHyperTree<Callback>::MatchChildren(
+ const std::vector<Forest::Vertex *> &children,
+ const HyperPath::NodeSeq &edgeLabel,
+ std::size_t pos,
+ std::size_t subSeqSize)
+{
+ if (children.size() != subSeqSize) {
+ return false;
+ }
+ for (int i = 0; i < subSeqSize; ++i) {
+ if (edgeLabel[pos+i] != children[i]->pvertex.symbol[0]->GetId()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+template<typename Callback>
+int RuleMatcherHyperTree<Callback>::CountCommas(const HyperPath::NodeSeq &seq)
+{
+ int count = 0;
+ for (std::vector<std::size_t>::const_iterator p = seq.begin();
+ p != seq.end(); ++p) {
+ if (*p == HyperPath::kComma) {
+ ++count;
+ }
+ }
+ return count;
+}
+
+template<typename Callback>
+int RuleMatcherHyperTree<Callback>::SubSeqLength(const HyperPath::NodeSeq &seq,
+ int pos)
+{
+ int length = 0;
+ while (pos != seq.size() && seq[pos] != HyperPath::kComma) {
+ ++pos;
+ ++length;
+ }
+ return length;
+}
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/RuleMatcherHyperTree.h b/moses/Syntax/F2S/RuleMatcherHyperTree.h
new file mode 100644
index 000000000..406d794ed
--- /dev/null
+++ b/moses/Syntax/F2S/RuleMatcherHyperTree.h
@@ -0,0 +1,78 @@
+#pragma once
+
+#include "moses/Syntax/PHyperedge.h"
+
+#include "Forest.h"
+#include "HyperTree.h"
+#include "RuleMatcher.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+// Rule matcher based on the algorithm from this paper:
+//
+// Hui Zhang, Min Zhang, Haizhou Li, and Chew Lim Tan
+// "Fast Translation Rule Matching for Syntax-based Statistical Machine
+// Translation"
+// In proceedings of EMNLP 2009
+//
+template<typename Callback>
+class RuleMatcherHyperTree : public RuleMatcher<Callback>
+{
+ public:
+ RuleMatcherHyperTree(const HyperTree &);
+
+ ~RuleMatcherHyperTree() {}
+
+ void EnumerateHyperedges(const Forest::Vertex &, Callback &);
+
+ private:
+ // Frontier node sequence.
+ typedef std::vector<const Forest::Vertex *> FNS;
+
+ // An AnnotatedFNS is a FNS annotated with the set of forest hyperedges that
+ // constitute the tree fragment from which it was derived.
+ struct AnnotatedFNS {
+ FNS fns;
+ std::vector<const Forest::Hyperedge *> fragment;
+ };
+
+ // A MatchItem is like the FP structure in Zhang et al. (2009), but it also
+ // records the set of forest hyperedges that constitute the matched tree
+ // fragment.
+ struct MatchItem {
+ AnnotatedFNS annotatedFNS;
+ const HyperTree::Node *trieNode;
+ };
+
+ // Implements the Cartsian product operation from line 16 of Algorithm 4
+ // (Zhang et al., 2009), which in this implementation also involves
+ // combining the fragment information associated with the FNS objects.
+ void CartesianProduct(const std::vector<AnnotatedFNS> &,
+ const std::vector<AnnotatedFNS> &,
+ std::vector<AnnotatedFNS> &);
+
+ int CountCommas(const HyperPath::NodeSeq &);
+
+ bool MatchChildren(const std::vector<Forest::Vertex *> &,
+ const HyperPath::NodeSeq &, std::size_t, std::size_t);
+
+ void PropagateNextLexel(const MatchItem &);
+
+ int SubSeqLength(const HyperPath::NodeSeq &, int);
+
+ const HyperTree &m_ruleTrie;
+ PHyperedge m_hyperedge;
+ std::queue<MatchItem> m_queue; // Called "SFP" in Zhang et al. (2009)
+};
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
+
+// Implementation
+#include "RuleMatcherHyperTree-inl.h"
diff --git a/moses/Syntax/F2S/TopologicalSorter.cpp b/moses/Syntax/F2S/TopologicalSorter.cpp
new file mode 100644
index 000000000..4821177b3
--- /dev/null
+++ b/moses/Syntax/F2S/TopologicalSorter.cpp
@@ -0,0 +1,55 @@
+#include "TopologicalSorter.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+void TopologicalSorter::Sort(const Forest &forest,
+ std::vector<const Forest::Vertex *> &permutation)
+{
+ permutation.clear();
+ BuildPredSets(forest);
+ m_visited.clear();
+ for (std::vector<Forest::Vertex *>::const_iterator
+ p = forest.vertices.begin(); p != forest.vertices.end(); ++p) {
+ if (m_visited.find(*p) == m_visited.end()) {
+ Visit(**p, permutation);
+ }
+ }
+}
+
+void TopologicalSorter::BuildPredSets(const Forest &forest)
+{
+ m_predSets.clear();
+ for (std::vector<Forest::Vertex *>::const_iterator
+ p = forest.vertices.begin(); p != forest.vertices.end(); ++p) {
+ const Forest::Vertex *head = *p;
+ for (std::vector<Forest::Hyperedge *>::const_iterator
+ q = head->incoming.begin(); q != head->incoming.end(); ++q) {
+ for (std::vector<Forest::Vertex *>::const_iterator
+ r = (*q)->tail.begin(); r != (*q)->tail.end(); ++r) {
+ m_predSets[head].insert(*r);
+ }
+ }
+ }
+}
+
+void TopologicalSorter::Visit(const Forest::Vertex &v,
+ std::vector<const Forest::Vertex *> &permutation)
+{
+ m_visited.insert(&v);
+ const VertexSet &predSet = m_predSets[&v];
+ for (VertexSet::const_iterator p = predSet.begin(); p != predSet.end(); ++p) {
+ if (m_visited.find(*p) == m_visited.end()) {
+ Visit(**p, permutation);
+ }
+ }
+ permutation.push_back(&v);
+}
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/TopologicalSorter.h b/moses/Syntax/F2S/TopologicalSorter.h
new file mode 100644
index 000000000..9dbb874ec
--- /dev/null
+++ b/moses/Syntax/F2S/TopologicalSorter.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <vector>
+
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+
+#include "Forest.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+class TopologicalSorter
+{
+ public:
+ void Sort(const Forest &, std::vector<const Forest::Vertex *> &);
+
+ private:
+ typedef boost::unordered_set<const Forest::Vertex *> VertexSet;
+
+ void BuildPredSets(const Forest &);
+ void Visit(const Forest::Vertex &, std::vector<const Forest::Vertex *> &);
+
+ boost::unordered_set<const Forest::Vertex *> m_visited;
+ boost::unordered_map<const Forest::Vertex *, VertexSet> m_predSets;
+};
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/TreeFragmentTokenizer.cpp b/moses/Syntax/F2S/TreeFragmentTokenizer.cpp
new file mode 100644
index 000000000..1d10a47ad
--- /dev/null
+++ b/moses/Syntax/F2S/TreeFragmentTokenizer.cpp
@@ -0,0 +1,93 @@
+#include "TreeFragmentTokenizer.h"
+
+#include <cctype>
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+TreeFragmentToken::TreeFragmentToken(TreeFragmentTokenType t,
+ StringPiece v, std::size_t p)
+ : type(t)
+ , value(v)
+ , pos(p) {
+}
+
+TreeFragmentTokenizer::TreeFragmentTokenizer()
+ : value_(TreeFragmentToken_EOS, "", -1) {
+}
+
+TreeFragmentTokenizer::TreeFragmentTokenizer(const StringPiece &s)
+ : str_(s)
+ , value_(TreeFragmentToken_EOS, "", -1)
+ , iter_(s.begin())
+ , end_(s.end())
+ , pos_(0) {
+ ++(*this);
+}
+
+TreeFragmentTokenizer &TreeFragmentTokenizer::operator++() {
+ while (iter_ != end_ && (*iter_ == ' ' || *iter_ == '\t')) {
+ ++iter_;
+ ++pos_;
+ }
+
+ if (iter_ == end_) {
+ value_ = TreeFragmentToken(TreeFragmentToken_EOS, "", pos_);
+ return *this;
+ }
+
+ if (*iter_ == '[') {
+ value_ = TreeFragmentToken(TreeFragmentToken_LSB, "[", pos_);
+ ++iter_;
+ ++pos_;
+ } else if (*iter_ == ']') {
+ value_ = TreeFragmentToken(TreeFragmentToken_RSB, "]", pos_);
+ ++iter_;
+ ++pos_;
+ } else {
+ std::size_t start = pos_;
+ while (true) {
+ ++iter_;
+ ++pos_;
+ if (iter_ == end_ || *iter_ == ' ' || *iter_ == '\t') {
+ break;
+ }
+ if (*iter_ == '[' || *iter_ == ']') {
+ break;
+ }
+ }
+ StringPiece word = str_.substr(start, pos_-start);
+ value_ = TreeFragmentToken(TreeFragmentToken_WORD, word, start);
+ }
+
+ return *this;
+}
+
+TreeFragmentTokenizer TreeFragmentTokenizer::operator++(int) {
+ TreeFragmentTokenizer tmp(*this);
+ ++*this;
+ return tmp;
+}
+
+bool operator==(const TreeFragmentTokenizer &lhs,
+ const TreeFragmentTokenizer &rhs) {
+ if (lhs.value_.type == TreeFragmentToken_EOS ||
+ rhs.value_.type == TreeFragmentToken_EOS) {
+ return lhs.value_.type == TreeFragmentToken_EOS &&
+ rhs.value_.type == TreeFragmentToken_EOS;
+ }
+ return lhs.iter_ == rhs.iter_;
+}
+
+bool operator!=(const TreeFragmentTokenizer &lhs,
+ const TreeFragmentTokenizer &rhs) {
+ return !(lhs == rhs);
+}
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/F2S/TreeFragmentTokenizer.h b/moses/Syntax/F2S/TreeFragmentTokenizer.h
new file mode 100644
index 000000000..3924c9bed
--- /dev/null
+++ b/moses/Syntax/F2S/TreeFragmentTokenizer.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include "util/string_piece.hh"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+enum TreeFragmentTokenType {
+ TreeFragmentToken_EOS,
+ TreeFragmentToken_LSB,
+ TreeFragmentToken_RSB,
+ TreeFragmentToken_WORD
+};
+
+struct TreeFragmentToken {
+ public:
+ TreeFragmentToken(TreeFragmentTokenType, StringPiece, std::size_t);
+ TreeFragmentTokenType type;
+ StringPiece value;
+ std::size_t pos;
+};
+
+// Tokenizes tree fragment strings in Moses format.
+//
+// For example, the string "[NP [NP [NN a]] [NP]]" is tokenized to the sequence:
+//
+// 1 LSB "["
+// 2 WORD "NP"
+// 3 LSB "["
+// 4 WORD "NP"
+// 5 LSB "["
+// 6 WORD "NN"
+// 7 WORD "a"
+// 8 RSB "]"
+// 9 RSB "]"
+// 10 LSB "["
+// 11 WORD "NP"
+// 12 RSB "]"
+// 13 RSB "]"
+// 14 EOS undefined
+//
+class TreeFragmentTokenizer {
+ public:
+ TreeFragmentTokenizer();
+ TreeFragmentTokenizer(const StringPiece &);
+
+ const TreeFragmentToken &operator*() const { return value_; }
+ const TreeFragmentToken *operator->() const { return &value_; }
+
+ TreeFragmentTokenizer &operator++();
+ TreeFragmentTokenizer operator++(int);
+
+ friend bool operator==(const TreeFragmentTokenizer &,
+ const TreeFragmentTokenizer &);
+
+ friend bool operator!=(const TreeFragmentTokenizer &,
+ const TreeFragmentTokenizer &);
+
+ private:
+ StringPiece str_;
+ TreeFragmentToken value_;
+ StringPiece::const_iterator iter_;
+ StringPiece::const_iterator end_;
+ std::size_t pos_;
+};
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/KBestExtractor.cpp b/moses/Syntax/KBestExtractor.cpp
index 335d80409..db08a46c0 100644
--- a/moses/Syntax/KBestExtractor.cpp
+++ b/moses/Syntax/KBestExtractor.cpp
@@ -14,8 +14,8 @@ namespace Syntax
// Extract the k-best list from the search graph.
void KBestExtractor::Extract(
- const std::vector<boost::shared_ptr<SVertex> > &topLevelVertices,
- std::size_t k, KBestVec &kBestList)
+ const std::vector<boost::shared_ptr<SVertex> > &topLevelVertices,
+ std::size_t k, KBestVec &kBestList)
{
kBestList.clear();
if (topLevelVertices.empty()) {
@@ -25,31 +25,32 @@ void KBestExtractor::Extract(
// Create a new SVertex, supremeVertex, that has the best top-level SVertex as
// its predecessor and has the same score.
std::vector<boost::shared_ptr<SVertex> >::const_iterator p =
- topLevelVertices.begin();
+ topLevelVertices.begin();
SVertex &bestTopLevelVertex = **p;
boost::scoped_ptr<SVertex> supremeVertex(new SVertex());
supremeVertex->pvertex = 0;
supremeVertex->best = new SHyperedge();
supremeVertex->best->head = supremeVertex.get();
supremeVertex->best->tail.push_back(&bestTopLevelVertex);
- supremeVertex->best->score = bestTopLevelVertex.best->score;
- supremeVertex->best->scoreBreakdown = bestTopLevelVertex.best->scoreBreakdown;
- supremeVertex->best->translation = 0;
+ supremeVertex->best->label.score = bestTopLevelVertex.best->label.score;
+ supremeVertex->best->label.scoreBreakdown =
+ bestTopLevelVertex.best->label.scoreBreakdown;
+ supremeVertex->best->label.translation = 0;
// For each alternative top-level SVertex, add a new incoming hyperedge to
// supremeVertex.
for (++p; p != topLevelVertices.end(); ++p) {
// Check that the first item in topLevelVertices really was the best.
- UTIL_THROW_IF2((*p)->best->score > bestTopLevelVertex.best->score,
+ UTIL_THROW_IF2((*p)->best->label.score > bestTopLevelVertex.best->label.score,
"top-level SVertices are not correctly sorted");
// Note: there's no need for a smart pointer here: supremeVertex will take
// ownership of altEdge.
SHyperedge *altEdge = new SHyperedge();
altEdge->head = supremeVertex.get();
altEdge->tail.push_back((*p).get());
- altEdge->score = (*p)->best->score;
- altEdge->scoreBreakdown = (*p)->best->scoreBreakdown;
- altEdge->translation = 0;
+ altEdge->label.score = (*p)->best->label.score;
+ altEdge->label.scoreBreakdown = (*p)->best->label.scoreBreakdown;
+ altEdge->label.translation = 0;
supremeVertex->recombined.push_back(altEdge);
}
@@ -61,8 +62,8 @@ void KBestExtractor::Extract(
// each derivation.
kBestList.reserve(targetVertex->kBestList.size());
for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
- q = targetVertex->kBestList.begin();
- q != targetVertex->kBestList.end(); ++q) {
+ q = targetVertex->kBestList.begin();
+ q != targetVertex->kBestList.end(); ++q) {
const boost::shared_ptr<Derivation> d(*q);
assert(d);
assert(d->subderivations.size() == 1);
@@ -77,7 +78,7 @@ Phrase KBestExtractor::GetOutputPhrase(const Derivation &d)
Phrase ret(ARRAY_SIZE_INCR);
- const TargetPhrase &phrase = *(d.edge->shyperedge.translation);
+ const TargetPhrase &phrase = *(d.edge->shyperedge.label.translation);
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
phrase.GetAlignNonTerm().GetNonTermIndexMap();
for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) {
@@ -94,24 +95,24 @@ Phrase KBestExtractor::GetOutputPhrase(const Derivation &d)
}
// FIXME
UTIL_THROW2("placeholders are not currently supported by the S2T decoder");
-/*
- std::set<std::size_t> sourcePosSet =
- phrase.GetAlignTerm().GetAlignmentsForTarget(pos);
- if (sourcePosSet.size() == 1) {
- const std::vector<const Word*> *ruleSourceFromInputPath =
- hypo.GetTranslationOption().GetSourceRuleFromInputPath();
- UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
- "Source Words in of the rules hasn't been filled out");
- std::size_t sourcePos = *sourcePosSet.begin();
- const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
- UTIL_THROW_IF2(sourceWord == NULL,
- "Null source word at position " << sourcePos);
- const Factor *factor = sourceWord->GetFactor(placeholderFactor);
- if (factor) {
- ret.Back()[0] = factor;
- }
- }
-*/
+ /*
+ std::set<std::size_t> sourcePosSet =
+ phrase.GetAlignTerm().GetAlignmentsForTarget(pos);
+ if (sourcePosSet.size() == 1) {
+ const std::vector<const Word*> *ruleSourceFromInputPath =
+ hypo.GetTranslationOption().GetSourceRuleFromInputPath();
+ UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
+ "Source Words in of the rules hasn't been filled out");
+ std::size_t sourcePos = *sourcePosSet.begin();
+ const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
+ UTIL_THROW_IF2(sourceWord == NULL,
+ "Null source word at position " << sourcePos);
+ const Factor *factor = sourceWord->GetFactor(placeholderFactor);
+ if (factor) {
+ ret.Back()[0] = factor;
+ }
+ }
+ */
}
}
@@ -121,7 +122,7 @@ Phrase KBestExtractor::GetOutputPhrase(const Derivation &d)
// Generate the target tree of the derivation d.
TreePointer KBestExtractor::GetOutputTree(const Derivation &d)
{
- const TargetPhrase &phrase = *(d.edge->shyperedge.translation);
+ const TargetPhrase &phrase = *(d.edge->shyperedge.label.translation);
if (const PhraseProperty *property = phrase.GetProperty("Tree")) {
const std::string *tree = property->GetValueString();
TreePointer mytree (boost::make_shared<InternalTree>(*tree));
@@ -140,8 +141,7 @@ TreePointer KBestExtractor::GetOutputTree(const Derivation &d)
mytree->Combine(previous_trees);
return mytree;
- }
- else {
+ } else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
}
}
@@ -180,7 +180,7 @@ KBestExtractor::FindOrCreateVertex(const SVertex &v)
}
boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
#ifndef NDEBUG
- std::pair<DerivationSet::iterator, bool> q =
+ std::pair<DerivationSet::iterator, bool> q =
#endif
m_derivations.insert(bestDerivation);
assert(q.second);
@@ -291,8 +291,8 @@ KBestExtractor::Derivation::Derivation(const boost::shared_ptr<KHyperedge> &e)
boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
subderivations.push_back(sub);
}
- score = edge->shyperedge.score;
- scoreBreakdown = edge->shyperedge.scoreBreakdown;
+ score = edge->shyperedge.label.score;
+ scoreBreakdown = edge->shyperedge.label.scoreBreakdown;
}
// Construct a Derivation that neighbours an existing Derivation.
diff --git a/moses/Syntax/KBestExtractor.h b/moses/Syntax/KBestExtractor.h
index 21fb6f737..15cf0e3c8 100644
--- a/moses/Syntax/KBestExtractor.h
+++ b/moses/Syntax/KBestExtractor.h
@@ -5,6 +5,7 @@
#include <queue>
#include <vector>
+#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <boost/weak_ptr.hpp>
@@ -27,7 +28,7 @@ namespace Syntax
//
class KBestExtractor
{
- public:
+public:
struct KVertex;
struct KHyperedge {
@@ -60,8 +61,8 @@ class KBestExtractor
struct KVertex {
typedef std::priority_queue<boost::weak_ptr<Derivation>,
- std::vector<boost::weak_ptr<Derivation> >,
- DerivationOrderer> DerivationQueue;
+ std::vector<boost::weak_ptr<Derivation> >,
+ DerivationOrderer> DerivationQueue;
KVertex(const SVertex &v) : svertex(v), visited(false) {}
@@ -81,9 +82,9 @@ class KBestExtractor
static Phrase GetOutputPhrase(const Derivation &);
static TreePointer GetOutputTree(const Derivation &);
- private:
+private:
typedef boost::unordered_map<const SVertex *,
- boost::shared_ptr<KVertex> > VertexMap;
+ boost::shared_ptr<KVertex> > VertexMap;
struct DerivationHasher {
std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
@@ -103,7 +104,7 @@ class KBestExtractor
};
typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
- DerivationEqualityPred> DerivationSet;
+ DerivationEqualityPred> DerivationSet;
boost::shared_ptr<KVertex> FindOrCreateVertex(const SVertex &);
void GetCandidates(boost::shared_ptr<KVertex>, std::size_t);
diff --git a/moses/Syntax/Manager.cpp b/moses/Syntax/Manager.cpp
new file mode 100644
index 000000000..ff8503808
--- /dev/null
+++ b/moses/Syntax/Manager.cpp
@@ -0,0 +1,236 @@
+#include "Manager.h"
+
+#include <sstream>
+
+#include "moses/OutputCollector.h"
+#include "moses/StaticData.h"
+
+#include "PVertex.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+
+Manager::Manager(const InputType &source)
+ : Moses::BaseManager(source)
+{
+}
+
+void Manager::OutputBest(OutputCollector *collector) const
+{
+ if (!collector) {
+ return;
+ }
+ std::ostringstream out;
+ FixPrecision(out);
+ const SHyperedge *best = GetBestSHyperedge();
+ if (best == NULL) {
+ VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
+ if (StaticData::Instance().GetOutputHypoScore()) {
+ out << "0 ";
+ }
+ } else {
+ if (StaticData::Instance().GetOutputHypoScore()) {
+ out << best->label.score << " ";
+ }
+ Phrase yield = GetOneBestTargetYield(*best);
+ // delete 1st & last
+ UTIL_THROW_IF2(yield.GetSize() < 2,
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ yield.RemoveWord(0);
+ yield.RemoveWord(yield.GetSize()-1);
+ out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
+ out << '\n';
+ }
+ collector->Write(m_source.GetTranslationId(), out.str());
+}
+
+void Manager::OutputNBest(OutputCollector *collector) const
+{
+ if (collector) {
+ const StaticData &staticData = StaticData::Instance();
+ long translationId = m_source.GetTranslationId();
+
+ KBestExtractor::KBestVec nBestList;
+ ExtractKBest(staticData.GetNBestSize(), nBestList,
+ staticData.GetDistinctNBest());
+ OutputNBestList(collector, nBestList, translationId);
+ }
+}
+
+void Manager::OutputUnknowns(OutputCollector *collector) const
+{
+ if (collector) {
+ long translationId = m_source.GetTranslationId();
+
+ std::ostringstream out;
+ for (std::set<Moses::Word>::const_iterator p = m_oovs.begin();
+ p != m_oovs.end(); ++p) {
+ out << *p;
+ }
+ out << std::endl;
+ collector->Write(translationId, out.str());
+ }
+}
+
+void Manager::OutputNBestList(OutputCollector *collector,
+ const KBestExtractor::KBestVec &nBestList,
+ long translationId) const
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ const std::vector<FactorType> &outputFactorOrder =
+ staticData.GetOutputFactorOrder();
+
+ std::ostringstream out;
+
+ if (collector->OutputIsCout()) {
+ // Set precision only if we're writing the n-best list to cout. This is to
+ // preserve existing behaviour, but should probably be done either way.
+ FixPrecision(out);
+ }
+
+ bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
+ bool PrintNBestTrees = staticData.PrintNBestTrees();
+
+ for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
+ p != nBestList.end(); ++p) {
+ const KBestExtractor::Derivation &derivation = **p;
+
+ // get the derivation's target-side yield
+ Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation);
+
+ // delete <s> and </s>
+ UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ outputPhrase.RemoveWord(0);
+ outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
+
+ // print the translation ID, surface factors, and scores
+ out << translationId << " ||| ";
+ OutputSurface(out, outputPhrase, outputFactorOrder, false);
+ out << " ||| ";
+ derivation.scoreBreakdown.OutputAllFeatureScores(out);
+ out << " ||| " << derivation.score;
+
+ // optionally, print word alignments
+ if (includeWordAlignment) {
+ out << " ||| ";
+ Alignments align;
+ OutputAlignmentNBest(align, derivation, 0);
+ for (Alignments::const_iterator q = align.begin(); q != align.end();
+ ++q) {
+ out << q->first << "-" << q->second << " ";
+ }
+ }
+
+ // optionally, print tree
+ if (PrintNBestTrees) {
+ TreePointer tree = KBestExtractor::GetOutputTree(derivation);
+ out << " ||| " << tree->GetString();
+ }
+
+ out << std::endl;
+ }
+
+ assert(collector);
+ collector->Write(translationId, out.str());
+}
+
+std::size_t Manager::OutputAlignmentNBest(
+ Alignments &retAlign,
+ const KBestExtractor::Derivation &derivation,
+ std::size_t startTarget) const
+{
+ const SHyperedge &shyperedge = derivation.edge->shyperedge;
+
+ std::size_t totalTargetSize = 0;
+ std::size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
+
+ const TargetPhrase &tp = *(shyperedge.label.translation);
+
+ std::size_t thisSourceSize = CalcSourceSize(derivation);
+
+ // position of each terminal word in translation rule, irrespective of
+ // alignment if non-term, number is undefined
+ std::vector<std::size_t> sourceOffsets(thisSourceSize, 0);
+ std::vector<std::size_t> targetOffsets(tp.GetSize(), 0);
+
+ const AlignmentInfo &aiNonTerm =
+ shyperedge.label.translation->GetAlignNonTerm();
+ std::vector<std::size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
+ const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd =
+ aiNonTerm.GetNonTermIndexMap();
+
+ UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
+ "Error");
+
+ std::size_t targetInd = 0;
+ for (std::size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
+ if (tp.GetWord(targetPos).IsNonTerminal()) {
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
+ std::size_t sourceInd = targetPos2SourceInd[targetPos];
+ std::size_t sourcePos = sourceInd2pos[sourceInd];
+
+ const KBestExtractor::Derivation &subderivation =
+ *derivation.subderivations[sourceInd];
+
+ // calc source size
+ std::size_t sourceSize =
+ subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
+ sourceOffsets[sourcePos] = sourceSize;
+
+ // calc target size.
+ // Recursively look thru child hypos
+ std::size_t currStartTarget = startTarget + totalTargetSize;
+ std::size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
+ currStartTarget);
+ targetOffsets[targetPos] = targetSize;
+
+ totalTargetSize += targetSize;
+ ++targetInd;
+ } else {
+ ++totalTargetSize;
+ }
+ }
+
+ // convert position within translation rule to absolute position within
+ // source sentence / output sentence
+ ShiftOffsets(sourceOffsets, startSource);
+ ShiftOffsets(targetOffsets, startTarget);
+
+ // get alignments from this hypo
+ const AlignmentInfo &aiTerm = shyperedge.label.translation->GetAlignTerm();
+
+ // add to output arg, offsetting by source & target
+ AlignmentInfo::const_iterator iter;
+ for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
+ const std::pair<std::size_t, std::size_t> &align = *iter;
+ std::size_t relSource = align.first;
+ std::size_t relTarget = align.second;
+ std::size_t absSource = sourceOffsets[relSource];
+ std::size_t absTarget = targetOffsets[relTarget];
+
+ std::pair<std::size_t, std::size_t> alignPoint(absSource, absTarget);
+ std::pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
+ UTIL_THROW_IF2(!ret.second, "Error");
+ }
+
+ return totalTargetSize;
+}
+
+std::size_t Manager::CalcSourceSize(const KBestExtractor::Derivation &d) const
+{
+ const SHyperedge &shyperedge = d.edge->shyperedge;
+ std::size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
+ for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) {
+ std::size_t childSize =
+ shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
+ ret -= (childSize - 1);
+ }
+ return ret;
+}
+
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/Manager.h b/moses/Syntax/Manager.h
new file mode 100644
index 000000000..8d814f604
--- /dev/null
+++ b/moses/Syntax/Manager.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include "moses/InputType.h"
+#include "moses/BaseManager.h"
+
+#include "KBestExtractor.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+
+// Common base class for Moses::Syntax managers.
+class Manager : public BaseManager
+{
+public:
+ Manager(const InputType &);
+
+ // Virtual functions from Moses::BaseManager that are implemented the same
+ // way for all Syntax managers.
+ void OutputBest(OutputCollector *collector) const;
+ void OutputNBest(OutputCollector *collector) const;
+ void OutputUnknowns(OutputCollector *collector) const;
+
+ // Virtual functions from Moses::BaseManager that are no-ops for all Syntax
+ // managers.
+ void OutputAlignment(OutputCollector *collector) const {}
+ void OutputDetailedTreeFragmentsTranslationReport(
+ OutputCollector *collector) const {}
+ void OutputLatticeSamples(OutputCollector *collector) const {}
+ void OutputSearchGraph(OutputCollector *collector) const {}
+ void OutputSearchGraphHypergraph() const {}
+ void OutputSearchGraphSLF() const {}
+ void OutputWordGraph(OutputCollector *collector) const {}
+ void OutputDetailedTranslationReport(OutputCollector *collector) const {}
+
+ void CalcDecoderStatistics() const {}
+
+ // Syntax-specific virtual functions that derived classes must implement.
+ virtual void ExtractKBest(
+ std::size_t k,
+ std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
+ bool onlyDistinct=false) const = 0;
+ virtual const SHyperedge *GetBestSHyperedge() const = 0;
+
+protected:
+ std::set<Word> m_oovs;
+
+private:
+ // Syntax-specific helper functions used to implement OutputNBest.
+ void OutputNBestList(OutputCollector *collector,
+ const KBestExtractor::KBestVec &nBestList,
+ long translationId) const;
+
+ std::size_t OutputAlignmentNBest(Alignments &retAlign,
+ const KBestExtractor::Derivation &d,
+ std::size_t startTarget) const;
+
+ std::size_t CalcSourceSize(const KBestExtractor::Derivation &d) const;
+};
+
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/NonTerminalMap.h b/moses/Syntax/NonTerminalMap.h
index ff7ce2508..b645212c5 100644
--- a/moses/Syntax/NonTerminalMap.h
+++ b/moses/Syntax/NonTerminalMap.h
@@ -23,42 +23,56 @@ namespace Syntax
template<typename T>
class NonTerminalMap
{
- private:
+private:
typedef boost::unordered_map<Word, T, SymbolHasher, SymbolEqualityPred> Map;
typedef std::vector<T*> Vec;
- public:
+public:
typedef typename Map::iterator Iterator;
typedef typename Map::const_iterator ConstIterator;
NonTerminalMap()
- : m_vec(FactorCollection::Instance().GetNumNonTerminals(), NULL) {}
+ : m_vec(FactorCollection::Instance().GetNumNonTerminals(), NULL) {}
- Iterator Begin() { return m_map.begin(); }
- Iterator End() { return m_map.end(); }
+ Iterator Begin() {
+ return m_map.begin();
+ }
+ Iterator End() {
+ return m_map.end();
+ }
- ConstIterator Begin() const { return m_map.begin(); }
- ConstIterator End() const { return m_map.end(); }
+ ConstIterator Begin() const {
+ return m_map.begin();
+ }
+ ConstIterator End() const {
+ return m_map.end();
+ }
- std::size_t Size() const { return m_map.size(); }
+ std::size_t Size() const {
+ return m_map.size();
+ }
- bool IsEmpty() const { return m_map.empty(); }
+ bool IsEmpty() const {
+ return m_map.empty();
+ }
std::pair<Iterator, bool> Insert(const Word &, const T &);
- T *Find(const Word &w) const { return m_vec[w[0]->GetId()]; }
+ T *Find(const Word &w) const {
+ return m_vec[w[0]->GetId()];
+ }
- private:
+private:
Map m_map;
Vec m_vec;
};
template<typename T>
std::pair<typename NonTerminalMap<T>::Iterator, bool> NonTerminalMap<T>::Insert(
- const Word &key, const T &value)
+ const Word &key, const T &value)
{
std::pair<typename Map::iterator, bool> result =
- m_map.insert(typename Map::value_type(key, value));
+ m_map.insert(typename Map::value_type(key, value));
if (result.second) {
T *p = &(result.first->second);
std::size_t i = key[0]->GetId();
diff --git a/moses/Syntax/PHyperedge.h b/moses/Syntax/PHyperedge.h
index 8f236fcb8..f8ad63e73 100644
--- a/moses/Syntax/PHyperedge.h
+++ b/moses/Syntax/PHyperedge.h
@@ -2,7 +2,7 @@
#include <vector>
-#include "moses/TargetPhraseCollection.h"
+#include "PLabel.h"
namespace Moses
{
@@ -11,11 +11,10 @@ namespace Syntax
struct PVertex;
-struct PHyperedge
-{
+struct PHyperedge {
PVertex *head;
std::vector<PVertex*> tail;
- const TargetPhraseCollection *translations;
+ PLabel label;
};
} // Syntax
diff --git a/moses/Syntax/PLabel.h b/moses/Syntax/PLabel.h
new file mode 100644
index 000000000..a1c3dcce0
--- /dev/null
+++ b/moses/Syntax/PLabel.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "moses/TargetPhraseCollection.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+
+struct PLabel {
+ const TargetPhraseCollection *translations;
+};
+
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/PVertex.h b/moses/Syntax/PVertex.h
index d82309c82..8832128b4 100644
--- a/moses/Syntax/PVertex.h
+++ b/moses/Syntax/PVertex.h
@@ -8,14 +8,18 @@ namespace Moses
namespace Syntax
{
-struct PVertex
-{
- public:
+struct PVertex {
+public:
PVertex(const WordsRange &wr, const Word &w) : span(wr), symbol(w) {}
WordsRange span;
Word symbol;
};
+inline bool operator==(const PVertex &v, const PVertex &w)
+{
+ return v.span == w.span && v.symbol == w.symbol;
+}
+
} // Syntax
} // Moses
diff --git a/moses/Syntax/RuleTable.h b/moses/Syntax/RuleTable.h
index 90a25d63c..8e20817dc 100644
--- a/moses/Syntax/RuleTable.h
+++ b/moses/Syntax/RuleTable.h
@@ -11,12 +11,12 @@ class RuleTableFF;
// grammar, like a trie (for S2T) or a DFA (for T2S).
class RuleTable
{
- public:
+public:
RuleTable(const RuleTableFF *ff) : m_ff(ff) {}
virtual ~RuleTable() {}
- protected:
+protected:
const RuleTableFF *m_ff;
};
diff --git a/moses/Syntax/RuleTableFF.cpp b/moses/Syntax/RuleTableFF.cpp
index 771c3983c..f4e06f489 100644
--- a/moses/Syntax/RuleTableFF.cpp
+++ b/moses/Syntax/RuleTableFF.cpp
@@ -1,9 +1,13 @@
#include "RuleTableFF.h"
#include "moses/StaticData.h"
+#include "moses/Syntax/F2S/HyperTree.h"
+#include "moses/Syntax/F2S/HyperTreeLoader.h"
#include "moses/Syntax/S2T/RuleTrieCYKPlus.h"
#include "moses/Syntax/S2T/RuleTrieLoader.h"
#include "moses/Syntax/S2T/RuleTrieScope3.h"
+#include "moses/Syntax/T2S/RuleTrie.h"
+#include "moses/Syntax/T2S/RuleTrieLoader.h"
namespace Moses
{
@@ -27,9 +31,13 @@ void RuleTableFF::Load()
SetFeaturesToApply();
const StaticData &staticData = StaticData::Instance();
- if (!staticData.UseS2TDecoder()) {
- UTIL_THROW2("ERROR: RuleTableFF currently only supports S2T decoder");
- } else {
+ if (staticData.GetSearchAlgorithm() == SyntaxF2S ||
+ staticData.GetSearchAlgorithm() == SyntaxT2S) {
+ F2S::HyperTree *trie = new F2S::HyperTree(this);
+ F2S::HyperTreeLoader loader;
+ loader.Load(m_input, m_output, m_filePath, *this, *trie);
+ m_table = trie;
+ } else if (staticData.GetSearchAlgorithm() == SyntaxS2T) {
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
if (algorithm == RecursiveCYKPlus) {
S2T::RuleTrieCYKPlus *trie = new S2T::RuleTrieCYKPlus(this);
@@ -44,6 +52,14 @@ void RuleTableFF::Load()
} else {
UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
}
+ } else if (staticData.GetSearchAlgorithm() == SyntaxT2S_SCFG) {
+ T2S::RuleTrie *trie = new T2S::RuleTrie(this);
+ T2S::RuleTrieLoader loader;
+ loader.Load(m_input, m_output, m_filePath, *this, *trie);
+ m_table = trie;
+ } else {
+ UTIL_THROW2(
+ "ERROR: RuleTableFF currently only supports the S2T, T2S, T2S_SCFG, and F2S search algorithms");
}
}
diff --git a/moses/Syntax/RuleTableFF.h b/moses/Syntax/RuleTableFF.h
index 0e6040612..4d6132e86 100644
--- a/moses/Syntax/RuleTableFF.h
+++ b/moses/Syntax/RuleTableFF.h
@@ -21,7 +21,7 @@ class RuleTable;
// anything except provide somewhere to store the weights and parameter values.
class RuleTableFF : public PhraseDictionary
{
- public:
+public:
RuleTableFF(const std::string &);
// FIXME Delete m_table?
@@ -29,18 +29,21 @@ class RuleTableFF : public PhraseDictionary
void Load();
- const RuleTable *GetTable() const { return m_table; }
+ const RuleTable *GetTable() const {
+ return m_table;
+ }
- static const std::vector<RuleTableFF*> &Instances() { return s_instances; }
+ static const std::vector<RuleTableFF*> &Instances() {
+ return s_instances;
+ }
ChartRuleLookupManager *CreateRuleLookupManager(
- const ChartParser &, const ChartCellCollectionBase &, std::size_t)
- {
+ const ChartParser &, const ChartCellCollectionBase &, std::size_t) {
assert(false);
return 0;
}
- private:
+private:
static std::vector<RuleTableFF*> s_instances;
const RuleTable *m_table;
diff --git a/moses/Syntax/S2T/DerivationWriter.cpp b/moses/Syntax/S2T/DerivationWriter.cpp
index dcb98b3c6..33fb8d70b 100644
--- a/moses/Syntax/S2T/DerivationWriter.cpp
+++ b/moses/Syntax/S2T/DerivationWriter.cpp
@@ -61,7 +61,7 @@ void DerivationWriter::WriteLine(const SHyperedge &shyperedge,
out << " ->";
// Target RHS symbols.
- const TargetPhrase &phrase = *(shyperedge.translation);
+ const TargetPhrase &phrase = *(shyperedge.label.translation);
for (std::size_t i = 0; i < phrase.GetSize(); ++i) {
out << " ";
WriteSymbol(phrase.GetWord(i), out);
diff --git a/moses/Syntax/S2T/DerivationWriter.h b/moses/Syntax/S2T/DerivationWriter.h
index 706490ce0..af0e5f521 100644
--- a/moses/Syntax/S2T/DerivationWriter.h
+++ b/moses/Syntax/S2T/DerivationWriter.h
@@ -21,14 +21,14 @@ namespace S2T
// TODO should be revisited when other the decoders are implemented.
class DerivationWriter
{
- public:
+public:
// 1-best version.
static void Write(const SHyperedge&, std::size_t, std::ostream &);
// k-best version.
static void Write(const KBestExtractor::Derivation &, std::size_t,
std::ostream &);
- private:
+private:
static void WriteLine(const SHyperedge &, std::size_t, std::ostream &);
static void WriteSymbol(const Word &, std::ostream &);
};
diff --git a/moses/Syntax/S2T/Manager-inl.h b/moses/Syntax/S2T/Manager-inl.h
index a08c320f0..15594d589 100644
--- a/moses/Syntax/S2T/Manager-inl.h
+++ b/moses/Syntax/S2T/Manager-inl.h
@@ -1,5 +1,8 @@
#pragma once
+#include <iostream>
+#include <sstream>
+
#include "moses/DecodeGraph.h"
#include "moses/StaticData.h"
#include "moses/Syntax/BoundedPriorityContainer.h"
@@ -13,6 +16,7 @@
#include "moses/Syntax/SymbolEqualityPred.h"
#include "moses/Syntax/SymbolHasher.h"
+#include "DerivationWriter.h"
#include "OovHandler.h"
#include "PChart.h"
#include "RuleTrie.h"
@@ -27,9 +31,9 @@ namespace S2T
template<typename Parser>
Manager<Parser>::Manager(const InputType &source)
- : m_source(source)
- , m_pchart(source.GetSize(), Parser::RequiresCompressedChart())
- , m_schart(source.GetSize())
+ : Syntax::Manager(source)
+ , m_pchart(source.GetSize(), Parser::RequiresCompressedChart())
+ , m_schart(source.GetSize())
{
}
@@ -41,7 +45,7 @@ void Manager<Parser>::InitializeCharts()
const Word &terminal = m_source.GetWord(i);
// PVertex
- PVertex tmp(WordsRange(i,i), m_source.GetWord(i));
+ PVertex tmp(WordsRange(i,i), terminal);
PVertex &pvertex = m_pchart.AddVertex(tmp);
// SVertex
@@ -96,7 +100,7 @@ void Manager<Parser>::InitializeParsers(PChart &pchart,
m_oovRuleTrie = oovHandler.SynthesizeRuleTrie(m_oovs.begin(), m_oovs.end());
// Create a parser for the OOV rule trie.
boost::shared_ptr<Parser> parser(
- new Parser(pchart, *m_oovRuleTrie, maxOovWidth));
+ new Parser(pchart, *m_oovRuleTrie, maxOovWidth));
m_parsers.push_back(parser);
}
}
@@ -193,14 +197,14 @@ void Manager<Parser>::Decode()
// Retrieve the (pruned) set of SHyperedgeBundles from the callback.
const BoundedPriorityContainer<SHyperedgeBundle> &bundles =
- callback.GetContainer();
+ callback.GetContainer();
// Use cube pruning to extract SHyperedges from SHyperedgeBundles.
// Collect the SHyperedges into buffers, one for each category.
CubeQueue cubeQueue(bundles.Begin(), bundles.End());
std::size_t count = 0;
typedef boost::unordered_map<Word, std::vector<SHyperedge*>,
- SymbolHasher, SymbolEqualityPred > BufferMap;
+ SymbolHasher, SymbolEqualityPred > BufferMap;
BufferMap buffers;
while (count < popLimit && !cubeQueue.IsEmpty()) {
SHyperedge *hyperedge = cubeQueue.Pop();
@@ -211,7 +215,7 @@ void Manager<Parser>::Decode()
// happens during cube pruning). The cube pruning code doesn't (and
// shouldn't) know about the contents of PChart and so creation of
// the PVertex is deferred until this point.
- const Word &lhs = hyperedge->translation->GetTargetLHS();
+ const Word &lhs = hyperedge->label.translation->GetTargetLHS();
hyperedge->head->pvertex = &m_pchart.AddVertex(PVertex(range, lhs));
// END{HACK}
buffers[lhs].push_back(hyperedge);
@@ -224,7 +228,7 @@ void Manager<Parser>::Decode()
const Word &category = p->first;
const std::vector<SHyperedge*> &buffer = p->second;
std::pair<SChart::Cell::NMap::Iterator, bool> ret =
- scell.nonTerminalStacks.Insert(category, SVertexStack());
+ scell.nonTerminalStacks.Insert(category, SVertexStack());
assert(ret.second);
SVertexStack &stack = ret.first->second;
RecombineAndSort(buffer, stack);
@@ -259,14 +263,15 @@ const SHyperedge *Manager<Parser>::GetBestSHyperedge() const
}
assert(stacks.Size() == 1);
const std::vector<boost::shared_ptr<SVertex> > &stack = stacks.Begin()->second;
+ // TODO Throw exception if stack is empty? Or return 0?
return stack[0]->best;
}
template<typename Parser>
void Manager<Parser>::ExtractKBest(
- std::size_t k,
- std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
- bool onlyDistinct) const
+ std::size_t k,
+ std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
+ bool onlyDistinct) const
{
kBestList.clear();
if (k == 0 || m_source.GetSize() == 0) {
@@ -281,6 +286,7 @@ void Manager<Parser>::ExtractKBest(
}
assert(stacks.Size() == 1);
const std::vector<boost::shared_ptr<SVertex> > &stack = stacks.Begin()->second;
+ // TODO Throw exception if stack is empty? Or return 0?
KBestExtractor extractor;
@@ -320,18 +326,18 @@ template<typename Parser>
void Manager<Parser>::PrunePChart(const SChart::Cell &scell,
PChart::Cell &pcell)
{
-/* FIXME
- PChart::Cell::VertexMap::iterator p = pcell.vertices.begin();
- while (p != pcell.vertices.end()) {
- const Word &category = p->first;
- if (scell.stacks.find(category) == scell.stacks.end()) {
- PChart::Cell::VertexMap::iterator q = p++;
- pcell.vertices.erase(q);
- } else {
- ++p;
+ /* FIXME
+ PChart::Cell::VertexMap::iterator p = pcell.vertices.begin();
+ while (p != pcell.vertices.end()) {
+ const Word &category = p->first;
+ if (scell.stacks.find(category) == scell.stacks.end()) {
+ PChart::Cell::VertexMap::iterator q = p++;
+ pcell.vertices.erase(q);
+ } else {
+ ++p;
+ }
}
- }
-*/
+ */
}
template<typename Parser>
@@ -359,7 +365,7 @@ void Manager<Parser>::RecombineAndSort(const std::vector<SHyperedge*> &buffer,
// Compare the score of h against the score of the best incoming hyperedge
// for the stored vertex.
SVertex *storedVertex = result.first->second;
- if (h->score > storedVertex->best->score) {
+ if (h->label.score > storedVertex->best->label.score) {
// h's score is better.
storedVertex->recombined.push_back(storedVertex->best);
storedVertex->best = h;
@@ -382,6 +388,20 @@ void Manager<Parser>::RecombineAndSort(const std::vector<SHyperedge*> &buffer,
std::sort(stack.begin(), stack.end(), SVertexStackContentOrderer());
}
+template<typename Parser>
+void Manager<Parser>::OutputDetailedTranslationReport(
+ OutputCollector *collector) const
+{
+ const SHyperedge *best = GetBestSHyperedge();
+ if (best == NULL || collector == NULL) {
+ return;
+ }
+ long translationId = m_source.GetTranslationId();
+ std::ostringstream out;
+ DerivationWriter::Write(*best, translationId, out);
+ collector->Write(translationId, out.str());
+}
+
} // S2T
} // Syntax
} // Moses
diff --git a/moses/Syntax/S2T/Manager.h b/moses/Syntax/S2T/Manager.h
index f6bea903d..0961c8e77 100644
--- a/moses/Syntax/S2T/Manager.h
+++ b/moses/Syntax/S2T/Manager.h
@@ -1,12 +1,15 @@
#pragma once
+#include <set>
#include <vector>
#include <boost/shared_ptr.hpp>
#include "moses/InputType.h"
#include "moses/Syntax/KBestExtractor.h"
+#include "moses/Syntax/Manager.h"
#include "moses/Syntax/SVertexStack.h"
+#include "moses/Word.h"
#include "OovHandler.h"
#include "ParserCallback.h"
@@ -18,16 +21,15 @@ namespace Moses
namespace Syntax
{
-class SDerivation;
struct SHyperedge;
namespace S2T
{
template<typename Parser>
-class Manager
+class Manager : public Syntax::Manager
{
- public:
+public:
Manager(const InputType &);
void Decode();
@@ -36,13 +38,13 @@ class Manager
const SHyperedge *GetBestSHyperedge() const;
void ExtractKBest(
- std::size_t k,
- std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
- bool onlyDistinct=false) const;
+ std::size_t k,
+ std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
+ bool onlyDistinct=false) const;
- const std::set<Word> &GetUnknownWords() const { return m_oovs; }
+ void OutputDetailedTranslationReport(OutputCollector *collector) const;
- private:
+private:
void FindOovs(const PChart &, std::set<Word> &, std::size_t);
void InitializeCharts();
@@ -53,10 +55,8 @@ class Manager
void PrunePChart(const SChart::Cell &, PChart::Cell &);
- const InputType &m_source;
PChart m_pchart;
SChart m_schart;
- std::set<Word> m_oovs;
boost::shared_ptr<typename Parser::RuleTrie> m_oovRuleTrie;
std::vector<boost::shared_ptr<Parser> > m_parsers;
};
diff --git a/moses/Syntax/S2T/OovHandler-inl.h b/moses/Syntax/S2T/OovHandler-inl.h
index e700f65c5..76eed861e 100644
--- a/moses/Syntax/S2T/OovHandler-inl.h
+++ b/moses/Syntax/S2T/OovHandler-inl.h
@@ -13,7 +13,7 @@ namespace S2T
template<typename RuleTrie>
template<typename InputIterator>
boost::shared_ptr<RuleTrie> OovHandler<RuleTrie>::SynthesizeRuleTrie(
- InputIterator first, InputIterator last)
+ InputIterator first, InputIterator last)
{
const UnknownLHSList &lhsList = StaticData::Instance().GetUnknownLHS();
@@ -33,7 +33,7 @@ boost::shared_ptr<RuleTrie> OovHandler<RuleTrie>::SynthesizeRuleTrie(
Word *tgtLHS = SynthesizeTargetLhs(targetLhsStr);
TargetPhrase *tp = SynthesizeTargetPhrase(oov, *srcPhrase, *tgtLHS, prob);
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(
- *trie, *srcPhrase, *tp, NULL); // TODO Check NULL is valid argument
+ *trie, *srcPhrase, *tp, NULL); // TODO Check NULL is valid argument
tpc.Add(tp);
}
}
@@ -63,12 +63,12 @@ Word *OovHandler<RuleTrie>::SynthesizeTargetLhs(const std::string &lhsStr)
template<typename RuleTrie>
TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase(
- const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob)
+ const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob)
{
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
- UnknownWordPenaltyProducer::Instance();
+ UnknownWordPenaltyProducer::Instance();
TargetPhrase *targetPhrase = new TargetPhrase();
Word &targetWord = targetPhrase->AddWord();
diff --git a/moses/Syntax/S2T/OovHandler.h b/moses/Syntax/S2T/OovHandler.h
index b74e697c5..5d484d2fd 100644
--- a/moses/Syntax/S2T/OovHandler.h
+++ b/moses/Syntax/S2T/OovHandler.h
@@ -4,6 +4,7 @@
#include <boost/shared_ptr.hpp>
+#include "moses/Phrase.h"
#include "moses/Syntax/RuleTableFF.h"
#include "moses/TargetPhrase.h"
#include "moses/Word.h"
@@ -20,7 +21,7 @@ namespace S2T
template<typename RuleTrie>
class OovHandler : public RuleTrieCreator
{
- public:
+public:
OovHandler(const RuleTableFF &ff) : m_ruleTableFF(ff) {}
// Synthesize a RuleTrie given a sequence of OOV words. The sequence is
@@ -29,7 +30,7 @@ class OovHandler : public RuleTrieCreator
template<typename InputIterator>
boost::shared_ptr<RuleTrie> SynthesizeRuleTrie(InputIterator, InputIterator);
- private:
+private:
const RuleTableFF &m_ruleTableFF;
bool ShouldDrop(const Word &);
diff --git a/moses/Syntax/S2T/PChart.h b/moses/Syntax/S2T/PChart.h
index 8f719eebb..0bd3148b3 100644
--- a/moses/Syntax/S2T/PChart.h
+++ b/moses/Syntax/S2T/PChart.h
@@ -19,11 +19,10 @@ namespace S2T
class PChart
{
- public:
- struct Cell
- {
+public:
+ struct Cell {
typedef boost::unordered_map<Word, PVertex, SymbolHasher,
- SymbolEqualityPred> TMap;
+ SymbolEqualityPred> TMap;
typedef NonTerminalMap<PVertex> NMap;
// Collection of terminal vertices (keyed by terminal symbol).
TMap terminalVertices;
@@ -42,7 +41,9 @@ class PChart
~PChart();
- std::size_t GetWidth() const { return m_cells.size(); }
+ std::size_t GetWidth() const {
+ return m_cells.size();
+ }
const Cell &GetCell(std::size_t start, std::size_t end) const {
return m_cells[start][end];
@@ -57,13 +58,13 @@ class PChart
if (!v.symbol.IsNonTerminal()) {
Cell::TMap::value_type x(v.symbol, v);
std::pair<Cell::TMap::iterator, bool> ret =
- cell.terminalVertices.insert(x);
+ cell.terminalVertices.insert(x);
return ret.first->second;
}
// If v is a non-terminal vertex add it to the cell's nonTerminalVertices
// map and update the compressed chart (if enabled).
std::pair<Cell::NMap::Iterator, bool> result =
- cell.nonTerminalVertices.Insert(v.symbol, v);
+ cell.nonTerminalVertices.Insert(v.symbol, v);
if (result.second && m_compressedChart) {
CompressedItem item;
item.end = end;
@@ -77,7 +78,7 @@ class PChart
return (*m_compressedChart)[start];
}
- private:
+private:
typedef std::vector<CompressedMatrix> CompressedChart;
std::vector<std::vector<Cell> > m_cells;
diff --git a/moses/Syntax/S2T/PHyperedgeToSHyperedgeBundle.h b/moses/Syntax/S2T/PHyperedgeToSHyperedgeBundle.h
index dd0be3ae9..e2802390e 100644
--- a/moses/Syntax/S2T/PHyperedgeToSHyperedgeBundle.h
+++ b/moses/Syntax/S2T/PHyperedgeToSHyperedgeBundle.h
@@ -15,9 +15,10 @@ namespace S2T
// Given a PHyperedge object and SChart produces a SHyperedgeBundle object.
inline void PHyperedgeToSHyperedgeBundle(const PHyperedge &hyperedge,
- const SChart &schart,
- SHyperedgeBundle &bundle) {
- bundle.translations = hyperedge.translations;
+ const SChart &schart,
+ SHyperedgeBundle &bundle)
+{
+ bundle.translations = hyperedge.label.translations;
bundle.stacks.clear();
for (std::vector<PVertex*>::const_iterator p = hyperedge.tail.begin();
p != hyperedge.tail.end(); ++p) {
@@ -31,7 +32,7 @@ inline void PHyperedgeToSHyperedgeBundle(const PHyperedge &hyperedge,
stack = cell.nonTerminalStacks.Find(symbol);
} else {
const SChart::Cell::TMap::const_iterator q =
- cell.terminalStacks.find(symbol);
+ cell.terminalStacks.find(symbol);
assert(q != cell.terminalStacks.end());
stack = &(q->second);
}
diff --git a/moses/Syntax/S2T/ParserCallback.h b/moses/Syntax/S2T/ParserCallback.h
index b18a85eae..f9db51601 100644
--- a/moses/Syntax/S2T/ParserCallback.h
+++ b/moses/Syntax/S2T/ParserCallback.h
@@ -15,14 +15,15 @@ namespace Syntax
namespace S2T
{
-class StandardParserCallback {
- private:
+class StandardParserCallback
+{
+private:
typedef BoundedPriorityContainer<SHyperedgeBundle> Container;
- public:
+public:
StandardParserCallback(const SChart &schart, std::size_t ruleLimit)
- : m_schart(schart)
- , m_container(ruleLimit) {}
+ : m_schart(schart)
+ , m_container(ruleLimit) {}
void operator()(const PHyperedge &hyperedge) {
PHyperedgeToSHyperedgeBundle(hyperedge, m_schart, m_tmpBundle);
@@ -30,25 +31,30 @@ class StandardParserCallback {
m_container.SwapIn(m_tmpBundle, score);
}
- void InitForRange(const WordsRange &range) { m_container.LazyClear(); }
+ void InitForRange(const WordsRange &range) {
+ m_container.LazyClear();
+ }
- const Container &GetContainer() { return m_container; }
+ const Container &GetContainer() {
+ return m_container;
+ }
- private:
+private:
const SChart &m_schart;
SHyperedgeBundle m_tmpBundle;
BoundedPriorityContainer<SHyperedgeBundle> m_container;
};
-class EagerParserCallback {
- private:
+class EagerParserCallback
+{
+private:
typedef BoundedPriorityContainer<SHyperedgeBundle> Container;
- public:
+public:
EagerParserCallback(const SChart &schart, std::size_t ruleLimit)
- : m_schart(schart)
- , m_containers(schart.GetWidth(), Container(ruleLimit))
- , m_prevStart(std::numeric_limits<std::size_t>::max()) {}
+ : m_schart(schart)
+ , m_containers(schart.GetWidth(), Container(ruleLimit))
+ , m_prevStart(std::numeric_limits<std::size_t>::max()) {}
void operator()(const PHyperedge &hyperedge, std::size_t end) {
PHyperedgeToSHyperedgeBundle(hyperedge, m_schart, m_tmpBundle);
@@ -68,9 +74,11 @@ class EagerParserCallback {
}
}
- const Container &GetContainer() { return m_containers[m_end]; }
+ const Container &GetContainer() {
+ return m_containers[m_end];
+ }
- private:
+private:
const SChart &m_schart;
SHyperedgeBundle m_tmpBundle;
std::vector<Container> m_containers;
diff --git a/moses/Syntax/S2T/Parsers/Parser.h b/moses/Syntax/S2T/Parsers/Parser.h
index b13a8d502..785fb66f9 100644
--- a/moses/Syntax/S2T/Parsers/Parser.h
+++ b/moses/Syntax/S2T/Parsers/Parser.h
@@ -13,7 +13,7 @@ class PChart;
template<typename Callback>
class Parser
{
- public:
+public:
typedef Callback CallbackType;
Parser(PChart &chart) : m_chart(chart) {}
@@ -21,7 +21,7 @@ class Parser
virtual ~Parser() {}
virtual void EnumerateHyperedges(const WordsRange &, Callback &) = 0;
- protected:
+protected:
PChart &m_chart;
};
diff --git a/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h b/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h
index b275a93ee..a84e16a54 100644
--- a/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h
+++ b/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h
@@ -11,21 +11,21 @@ namespace S2T
template<typename Callback>
RecursiveCYKPlusParser<Callback>::RecursiveCYKPlusParser(
- PChart &chart,
- const RuleTrie &trie,
- std::size_t maxChartSpan)
- : Parser<Callback>(chart)
- , m_ruleTable(trie)
- , m_maxChartSpan(maxChartSpan)
- , m_callback(NULL)
+ PChart &chart,
+ const RuleTrie &trie,
+ std::size_t maxChartSpan)
+ : Parser<Callback>(chart)
+ , m_ruleTable(trie)
+ , m_maxChartSpan(maxChartSpan)
+ , m_callback(NULL)
{
m_hyperedge.head = 0;
}
template<typename Callback>
void RecursiveCYKPlusParser<Callback>::EnumerateHyperedges(
- const WordsRange &range,
- Callback &callback)
+ const WordsRange &range,
+ Callback &callback)
{
const std::size_t start = range.GetStartPos();
const std::size_t end = range.GetEndPos();
@@ -49,16 +49,17 @@ void RecursiveCYKPlusParser<Callback>::EnumerateHyperedges(
// with a non-terminal over a span between [start,minEnd] and [start,maxEnd].
template<typename Callback>
void RecursiveCYKPlusParser<Callback>::GetNonTerminalExtensions(
- const RuleTrie::Node &node,
- std::size_t start,
- std::size_t minEnd,
- std::size_t maxEnd) {
+ const RuleTrie::Node &node,
+ std::size_t start,
+ std::size_t minEnd,
+ std::size_t maxEnd)
+{
// Non-terminal labels in node's outgoing edge set.
const RuleTrie::Node::SymbolMap &nonTermMap = node.GetNonTerminalMap();
// Compressed matrix from PChart.
const PChart::CompressedMatrix &matrix =
- Base::m_chart.GetCompressedMatrix(start);
+ Base::m_chart.GetCompressedMatrix(start);
// Loop over possible expansions of the rule.
RuleTrie::Node::SymbolMap::const_iterator p;
@@ -66,7 +67,7 @@ void RecursiveCYKPlusParser<Callback>::GetNonTerminalExtensions(
for (p = nonTermMap.begin(); p != p_end; ++p) {
const Word &nonTerm = p->first;
const std::vector<PChart::CompressedItem> &items =
- matrix[nonTerm[0]->GetId()];
+ matrix[nonTerm[0]->GetId()];
for (std::vector<PChart::CompressedItem>::const_iterator q = items.begin();
q != items.end(); ++q) {
if (q->end >= minEnd && q->end <= maxEnd) {
@@ -81,12 +82,13 @@ void RecursiveCYKPlusParser<Callback>::GetNonTerminalExtensions(
// with a terminal over span [start,end].
template<typename Callback>
void RecursiveCYKPlusParser<Callback>::GetTerminalExtension(
- const RuleTrie::Node &node,
- std::size_t start,
- std::size_t end) {
+ const RuleTrie::Node &node,
+ std::size_t start,
+ std::size_t end)
+{
const PChart::Cell::TMap &vertexMap =
- Base::m_chart.GetCell(start, end).terminalVertices;
+ Base::m_chart.GetCell(start, end).terminalVertices;
if (vertexMap.empty()) {
return;
}
@@ -122,16 +124,17 @@ void RecursiveCYKPlusParser<Callback>::GetTerminalExtension(
// non-empty), and try to find expansions that have this partial rule as prefix.
template<typename Callback>
void RecursiveCYKPlusParser<Callback>::AddAndExtend(
- const RuleTrie::Node &node,
- std::size_t end,
- const PVertex &vertex) {
+ const RuleTrie::Node &node,
+ std::size_t end,
+ const PVertex &vertex)
+{
// FIXME Sort out const-ness.
m_hyperedge.tail.push_back(const_cast<PVertex *>(&vertex));
// Add target phrase collection (except if rule is empty or unary).
const TargetPhraseCollection &tpc = node.GetTargetPhraseCollection();
if (!tpc.IsEmpty() && !IsNonLexicalUnary(m_hyperedge)) {
- m_hyperedge.translations = &tpc;
+ m_hyperedge.label.translations = &tpc;
(*m_callback)(m_hyperedge, end);
}
@@ -153,7 +156,7 @@ void RecursiveCYKPlusParser<Callback>::AddAndExtend(
template<typename Callback>
bool RecursiveCYKPlusParser<Callback>::IsNonLexicalUnary(
- const PHyperedge &hyperedge) const
+ const PHyperedge &hyperedge) const
{
return hyperedge.tail.size() == 1 &&
hyperedge.tail[0]->symbol.IsNonTerminal();
diff --git a/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h b/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h
index 264d43eea..2b8edbfd1 100644
--- a/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h
+++ b/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h
@@ -22,12 +22,14 @@ namespace S2T
template<typename Callback>
class RecursiveCYKPlusParser : public Parser<Callback>
{
- public:
+public:
typedef Parser<Callback> Base;
typedef RuleTrieCYKPlus RuleTrie;
// TODO Make this configurable?
- static bool RequiresCompressedChart() { return true; }
+ static bool RequiresCompressedChart() {
+ return true;
+ }
RecursiveCYKPlusParser(PChart &, const RuleTrie &, std::size_t);
@@ -35,7 +37,7 @@ class RecursiveCYKPlusParser : public Parser<Callback>
void EnumerateHyperedges(const WordsRange &, Callback &);
- private:
+private:
void GetTerminalExtension(const RuleTrie::Node &, std::size_t, std::size_t);
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h b/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h
index d55f7e842..f50cee3a0 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h
@@ -23,10 +23,10 @@ namespace S2T
template<typename Callback>
Scope3Parser<Callback>::Scope3Parser(PChart &chart, const RuleTrie &trie,
std::size_t maxChartSpan)
- : Parser<Callback>(chart)
- , m_ruleTable(trie)
- , m_maxChartSpan(maxChartSpan)
- , m_latticeBuilder(chart)
+ : Parser<Callback>(chart)
+ , m_ruleTable(trie)
+ , m_maxChartSpan(maxChartSpan)
+ , m_latticeBuilder(chart)
{
Init();
}
@@ -39,7 +39,7 @@ Scope3Parser<Callback>::~Scope3Parser()
template<typename Callback>
void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
- Callback &callback)
+ Callback &callback)
{
const std::size_t start = range.GetStartPos();
const std::size_t end = range.GetEndPos();
@@ -65,7 +65,7 @@ void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
// Ask the grammar for the mapping from label sequences to target phrase
// collections for this pattern.
const RuleTrie::Node::LabelMap &labelMap =
- patNode->m_node->GetLabelMap();
+ patNode->m_node->GetLabelMap();
// For each label sequence, search the lattice for the set of PHyperedge
// tails.
@@ -146,7 +146,7 @@ void Scope3Parser<Callback>::FillSentenceMap(SentenceMap &sentMap)
template<typename Callback>
void Scope3Parser<Callback>::RecordPatternApplicationSpans(
- const PatternApplicationTrie &patNode)
+ const PatternApplicationTrie &patNode)
{
if (patNode.m_node->HasRules()) {
int s1 = -1;
@@ -175,7 +175,7 @@ void Scope3Parser<Callback>::RecordPatternApplicationSpans(
}
for (std::vector<PatternApplicationTrie*>::const_iterator p =
- patNode.m_children.begin(); p != patNode.m_children.end(); ++p) {
+ patNode.m_children.begin(); p != patNode.m_children.end(); ++p) {
RecordPatternApplicationSpans(**p);
}
}
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h b/moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h
index d3104d9b1..df2989d62 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h
@@ -33,7 +33,9 @@ public:
typedef RuleTrieScope3 RuleTrie;
// TODO Make this configurable?
- static bool RequiresCompressedChart() { return false; }
+ static bool RequiresCompressedChart() {
+ return false;
+ }
Scope3Parser(PChart &, const RuleTrie &, std::size_t);
@@ -60,7 +62,7 @@ private:
/* m_patSpans[i][j] records the set of all PAT nodes for span [i,i+j]
i.e. j is the width of the span */
std::vector<std::vector<
- std::vector<const PatternApplicationTrie *> > > m_patSpans;
+ std::vector<const PatternApplicationTrie *> > > m_patSpans;
};
} // namespace S2T
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.cpp b/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.cpp
index 218cd4017..f580c254a 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.cpp
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.cpp
@@ -9,7 +9,8 @@ namespace Syntax
namespace S2T
{
-int PatternApplicationTrie::Depth() const {
+int PatternApplicationTrie::Depth() const
+{
if (m_parent) {
return m_parent->Depth() + 1;
}
@@ -77,8 +78,8 @@ PatternApplicationTrie::GetLowestTerminalNode() const
// may be unknown). This function determines the range of possible start
// values for the partially-applied pattern.
void PatternApplicationTrie::DetermineStartRange(int sentenceLength,
- int &minStart,
- int &maxStart) const
+ int &minStart,
+ int &maxStart) const
{
// Find the leftmost terminal symbol, if any.
const PatternApplicationTrie *n = GetHighestTerminalNode();
@@ -108,8 +109,8 @@ void PatternApplicationTrie::DetermineStartRange(int sentenceLength,
// may be unknown). This function determines the range of possible end values
// for the partially-applied pattern.
void PatternApplicationTrie::DetermineEndRange(int sentenceLength,
- int &minEnd,
- int &maxEnd) const
+ int &minEnd,
+ int &maxEnd) const
{
// Find the rightmost terminal symbol, if any.
const PatternApplicationTrie *n = GetLowestTerminalNode();
@@ -154,7 +155,7 @@ void PatternApplicationTrie::Extend(const RuleTrieScope3::Node &node,
(followsGap && start > (std::size_t)minPos) ||
minPos == -1) {
PatternApplicationTrie *subTrie =
- new PatternApplicationTrie(start, end, child, v, this);
+ new PatternApplicationTrie(start, end, child, v, this);
subTrie->Extend(child, end+1, sentMap, false);
m_children.push_back(subTrie);
}
@@ -174,7 +175,8 @@ void PatternApplicationTrie::Extend(const RuleTrieScope3::Node &node,
}
void PatternApplicationTrie::ReadOffPatternApplicationKey(
- PatternApplicationKey &key) const {
+ PatternApplicationKey &key) const
+{
const int depth = Depth();
key.resize(depth);
const PatternApplicationTrie *p = this;
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.h b/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.h
index 0ad371367..1869c0bfd 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.h
@@ -19,7 +19,7 @@ struct PatternApplicationTrie;
typedef std::vector<const PatternApplicationTrie*> PatternApplicationKey;
struct PatternApplicationTrie {
- public:
+public:
PatternApplicationTrie(int start, int end, const RuleTrieScope3::Node &node,
const PVertex *pvertex, PatternApplicationTrie *parent)
: m_start(start)
@@ -36,8 +36,12 @@ struct PatternApplicationTrie {
int Depth() const;
- bool IsGapNode() const { return m_end == -1; }
- bool IsTerminalNode() const { return m_end != -1; }
+ bool IsGapNode() const {
+ return m_end == -1;
+ }
+ bool IsTerminalNode() const {
+ return m_end != -1;
+ }
const PatternApplicationTrie *GetHighestTerminalNode() const;
const PatternApplicationTrie *GetLowestTerminalNode() const;
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/SentenceMap.h b/moses/Syntax/S2T/Parsers/Scope3Parser/SentenceMap.h
index 8e6aae9f1..522b56618 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/SentenceMap.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/SentenceMap.h
@@ -20,7 +20,7 @@ namespace S2T
// FIXME Check SymbolHasher does the right thing here
typedef boost::unordered_map<Word, std::vector<const PVertex *>, SymbolHasher,
- SymbolEqualityPred> SentenceMap;
+ SymbolEqualityPred> SentenceMap;
} // namespace S2T
} // namespace Syntax
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.cpp b/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.cpp
index 0eb615db8..8b1f203be 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.cpp
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.cpp
@@ -18,7 +18,7 @@ void SymbolRangeCalculator::Calc(const PatternApplicationKey &key,
// Fill in ranges for terminals and set ranges to -1 for non-terminals.
void SymbolRangeCalculator::FillInTerminalRanges(
- const PatternApplicationKey &key, std::vector<SymbolRange> &ranges)
+ const PatternApplicationKey &key, std::vector<SymbolRange> &ranges)
{
ranges.resize(key.size());
for (std::size_t i = 0; i < key.size(); ++i) {
@@ -34,7 +34,7 @@ void SymbolRangeCalculator::FillInTerminalRanges(
}
void SymbolRangeCalculator::FillInAuxSymbolInfo(
- const std::vector<SymbolRange> &ranges)
+ const std::vector<SymbolRange> &ranges)
{
m_auxSymbolInfo.resize(ranges.size());
@@ -81,8 +81,8 @@ void SymbolRangeCalculator::FillInAuxSymbolInfo(
}
void SymbolRangeCalculator::FillInGapRanges(const PatternApplicationKey &key,
- int spanStart, int spanEnd,
- std::vector<SymbolRange> &ranges)
+ int spanStart, int spanEnd,
+ std::vector<SymbolRange> &ranges)
{
for (std::size_t i = 0; i < key.size(); ++i) {
const PatternApplicationTrie *patNode = key[i];
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.h b/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.h
index 341fb9bb4..c9bbcb02d 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.h
@@ -14,11 +14,11 @@ namespace S2T
class SymbolRangeCalculator
{
- public:
+public:
void Calc(const PatternApplicationKey &, int, int,
std::vector<SymbolRange> &);
- private:
+private:
// Provides contextual information used in determining a symbol's range.
struct AuxSymbolInfo {
int distanceToNextTerminal;
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLattice.h b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLattice.h
index 9ee16b186..88685e81e 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLattice.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLattice.h
@@ -11,7 +11,7 @@ namespace S2T
{
/* Lattice in which a full path corresponds to the tail of a PHyperedge.
- * For an entry x[i][j][k][l] in a TailLattice x:
+ * For an entry x[i][j][k][l] in a TailLattice x:
*
* i = offset from start of rule pattern
*
@@ -23,9 +23,9 @@ namespace S2T
* l = label index (zero for terminals, otherwise as in RuleTrieScope3::Node)
*/
typedef std::vector<
- std::vector<
- std::vector<
- std::vector<const PVertex *> > > > TailLattice;
+std::vector<
+std::vector<
+std::vector<const PVertex *> > > > TailLattice;
} // namespace S2T
} // namespace Syntax
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.cpp b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.cpp
index 6b31090fc..3921ecfa0 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.cpp
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.cpp
@@ -25,7 +25,7 @@ void TailLatticeBuilder::Build(
const RuleTrieScope3::Node *utrieNode = key.back()->m_node;
const RuleTrieScope3::Node::LabelTable &labelTable =
- utrieNode->GetLabelTable();
+ utrieNode->GetLabelTable();
std::size_t nonTermIndex = 0;
@@ -52,7 +52,7 @@ void TailLatticeBuilder::Build(
p != labelVec.end(); ++p, ++q) {
const Word &label = *p;
const PVertex *v =
- m_chart.GetCell(s, e).nonTerminalVertices.Find(label);
+ m_chart.GetCell(s, e).nonTerminalVertices.Find(label);
lattice[offset][nonTermIndex+1][width].push_back(v);
*q = (*q || static_cast<bool>(v));
}
@@ -81,7 +81,7 @@ void TailLatticeBuilder::ExtendAndClear(
const RuleTrieScope3::Node *utrieNode = key.back()->m_node;
const RuleTrieScope3::Node::LabelTable &labelTable =
- utrieNode->GetLabelTable();
+ utrieNode->GetLabelTable();
std::size_t nonTermIndex = 0;
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.h b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.h
index c61df8a40..9297e5eba 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.h
@@ -17,7 +17,7 @@ namespace S2T
class TailLatticeBuilder
{
- public:
+public:
TailLatticeBuilder(PChart &chart) : m_chart(chart) {}
// Given a key from a PatternApplicationTrie and the valid ranges of its
@@ -26,7 +26,7 @@ class TailLatticeBuilder
const std::vector<SymbolRange> &,
TailLattice &, std::vector<std::vector<bool> > &);
- private:
+private:
// Auxiliary function used by Build. Enlarges a TailLattice, if necessary,
// and clears the innermost vectors.
void ExtendAndClear(const std::vector<const PatternApplicationTrie *> &,
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h
index a2897ce73..4e48429ab 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h
@@ -17,7 +17,7 @@ namespace S2T
template<typename Callback>
class TailLatticeSearcher
{
- public:
+public:
TailLatticeSearcher(const TailLattice &lattice,
const PatternApplicationKey &key,
const std::vector<SymbolRange> &ranges)
@@ -31,11 +31,11 @@ class TailLatticeSearcher
m_matchCB = &callback;
m_hyperedge.head = 0;
m_hyperedge.tail.clear();
- m_hyperedge.translations = &tpc;
+ m_hyperedge.label.translations = &tpc;
SearchInner(0, 0, 0);
}
- private:
+private:
void SearchInner(int offset, std::size_t i, std::size_t nonTermIndex) {
assert(m_hyperedge.tail.size() == i);
@@ -61,7 +61,7 @@ class TailLatticeSearcher
const int maxWidth = range.maxEnd - absStart + 1;
const std::vector<std::vector<const PVertex *> > &innerVec =
- m_lattice[offset][nonTermIndex+1];
+ m_lattice[offset][nonTermIndex+1];
std::size_t labelIndex = (*m_labels)[nonTermIndex];
diff --git a/moses/Syntax/S2T/RuleTrie.h b/moses/Syntax/S2T/RuleTrie.h
index 8f6dcbb80..27b0bc838 100644
--- a/moses/Syntax/S2T/RuleTrie.h
+++ b/moses/Syntax/S2T/RuleTrie.h
@@ -20,12 +20,12 @@ namespace S2T
// Base class for parser-specific trie types.
class RuleTrie : public RuleTable
{
- public:
+public:
RuleTrie(const RuleTableFF *ff) : RuleTable(ff) {}
virtual bool HasPreterminalRule(const Word &) const = 0;
- private:
+private:
friend class RuleTrieCreator;
virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
diff --git a/moses/Syntax/S2T/RuleTrieCYKPlus.cpp b/moses/Syntax/S2T/RuleTrieCYKPlus.cpp
index 9a300e9eb..05f8758e9 100644
--- a/moses/Syntax/S2T/RuleTrieCYKPlus.cpp
+++ b/moses/Syntax/S2T/RuleTrieCYKPlus.cpp
@@ -53,7 +53,7 @@ void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit)
}
RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateChild(
- const Word &sourceTerm)
+ const Word &sourceTerm)
{
return &m_sourceTermMap[sourceTerm];
}
@@ -61,40 +61,40 @@ RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateChild(
RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
{
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
- "Not a non-terminal: " << targetNonTerm);
+ "Not a non-terminal: " << targetNonTerm);
return &m_nonTermMap[targetNonTerm];
}
const RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetChild(
- const Word &sourceTerm) const
+ const Word &sourceTerm) const
{
UTIL_THROW_IF2(sourceTerm.IsNonTerminal(),
- "Not a terminal: " << sourceTerm);
+ "Not a terminal: " << sourceTerm);
SymbolMap::const_iterator p = m_sourceTermMap.find(sourceTerm);
return (p == m_sourceTermMap.end()) ? NULL : &p->second;
}
const RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetNonTerminalChild(
- const Word &targetNonTerm) const
+ const Word &targetNonTerm) const
{
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
- "Not a non-terminal: " << targetNonTerm);
+ "Not a non-terminal: " << targetNonTerm);
SymbolMap::const_iterator p = m_nonTermMap.find(targetNonTerm);
return (p == m_nonTermMap.end()) ? NULL : &p->second;
}
TargetPhraseCollection &RuleTrieCYKPlus::GetOrCreateTargetPhraseCollection(
- const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
+ const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
{
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetTargetPhraseCollection();
}
RuleTrieCYKPlus::Node &RuleTrieCYKPlus::GetOrCreateNode(
- const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
+ const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
{
const std::size_t size = source.GetSize();
@@ -106,14 +106,10 @@ RuleTrieCYKPlus::Node &RuleTrieCYKPlus::GetOrCreateNode(
const Word& word = source.GetWord(pos);
if (word.IsNonTerminal()) {
- // indexed by source label 1st
- const Word &sourceNonTerm = word;
-
UTIL_THROW_IF2(iterAlign == alignmentInfo.end(),
- "No alignment for non-term at position " << pos);
+ "No alignment for non-term at position " << pos);
UTIL_THROW_IF2(iterAlign->first != pos,
- "Alignment info incorrect at position " << pos);
-
+ "Alignment info incorrect at position " << pos);
std::size_t targetNonTermInd = iterAlign->second;
++iterAlign;
const Word &targetNonTerm = target.GetWord(targetNonTermInd);
@@ -122,13 +118,9 @@ RuleTrieCYKPlus::Node &RuleTrieCYKPlus::GetOrCreateNode(
currNode = currNode->GetOrCreateChild(word);
}
- UTIL_THROW_IF2(currNode == NULL,
- "Node not found at position " << pos);
+ UTIL_THROW_IF2(currNode == NULL, "Node not found at position " << pos);
}
- // finally, the source LHS
- //currNode = currNode->GetOrCreateChild(sourceLHS);
-
return *currNode;
}
diff --git a/moses/Syntax/S2T/RuleTrieCYKPlus.h b/moses/Syntax/S2T/RuleTrieCYKPlus.h
index 83ea55b87..11cf4c199 100644
--- a/moses/Syntax/S2T/RuleTrieCYKPlus.h
+++ b/moses/Syntax/S2T/RuleTrieCYKPlus.h
@@ -26,18 +26,20 @@ namespace S2T
class RuleTrieCYKPlus : public RuleTrie
{
- public:
+public:
class Node
{
- public:
+ public:
typedef boost::unordered_map<Word, Node, SymbolHasher,
- SymbolEqualityPred> SymbolMap;
+ SymbolEqualityPred> SymbolMap;
bool IsLeaf() const {
return m_sourceTermMap.empty() && m_nonTermMap.empty();
}
- bool HasRules() const { return !m_targetPhraseCollection.IsEmpty(); }
+ bool HasRules() const {
+ return !m_targetPhraseCollection.IsEmpty();
+ }
void Prune(std::size_t tableLimit);
void Sort(std::size_t tableLimit);
@@ -56,11 +58,15 @@ class RuleTrieCYKPlus : public RuleTrie
return m_targetPhraseCollection;
}
- const SymbolMap &GetTerminalMap() const { return m_sourceTermMap; }
+ const SymbolMap &GetTerminalMap() const {
+ return m_sourceTermMap;
+ }
- const SymbolMap &GetNonTerminalMap() const { return m_nonTermMap; }
+ const SymbolMap &GetNonTerminalMap() const {
+ return m_nonTermMap;
+ }
- private:
+ private:
SymbolMap m_sourceTermMap;
SymbolMap m_nonTermMap;
TargetPhraseCollection m_targetPhraseCollection;
@@ -68,11 +74,13 @@ class RuleTrieCYKPlus : public RuleTrie
RuleTrieCYKPlus(const RuleTableFF *ff) : RuleTrie(ff) {}
- const Node &GetRootNode() const { return m_root; }
+ const Node &GetRootNode() const {
+ return m_root;
+ }
bool HasPreterminalRule(const Word &) const;
- private:
+private:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
diff --git a/moses/Syntax/S2T/RuleTrieCreator.h b/moses/Syntax/S2T/RuleTrieCreator.h
index 1fe99e609..e49a2cbde 100644
--- a/moses/Syntax/S2T/RuleTrieCreator.h
+++ b/moses/Syntax/S2T/RuleTrieCreator.h
@@ -13,7 +13,7 @@ namespace S2T
// OovHandler). RuleTrieCreator is a friend of RuleTrie.
class RuleTrieCreator
{
- protected:
+protected:
// Provide access to RuleTrie's private SortAndPrune function.
void SortAndPrune(RuleTrie &trie, std::size_t limit) {
trie.SortAndPrune(limit);
@@ -22,8 +22,8 @@ class RuleTrieCreator
// Provide access to RuleTrie's private GetOrCreateTargetPhraseCollection
// function.
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
- RuleTrie &trie, const Phrase &source, const TargetPhrase &target,
- const Word *sourceLHS) {
+ RuleTrie &trie, const Phrase &source, const TargetPhrase &target,
+ const Word *sourceLHS) {
return trie.GetOrCreateTargetPhraseCollection(source, target, sourceLHS);
}
};
diff --git a/moses/Syntax/S2T/RuleTrieLoader.cpp b/moses/Syntax/S2T/RuleTrieLoader.cpp
index 8efa4969b..b9f7484ad 100644
--- a/moses/Syntax/S2T/RuleTrieLoader.cpp
+++ b/moses/Syntax/S2T/RuleTrieLoader.cpp
@@ -16,7 +16,6 @@
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/WordsRange.h"
-#include "moses/UserMessage.h"
#include "moses/ChartTranslationOptionList.h"
#include "moses/FactorCollection.h"
#include "moses/Syntax/RuleTableFF.h"
@@ -44,7 +43,6 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
PrintUserTime(std::string("Start loading text phrase table. Moses format"));
const StaticData &staticData = StaticData::Instance();
- const std::string &factorDelimiter = staticData.GetFactorDelimiter();
std::size_t count = 0;
@@ -76,10 +74,6 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
alignString = temp;
}
- if (++pipes) {
- StringPiece str(*pipes); //counts
- }
-
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == std::string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ff.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
@@ -96,7 +90,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
const size_t numScoreComponents = ff.GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
- << numScoreComponents << ") of score components on line " << count);
+ << numScoreComponents << ") of score components on line " << count);
}
// parse source & find pt node
@@ -107,18 +101,16 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase(&ff);
- // targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS);
targetPhrase->CreateFromString(Output, output, targetPhraseString, &targetLHS);
// source
Phrase sourcePhrase;
- // sourcePhrase.CreateFromString(Input, input, sourcePhraseString, factorDelimiter, &sourceLHS);
sourcePhrase.CreateFromString(Input, input, sourcePhraseString, &sourceLHS);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignString);
targetPhrase->SetTargetLHS(targetLHS);
- //targetPhrase->SetDebugOutput(string("New Format pt ") + line);
+ ++pipes; // skip over counts field.
if (++pipes) {
StringPiece sparseString(*pipes);
@@ -134,7 +126,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
- trie, sourcePhrase, *targetPhrase, sourceLHS);
+ trie, sourcePhrase, *targetPhrase, sourceLHS);
phraseColl.Add(targetPhrase);
// not implemented correctly in memory pt. just delete it for now
diff --git a/moses/Syntax/S2T/RuleTrieLoader.h b/moses/Syntax/S2T/RuleTrieLoader.h
index c625f91d6..855f1d2a8 100644
--- a/moses/Syntax/S2T/RuleTrieLoader.h
+++ b/moses/Syntax/S2T/RuleTrieLoader.h
@@ -18,7 +18,7 @@ namespace S2T
class RuleTrieLoader : public RuleTrieCreator
{
- public:
+public:
bool Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
const std::string &inFile,
diff --git a/moses/Syntax/S2T/RuleTrieScope3.cpp b/moses/Syntax/S2T/RuleTrieScope3.cpp
index a16cbefdc..7318f09d6 100644
--- a/moses/Syntax/S2T/RuleTrieScope3.cpp
+++ b/moses/Syntax/S2T/RuleTrieScope3.cpp
@@ -55,7 +55,7 @@ void RuleTrieScope3::Node::Sort(std::size_t tableLimit)
}
RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateTerminalChild(
- const Word &sourceTerm)
+ const Word &sourceTerm)
{
assert(!sourceTerm.IsNonTerminal());
std::pair<TerminalMap::iterator, bool> result;
@@ -66,7 +66,7 @@ RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateTerminalChild(
}
RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateNonTerminalChild(
- const Word &targetNonTerm)
+ const Word &targetNonTerm)
{
assert(targetNonTerm.IsNonTerminal());
if (m_gapNode == NULL) {
@@ -77,7 +77,7 @@ RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateNonTerminalChild(
TargetPhraseCollection &
RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection(
- const TargetPhrase &target)
+ const TargetPhrase &target)
{
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
const std::size_t rank = alignmentInfo.GetSize();
@@ -99,14 +99,14 @@ RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection(
}
TargetPhraseCollection &RuleTrieScope3::GetOrCreateTargetPhraseCollection(
- const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
+ const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
{
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetOrCreateTargetPhraseCollection(target);
}
RuleTrieScope3::Node &RuleTrieScope3::GetOrCreateNode(
- const Phrase &source, const TargetPhrase &target, const Word */*sourceLHS*/)
+ const Phrase &source, const TargetPhrase &target, const Word */*sourceLHS*/)
{
const std::size_t size = source.GetSize();
diff --git a/moses/Syntax/S2T/RuleTrieScope3.h b/moses/Syntax/S2T/RuleTrieScope3.h
index 6dd38a4f1..5909b6509 100644
--- a/moses/Syntax/S2T/RuleTrieScope3.h
+++ b/moses/Syntax/S2T/RuleTrieScope3.h
@@ -25,27 +25,37 @@ namespace S2T
class RuleTrieScope3 : public RuleTrie
{
- public:
+public:
class Node
{
- public:
+ public:
typedef std::vector<std::vector<Word> > LabelTable;
typedef boost::unordered_map<Word, Node, SymbolHasher,
- SymbolEqualityPred> TerminalMap;
+ SymbolEqualityPred> TerminalMap;
typedef boost::unordered_map<std::vector<int>,
TargetPhraseCollection> LabelMap;
- ~Node() { delete m_gapNode; }
+ ~Node() {
+ delete m_gapNode;
+ }
- const LabelTable &GetLabelTable() const { return m_labelTable; }
+ const LabelTable &GetLabelTable() const {
+ return m_labelTable;
+ }
- const LabelMap &GetLabelMap() const { return m_labelMap; }
+ const LabelMap &GetLabelMap() const {
+ return m_labelMap;
+ }
- const TerminalMap &GetTerminalMap() const { return m_terminalMap; }
+ const TerminalMap &GetTerminalMap() const {
+ return m_terminalMap;
+ }
- const Node *GetNonTerminalChild() const { return m_gapNode; }
+ const Node *GetNonTerminalChild() const {
+ return m_gapNode;
+ }
Node *GetOrCreateTerminalChild(const Word &sourceTerm);
@@ -54,14 +64,18 @@ class RuleTrieScope3 : public RuleTrie
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const TargetPhrase &);
- bool IsLeaf() const { return m_terminalMap.empty() && m_gapNode == NULL; }
+ bool IsLeaf() const {
+ return m_terminalMap.empty() && m_gapNode == NULL;
+ }
- bool HasRules() const { return !m_labelMap.empty(); }
+ bool HasRules() const {
+ return !m_labelMap.empty();
+ }
void Prune(std::size_t tableLimit);
void Sort(std::size_t tableLimit);
- private:
+ private:
friend class RuleTrieScope3;
Node() : m_gapNode(NULL) {}
@@ -85,11 +99,13 @@ class RuleTrieScope3 : public RuleTrie
RuleTrieScope3(const RuleTableFF *ff) : RuleTrie(ff) {}
- const Node &GetRootNode() const { return m_root; }
+ const Node &GetRootNode() const {
+ return m_root;
+ }
bool HasPreterminalRule(const Word &) const;
- private:
+private:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
diff --git a/moses/Syntax/S2T/SChart.h b/moses/Syntax/S2T/SChart.h
index 62b7d0c2b..ac6404831 100644
--- a/moses/Syntax/S2T/SChart.h
+++ b/moses/Syntax/S2T/SChart.h
@@ -19,11 +19,10 @@ namespace S2T
class SChart
{
- public:
- struct Cell
- {
+public:
+ struct Cell {
typedef boost::unordered_map<Word, SVertexStack, SymbolHasher,
- SymbolEqualityPred> TMap;
+ SymbolEqualityPred> TMap;
typedef NonTerminalMap<SVertexStack> NMap;
TMap terminalStacks;
NMap nonTerminalStacks;
@@ -31,7 +30,9 @@ class SChart
SChart(std::size_t width);
- std::size_t GetWidth() const { return m_cells.size(); }
+ std::size_t GetWidth() const {
+ return m_cells.size();
+ }
const Cell &GetCell(std::size_t start, std::size_t end) const {
return m_cells[start][end];
@@ -41,7 +42,7 @@ class SChart
return m_cells[start][end];
}
- private:
+private:
std::vector<std::vector<Cell> > m_cells;
};
diff --git a/moses/Syntax/SHyperedge.cpp b/moses/Syntax/SHyperedge.cpp
index 0f098c7a4..0260492c8 100644
--- a/moses/Syntax/SHyperedge.cpp
+++ b/moses/Syntax/SHyperedge.cpp
@@ -16,10 +16,10 @@ Phrase GetOneBestTargetYield(const SHyperedge &h)
Phrase ret(ARRAY_SIZE_INCR);
const AlignmentInfo::NonTermIndexMap &targetToSourceMap =
- h.translation->GetAlignNonTerm().GetNonTermIndexMap2();
+ h.label.translation->GetAlignNonTerm().GetNonTermIndexMap2();
- for (std::size_t pos = 0; pos < h.translation->GetSize(); ++pos) {
- const Word &word = h.translation->GetWord(pos);
+ for (std::size_t pos = 0; pos < h.label.translation->GetSize(); ++pos) {
+ const Word &word = h.label.translation->GetWord(pos);
if (word.IsNonTerminal()) {
std::size_t sourceIndex = targetToSourceMap[pos];
const SHyperedge &incoming = *h.tail[sourceIndex]->best;
@@ -32,24 +32,24 @@ Phrase GetOneBestTargetYield(const SHyperedge &h)
}
assert(false);
// FIXME Modify this chunk of code to work for SHyperedge.
-/*
- std::set<std::size_t> sourcePosSet =
- h.translation->GetAlignTerm().GetAlignmentsForTarget(pos);
- if (sourcePosSet.size() == 1) {
- const std::vector<const Word*> *ruleSourceFromInputPath =
- hypo.GetTranslationOption().GetSourceRuleFromInputPath();
- UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
- "Source Words in of the rules hasn't been filled out");
- std::size_t sourcePos = *sourcePosSet.begin();
- const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
- UTIL_THROW_IF2(sourceWord == NULL,
- "Null source word at position " << sourcePos);
- const Factor *factor = sourceWord->GetFactor(placeholderFactor);
- if (factor) {
- ret.Back()[0] = factor;
- }
- }
-*/
+ /*
+ std::set<std::size_t> sourcePosSet =
+ h.translation->GetAlignTerm().GetAlignmentsForTarget(pos);
+ if (sourcePosSet.size() == 1) {
+ const std::vector<const Word*> *ruleSourceFromInputPath =
+ hypo.GetTranslationOption().GetSourceRuleFromInputPath();
+ UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
+ "Source Words in of the rules hasn't been filled out");
+ std::size_t sourcePos = *sourcePosSet.begin();
+ const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
+ UTIL_THROW_IF2(sourceWord == NULL,
+ "Null source word at position " << sourcePos);
+ const Factor *factor = sourceWord->GetFactor(placeholderFactor);
+ if (factor) {
+ ret.Back()[0] = factor;
+ }
+ }
+ */
}
}
return ret;
diff --git a/moses/Syntax/SHyperedge.h b/moses/Syntax/SHyperedge.h
index 6d9128d49..dd19f27c8 100644
--- a/moses/Syntax/SHyperedge.h
+++ b/moses/Syntax/SHyperedge.h
@@ -3,8 +3,8 @@
#include <vector>
#include "moses/Phrase.h"
-#include "moses/ScoreComponentCollection.h"
-#include "moses/TargetPhrase.h"
+
+#include "SLabel.h"
namespace Moses
{
@@ -13,13 +13,10 @@ namespace Syntax
struct SVertex;
-struct SHyperedge
-{
+struct SHyperedge {
SVertex *head;
std::vector<SVertex*> tail;
- float score;
- ScoreComponentCollection scoreBreakdown;
- const TargetPhrase *translation;
+ SLabel label;
};
Phrase GetOneBestTargetYield(const SHyperedge &h);
diff --git a/moses/Syntax/SHyperedgeBundle.h b/moses/Syntax/SHyperedgeBundle.h
index 4a78c5458..f4a07a181 100644
--- a/moses/Syntax/SHyperedgeBundle.h
+++ b/moses/Syntax/SHyperedgeBundle.h
@@ -14,8 +14,7 @@ namespace Syntax
struct PVertex;
-struct SHyperedgeBundle
-{
+struct SHyperedgeBundle {
std::vector<const SVertexStack*> stacks;
const TargetPhraseCollection *translations;
diff --git a/moses/Syntax/SHyperedgeBundleScorer.h b/moses/Syntax/SHyperedgeBundleScorer.h
index 3bf547cfd..5f79c5915 100644
--- a/moses/Syntax/SHyperedgeBundleScorer.h
+++ b/moses/Syntax/SHyperedgeBundleScorer.h
@@ -7,17 +7,16 @@ namespace Moses
namespace Syntax
{
-struct SHyperedgeBundleScorer
-{
- public:
+struct SHyperedgeBundleScorer {
+public:
static float Score(const SHyperedgeBundle &bundle) {
const TargetPhrase &targetPhrase = **(bundle.translations->begin());
float score = targetPhrase.GetFutureScore();
for (std::vector<const SVertexStack*>::const_iterator p =
- bundle.stacks.begin(); p != bundle.stacks.end(); ++p) {
+ bundle.stacks.begin(); p != bundle.stacks.end(); ++p) {
const SVertexStack *stack = *p;
if (stack->front()->best) {
- score += stack->front()->best->score;
+ score += stack->front()->best->label.score;
}
}
return score;
diff --git a/moses/Syntax/SLabel.h b/moses/Syntax/SLabel.h
new file mode 100644
index 000000000..0b87cb76e
--- /dev/null
+++ b/moses/Syntax/SLabel.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "moses/ScoreComponentCollection.h"
+#include "moses/TargetPhrase.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+
+struct SLabel {
+ float score;
+ ScoreComponentCollection scoreBreakdown;
+ const TargetPhrase *translation;
+};
+
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/SVertex.h b/moses/Syntax/SVertex.h
index cde14c21a..e596cb442 100644
--- a/moses/Syntax/SVertex.h
+++ b/moses/Syntax/SVertex.h
@@ -17,8 +17,7 @@ struct SHyperedge;
//
// Important: a SVertex owns its incoming SHyperedge objects and its FFState
// objects and will delete them on destruction.
-struct SVertex
-{
+struct SVertex {
~SVertex();
SHyperedge *best;
diff --git a/moses/Syntax/SVertexRecombinationOrderer.h b/moses/Syntax/SVertexRecombinationOrderer.h
index 60686d989..fcabed04a 100644
--- a/moses/Syntax/SVertexRecombinationOrderer.h
+++ b/moses/Syntax/SVertexRecombinationOrderer.h
@@ -9,11 +9,9 @@ namespace Moses
namespace Syntax
{
-struct SVertexRecombinationOrderer
-{
- public:
- bool operator()(const SVertex &x, const SVertex &y) const
- {
+struct SVertexRecombinationOrderer {
+public:
+ bool operator()(const SVertex &x, const SVertex &y) const {
int comp = 0;
for (std::size_t i = 0; i < x.state.size(); ++i) {
if (x.state[i] == NULL || y.state[i] == NULL) {
@@ -28,8 +26,7 @@ struct SVertexRecombinationOrderer
return false;
}
- bool operator()(const SVertex *x, const SVertex *y) const
- {
+ bool operator()(const SVertex *x, const SVertex *y) const {
return operator()(*x, *y);
}
};
diff --git a/moses/Syntax/SVertexStack.h b/moses/Syntax/SVertexStack.h
index 57dc9f247..b88c055f4 100644
--- a/moses/Syntax/SVertexStack.h
+++ b/moses/Syntax/SVertexStack.h
@@ -14,13 +14,11 @@ namespace Syntax
typedef std::vector<boost::shared_ptr<SVertex> > SVertexStack;
-struct SVertexStackContentOrderer
-{
- public:
+struct SVertexStackContentOrderer {
+public:
bool operator()(const boost::shared_ptr<SVertex> &x,
- const boost::shared_ptr<SVertex> &y)
- {
- return x->best->score > y->best->score;
+ const boost::shared_ptr<SVertex> &y) {
+ return x->best->label.score > y->best->label.score;
}
};
diff --git a/moses/Syntax/SymbolEqualityPred.h b/moses/Syntax/SymbolEqualityPred.h
index e97c4f11b..684d70cee 100644
--- a/moses/Syntax/SymbolEqualityPred.h
+++ b/moses/Syntax/SymbolEqualityPred.h
@@ -12,7 +12,7 @@ namespace Syntax
// *not* work in moses_chart unless this is changed (among other things).
class SymbolEqualityPred
{
- public:
+public:
bool operator()(const Word &s1, const Word &s2) const {
const Factor *f1 = s1[0];
const Factor *f2 = s2[0];
diff --git a/moses/Syntax/SymbolHasher.h b/moses/Syntax/SymbolHasher.h
index b398fdd00..c758d7017 100644
--- a/moses/Syntax/SymbolHasher.h
+++ b/moses/Syntax/SymbolHasher.h
@@ -14,7 +14,7 @@ namespace Syntax
// *not* work in moses_chart unless this is changed (among other things).
class SymbolHasher
{
- public:
+public:
std::size_t operator()(const Word &s) const {
const Factor *f = s[0];
return hash_value(*f);
diff --git a/moses/Syntax/T2S/GlueRuleSynthesizer.cpp b/moses/Syntax/T2S/GlueRuleSynthesizer.cpp
new file mode 100644
index 000000000..ec60af5f0
--- /dev/null
+++ b/moses/Syntax/T2S/GlueRuleSynthesizer.cpp
@@ -0,0 +1,77 @@
+#include "GlueRuleSynthesizer.h"
+
+#include <sstream>
+
+#include "moses/FF/UnknownWordPenaltyProducer.h"
+#include "moses/StaticData.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node)
+{
+ const Word &sourceLhs = node.pvertex.symbol;
+ boost::scoped_ptr<Phrase> sourceRhs(SynthesizeSourcePhrase(node));
+ TargetPhrase *tp = SynthesizeTargetPhrase(node, *sourceRhs);
+ TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(
+ m_ruleTrie, sourceLhs, *sourceRhs);
+ tpc.Add(tp);
+}
+
+Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node)
+{
+ Phrase *phrase = new Phrase(node.children.size());
+ for (std::vector<InputTree::Node*>::const_iterator p = node.children.begin();
+ p != node.children.end(); ++p) {
+ phrase->AddWord((*p)->pvertex.symbol);
+ }
+/*
+TODO What counts as an OOV?
+ phrase->AddWord() = sourceWord;
+ phrase->GetWord(0).SetIsOOV(true);
+*/
+ return phrase;
+}
+
+TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
+ const InputTree::Node &node, const Phrase &sourceRhs)
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
+ UnknownWordPenaltyProducer::Instance();
+
+ TargetPhrase *targetPhrase = new TargetPhrase();
+
+ std::ostringstream alignmentSS;
+ for (std::size_t i = 0; i < node.children.size(); ++i) {
+ const Word &symbol = node.children[i]->pvertex.symbol;
+ if (symbol.IsNonTerminal()) {
+ targetPhrase->AddWord(staticData.GetOutputDefaultNonTerminal());
+ } else {
+ // TODO Check this
+ Word &targetWord = targetPhrase->AddWord();
+ targetWord.CreateUnknownWord(symbol);
+ }
+ alignmentSS << i << "-" << i << " ";
+ }
+
+ // Assign the lowest possible score so that glue rules are only used when
+ // absolutely required.
+ float score = LOWEST_SCORE;
+ targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score);
+ targetPhrase->EvaluateInIsolation(sourceRhs);
+ Word *targetLhs = new Word(staticData.GetOutputDefaultNonTerminal());
+ targetPhrase->SetTargetLHS(targetLhs);
+ targetPhrase->SetAlignmentInfo(alignmentSS.str());
+
+ return targetPhrase;
+}
+
+} // T2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/T2S/GlueRuleSynthesizer.h b/moses/Syntax/T2S/GlueRuleSynthesizer.h
new file mode 100644
index 000000000..95942004c
--- /dev/null
+++ b/moses/Syntax/T2S/GlueRuleSynthesizer.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "moses/Phrase.h"
+#include "moses/TargetPhrase.h"
+
+#include "InputTree.h"
+#include "RuleTrie.h"
+#include "RuleTrieCreator.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+class GlueRuleSynthesizer : public RuleTrieCreator
+{
+ public:
+ GlueRuleSynthesizer(RuleTrie &trie) : m_ruleTrie(trie) {}
+
+ // Synthesize the minimal, montone rule that can be applied to the given node
+ // and add it to the rule trie.
+ void SynthesizeRule(const InputTree::Node &);
+
+ private:
+ Phrase *SynthesizeSourcePhrase(const InputTree::Node &);
+ TargetPhrase *SynthesizeTargetPhrase(const InputTree::Node &, const Phrase &);
+
+ RuleTrie &m_ruleTrie;
+};
+
+} // T2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/T2S/HyperTree.h b/moses/Syntax/T2S/HyperTree.h
new file mode 100644
index 000000000..745b2d26e
--- /dev/null
+++ b/moses/Syntax/T2S/HyperTree.h
@@ -0,0 +1,81 @@
+#pragma once
+
+#include <map>
+#include <vector>
+
+#include <boost/functional/hash.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/version.hpp>
+
+#include "moses/Syntax/RuleTable.h"
+#include "moses/Syntax/SymbolEqualityPred.h"
+#include "moses/Syntax/SymbolHasher.h"
+#include "moses/TargetPhrase.h"
+#include "moses/TargetPhraseCollection.h"
+#include "moses/Terminal.h"
+#include "moses/Util.h"
+#include "moses/Word.h"
+
+#include "RuleTrie.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+class HyperTree: public RuleTable
+{
+ public:
+ class Node
+ {
+ public:
+ typedef boost::unordered_map<std::vector<Factor*>, Node> Map;
+
+ bool IsLeaf() const { return m_map.empty(); }
+
+ bool HasRules() const { return !m_targetPhraseCollection.IsEmpty(); }
+
+ void Prune(std::size_t tableLimit);
+ void Sort(std::size_t tableLimit);
+
+ Node *GetOrCreateChild(const HyperPath::NodeSeq &);
+
+ const Node *GetChild(const HyperPath::NodeSeq &) const;
+
+ const TargetPhraseCollection &GetTargetPhraseCollection() const
+ return m_targetPhraseCollection;
+ }
+
+ TargetPhraseCollection &GetTargetPhraseCollection()
+ return m_targetPhraseCollection;
+ }
+
+ const Map &GetMap() const { return m_map; }
+
+ private:
+ Map m_map;
+ TargetPhraseCollection m_targetPhraseCollection;
+ };
+
+ HyperTree(const RuleTableFF *ff) : RuleTable(ff) {}
+
+ const Node &GetRootNode() const { return m_root; }
+
+ private:
+ friend class RuleTrieCreator;
+
+ TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
+ const Word &sourceLHS, const Phrase &sourceRHS);
+
+ Node &GetOrCreateNode(const Phrase &sourceRHS);
+
+ void SortAndPrune(std::size_t);
+
+ Node m_root;
+};
+
+} // namespace T2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/T2S/InputTree.h b/moses/Syntax/T2S/InputTree.h
new file mode 100644
index 000000000..93b7516e6
--- /dev/null
+++ b/moses/Syntax/T2S/InputTree.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <vector>
+
+#include "moses/Syntax/PVertex.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+struct InputTree
+{
+ public:
+ struct Node {
+ Node(const PVertex &v, const std::vector<Node*> &c)
+ : pvertex(v)
+ , children(c) {}
+
+ Node(const PVertex &v) : pvertex(v) {}
+
+ PVertex pvertex;
+ std::vector<Node*> children;
+ };
+
+ // All tree nodes in post-order.
+ std::vector<Node> nodes;
+
+ // Tree nodes arranged by starting position (i.e. the vector nodes[i]
+ // contains the subset of tree nodes with span [i,j] (for any j).)
+ std::vector<std::vector<Node*> > nodesAtPos;
+};
+
+} // T2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/T2S/InputTreeBuilder.cpp b/moses/Syntax/T2S/InputTreeBuilder.cpp
new file mode 100644
index 000000000..ecded8e91
--- /dev/null
+++ b/moses/Syntax/T2S/InputTreeBuilder.cpp
@@ -0,0 +1,171 @@
+#include "InputTreeBuilder.h"
+
+#include "moses/StaticData.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+InputTreeBuilder::InputTreeBuilder()
+ : m_outputFactorOrder(StaticData::Instance().GetOutputFactorOrder())
+{
+}
+
+void InputTreeBuilder::Build(const TreeInput &in,
+ const std::string &topLevelLabel,
+ InputTree &out)
+{
+ CreateNodes(in, topLevelLabel, out);
+ ConnectNodes(out);
+}
+
+// Create the InputTree::Node objects but do not connect them.
+void InputTreeBuilder::CreateNodes(const TreeInput &in,
+ const std::string &topLevelLabel,
+ InputTree &out)
+{
+ // Get the input sentence word count. This includes the <s> and </s> symbols.
+ const std::size_t numWords = in.GetSize();
+
+ // Get the parse tree non-terminal nodes. The parse tree covers the original
+ // sentence only, not the <s> and </s> symbols, so at this point there is
+ // no top-level node.
+ std::vector<XMLParseOutput> xmlNodes = in.GetLabelledSpans();
+
+ // Sort the XML nodes into post-order. Prior to sorting they will be in the
+ // order that TreeInput created them. Usually that will be post-order, but
+ // if, for example, the tree was binarized by relax-parse then it won't be.
+ // In all cases, we assume that if two nodes cover the same span then the
+ // first one is the lowest.
+ SortXmlNodesIntoPostOrder(xmlNodes);
+
+ // Copy the parse tree non-terminal nodes, but offset the ranges by 1 (to
+ // allow for the <s> symbol at position 0).
+ std::vector<XMLParseOutput> nonTerms;
+ nonTerms.reserve(xmlNodes.size()+1);
+ for (std::vector<XMLParseOutput>::const_iterator p = xmlNodes.begin();
+ p != xmlNodes.end(); ++p) {
+ std::size_t start = p->m_range.GetStartPos();
+ std::size_t end = p->m_range.GetEndPos();
+ nonTerms.push_back(XMLParseOutput(p->m_label, WordsRange(start+1, end+1)));
+ }
+ // Add a top-level node that also covers <s> and </s>.
+ nonTerms.push_back(XMLParseOutput(topLevelLabel, WordsRange(0, numWords-1)));
+
+ // Allocate space for the InputTree nodes. In the case of out.nodes, this
+ // step is essential because once created the PVertex objects must not be
+ // moved around (through vector resizing) because InputTree keeps pointers
+ // to them.
+ out.nodes.reserve(numWords + nonTerms.size());
+ out.nodesAtPos.resize(numWords);
+
+ // Create the InputTree::Node objects.
+ int prevStart = -1;
+ int prevEnd = -1;
+ for (std::vector<XMLParseOutput>::const_iterator p = nonTerms.begin();
+ p != nonTerms.end(); ++p) {
+ int start = static_cast<int>(p->m_range.GetStartPos());
+ int end = static_cast<int>(p->m_range.GetEndPos());
+
+ // Check if we've started ascending a new subtree.
+ if (start != prevStart && end != prevEnd) {
+ // Add a node for each terminal to the left of or below the first
+ // nonTerm child of the subtree.
+ for (int i = prevEnd+1; i <= end; ++i) {
+ PVertex v(WordsRange(i, i), in.GetWord(i));
+ out.nodes.push_back(InputTree::Node(v));
+ out.nodesAtPos[i].push_back(&out.nodes.back());
+ }
+ }
+ // Add a node for the non-terminal.
+ Word w(true);
+ w.CreateFromString(Moses::Output, m_outputFactorOrder, p->m_label, true);
+ PVertex v(WordsRange(start, end), w);
+ out.nodes.push_back(InputTree::Node(v));
+ out.nodesAtPos[start].push_back(&out.nodes.back());
+
+ prevStart = start;
+ prevEnd = end;
+ }
+}
+
+// Connect the nodes by filling in the node.children vectors.
+void InputTreeBuilder::ConnectNodes(InputTree &out)
+{
+ // Create a vector that records the parent of each node (except the root).
+ std::vector<InputTree::Node*> parents(out.nodes.size(), NULL);
+ for (std::size_t i = 0; i < out.nodes.size()-1; ++i) {
+ const InputTree::Node &node = out.nodes[i];
+ std::size_t start = node.pvertex.span.GetStartPos();
+ std::size_t end = node.pvertex.span.GetEndPos();
+ // Find the next node (in post-order) that completely covers node's span.
+ std::size_t j = i+1;
+ while (true) {
+ const InputTree::Node &succ = out.nodes[j];
+ std::size_t succStart = succ.pvertex.span.GetStartPos();
+ std::size_t succEnd = succ.pvertex.span.GetEndPos();
+ if (succStart <= start && succEnd >= end) {
+ break;
+ }
+ ++j;
+ }
+ parents[i] = &(out.nodes[j]);
+ }
+
+ // Add each node to its parent's list of children (except the root).
+ for (std::size_t i = 0; i < out.nodes.size()-1; ++i) {
+ InputTree::Node &child = out.nodes[i];
+ InputTree::Node &parent = *(parents[i]);
+ parent.children.push_back(&child);
+ }
+}
+
+void InputTreeBuilder::SortXmlNodesIntoPostOrder(
+ std::vector<XMLParseOutput> &nodes)
+{
+ // Sorting is based on both the value of a node and its original position,
+ // so for each node construct a pair containing both pieces of information.
+ std::vector<std::pair<XMLParseOutput *, int> > pairs;
+ pairs.reserve(nodes.size());
+ for (std::size_t i = 0; i < nodes.size(); ++i) {
+ pairs.push_back(std::make_pair(&(nodes[i]), i));
+ }
+
+ // Sort the pairs.
+ std::sort(pairs.begin(), pairs.end(), PostOrderComp);
+
+ // Replace the original node sequence with the correctly sorted sequence.
+ std::vector<XMLParseOutput> tmp;
+ tmp.reserve(nodes.size());
+ for (std::size_t i = 0; i < pairs.size(); ++i) {
+ tmp.push_back(nodes[pairs[i].second]);
+ }
+ nodes.swap(tmp);
+}
+
+// Comparison function used by SortXmlNodesIntoPostOrder.
+bool InputTreeBuilder::PostOrderComp(const std::pair<XMLParseOutput *, int> &x,
+ const std::pair<XMLParseOutput *, int> &y)
+{
+ std::size_t xStart = x.first->m_range.GetStartPos();
+ std::size_t xEnd = x.first->m_range.GetEndPos();
+ std::size_t yStart = y.first->m_range.GetStartPos();
+ std::size_t yEnd = y.first->m_range.GetEndPos();
+
+ if (xEnd == yEnd) {
+ if (xStart == yStart) {
+ return x.second < y.second;
+ } else {
+ return xStart > yStart;
+ }
+ } else {
+ return xEnd < yEnd;
+ }
+}
+
+} // T2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/T2S/InputTreeBuilder.h b/moses/Syntax/T2S/InputTreeBuilder.h
new file mode 100644
index 000000000..24b107f81
--- /dev/null
+++ b/moses/Syntax/T2S/InputTreeBuilder.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <vector>
+
+#include "moses/TreeInput.h"
+#include "moses/TypeDef.h"
+
+#include "InputTree.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+class InputTreeBuilder
+{
+ public:
+ InputTreeBuilder();
+
+ // Constructs a Moses::T2S::InputTree given a Moses::TreeInput and a label
+ // for the top-level node (which covers <s> and </s>).
+ void Build(const TreeInput &, const std::string &, InputTree &);
+
+ private:
+ static bool PostOrderComp(const std::pair<XMLParseOutput *, int> &,
+ const std::pair<XMLParseOutput *, int> &);
+
+ void CreateNodes(const TreeInput &, const std::string &, InputTree &);
+ void ConnectNodes(InputTree &);
+ void SortXmlNodesIntoPostOrder(std::vector<XMLParseOutput> &);
+
+ const std::vector<FactorType> &m_outputFactorOrder;
+};
+
+} // T2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/T2S/InputTreeToForest.cpp b/moses/Syntax/T2S/InputTreeToForest.cpp
new file mode 100644
index 000000000..fda988d57
--- /dev/null
+++ b/moses/Syntax/T2S/InputTreeToForest.cpp
@@ -0,0 +1,52 @@
+#include "InputTreeToForest.h"
+
+#include <boost/unordered_map.hpp>
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+const F2S::Forest::Vertex *InputTreeToForest(const InputTree &tree,
+ F2S::Forest &forest)
+{
+ forest.Clear();
+
+ // Map from tree vertices to forest vertices.
+ boost::unordered_map<const InputTree::Node*, F2S::Forest::Vertex*> vertexMap;
+
+ // Create forest vertices (but not hyperedges) and fill in vertexMap.
+ for (std::vector<InputTree::Node>::const_iterator p = tree.nodes.begin();
+ p != tree.nodes.end(); ++p) {
+ F2S::Forest::Vertex *v = new F2S::Forest::Vertex(p->pvertex);
+ forest.vertices.push_back(v);
+ vertexMap[&*p] = v;
+ }
+
+ // Create the forest hyperedges.
+ for (std::vector<InputTree::Node>::const_iterator p = tree.nodes.begin();
+ p != tree.nodes.end(); ++p) {
+ const InputTree::Node &treeVertex = *p;
+ const std::vector<InputTree::Node*> &treeChildren = treeVertex.children;
+ if (treeChildren.empty()) {
+ continue;
+ }
+ F2S::Forest::Hyperedge *e = new F2S::Forest::Hyperedge();
+ e->head = vertexMap[&treeVertex];
+ e->tail.reserve(treeChildren.size());
+ for (std::vector<InputTree::Node*>::const_iterator q = treeChildren.begin();
+ q != treeChildren.end(); ++q) {
+ e->tail.push_back(vertexMap[*q]);
+ }
+ e->head->incoming.push_back(e);
+ }
+
+ // Return a pointer to the forest's root vertex.
+ return forest.vertices.back();
+}
+
+} // T2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/T2S/InputTreeToForest.h b/moses/Syntax/T2S/InputTreeToForest.h
new file mode 100644
index 000000000..e8532c6f2
--- /dev/null
+++ b/moses/Syntax/T2S/InputTreeToForest.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "moses/Syntax/F2S/Forest.h"
+
+#include "InputTree.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+// Constructs a F2S::Forest given a T2S::InputTree.
+const F2S::Forest::Vertex *InputTreeToForest(const InputTree &, F2S::Forest &);
+
+} // T2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/T2S/Manager-inl.h b/moses/Syntax/T2S/Manager-inl.h
new file mode 100644
index 000000000..778d1048f
--- /dev/null
+++ b/moses/Syntax/T2S/Manager-inl.h
@@ -0,0 +1,301 @@
+#pragma once
+
+#include "moses/DecodeGraph.h"
+#include "moses/StaticData.h"
+#include "moses/Syntax/BoundedPriorityContainer.h"
+#include "moses/Syntax/CubeQueue.h"
+#include "moses/Syntax/F2S/DerivationWriter.h"
+#include "moses/Syntax/F2S/RuleMatcherCallback.h"
+#include "moses/Syntax/PHyperedge.h"
+#include "moses/Syntax/RuleTable.h"
+#include "moses/Syntax/RuleTableFF.h"
+#include "moses/Syntax/SHyperedgeBundle.h"
+#include "moses/Syntax/SVertex.h"
+#include "moses/Syntax/SVertexRecombinationOrderer.h"
+#include "moses/Syntax/SymbolEqualityPred.h"
+#include "moses/Syntax/SymbolHasher.h"
+
+#include "GlueRuleSynthesizer.h"
+#include "InputTreeBuilder.h"
+#include "RuleTrie.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+template<typename RuleMatcher>
+Manager<RuleMatcher>::Manager(const TreeInput &source)
+ : Syntax::Manager(source)
+ , m_treeSource(source)
+{
+}
+
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::InitializeRuleMatchers()
+{
+ const std::vector<RuleTableFF*> &ffs = RuleTableFF::Instances();
+ for (std::size_t i = 0; i < ffs.size(); ++i) {
+ RuleTableFF *ff = ffs[i];
+ // This may change in the future, but currently we assume that every
+ // RuleTableFF is associated with a static, file-based rule table of
+ // some sort and that the table should have been loaded into a RuleTable
+ // by this point.
+ const RuleTable *table = ff->GetTable();
+ assert(table);
+ RuleTable *nonConstTable = const_cast<RuleTable*>(table);
+ RuleTrie *trie = dynamic_cast<RuleTrie*>(nonConstTable);
+ assert(trie);
+ boost::shared_ptr<RuleMatcher> p(new RuleMatcher(m_inputTree, *trie));
+ m_ruleMatchers.push_back(p);
+ }
+
+ // Create an additional rule trie + matcher for glue rules (which are
+ // synthesized on demand).
+ // FIXME Add a hidden RuleTableFF for the glue rule trie(?)
+ m_glueRuleTrie.reset(new RuleTrie(ffs[0]));
+ boost::shared_ptr<RuleMatcher> p(new RuleMatcher(m_inputTree, *m_glueRuleTrie));
+ m_ruleMatchers.push_back(p);
+ m_glueRuleMatcher = p.get();
+}
+
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::InitializeStacks()
+{
+ // Check that m_inputTree has been initialized.
+ assert(!m_inputTree.nodes.empty());
+
+ for (std::vector<InputTree::Node>::const_iterator p =
+ m_inputTree.nodes.begin(); p != m_inputTree.nodes.end(); ++p) {
+ const InputTree::Node &node = *p;
+
+ // Create an empty stack.
+ SVertexStack &stack = m_stackMap[&(node.pvertex)];
+
+ // For terminals only, add a single SVertex.
+ if (node.children.empty()) {
+ boost::shared_ptr<SVertex> v(new SVertex());
+ v->best = 0;
+ v->pvertex = &(node.pvertex);
+ stack.push_back(v);
+ }
+ }
+}
+
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::Decode()
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ // Get various pruning-related constants.
+ const std::size_t popLimit = staticData.GetCubePruningPopLimit();
+ const std::size_t ruleLimit = staticData.GetRuleLimit();
+ const std::size_t stackLimit = staticData.GetMaxHypoStackSize();
+
+ // Construct the InputTree.
+ InputTreeBuilder builder;
+ builder.Build(m_treeSource, "Q", m_inputTree);
+
+ // Initialize the stacks.
+ InitializeStacks();
+
+ // Initialize the rule matchers.
+ InitializeRuleMatchers();
+
+ // Create a callback to process the PHyperedges produced by the rule matchers.
+ F2S::RuleMatcherCallback callback(m_stackMap, ruleLimit);
+
+ // Create a glue rule synthesizer.
+ GlueRuleSynthesizer glueRuleSynthesizer(*m_glueRuleTrie);
+
+ // Visit each node of the input tree in post-order.
+ for (std::vector<InputTree::Node>::const_iterator p =
+ m_inputTree.nodes.begin(); p != m_inputTree.nodes.end(); ++p) {
+
+ const InputTree::Node &node = *p;
+
+ // Skip terminal nodes.
+ if (node.children.empty()) {
+ continue;
+ }
+
+ // Call the rule matchers to generate PHyperedges for this node and
+ // convert each one to a SHyperedgeBundle (via the callback). The
+ // callback prunes the SHyperedgeBundles and keeps the best ones (up
+ // to ruleLimit).
+ callback.ClearContainer();
+ for (typename std::vector<boost::shared_ptr<RuleMatcher> >::iterator
+ q = m_ruleMatchers.begin(); q != m_ruleMatchers.end(); ++q) {
+ (*q)->EnumerateHyperedges(node, callback);
+ }
+
+ // Retrieve the (pruned) set of SHyperedgeBundles from the callback.
+ const BoundedPriorityContainer<SHyperedgeBundle> &bundles =
+ callback.GetContainer();
+
+ // Check if any rules were matched. If not then synthesize a glue rule
+ // that is guaranteed to match.
+ if (bundles.Size() == 0) {
+ glueRuleSynthesizer.SynthesizeRule(node);
+ m_glueRuleMatcher->EnumerateHyperedges(node, callback);
+ assert(bundles.Size() == 1);
+ }
+
+ // Use cube pruning to extract SHyperedges from SHyperedgeBundles and
+ // collect the SHyperedges in a buffer.
+ CubeQueue cubeQueue(bundles.Begin(), bundles.End());
+ std::size_t count = 0;
+ std::vector<SHyperedge*> buffer;
+ while (count < popLimit && !cubeQueue.IsEmpty()) {
+ SHyperedge *hyperedge = cubeQueue.Pop();
+ // FIXME See corresponding code in S2T::Manager
+ // BEGIN{HACK}
+ hyperedge->head->pvertex = &(node.pvertex);
+ // END{HACK}
+ buffer.push_back(hyperedge);
+ ++count;
+ }
+
+ // Recombine SVertices and sort into a stack.
+ SVertexStack &stack = m_stackMap[&(node.pvertex)];
+ RecombineAndSort(buffer, stack);
+
+ // Prune stack.
+ if (stackLimit > 0 && stack.size() > stackLimit) {
+ stack.resize(stackLimit);
+ }
+ }
+}
+
+template<typename RuleMatcher>
+const SHyperedge *Manager<RuleMatcher>::GetBestSHyperedge() const
+{
+ const InputTree::Node &rootNode = m_inputTree.nodes.back();
+ F2S::PVertexToStackMap::const_iterator p = m_stackMap.find(&rootNode.pvertex);
+ assert(p != m_stackMap.end());
+ const SVertexStack &stack = p->second;
+ assert(!stack.empty());
+ return stack[0]->best;
+}
+
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::ExtractKBest(
+ std::size_t k,
+ std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
+ bool onlyDistinct) const
+{
+ kBestList.clear();
+ if (k == 0 || m_source.GetSize() == 0) {
+ return;
+ }
+
+ // Get the top-level SVertex stack.
+ const InputTree::Node &rootNode = m_inputTree.nodes.back();
+ F2S::PVertexToStackMap::const_iterator p = m_stackMap.find(&rootNode.pvertex);
+ assert(p != m_stackMap.end());
+ const SVertexStack &stack = p->second;
+ assert(!stack.empty());
+
+ KBestExtractor extractor;
+
+ if (!onlyDistinct) {
+ // Return the k-best list as is, including duplicate translations.
+ extractor.Extract(stack, k, kBestList);
+ return;
+ }
+
+ // Determine how many derivations to extract. If the k-best list is
+ // restricted to distinct translations then this limit should be bigger
+ // than k. The k-best factor determines how much bigger the limit should be,
+ // with 0 being 'unlimited.' This actually sets a large-ish limit in case
+ // too many translations are identical.
+ const StaticData &staticData = StaticData::Instance();
+ const std::size_t nBestFactor = staticData.GetNBestFactor();
+ std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor;
+
+ // Extract the derivations.
+ KBestExtractor::KBestVec bigList;
+ bigList.reserve(numDerivations);
+ extractor.Extract(stack, numDerivations, bigList);
+
+ // Copy derivations into kBestList, skipping ones with repeated translations.
+ std::set<Phrase> distinct;
+ for (KBestExtractor::KBestVec::const_iterator p = bigList.begin();
+ kBestList.size() < k && p != bigList.end(); ++p) {
+ boost::shared_ptr<KBestExtractor::Derivation> derivation = *p;
+ Phrase translation = KBestExtractor::GetOutputPhrase(*derivation);
+ if (distinct.insert(translation).second) {
+ kBestList.push_back(derivation);
+ }
+ }
+}
+
+// TODO Move this function into parent directory (Recombiner class?) and
+// TODO share with S2T
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::RecombineAndSort(
+ const std::vector<SHyperedge*> &buffer, SVertexStack &stack)
+{
+ // Step 1: Create a map containing a single instance of each distinct vertex
+ // (where distinctness is defined by the state value). The hyperedges'
+ // head pointers are updated to point to the vertex instances in the map and
+ // any 'duplicate' vertices are deleted.
+// TODO Set?
+ typedef std::map<SVertex *, SVertex *, SVertexRecombinationOrderer> Map;
+ Map map;
+ for (std::vector<SHyperedge*>::const_iterator p = buffer.begin();
+ p != buffer.end(); ++p) {
+ SHyperedge *h = *p;
+ SVertex *v = h->head;
+ assert(v->best == h);
+ assert(v->recombined.empty());
+ std::pair<Map::iterator, bool> result = map.insert(Map::value_type(v, v));
+ if (result.second) {
+ continue; // v's recombination value hasn't been seen before.
+ }
+ // v is a duplicate (according to the recombination rules).
+ // Compare the score of h against the score of the best incoming hyperedge
+ // for the stored vertex.
+ SVertex *storedVertex = result.first->second;
+ if (h->label.score > storedVertex->best->label.score) {
+ // h's score is better.
+ storedVertex->recombined.push_back(storedVertex->best);
+ storedVertex->best = h;
+ } else {
+ storedVertex->recombined.push_back(h);
+ }
+ h->head->best = 0;
+ delete h->head;
+ h->head = storedVertex;
+ }
+
+ // Step 2: Copy the vertices from the map to the stack.
+ stack.clear();
+ stack.reserve(map.size());
+ for (Map::const_iterator p = map.begin(); p != map.end(); ++p) {
+ stack.push_back(boost::shared_ptr<SVertex>(p->first));
+ }
+
+ // Step 3: Sort the vertices in the stack.
+ std::sort(stack.begin(), stack.end(), SVertexStackContentOrderer());
+}
+
+template<typename RuleMatcher>
+void Manager<RuleMatcher>::OutputDetailedTranslationReport(
+ OutputCollector *collector) const
+{
+ const SHyperedge *best = GetBestSHyperedge();
+ if (best == NULL || collector == NULL) {
+ return;
+ }
+ long translationId = m_source.GetTranslationId();
+ std::ostringstream out;
+ F2S::DerivationWriter::Write(*best, translationId, out);
+ collector->Write(translationId, out.str());
+}
+
+} // T2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/T2S/Manager.h b/moses/Syntax/T2S/Manager.h
new file mode 100644
index 000000000..0082e1038
--- /dev/null
+++ b/moses/Syntax/T2S/Manager.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include <set>
+#include <vector>
+
+#include <boost/shared_ptr.hpp>
+#include <boost/unordered_map.hpp>
+
+#include "moses/Syntax/F2S/PVertexToStackMap.h"
+#include "moses/Syntax/KBestExtractor.h"
+#include "moses/Syntax/Manager.h"
+#include "moses/Syntax/SVertexStack.h"
+#include "moses/TreeInput.h"
+#include "moses/Word.h"
+
+#include "InputTree.h"
+#include "RuleTrie.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+
+struct SHyperedge;
+
+namespace T2S
+{
+
+template<typename RuleMatcher>
+class Manager : public Syntax::Manager
+{
+ public:
+ Manager(const TreeInput &);
+
+ void Decode();
+
+ // Get the SHyperedge for the 1-best derivation.
+ const SHyperedge *GetBestSHyperedge() const;
+
+ void ExtractKBest(
+ std::size_t k,
+ std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
+ bool onlyDistinct=false) const;
+
+ void OutputDetailedTranslationReport(OutputCollector *collector) const;
+
+ private:
+ void InitializeRuleMatchers();
+
+ void InitializeStacks();
+
+ void RecombineAndSort(const std::vector<SHyperedge*> &, SVertexStack &);
+
+ const TreeInput &m_treeSource;
+ InputTree m_inputTree;
+ F2S::PVertexToStackMap m_stackMap;
+ boost::shared_ptr<RuleTrie> m_glueRuleTrie;
+ std::vector<boost::shared_ptr<RuleMatcher> > m_ruleMatchers;
+ RuleMatcher *m_glueRuleMatcher;
+};
+
+} // T2S
+} // Syntax
+} // Moses
+
+// Implementation
+#include "Manager-inl.h"
diff --git a/moses/Syntax/T2S/RuleMatcher.h b/moses/Syntax/T2S/RuleMatcher.h
new file mode 100644
index 000000000..2f7d4c99a
--- /dev/null
+++ b/moses/Syntax/T2S/RuleMatcher.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "InputTree.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+// Base class for rule matchers.
+template<typename Callback>
+class RuleMatcher
+{
+ public:
+ virtual ~RuleMatcher() {}
+
+ virtual void EnumerateHyperedges(const InputTree::Node &, Callback &) = 0;
+};
+
+} // T2S
+} // Syntax
+} // Moses
diff --git a/moses/Syntax/T2S/RuleMatcherSCFG-inl.h b/moses/Syntax/T2S/RuleMatcherSCFG-inl.h
new file mode 100644
index 000000000..c1d8db63b
--- /dev/null
+++ b/moses/Syntax/T2S/RuleMatcherSCFG-inl.h
@@ -0,0 +1,107 @@
+#pragma once
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+template<typename Callback>
+RuleMatcherSCFG<Callback>::RuleMatcherSCFG(const InputTree &inputTree,
+ const RuleTrie &ruleTrie)
+ : m_inputTree(inputTree)
+ , m_ruleTrie(ruleTrie)
+{
+}
+
+template<typename Callback>
+void RuleMatcherSCFG<Callback>::EnumerateHyperedges(const InputTree::Node &node,
+ Callback &callback)
+{
+ const int start = static_cast<int>(node.pvertex.span.GetStartPos());
+ m_hyperedge.head = const_cast<PVertex*>(&node.pvertex);
+ m_hyperedge.tail.clear();
+ Match(node, m_ruleTrie.GetRootNode(), start, callback);
+}
+
+template<typename Callback>
+void RuleMatcherSCFG<Callback>::Match(const InputTree::Node &inNode,
+ const RuleTrie::Node &trieNode,
+ int start, Callback &callback)
+{
+ // Try to extend the current hyperedge tail by adding a tree node that is a
+ // descendent of inNode and has a span starting at start.
+ const std::vector<InputTree::Node*> &nodes = m_inputTree.nodesAtPos[start];
+ for (std::vector<InputTree::Node*>::const_iterator p = nodes.begin();
+ p != nodes.end(); ++p) {
+ InputTree::Node &candidate = **p;
+ // Is candidate a descendent of inNode?
+ if (!IsDescendent(candidate, inNode)) {
+ continue;
+ }
+ // Get the appropriate SymbolMap (according to whether candidate is a
+ // terminal or non-terminal map) from the current rule trie node.
+ const RuleTrie::Node::SymbolMap *map = NULL;
+ if (candidate.children.empty()) {
+ map = &(trieNode.GetTerminalMap());
+ } else {
+ map = &(trieNode.GetNonTerminalMap());
+ }
+ // Test if the current rule prefix can be extended by candidate's symbol.
+ RuleTrie::Node::SymbolMap::const_iterator q =
+ map->find(candidate.pvertex.symbol);
+ if (q == map->end()) {
+ continue;
+ }
+ const RuleTrie::Node &newTrieNode = q->second;
+ // Add the candidate node to the tail.
+ m_hyperedge.tail.push_back(&candidate.pvertex);
+ // Have we now covered the full span of inNode?
+ if (candidate.pvertex.span.GetEndPos() == inNode.pvertex.span.GetEndPos()) {
+ // Check if the trie node has any rules with a LHS that match inNode.
+ const Word &lhs = inNode.pvertex.symbol;
+ const TargetPhraseCollection *tpc =
+ newTrieNode.GetTargetPhraseCollection(lhs);
+ if (tpc) {
+ m_hyperedge.label.translations = tpc;
+ callback(m_hyperedge);
+ }
+ } else {
+ // Recursive step.
+ int newStart = candidate.pvertex.span.GetEndPos()+1;
+ Match(inNode, newTrieNode, newStart, callback);
+ }
+ m_hyperedge.tail.pop_back();
+ }
+}
+
+// Return true iff x is a descendent of y; false otherwise.
+template<typename Callback>
+bool RuleMatcherSCFG<Callback>::IsDescendent(const InputTree::Node &x,
+ const InputTree::Node &y)
+{
+ const std::size_t xStart = x.pvertex.span.GetStartPos();
+ const std::size_t yStart = y.pvertex.span.GetStartPos();
+ const std::size_t xEnd = x.pvertex.span.GetEndPos();
+ const std::size_t yEnd = y.pvertex.span.GetEndPos();
+ if (xStart < yStart || xEnd > yEnd) {
+ return false;
+ }
+ if (xStart > yStart || xEnd < yEnd) {
+ return true;
+ }
+ // x and y both cover the same span.
+ const InputTree::Node *z = &y;
+ while (z->children.size() == 1) {
+ z = z->children[0];
+ if (z == &x) {
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace T2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/T2S/RuleMatcherSCFG.h b/moses/Syntax/T2S/RuleMatcherSCFG.h
new file mode 100644
index 000000000..078388f5f
--- /dev/null
+++ b/moses/Syntax/T2S/RuleMatcherSCFG.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include "moses/Syntax/PHyperedge.h"
+
+#include "RuleMatcher.h"
+#include "RuleTrie.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+// TODO
+//
+template<typename Callback>
+class RuleMatcherSCFG : public RuleMatcher<Callback>
+{
+ public:
+ RuleMatcherSCFG(const InputTree &, const RuleTrie &);
+
+ ~RuleMatcherSCFG() {}
+
+ void EnumerateHyperedges(const InputTree::Node &, Callback &);
+
+ private:
+ bool IsDescendent(const InputTree::Node &, const InputTree::Node &);
+
+ void Match(const InputTree::Node &, const RuleTrie::Node &, int, Callback &);
+
+ const InputTree &m_inputTree;
+ const RuleTrie &m_ruleTrie;
+ PHyperedge m_hyperedge;
+};
+
+} // namespace T2S
+} // namespace Syntax
+} // namespace Moses
+
+// Implementation
+#include "RuleMatcherSCFG-inl.h"
diff --git a/moses/Syntax/T2S/RuleTrie.cpp b/moses/Syntax/T2S/RuleTrie.cpp
new file mode 100644
index 000000000..981543290
--- /dev/null
+++ b/moses/Syntax/T2S/RuleTrie.cpp
@@ -0,0 +1,139 @@
+#include "RuleTrie.h"
+
+#include <map>
+#include <vector>
+
+#include <boost/functional/hash.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/version.hpp>
+
+#include "moses/NonTerminal.h"
+#include "moses/TargetPhrase.h"
+#include "moses/TargetPhraseCollection.h"
+#include "moses/Util.h"
+#include "moses/Word.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+void RuleTrie::Node::Prune(std::size_t tableLimit)
+{
+ // Recusively prune child nodes.
+ for (SymbolMap::iterator p = m_sourceTermMap.begin();
+ p != m_sourceTermMap.end(); ++p) {
+ p->second.Prune(tableLimit);
+ }
+ for (SymbolMap::iterator p = m_nonTermMap.begin();
+ p != m_nonTermMap.end(); ++p) {
+ p->second.Prune(tableLimit);
+ }
+
+ // Prune TargetPhraseCollections at this node.
+ for (TPCMap::iterator p = m_targetPhraseCollections.begin();
+ p != m_targetPhraseCollections.end(); ++p) {
+ p->second.Prune(true, tableLimit);
+ }
+}
+
+void RuleTrie::Node::Sort(std::size_t tableLimit)
+{
+ // Recusively sort child nodes.
+ for (SymbolMap::iterator p = m_sourceTermMap.begin();
+ p != m_sourceTermMap.end(); ++p) {
+ p->second.Sort(tableLimit);
+ }
+ for (SymbolMap::iterator p = m_nonTermMap.begin();
+ p != m_nonTermMap.end(); ++p) {
+ p->second.Sort(tableLimit);
+ }
+
+ // Sort TargetPhraseCollections at this node.
+ for (TPCMap::iterator p = m_targetPhraseCollections.begin();
+ p != m_targetPhraseCollections.end(); ++p) {
+ p->second.Sort(true, tableLimit);
+ }
+}
+
+RuleTrie::Node *RuleTrie::Node::GetOrCreateChild(
+ const Word &sourceTerm)
+{
+ return &m_sourceTermMap[sourceTerm];
+}
+
+RuleTrie::Node *RuleTrie::Node::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
+{
+ UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
+ "Not a non-terminal: " << targetNonTerm);
+
+ return &m_nonTermMap[targetNonTerm];
+}
+
+TargetPhraseCollection &RuleTrie::Node::GetOrCreateTargetPhraseCollection(
+ const Word &sourceLHS)
+{
+ UTIL_THROW_IF2(!sourceLHS.IsNonTerminal(),
+ "Not a non-terminal: " << sourceLHS);
+ return m_targetPhraseCollections[sourceLHS];
+}
+
+const RuleTrie::Node *RuleTrie::Node::GetChild(
+ const Word &sourceTerm) const
+{
+ UTIL_THROW_IF2(sourceTerm.IsNonTerminal(),
+ "Not a terminal: " << sourceTerm);
+
+ SymbolMap::const_iterator p = m_sourceTermMap.find(sourceTerm);
+ return (p == m_sourceTermMap.end()) ? NULL : &p->second;
+}
+
+const RuleTrie::Node *RuleTrie::Node::GetNonTerminalChild(
+ const Word &targetNonTerm) const
+{
+ UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
+ "Not a non-terminal: " << targetNonTerm);
+
+ SymbolMap::const_iterator p = m_nonTermMap.find(targetNonTerm);
+ return (p == m_nonTermMap.end()) ? NULL : &p->second;
+}
+
+TargetPhraseCollection &RuleTrie::GetOrCreateTargetPhraseCollection(
+ const Word &sourceLHS, const Phrase &sourceRHS)
+{
+ Node &currNode = GetOrCreateNode(sourceRHS);
+ return currNode.GetOrCreateTargetPhraseCollection(sourceLHS);
+}
+
+RuleTrie::Node &RuleTrie::GetOrCreateNode(const Phrase &sourceRHS)
+{
+ const std::size_t size = sourceRHS.GetSize();
+
+ Node *currNode = &m_root;
+ for (std::size_t pos = 0 ; pos < size ; ++pos) {
+ const Word& word = sourceRHS.GetWord(pos);
+
+ if (word.IsNonTerminal()) {
+ currNode = currNode->GetOrCreateNonTerminalChild(word);
+ } else {
+ currNode = currNode->GetOrCreateChild(word);
+ }
+
+ UTIL_THROW_IF2(currNode == NULL, "Node not found at position " << pos);
+ }
+
+ return *currNode;
+}
+
+void RuleTrie::SortAndPrune(std::size_t tableLimit)
+{
+ if (tableLimit) {
+ m_root.Sort(tableLimit);
+ }
+}
+
+} // namespace T2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/T2S/RuleTrie.h b/moses/Syntax/T2S/RuleTrie.h
new file mode 100644
index 000000000..564c0cc80
--- /dev/null
+++ b/moses/Syntax/T2S/RuleTrie.h
@@ -0,0 +1,90 @@
+#pragma once
+
+#include <map>
+#include <vector>
+
+#include <boost/functional/hash.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/version.hpp>
+
+#include "moses/Syntax/RuleTable.h"
+#include "moses/Syntax/SymbolEqualityPred.h"
+#include "moses/Syntax/SymbolHasher.h"
+#include "moses/TargetPhrase.h"
+#include "moses/TargetPhraseCollection.h"
+#include "moses/Terminal.h"
+#include "moses/Util.h"
+#include "moses/Word.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+class RuleTrie: public RuleTable
+{
+ public:
+ class Node
+ {
+ public:
+ typedef boost::unordered_map<Word, Node, SymbolHasher,
+ SymbolEqualityPred> SymbolMap;
+
+ typedef boost::unordered_map<Word, TargetPhraseCollection,
+ SymbolHasher, SymbolEqualityPred> TPCMap;
+
+ bool IsLeaf() const {
+ return m_sourceTermMap.empty() && m_nonTermMap.empty();
+ }
+
+ bool HasRules() const { return !m_targetPhraseCollections.empty(); }
+
+ void Prune(std::size_t tableLimit);
+ void Sort(std::size_t tableLimit);
+
+ Node *GetOrCreateChild(const Word &sourceTerm);
+ Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
+ TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const Word &);
+
+ const Node *GetChild(const Word &sourceTerm) const;
+ const Node *GetNonTerminalChild(const Word &targetNonTerm) const;
+
+ const TargetPhraseCollection *GetTargetPhraseCollection(
+ const Word &sourceLHS) const {
+ TPCMap::const_iterator p = m_targetPhraseCollections.find(sourceLHS);
+ return p == m_targetPhraseCollections.end() ? 0 : &(p->second);
+ }
+
+ // FIXME IS there any reason to distinguish these two for T2S?
+ const SymbolMap &GetTerminalMap() const { return m_sourceTermMap; }
+
+ const SymbolMap &GetNonTerminalMap() const { return m_nonTermMap; }
+
+ private:
+ SymbolMap m_sourceTermMap;
+ SymbolMap m_nonTermMap;
+ TPCMap m_targetPhraseCollections;
+ };
+
+ RuleTrie(const RuleTableFF *ff) : RuleTable(ff) {}
+
+ const Node &GetRootNode() const { return m_root; }
+
+ private:
+ friend class RuleTrieCreator;
+
+ TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
+ const Word &sourceLHS, const Phrase &sourceRHS);
+
+ Node &GetOrCreateNode(const Phrase &sourceRHS);
+
+ void SortAndPrune(std::size_t);
+
+ Node m_root;
+};
+
+} // namespace T2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/T2S/RuleTrieCreator.h b/moses/Syntax/T2S/RuleTrieCreator.h
new file mode 100644
index 000000000..b474a88e0
--- /dev/null
+++ b/moses/Syntax/T2S/RuleTrieCreator.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include "RuleTrie.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+// Base for classes that create a RuleTrie (currently RuleTrieLoader and
+// OovHandler). RuleTrieCreator is a friend of RuleTrie.
+class RuleTrieCreator
+{
+ protected:
+ // Provide access to RuleTrie's private SortAndPrune function.
+ void SortAndPrune(RuleTrie &trie, std::size_t limit) {
+ trie.SortAndPrune(limit);
+ }
+
+ // Provide access to RuleTrie's private
+ // GetOrCreateTargetPhraseCollection function.
+ TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
+ RuleTrie &trie, const Word &sourceLHS, const Phrase &sourceRHS) {
+ return trie.GetOrCreateTargetPhraseCollection(sourceLHS, sourceRHS);
+ }
+};
+
+} // namespace T2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/T2S/RuleTrieLoader.cpp b/moses/Syntax/T2S/RuleTrieLoader.cpp
new file mode 100644
index 000000000..9feaefc94
--- /dev/null
+++ b/moses/Syntax/T2S/RuleTrieLoader.cpp
@@ -0,0 +1,154 @@
+#include "RuleTrieLoader.h"
+
+#include <sys/stat.h>
+#include <stdlib.h>
+
+#include <fstream>
+#include <string>
+#include <iterator>
+#include <algorithm>
+#include <iostream>
+
+#include "moses/FactorCollection.h"
+#include "moses/Word.h"
+#include "moses/Util.h"
+#include "moses/InputFileStream.h"
+#include "moses/StaticData.h"
+#include "moses/WordsRange.h"
+#include "moses/ChartTranslationOptionList.h"
+#include "moses/FactorCollection.h"
+#include "moses/Syntax/RuleTableFF.h"
+#include "util/file_piece.hh"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+#include "util/double-conversion/double-conversion.h"
+#include "util/exception.hh"
+
+#include "RuleTrie.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
+ const std::vector<FactorType> &output,
+ const std::string &inFile,
+ const RuleTableFF &ff,
+ RuleTrie &trie)
+{
+ PrintUserTime(std::string("Start loading text phrase table. Moses format"));
+
+ const StaticData &staticData = StaticData::Instance();
+ const std::string &factorDelimiter = staticData.GetFactorDelimiter();
+
+ std::size_t count = 0;
+
+ std::ostream *progress = NULL;
+ IFVERBOSE(1) progress = &std::cerr;
+ util::FilePiece in(inFile.c_str(), progress);
+
+ // reused variables
+ std::vector<float> scoreVector;
+ StringPiece line;
+
+ double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
+
+ while(true) {
+ try {
+ line = in.ReadLine();
+ } catch (const util::EndOfFileException &e) {
+ break;
+ }
+
+ util::TokenIter<util::MultiCharacter> pipes(line, "|||");
+ StringPiece sourcePhraseString(*pipes);
+ StringPiece targetPhraseString(*++pipes);
+ StringPiece scoreString(*++pipes);
+
+ StringPiece alignString;
+ if (++pipes) {
+ StringPiece temp(*pipes);
+ alignString = temp;
+ }
+
+ if (++pipes) {
+ StringPiece str(*pipes); //counts
+ }
+
+ bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == std::string::npos);
+ if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
+ TRACE_ERR( ff.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
+ continue;
+ }
+
+ scoreVector.clear();
+ for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
+ int processed;
+ float score = converter.StringToFloat(s->data(), s->length(), &processed);
+ UTIL_THROW_IF2(isnan(score), "Bad score " << *s << " on line " << count);
+ scoreVector.push_back(FloorScore(TransformScore(score)));
+ }
+ const std::size_t numScoreComponents = ff.GetNumScoreComponents();
+ if (scoreVector.size() != numScoreComponents) {
+ UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
+ << numScoreComponents << ") of score components on line " << count);
+ }
+
+ // parse source & find pt node
+
+ // constituent labels
+ Word *sourceLHS = NULL;
+ Word *targetLHS;
+
+ // create target phrase obj
+ TargetPhrase *targetPhrase = new TargetPhrase(&ff);
+ // targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS);
+ targetPhrase->CreateFromString(Output, output, targetPhraseString, &targetLHS);
+ // source
+ Phrase sourcePhrase;
+ // sourcePhrase.CreateFromString(Input, input, sourcePhraseString, factorDelimiter, &sourceLHS);
+ sourcePhrase.CreateFromString(Input, input, sourcePhraseString, &sourceLHS);
+
+ // rest of target phrase
+ targetPhrase->SetAlignmentInfo(alignString);
+ targetPhrase->SetTargetLHS(targetLHS);
+
+ //targetPhrase->SetDebugOutput(string("New Format pt ") + line);
+
+ if (++pipes) {
+ StringPiece sparseString(*pipes);
+ targetPhrase->SetSparseScore(&ff, sparseString);
+ }
+
+ if (++pipes) {
+ StringPiece propertiesString(*pipes);
+ targetPhrase->SetProperties(propertiesString);
+ }
+
+ targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
+ targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
+
+ TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
+ trie, *sourceLHS, sourcePhrase);
+ phraseColl.Add(targetPhrase);
+
+ // not implemented correctly in memory pt. just delete it for now
+ delete sourceLHS;
+
+ count++;
+ }
+
+ // sort and prune each target phrase collection
+ if (ff.GetTableLimit()) {
+ SortAndPrune(trie, ff.GetTableLimit());
+ }
+
+ return true;
+}
+
+} // namespace T2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/Syntax/T2S/RuleTrieLoader.h b/moses/Syntax/T2S/RuleTrieLoader.h
new file mode 100644
index 000000000..d3fa4ec60
--- /dev/null
+++ b/moses/Syntax/T2S/RuleTrieLoader.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <istream>
+#include <vector>
+
+#include "moses/TypeDef.h"
+#include "moses/Syntax/RuleTableFF.h"
+
+#include "RuleTrie.h"
+#include "RuleTrieCreator.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace T2S
+{
+
+class RuleTrieLoader : public RuleTrieCreator
+{
+ public:
+ bool Load(const std::vector<FactorType> &input,
+ const std::vector<FactorType> &output,
+ const std::string &inFile,
+ const RuleTableFF &,
+ RuleTrie &);
+};
+
+} // namespace T2S
+} // namespace Syntax
+} // namespace Moses
diff --git a/moses/TabbedSentence.cpp b/moses/TabbedSentence.cpp
new file mode 100644
index 000000000..ae0876595
--- /dev/null
+++ b/moses/TabbedSentence.cpp
@@ -0,0 +1,72 @@
+// $Id$
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <vector>
+#include <string>
+#include <sstream>
+#include <boost/algorithm/string.hpp>
+
+#include "TabbedSentence.h"
+
+namespace Moses
+{
+
+void TabbedSentence::CreateFromString(const std::vector<FactorType> &factorOrder
+ , const std::string &tabbedString)
+{
+ TabbedColumns allColumns;
+
+ boost::split(allColumns, tabbedString, boost::is_any_of("\t"));
+
+ if(allColumns.size() < 2) {
+ Sentence::CreateFromString(factorOrder, tabbedString);
+ } else {
+ m_columns.resize(allColumns.size() - 1);
+ std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
+ Sentence::CreateFromString(factorOrder, allColumns[0]);
+ }
+}
+
+int TabbedSentence::Read(std::istream& in, const std::vector<FactorType>& factorOrder)
+{
+ TabbedColumns allColumns;
+
+ std::string line;
+ if (getline(in, line, '\n').eof())
+ return 0;
+
+ boost::split(allColumns, line, boost::is_any_of("\t"));
+
+ if(allColumns.size() < 2) {
+ std::stringstream dummyStream;
+ dummyStream << line << std::endl;
+ return Sentence::Read(dummyStream, factorOrder);
+ } else {
+ m_columns.resize(allColumns.size() - 1);
+ std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
+
+ std::stringstream dummyStream;
+ dummyStream << allColumns[0] << std::endl;
+ return Sentence::Read(dummyStream, factorOrder);
+ }
+}
+
+}
diff --git a/moses/TabbedSentence.h b/moses/TabbedSentence.h
new file mode 100644
index 000000000..ffd28a877
--- /dev/null
+++ b/moses/TabbedSentence.h
@@ -0,0 +1,87 @@
+// $Id$
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#pragma once
+
+#include <vector>
+#include <string>
+#include <cstdlib>
+#include "Sentence.h"
+
+namespace Moses
+{
+
+/**
+ * Adds a vector of strings to Sentence that are filled from tab-separated input.
+ * The first column is just treated as the normal input sentence with all the XML
+ * processing and stuff. Then it contains a vector of strings that contains all
+ * other columns.
+ *
+ * Aany feature function can do anything with any column. Ideally, feature
+ * functions should keep the parse results for the columns in thread-specific
+ * storage, e.g. boost::thread_specific_ptr<Something>.
+ *
+ * In theory a column can contain anything, even text-serialized parse trees or
+ * classifier features as long it can be represented as text and does not contain
+ * tab characters.
+ *
+ */
+
+typedef std::vector<std::string> TabbedColumns;
+
+class TabbedSentence : public Sentence
+{
+
+public:
+ TabbedSentence() {}
+ ~TabbedSentence() {}
+
+ InputTypeEnum GetType() const {
+ return TabbedSentenceInput;
+ }
+
+ // Splits off the first tab-separated column and passes it to
+ // Sentence::CreateFromString(...), the remaining columns are stored in
+ // m_columns .
+
+ virtual void CreateFromString(const std::vector<FactorType> &factorOrder
+ , const std::string &tabbedString);
+
+ virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
+
+ const TabbedColumns& GetColumns() const {
+ return m_columns;
+ }
+
+ const std::string& GetColumn(size_t i) const {
+ UTIL_THROW_IF2(m_columns.size() <= i,
+ "There is no column with index " << i);
+ return m_columns[i];
+ }
+
+private:
+ TabbedColumns m_columns;
+
+};
+
+
+}
+
diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp
index 9fb33d2a9..bf3920fa1 100644
--- a/moses/TargetPhrase.cpp
+++ b/moses/TargetPhrase.cpp
@@ -1,23 +1,23 @@
// $Id$
/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
#include <algorithm>
#include <stdlib.h>
@@ -38,6 +38,25 @@ using namespace std;
namespace Moses
{
+TargetPhrase::TargetPhrase( std::string out_string, const PhraseDictionary *pt)
+ :Phrase(0)
+ , m_fullScore(0.0)
+ , m_futureScore(0.0)
+ , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
+ , m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
+ , m_lhsTarget(NULL)
+ , m_ruleSource(NULL)
+ , m_container(pt)
+{
+
+ //ACAT
+ const StaticData &staticData = StaticData::Instance();
+ // XXX should this really be InputFactorOrder???
+ CreateFromString(Output, staticData.GetInputFactorOrder(), out_string,
+ // staticData.GetFactorDelimiter(), // eliminated [UG]
+ NULL);
+}
+
TargetPhrase::TargetPhrase(const PhraseDictionary *pt)
:Phrase()
, m_fullScore(0.0)
@@ -69,6 +88,7 @@ TargetPhrase::TargetPhrase(const TargetPhrase &copy)
, m_scoreBreakdown(copy.m_scoreBreakdown)
, m_alignTerm(copy.m_alignTerm)
, m_alignNonTerm(copy.m_alignNonTerm)
+ , m_properties(copy.m_properties)
, m_container(copy.m_container)
{
if (copy.m_lhsTarget) {
@@ -122,7 +142,6 @@ void TargetPhrase::EvaluateInIsolation(const Phrase &source, const std::vector<F
float weightedScore = m_scoreBreakdown.GetWeightedScore();
m_futureScore += futureScoreBreakdown.GetWeightedScore();
m_fullScore = weightedScore + m_futureScore;
-
}
}
@@ -142,6 +161,14 @@ void TargetPhrase::EvaluateWithSourceContext(const InputType &input, const Input
m_fullScore = weightedScore + m_futureScore;
}
+void TargetPhrase::UpdateScore(ScoreComponentCollection* futureScoreBreakdown)
+{
+ float weightedScore = m_scoreBreakdown.GetWeightedScore();
+ if(futureScoreBreakdown)
+ m_futureScore += futureScoreBreakdown->GetWeightedScore();
+ m_fullScore = weightedScore + m_futureScore;
+}
+
void TargetPhrase::SetXMLScore(float score)
{
const FeatureFunction* prod = PhraseDictionary::GetColl()[0];
@@ -165,7 +192,6 @@ void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash);
UTIL_THROW_IF2(++dash, "Extra gunk in alignment " << *token);
-
if (GetWord(targetPos).IsNonTerminal()) {
alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
} else {
@@ -174,7 +200,7 @@ void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
}
SetAlignTerm(alignTerm);
SetAlignNonTerm(alignNonTerm);
-
+ // cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n";
}
// void TargetPhrase::SetAlignTerm(const AlignmentInfo::CollType &coll)
@@ -222,12 +248,12 @@ void TargetPhrase::SetProperties(const StringPiece &str)
vector<string> keyValue = TokenizeFirstOnly(tok, " ");
UTIL_THROW_IF2(keyValue.size() != 2,
- "Incorrect format of property: " << str);
+ "Incorrect format of property: " << str);
SetProperty(keyValue[0], keyValue[1]);
}
}
-void TargetPhrase::SetProperty(const std::string &key, const std::string &value)
+void TargetPhrase::SetProperty(const std::string &key, const std::string &value)
{
const StaticData &staticData = StaticData::Instance();
const PhrasePropertyFactory& phrasePropertyFactory = staticData.GetPhrasePropertyFactory();
@@ -270,27 +296,30 @@ std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp)
if (tp.m_lhsTarget) {
os << *tp.m_lhsTarget<< " -> ";
}
+
os << static_cast<const Phrase&>(tp) << ":" << flush;
os << tp.GetAlignNonTerm() << flush;
+ os << ": term=" << tp.GetAlignTerm() << flush;
+ os << ": nonterm=" << tp.GetAlignNonTerm() << flush;
os << ": c=" << tp.m_fullScore << flush;
os << " " << tp.m_scoreBreakdown << flush;
-
+
const Phrase *sourcePhrase = tp.GetRuleSource();
if (sourcePhrase) {
os << " sourcePhrase=" << *sourcePhrase << flush;
}
if (tp.m_properties.size()) {
- os << " properties: " << flush;
+ os << " properties: " << flush;
- TargetPhrase::Properties::const_iterator iter;
- for (iter = tp.m_properties.begin(); iter != tp.m_properties.end(); ++iter) {
- const string &key = iter->first;
- const PhraseProperty *prop = iter->second.get();
- assert(prop);
+ TargetPhrase::Properties::const_iterator iter;
+ for (iter = tp.m_properties.begin(); iter != tp.m_properties.end(); ++iter) {
+ const string &key = iter->first;
+ const PhraseProperty *prop = iter->second.get();
+ assert(prop);
- os << key << "=" << *prop << " ";
- }
+ os << key << "=" << *prop << " ";
+ }
}
return os;
diff --git a/moses/TargetPhrase.h b/moses/TargetPhrase.h
index 35d27feea..db7da97c5 100644
--- a/moses/TargetPhrase.h
+++ b/moses/TargetPhrase.h
@@ -67,6 +67,7 @@ private:
public:
TargetPhrase(const PhraseDictionary *pt = NULL);
+ TargetPhrase(std::string out_string, const PhraseDictionary *pt = NULL);
TargetPhrase(const TargetPhrase &copy);
explicit TargetPhrase(const Phrase &targetPhrase, const PhraseDictionary *pt);
~TargetPhrase();
@@ -81,6 +82,8 @@ public:
// 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
+ void UpdateScore(ScoreComponentCollection *futureScoreBreakdown = NULL);
+
void SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString);
// used to set translation or gen score
@@ -107,6 +110,15 @@ public:
return m_scoreBreakdown;
}
+ /*
+ //TODO: Probably shouldn't copy this, but otherwise ownership is unclear
+ void SetSourcePhrase(const Phrase& p) {
+ m_sourcePhrase=p;
+ }
+ const Phrase& GetSourcePhrase() const {
+ return m_sourcePhrase;
+ }
+ */
void SetTargetLHS(const Word *lhs) {
m_lhsTarget = lhs;
}
@@ -122,21 +134,19 @@ public:
m_alignNonTerm = alignNonTerm;
}
- // ALNREP = alignment representation,
+ // ALNREP = alignment representation,
// see AlignmentInfo constructors for supported representations
template<typename ALNREP>
- void
- SetAlignTerm(const ALNREP &coll)
- {
+ void
+ SetAlignTerm(const ALNREP &coll) {
m_alignTerm = AlignmentInfoCollection::Instance().Add(coll);
}
- // ALNREP = alignment representation,
+ // ALNREP = alignment representation,
// see AlignmentInfo constructors for supported representations
- template<typename ALNREP>
- void
- SetAlignNonTerm(const ALNREP &coll)
- {
+ template<typename ALNREP>
+ void
+ SetAlignNonTerm(const ALNREP &coll) {
m_alignNonTerm = AlignmentInfoCollection::Instance().Add(coll);
}
@@ -152,8 +162,9 @@ public:
return m_ruleSource;
}
- const PhraseDictionary *GetContainer() const
- { return m_container; }
+ const PhraseDictionary *GetContainer() const {
+ return m_container;
+ }
// To be set by the FF that needs it, by default the rule source = NULL
// make a copy of the source side of the rule
diff --git a/moses/TargetPhraseCollection.h b/moses/TargetPhraseCollection.h
index 0c6a7a74c..d61ff2c4f 100644
--- a/moses/TargetPhraseCollection.h
+++ b/moses/TargetPhraseCollection.h
@@ -44,11 +44,10 @@ public:
typedef CollType::iterator iterator;
typedef CollType::const_iterator const_iterator;
- TargetPhrase const*
- operator[](size_t const i) const
- {
+ TargetPhrase const*
+ operator[](size_t const i) const {
return m_collection.at(i);
- }
+ }
iterator begin() {
return m_collection.begin();
@@ -63,8 +62,8 @@ public:
return m_collection.end();
}
- TargetPhraseCollection()
- {}
+ TargetPhraseCollection() {
+ }
TargetPhraseCollection(const TargetPhraseCollection &copy);
@@ -76,6 +75,18 @@ public:
return m_collection;
}
+ //! delete an entry from the collection
+ void Remove(const size_t pos) {
+ if (pos < m_collection.size()) {
+ m_collection.erase(begin() + pos);
+ }
+ }
+
+ //! return an entry of the collection
+ const TargetPhrase* GetTargetPhrase(const size_t pos) const {
+ return m_collection[pos];
+ }
+
//! divide collection into 2 buckets using std::nth_element, the top & bottom according to table limit
void NthElement(size_t tableLimit);
diff --git a/moses/Timer.cpp b/moses/Timer.cpp
index 75f4154ec..6128ab885 100644
--- a/moses/Timer.cpp
+++ b/moses/Timer.cpp
@@ -2,6 +2,7 @@
#include <iomanip>
#include "Util.h"
#include "Timer.h"
+#include "StaticData.h"
#include "util/usage.hh"
@@ -10,7 +11,7 @@ namespace Moses
/***
* Return the total wall time that the timer has been in the "running"
- * state since it was first "started".
+ * state since it was first "started".
*/
double Timer::get_elapsed_time() const
{
@@ -30,7 +31,7 @@ double Timer::get_elapsed_time() const
void Timer::start(const char* msg)
{
// Print an optional message, something like "Starting timer t";
- if (msg) TRACE_ERR( msg << std::endl);
+ if (msg) VERBOSE(1, msg << std::endl);
// Return immediately if the timer is already running
if (running && !stopped) return;
@@ -39,8 +40,7 @@ void Timer::start(const char* msg)
if (stopped) {
start_time = util::WallTime() - (stop_time - start_time);
stopped = false;
- }
- else {
+ } else {
start_time = util::WallTime();
running = true;
}
@@ -53,7 +53,7 @@ void Timer::start(const char* msg)
void Timer::stop(const char* msg)
{
// Print an optional message, something like "Stopping timer t";
- if (msg) TRACE_ERR( msg << std::endl);
+ if (msg) VERBOSE(1, msg << std::endl);
// Return immediately if the timer is not running
if (stopped || !running) return;
@@ -71,10 +71,10 @@ void Timer::stop(const char* msg)
void Timer::check(const char* msg)
{
// Print an optional message, something like "Checking timer t";
- if (msg) TRACE_ERR( msg << " : ");
+ if (msg) VERBOSE(1, msg << " : ");
-// TRACE_ERR( "[" << std::setiosflags(std::ios::fixed) << std::setprecision(2) << (running ? elapsed_time() : 0) << "] seconds\n");
- TRACE_ERR( "[" << (running ? get_elapsed_time() : 0) << "] seconds\n");
+// VERBOSE(1, "[" << std::setiosflags(std::ios::fixed) << std::setprecision(2) << (running ? elapsed_time() : 0) << "] seconds\n");
+ VERBOSE(1, "[" << (running ? get_elapsed_time() : 0) << "] seconds\n");
}
/***
diff --git a/moses/TrainingTask.h b/moses/TrainingTask.h
new file mode 100644
index 000000000..885e8fd16
--- /dev/null
+++ b/moses/TrainingTask.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <boost/smart_ptr/shared_ptr.hpp>
+#include "moses/ThreadPool.h"
+#include "moses/TranslationOptionCollection.h"
+#include "moses/IOWrapper.h"
+
+namespace Moses
+{
+class InputType;
+class OutputCollector;
+
+
+class TrainingTask : public Moses::Task
+{
+
+public:
+
+ TrainingTask(Moses::InputType* source, Moses::IOWrapper &ioWrapper)
+ : m_source(source)
+ , m_ioWrapper(ioWrapper) {
+ }
+
+ ~TrainingTask() {
+ }
+
+ void Run() {
+ StaticData::Instance().InitializeForInput(*m_source);
+
+ std::cerr << *m_source << std::endl;
+
+ TranslationOptionCollection *transOptColl = m_source->CreateTranslationOptionCollection();
+ transOptColl->CreateTranslationOptions();
+ delete transOptColl;
+
+ StaticData::Instance().CleanUpAfterSentenceProcessing(*m_source);
+ }
+
+
+private:
+ Moses::InputType* m_source;
+ Moses::IOWrapper &m_ioWrapper;
+
+};
+
+
+} //namespace
diff --git a/moses/TranslationAnalysis.h b/moses/TranslationAnalysis.h
index ccb21f041..143f65967 100644
--- a/moses/TranslationAnalysis.h
+++ b/moses/TranslationAnalysis.h
@@ -7,9 +7,10 @@
#include <iostream>
-namespace Moses {
- class Hypothesis;
- class ChartHypothesis;
+namespace Moses
+{
+class Hypothesis;
+class ChartHypothesis;
}
namespace TranslationAnalysis
diff --git a/moses/TranslationModel/BilingualDynSuffixArray.cpp b/moses/TranslationModel/BilingualDynSuffixArray.cpp
index d93632b7e..b0607b770 100644
--- a/moses/TranslationModel/BilingualDynSuffixArray.cpp
+++ b/moses/TranslationModel/BilingualDynSuffixArray.cpp
@@ -62,7 +62,7 @@ Load(
LoadCorpus(Output, targetStrme, m_outputFactors,*m_trgCorpus, m_trgSntBreaks, m_trgVocab);
UTIL_THROW_IF2(m_srcSntBreaks.size() != m_trgSntBreaks.size(),
- "Source and target arrays aren't the same size");
+ "Source and target arrays aren't the same size");
// build suffix arrays and auxilliary arrays
cerr << "Building Source Suffix Array...\n";
@@ -130,7 +130,7 @@ LoadRawAlignments(string& align)
vector<int> vtmp;
Utils::splitToInt(align, vtmp, "- ");
UTIL_THROW_IF2(vtmp.size() % 2 != 0,
- "Alignment format is incorrect: " << align);
+ "Alignment format is incorrect: " << align);
vector<short> vAlgn; // store as short ints for memory
for (vector<int>::const_iterator itr = vtmp.begin();
itr != vtmp.end(); ++itr) {
@@ -380,7 +380,7 @@ GetMosesFactorIDs(const SAPhrase& phrase, const Phrase& sourcePhrase, const Phra
for(size_t i=0; i < phrase.words.size(); ++i) { // look up trg words
Word& word = m_trgVocab->GetWord( phrase.words[i]);
UTIL_THROW_IF2(word == m_trgVocab->GetkOOVWord(),
- "Unknown word at position " << i);
+ "Unknown word at position " << i);
targetPhrase->AddWord(word);
}
// scoring
diff --git a/moses/TranslationModel/BilingualDynSuffixArray.h b/moses/TranslationModel/BilingualDynSuffixArray.h
index 5b52b8814..1c4ceae34 100644
--- a/moses/TranslationModel/BilingualDynSuffixArray.h
+++ b/moses/TranslationModel/BilingualDynSuffixArray.h
@@ -78,8 +78,8 @@ public:
class ScoresComp
{
public:
- ScoresComp(const vector<float>& weights)
- {}
+ ScoresComp(const vector<float>& weights) {
+ }
bool operator()(const Scores& s1, const Scores& s2) const {
return s1[0] < s2[0]; // just p(e|f) as approximation
// float score1(0), score2(0);
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
index e2ba6779c..02bea7b43 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
@@ -51,16 +51,15 @@ protected:
};
// struct that caches cellLabel, its end position and score for quicker lookup
-struct ChartCellCache
-{
- ChartCellCache(size_t endPos, const ChartCellLabel* cellLabel, float score)
+struct ChartCellCache {
+ ChartCellCache(size_t endPos, const ChartCellLabel* cellLabel, float score)
: endPos(endPos)
, cellLabel(cellLabel)
, score(score) {}
- size_t endPos;
- const ChartCellLabel* cellLabel;
- float score;
+ size_t endPos;
+ const ChartCellLabel* cellLabel;
+ float score;
};
} // namespace Moses
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
index ac6522ef5..54f172d1e 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
@@ -52,10 +52,11 @@ ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory(
}
void ChartRuleLookupManagerMemory::GetChartRuleCollection(
- const WordsRange &range,
+ const InputPath &inputPath,
size_t lastPos,
ChartParserCallback &outColl)
{
+ const WordsRange &range = inputPath.GetWordsRange();
size_t startPos = range.GetStartPos();
size_t absEndPos = range.GetEndPos();
@@ -70,208 +71,200 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
const PhraseDictionaryNodeMemory &rootNode = m_ruleTable.GetRootNode();
- // size-1 terminal rules
+ // all rules starting with terminal
if (startPos == absEndPos) {
- const Word &sourceWord = GetSourceAt(absEndPos).GetLabel();
- const PhraseDictionaryNodeMemory *child = rootNode.GetChild(sourceWord);
-
- // if we found a new rule -> directly add it to the out collection
- if (child != NULL) {
- const TargetPhraseCollection &tpc = child->GetTargetPhraseCollection();
- outColl.Add(tpc, m_stackVec, range);
- }
+ GetTerminalExtension(&rootNode, startPos);
}
// all rules starting with nonterminal
else if (absEndPos > startPos) {
GetNonTerminalExtension(&rootNode, startPos);
- // all (non-unary) rules starting with terminal
- if (absEndPos == startPos+1) {
- GetTerminalExtension(&rootNode, absEndPos-1);
- }
}
// copy temporarily stored rules to out collection
- CompletedRuleCollection rules = m_completedRules[absEndPos];
+ CompletedRuleCollection & rules = m_completedRules[absEndPos];
for (vector<CompletedRule*>::const_iterator iter = rules.begin(); iter != rules.end(); ++iter) {
outColl.Add((*iter)->GetTPC(), (*iter)->GetStackVector(), range);
}
- m_completedRules[absEndPos].Clear();
+ rules.Clear();
}
// Create/update compressed matrix that stores all valid ChartCellLabels for a given start position and label.
void ChartRuleLookupManagerMemory::UpdateCompressedMatrix(size_t startPos,
size_t origEndPos,
- size_t lastPos) {
+ size_t lastPos)
+{
- std::vector<size_t> endPosVec;
- size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
- m_compressedMatrixVec.resize(lastPos+1);
+ std::vector<size_t> endPosVec;
+ size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
+ m_compressedMatrixVec.resize(lastPos+1);
- // we only need to update cell at [startPos, origEndPos-1] for initial lookup
- if (startPos < origEndPos) {
- endPosVec.push_back(origEndPos-1);
- }
+ // we only need to update cell at [startPos, origEndPos-1] for initial lookup
+ if (startPos < origEndPos) {
+ endPosVec.push_back(origEndPos-1);
+ }
- // update all cells starting from startPos+1 for lookup of rule extensions
- else if (startPos == origEndPos)
- {
- startPos++;
- for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
- endPosVec.push_back(endPos);
- }
- //re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
- for (size_t pos = startPos+1; pos <= lastPos; pos++) {
- CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
- cellMatrix.resize(numNonTerms);
- for (size_t i = 0; i < numNonTerms; i++) {
- if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
- cellMatrix[i].pop_back();
- }
- }
+ // update all cells starting from startPos+1 for lookup of rule extensions
+ else if (startPos == origEndPos) {
+ startPos++;
+ for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
+ endPosVec.push_back(endPos);
+ }
+ //re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
+ for (size_t pos = startPos+1; pos <= lastPos; pos++) {
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
+ cellMatrix.resize(numNonTerms);
+ for (size_t i = 0; i < numNonTerms; i++) {
+ if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
+ cellMatrix[i].pop_back();
}
+ }
}
+ }
- if (startPos > lastPos) {
- return;
- }
+ if (startPos > lastPos) {
+ return;
+ }
- // populate compressed matrix with all chart cells that start at current start position
- CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
- cellMatrix.clear();
- cellMatrix.resize(numNonTerms);
- for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
+ // populate compressed matrix with all chart cells that start at current start position
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
+ cellMatrix.clear();
+ cellMatrix.resize(numNonTerms);
+ for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
- size_t endPos = *p;
- // target non-terminal labels for the span
- const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
+ size_t endPos = *p;
+ // target non-terminal labels for the span
+ const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
- if (targetNonTerms.GetSize() == 0) {
- continue;
- }
+ if (targetNonTerms.GetSize() == 0) {
+ continue;
+ }
#if !defined(UNLABELLED_SOURCE)
- // source non-terminal labels for the span
- const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
+ // source non-terminal labels for the span
+ const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
- // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
- if (inputPath.GetNonTerminalSet().size() == 0) {
- continue;
- }
+ // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
+ if (inputPath.GetNonTerminalSet().size() == 0) {
+ continue;
+ }
#endif
- for (size_t i = 0; i < numNonTerms; i++) {
- const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
- if (cellLabel != NULL) {
- float score = cellLabel->GetBestScore(m_outColl);
- cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
- }
- }
+ for (size_t i = 0; i < numNonTerms; i++) {
+ const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
+ if (cellLabel != NULL) {
+ float score = cellLabel->GetBestScore(m_outColl);
+ cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
+ }
}
+ }
}
// if a (partial) rule matches, add it to list completed rules (if non-unary and non-empty), and try find expansions that have this partial rule as prefix.
void ChartRuleLookupManagerMemory::AddAndExtend(
- const PhraseDictionaryNodeMemory *node,
- size_t endPos) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t endPos)
+{
- const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
- // add target phrase collection (except if rule is empty or unary)
- if (!tpc.IsEmpty() && endPos != m_unaryPos) {
- m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
- }
+ const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
+ // add target phrase collection (except if rule is empty or a unary non-terminal rule)
+ if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
+ m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
+ }
- // get all further extensions of rule (until reaching end of sentence or max-chart-span)
- if (endPos < m_lastPos) {
- if (!node->GetTerminalMap().empty()) {
- GetTerminalExtension(node, endPos+1);
- }
- if (!node->GetNonTerminalMap().empty()) {
- GetNonTerminalExtension(node, endPos+1);
- }
+ // get all further extensions of rule (until reaching end of sentence or max-chart-span)
+ if (endPos < m_lastPos) {
+ if (!node->GetTerminalMap().empty()) {
+ GetTerminalExtension(node, endPos+1);
+ }
+ if (!node->GetNonTerminalMap().empty()) {
+ GetNonTerminalExtension(node, endPos+1);
}
+ }
}
// search all possible terminal extensions of a partial rule (pointed at by node) at a given position
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemory::GetTerminalExtension(
- const PhraseDictionaryNodeMemory *node,
- size_t pos) {
-
- const Word &sourceWord = GetSourceAt(pos).GetLabel();
- const PhraseDictionaryNodeMemory::TerminalMap & terminals = node->GetTerminalMap();
-
- // if node has small number of terminal edges, test word equality for each.
- if (terminals.size() < 5) {
- for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
- const Word & word = iter->first;
- if (TerminalEqualityPred()(word, sourceWord)) {
- const PhraseDictionaryNodeMemory *child = & iter->second;
- AddAndExtend(child, pos);
- break;
- }
- }
- }
- // else, do hash lookup
- else {
- const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
- if (child != NULL) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t pos)
+{
+
+ const Word &sourceWord = GetSourceAt(pos).GetLabel();
+ const PhraseDictionaryNodeMemory::TerminalMap & terminals = node->GetTerminalMap();
+
+ // if node has small number of terminal edges, test word equality for each.
+ if (terminals.size() < 5) {
+ for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
+ const Word & word = iter->first;
+ if (TerminalEqualityPred()(word, sourceWord)) {
+ const PhraseDictionaryNodeMemory *child = & iter->second;
AddAndExtend(child, pos);
+ break;
}
}
+ }
+ // else, do hash lookup
+ else {
+ const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
+ if (child != NULL) {
+ AddAndExtend(child, pos);
+ }
+ }
}
// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a variable span (starting from startPos).
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemory::GetNonTerminalExtension(
- const PhraseDictionaryNodeMemory *node,
- size_t startPos) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t startPos)
+{
- const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
+ const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
- // non-terminal labels in phrase dictionary node
- const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
+ // non-terminal labels in phrase dictionary node
+ const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
- // make room for back pointer
- m_stackVec.push_back(NULL);
- m_stackScores.push_back(0);
+ // make room for back pointer
+ m_stackVec.push_back(NULL);
+ m_stackScores.push_back(0);
- // loop over possible expansions of the rule
- PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
- PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
- for (p = nonTermMap.begin(); p != end; ++p) {
- // does it match possible source and target non-terminals?
+ // loop over possible expansions of the rule
+ PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
+ PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
+ for (p = nonTermMap.begin(); p != end; ++p) {
+ // does it match possible source and target non-terminals?
#if defined(UNLABELLED_SOURCE)
- const Word &targetNonTerm = p->first;
+ const Word &targetNonTerm = p->first;
#else
- const Word &targetNonTerm = p->first.second;
+ const Word &targetNonTerm = p->first.second;
#endif
- const PhraseDictionaryNodeMemory *child = &p->second;
- //soft matching of NTs
- if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
- const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
- for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
- const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
- for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
- m_stackVec.back() = match->cellLabel;
- m_stackScores.back() = match->score;
- AddAndExtend(child, match->endPos);
- }
+ const PhraseDictionaryNodeMemory *child = &p->second;
+ //soft matching of NTs
+ if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
+ const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
+ for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
+ const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- } // end of soft matches lookup
-
- const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
- for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
- m_stackVec.back() = match->cellLabel;
- m_stackScores.back() = match->score;
- AddAndExtend(child, match->endPos);
}
+ } // end of soft matches lookup
+
+ const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- // remove last back pointer
- m_stackVec.pop_back();
- m_stackScores.pop_back();
+ }
+ // remove last back pointer
+ m_stackVec.pop_back();
+ m_stackScores.pop_back();
}
} // namespace Moses
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h
index 80b6f7246..84e5f085d 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h
@@ -51,7 +51,7 @@ public:
~ChartRuleLookupManagerMemory() {};
virtual void GetChartRuleCollection(
- const WordsRange &range,
+ const InputPath &inputPath,
size_t lastPos, // last position to consider if using lookahead
ChartParserCallback &outColl);
@@ -70,8 +70,8 @@ private:
size_t endPos);
void UpdateCompressedMatrix(size_t startPos,
- size_t endPos,
- size_t lastPos);
+ size_t endPos,
+ size_t lastPos);
const PhraseDictionaryMemory &m_ruleTable;
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
index 784d31deb..e090ee1ae 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
@@ -52,10 +52,11 @@ ChartRuleLookupManagerMemoryPerSentence::ChartRuleLookupManagerMemoryPerSentence
}
void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
- const WordsRange &range,
+ const InputPath &inputPath,
size_t lastPos,
ChartParserCallback &outColl)
{
+ const WordsRange &range = inputPath.GetWordsRange();
size_t startPos = range.GetStartPos();
size_t absEndPos = range.GetEndPos();
@@ -70,208 +71,200 @@ void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
const PhraseDictionaryNodeMemory &rootNode = m_ruleTable.GetRootNode(GetParser().GetTranslationId());
- // size-1 terminal rules
+ // all rules starting with terminal
if (startPos == absEndPos) {
- const Word &sourceWord = GetSourceAt(absEndPos).GetLabel();
- const PhraseDictionaryNodeMemory *child = rootNode.GetChild(sourceWord);
-
- // if we found a new rule -> directly add it to the out collection
- if (child != NULL) {
- const TargetPhraseCollection &tpc = child->GetTargetPhraseCollection();
- outColl.Add(tpc, m_stackVec, range);
- }
+ GetTerminalExtension(&rootNode, startPos);
}
// all rules starting with nonterminal
else if (absEndPos > startPos) {
GetNonTerminalExtension(&rootNode, startPos);
- // all (non-unary) rules starting with terminal
- if (absEndPos == startPos+1) {
- GetTerminalExtension(&rootNode, absEndPos-1);
- }
}
// copy temporarily stored rules to out collection
- CompletedRuleCollection rules = m_completedRules[absEndPos];
+ CompletedRuleCollection & rules = m_completedRules[absEndPos];
for (vector<CompletedRule*>::const_iterator iter = rules.begin(); iter != rules.end(); ++iter) {
outColl.Add((*iter)->GetTPC(), (*iter)->GetStackVector(), range);
}
- m_completedRules[absEndPos].Clear();
+ rules.Clear();
}
// Create/update compressed matrix that stores all valid ChartCellLabels for a given start position and label.
void ChartRuleLookupManagerMemoryPerSentence::UpdateCompressedMatrix(size_t startPos,
size_t origEndPos,
- size_t lastPos) {
+ size_t lastPos)
+{
- std::vector<size_t> endPosVec;
- size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
- m_compressedMatrixVec.resize(lastPos+1);
+ std::vector<size_t> endPosVec;
+ size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
+ m_compressedMatrixVec.resize(lastPos+1);
- // we only need to update cell at [startPos, origEndPos-1] for initial lookup
- if (startPos < origEndPos) {
- endPosVec.push_back(origEndPos-1);
- }
+ // we only need to update cell at [startPos, origEndPos-1] for initial lookup
+ if (startPos < origEndPos) {
+ endPosVec.push_back(origEndPos-1);
+ }
- // update all cells starting from startPos+1 for lookup of rule extensions
- else if (startPos == origEndPos)
- {
- startPos++;
- for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
- endPosVec.push_back(endPos);
- }
- //re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
- for (size_t pos = startPos+1; pos <= lastPos; pos++) {
- CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
- cellMatrix.resize(numNonTerms);
- for (size_t i = 0; i < numNonTerms; i++) {
- if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
- cellMatrix[i].pop_back();
- }
- }
+ // update all cells starting from startPos+1 for lookup of rule extensions
+ else if (startPos == origEndPos) {
+ startPos++;
+ for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
+ endPosVec.push_back(endPos);
+ }
+ //re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
+ for (size_t pos = startPos+1; pos <= lastPos; pos++) {
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
+ cellMatrix.resize(numNonTerms);
+ for (size_t i = 0; i < numNonTerms; i++) {
+ if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
+ cellMatrix[i].pop_back();
}
+ }
}
+ }
- if (startPos > lastPos) {
- return;
- }
+ if (startPos > lastPos) {
+ return;
+ }
- // populate compressed matrix with all chart cells that start at current start position
- CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
- cellMatrix.clear();
- cellMatrix.resize(numNonTerms);
- for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
+ // populate compressed matrix with all chart cells that start at current start position
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
+ cellMatrix.clear();
+ cellMatrix.resize(numNonTerms);
+ for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
- size_t endPos = *p;
- // target non-terminal labels for the span
- const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
+ size_t endPos = *p;
+ // target non-terminal labels for the span
+ const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
- if (targetNonTerms.GetSize() == 0) {
- continue;
- }
+ if (targetNonTerms.GetSize() == 0) {
+ continue;
+ }
#if !defined(UNLABELLED_SOURCE)
- // source non-terminal labels for the span
- const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
+ // source non-terminal labels for the span
+ const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
- // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
- if (inputPath.GetNonTerminalSet().size() == 0) {
- continue;
- }
+ // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
+ if (inputPath.GetNonTerminalSet().size() == 0) {
+ continue;
+ }
#endif
- for (size_t i = 0; i < numNonTerms; i++) {
- const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
- if (cellLabel != NULL) {
- float score = cellLabel->GetBestScore(m_outColl);
- cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
- }
- }
+ for (size_t i = 0; i < numNonTerms; i++) {
+ const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
+ if (cellLabel != NULL) {
+ float score = cellLabel->GetBestScore(m_outColl);
+ cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
+ }
}
+ }
}
// if a (partial) rule matches, add it to list completed rules (if non-unary and non-empty), and try find expansions that have this partial rule as prefix.
void ChartRuleLookupManagerMemoryPerSentence::AddAndExtend(
- const PhraseDictionaryNodeMemory *node,
- size_t endPos) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t endPos)
+{
- const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
- // add target phrase collection (except if rule is empty or unary)
- if (!tpc.IsEmpty() && endPos != m_unaryPos) {
- m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
- }
+ const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
+ // add target phrase collection (except if rule is empty or a unary non-terminal rule)
+ if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
+ m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
+ }
- // get all further extensions of rule (until reaching end of sentence or max-chart-span)
- if (endPos < m_lastPos) {
- if (!node->GetTerminalMap().empty()) {
- GetTerminalExtension(node, endPos+1);
- }
- if (!node->GetNonTerminalMap().empty()) {
- GetNonTerminalExtension(node, endPos+1);
- }
+ // get all further extensions of rule (until reaching end of sentence or max-chart-span)
+ if (endPos < m_lastPos) {
+ if (!node->GetTerminalMap().empty()) {
+ GetTerminalExtension(node, endPos+1);
+ }
+ if (!node->GetNonTerminalMap().empty()) {
+ GetNonTerminalExtension(node, endPos+1);
}
+ }
}
// search all possible terminal extensions of a partial rule (pointed at by node) at a given position
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemoryPerSentence::GetTerminalExtension(
- const PhraseDictionaryNodeMemory *node,
- size_t pos) {
-
- const Word &sourceWord = GetSourceAt(pos).GetLabel();
- const PhraseDictionaryNodeMemory::TerminalMap & terminals = node->GetTerminalMap();
-
- // if node has small number of terminal edges, test word equality for each.
- if (terminals.size() < 5) {
- for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
- const Word & word = iter->first;
- if (TerminalEqualityPred()(word, sourceWord)) {
- const PhraseDictionaryNodeMemory *child = & iter->second;
- AddAndExtend(child, pos);
- break;
- }
- }
- }
- // else, do hash lookup
- else {
- const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
- if (child != NULL) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t pos)
+{
+
+ const Word &sourceWord = GetSourceAt(pos).GetLabel();
+ const PhraseDictionaryNodeMemory::TerminalMap & terminals = node->GetTerminalMap();
+
+ // if node has small number of terminal edges, test word equality for each.
+ if (terminals.size() < 5) {
+ for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
+ const Word & word = iter->first;
+ if (TerminalEqualityPred()(word, sourceWord)) {
+ const PhraseDictionaryNodeMemory *child = & iter->second;
AddAndExtend(child, pos);
+ break;
}
}
+ }
+ // else, do hash lookup
+ else {
+ const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
+ if (child != NULL) {
+ AddAndExtend(child, pos);
+ }
+ }
}
// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a variable span (starting from startPos).
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemoryPerSentence::GetNonTerminalExtension(
- const PhraseDictionaryNodeMemory *node,
- size_t startPos) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t startPos)
+{
- const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
+ const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
- // non-terminal labels in phrase dictionary node
- const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
+ // non-terminal labels in phrase dictionary node
+ const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
- // make room for back pointer
- m_stackVec.push_back(NULL);
- m_stackScores.push_back(0);
+ // make room for back pointer
+ m_stackVec.push_back(NULL);
+ m_stackScores.push_back(0);
- // loop over possible expansions of the rule
- PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
- PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
- for (p = nonTermMap.begin(); p != end; ++p) {
- // does it match possible source and target non-terminals?
+ // loop over possible expansions of the rule
+ PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
+ PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
+ for (p = nonTermMap.begin(); p != end; ++p) {
+ // does it match possible source and target non-terminals?
#if defined(UNLABELLED_SOURCE)
- const Word &targetNonTerm = p->first;
+ const Word &targetNonTerm = p->first;
#else
- const Word &targetNonTerm = p->first.second;
+ const Word &targetNonTerm = p->first.second;
#endif
- const PhraseDictionaryNodeMemory *child = &p->second;
- //soft matching of NTs
- if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
- const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
- for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
- const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
- for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
- m_stackVec.back() = match->cellLabel;
- m_stackScores.back() = match->score;
- AddAndExtend(child, match->endPos);
- }
+ const PhraseDictionaryNodeMemory *child = &p->second;
+ //soft matching of NTs
+ if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
+ const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
+ for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
+ const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- } // end of soft matches lookup
-
- const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
- for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
- m_stackVec.back() = match->cellLabel;
- m_stackScores.back() = match->score;
- AddAndExtend(child, match->endPos);
}
+ } // end of soft matches lookup
+
+ const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- // remove last back pointer
- m_stackVec.pop_back();
- m_stackScores.pop_back();
+ }
+ // remove last back pointer
+ m_stackVec.pop_back();
+ m_stackScores.pop_back();
}
} // namespace Moses
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h
index 6cdc73dd4..9db0d02f0 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h
@@ -44,13 +44,13 @@ public:
typedef std::vector<CompressedColumn> CompressedMatrix;
ChartRuleLookupManagerMemoryPerSentence(const ChartParser &parser,
- const ChartCellCollectionBase &cellColl,
- const PhraseDictionaryFuzzyMatch &ruleTable);
+ const ChartCellCollectionBase &cellColl,
+ const PhraseDictionaryFuzzyMatch &ruleTable);
~ChartRuleLookupManagerMemoryPerSentence() {};
virtual void GetChartRuleCollection(
- const WordsRange &range,
+ const InputPath &inputPath,
size_t lastPos, // last position to consider if using lookahead
ChartParserCallback &outColl);
@@ -69,8 +69,8 @@ private:
size_t endPos);
void UpdateCompressedMatrix(size_t startPos,
- size_t endPos,
- size_t lastPos);
+ size_t endPos,
+ size_t lastPos);
const PhraseDictionaryFuzzyMatch &m_ruleTable;
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
index 01f90fd82..77678e523 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
@@ -47,7 +47,7 @@ ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
, m_outputFactorsVec(outputFactorsVec)
{
UTIL_THROW_IF2(m_expandableDottedRuleListVec.size() != 0,
- "Dotted rule collection not correctly initialized");
+ "Dotted rule collection not correctly initialized");
size_t sourceSize = parser.GetSize();
m_expandableDottedRuleListVec.resize(sourceSize);
@@ -75,12 +75,13 @@ ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk()
}
void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
- const WordsRange &range,
+ const InputPath &inputPath,
size_t lastPos,
ChartParserCallback &outColl)
{
const StaticData &staticData = StaticData::Instance();
const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal();
+ const WordsRange &range = inputPath.GetWordsRange();
size_t relEndPos = range.GetEndPos() - range.GetStartPos();
size_t absEndPos = range.GetEndPos();
@@ -174,14 +175,14 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
bool doSearch = true;
if (m_dictionary.m_maxSpanDefault != NOT_FOUND) {
- // for Hieu's source syntax
+ // for Hieu's source syntax
- bool isSourceSyntaxNonTerm = sourceLHS != defaultSourceNonTerm;
- size_t nonTermNumWordsCovered = endPos - startPos + 1;
+ bool isSourceSyntaxNonTerm = sourceLHS != defaultSourceNonTerm;
+ size_t nonTermNumWordsCovered = endPos - startPos + 1;
- doSearch = isSourceSyntaxNonTerm ?
- nonTermNumWordsCovered <= m_dictionary.m_maxSpanLabelled :
- nonTermNumWordsCovered <= m_dictionary.m_maxSpanDefault;
+ doSearch = isSourceSyntaxNonTerm ?
+ nonTermNumWordsCovered <= m_dictionary.m_maxSpanLabelled :
+ nonTermNumWordsCovered <= m_dictionary.m_maxSpanDefault;
}
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h
index 6f2f71cdd..0ff87b378 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h
@@ -45,7 +45,7 @@ public:
~ChartRuleLookupManagerOnDisk();
- virtual void GetChartRuleCollection(const WordsRange &range,
+ virtual void GetChartRuleCollection(const InputPath &inputPath,
size_t last,
ChartParserCallback &outColl);
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp
index 93ddc82db..53011e5ac 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp
@@ -52,7 +52,7 @@ ChartRuleLookupManagerSkeleton::~ChartRuleLookupManagerSkeleton()
}
void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
- const WordsRange &range,
+ const InputPath &inputPath,
size_t last,
ChartParserCallback &outColl)
{
@@ -61,6 +61,8 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
m_tpColl.push_back(tpColl);
+ const WordsRange &range = inputPath.GetWordsRange();
+
if (range.GetNumWordsCovered() == 1) {
const ChartCellLabel &sourceWordLabel = GetSourceAt(range.GetStartPos());
const Word &sourceWord = sourceWordLabel.GetLabel();
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h
index dd8b82278..0c141d2ef 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h
@@ -41,7 +41,7 @@ public:
~ChartRuleLookupManagerSkeleton();
virtual void GetChartRuleCollection(
- const WordsRange &range,
+ const InputPath &inputPath,
size_t last,
ChartParserCallback &outColl);
diff --git a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp
index 325650c2e..3a8fc8662 100644
--- a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp
+++ b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp
@@ -32,10 +32,15 @@ CompletedRuleCollection::CompletedRuleCollection() : m_ruleLimit(StaticData::Ins
m_scoreThreshold = numeric_limits<float>::infinity();
}
+CompletedRuleCollection::~CompletedRuleCollection()
+{
+ Clear();
+}
+
// copies some functionality (pruning) from ChartTranslationOptionList::Add
void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
- const StackVec &stackVec,
- const ChartParserCallback &outColl)
+ const StackVec &stackVec,
+ const ChartParserCallback &outColl)
{
if (tpc.IsEmpty()) {
return;
@@ -64,10 +69,10 @@ void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
// Prune if bursting
if (m_ruleLimit && m_collection.size() == m_ruleLimit * 2) {
- NTH_ELEMENT4(m_collection.begin(),
- m_collection.begin() + m_ruleLimit - 1,
- m_collection.end(),
- CompletedRuleOrdered());
+ NTH_ELEMENT4(m_collection.begin(),
+ m_collection.begin() + m_ruleLimit - 1,
+ m_collection.end(),
+ CompletedRuleOrdered());
m_scoreThreshold = m_collection[m_ruleLimit-1]->GetScoreEstimate();
for (size_t i = 0 + m_ruleLimit; i < m_collection.size(); i++) {
delete m_collection[i];
@@ -80,25 +85,25 @@ void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
// copies some functionality (pruning) from ChartTranslationOptionList::Add
void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
- const StackVec &stackVec,
- const std::vector<float> &stackScores,
- const ChartParserCallback &outColl)
+ const StackVec &stackVec,
+ const std::vector<float> &stackScores,
+ const ChartParserCallback &outColl)
{
- if (tpc.IsEmpty()) {
- return;
- }
+ if (tpc.IsEmpty()) {
+ return;
+ }
- const TargetPhrase &targetPhrase = **(tpc.begin());
- float score = std::accumulate(stackScores.begin(), stackScores.end(), targetPhrase.GetFutureScore());
+ const TargetPhrase &targetPhrase = **(tpc.begin());
+ float score = std::accumulate(stackScores.begin(), stackScores.end(), targetPhrase.GetFutureScore());
- // If the rule limit has already been reached then don't add the option
- // unless it is better than at least one existing option.
- if (m_collection.size() > m_ruleLimit && score < m_scoreThreshold) {
- return;
- }
+ // If the rule limit has already been reached then don't add the option
+ // unless it is better than at least one existing option.
+ if (m_collection.size() > m_ruleLimit && score < m_scoreThreshold) {
+ return;
+ }
- CompletedRule *completedRule = new CompletedRule(tpc, stackVec, score);
- m_collection.push_back(completedRule);
+ CompletedRule *completedRule = new CompletedRule(tpc, stackVec, score);
+ m_collection.push_back(completedRule);
// If the rule limit hasn't been exceeded then update the threshold.
if (m_collection.size() <= m_ruleLimit) {
@@ -107,10 +112,10 @@ void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
// Prune if bursting
if (m_collection.size() == m_ruleLimit * 2) {
- NTH_ELEMENT4(m_collection.begin(),
- m_collection.begin() + m_ruleLimit - 1,
- m_collection.end(),
- CompletedRuleOrdered());
+ NTH_ELEMENT4(m_collection.begin(),
+ m_collection.begin() + m_ruleLimit - 1,
+ m_collection.end(),
+ CompletedRuleOrdered());
m_scoreThreshold = m_collection[m_ruleLimit-1]->GetScoreEstimate();
for (size_t i = 0 + m_ruleLimit; i < m_collection.size(); i++) {
delete m_collection[i];
@@ -120,4 +125,4 @@ void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
}
}
-} \ No newline at end of file
+}
diff --git a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h
index 18373d743..84b583df6 100644
--- a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h
+++ b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h
@@ -34,16 +34,15 @@ namespace Moses
{
// temporary storage for a completed rule (because we use lookahead to find rules before ChartManager wants us to)
-struct CompletedRule
-{
+struct CompletedRule {
public:
CompletedRule(const TargetPhraseCollection &tpc,
- const StackVec &stackVec,
- const float score)
- : m_stackVec(stackVec)
- , m_tpc(tpc)
- , m_score(score) {}
+ const StackVec &stackVec,
+ const float score)
+ : m_stackVec(stackVec)
+ , m_tpc(tpc)
+ , m_score(score) {}
const TargetPhraseCollection & GetTPC() const {
return m_tpc;
@@ -72,23 +71,23 @@ public:
}
};
-struct CompletedRuleCollection
-{
+struct CompletedRuleCollection {
public:
CompletedRuleCollection();
+ ~CompletedRuleCollection();
CompletedRuleCollection(const CompletedRuleCollection &old)
- : m_collection(old.m_collection)
- , m_scoreThreshold(old.m_scoreThreshold)
- , m_ruleLimit(old.m_ruleLimit) {}
+ : m_collection(old.m_collection)
+ , m_scoreThreshold(old.m_scoreThreshold)
+ , m_ruleLimit(old.m_ruleLimit) {}
CompletedRuleCollection & operator=(const CompletedRuleCollection &old) {
- m_collection = old.m_collection;
- m_scoreThreshold = old.m_scoreThreshold;
- m_ruleLimit = old.m_ruleLimit;
- return *this;
+ m_collection = old.m_collection;
+ m_scoreThreshold = old.m_scoreThreshold;
+ m_ruleLimit = old.m_ruleLimit;
+ return *this;
}
std::vector<CompletedRule*>::const_iterator begin() const {
diff --git a/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h b/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h
index 390d19b5c..a95b5599e 100644
--- a/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h
+++ b/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h
@@ -103,8 +103,7 @@ public:
if (dottedRule->GetLastNode().GetNonTerminalMap().empty() && !dottedRule->IsRoot()) {
size_t startPos = dottedRule->GetWordsRange().GetEndPos() + 1;
m_expandableDottedRuleListTerminalsOnly[startPos].push_back(dottedRule);
- }
- else {
+ } else {
m_expandableDottedRuleList.push_back(dottedRule);
}
}
diff --git a/moses/TranslationModel/CompactPT/BlockHashIndex.cpp b/moses/TranslationModel/CompactPT/BlockHashIndex.cpp
index cd277ad0c..c90dcd6d9 100644
--- a/moses/TranslationModel/CompactPT/BlockHashIndex.cpp
+++ b/moses/TranslationModel/CompactPT/BlockHashIndex.cpp
@@ -366,10 +366,10 @@ void BlockHashIndex::CalcHash(size_t current, void* source_void)
if(lastKey > temp) {
if(source->nkeys != 2 || temp != "###DUMMY_KEY###") {
- std::stringstream strme;
- strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl;
- strme << "1: " << lastKey << std::endl;
- strme << "2: " << temp << std::endl;
+ std::stringstream strme;
+ strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl;
+ strme << "1: " << lastKey << std::endl;
+ strme << "2: " << temp << std::endl;
UTIL_THROW2(strme.str());
}
}
diff --git a/moses/TranslationModel/CompactPT/CanonicalHuffman.h b/moses/TranslationModel/CompactPT/CanonicalHuffman.h
index 8d6e1cbb1..9f6c14e56 100644
--- a/moses/TranslationModel/CompactPT/CanonicalHuffman.h
+++ b/moses/TranslationModel/CompactPT/CanonicalHuffman.h
@@ -157,12 +157,14 @@ private:
}
}
- boost::dynamic_bitset<>& Encode(Data data) {
- return m_encodeMap[data];
+ const boost::dynamic_bitset<>& Encode(Data data) const {
+ typename EncodeMap::const_iterator it = m_encodeMap.find(data);
+ UTIL_THROW_IF2(it == m_encodeMap.end(), "Cannot find symbol in encoding map");
+ return it->second;
}
template <class BitWrapper>
- void PutCode(BitWrapper& bitWrapper, boost::dynamic_bitset<>& code) {
+ void PutCode(BitWrapper& bitWrapper, const boost::dynamic_bitset<>& code) {
for(int j = code.size()-1; j >= 0; j--)
bitWrapper.Put(code[j]);
}
diff --git a/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp b/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp
index c4ba1084d..9fe9eec30 100644
--- a/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp
+++ b/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp
@@ -206,7 +206,7 @@ std::string LexicalReorderingTableCreator::EncodeLine(std::vector<std::string>&
if(m_numScoreComponent != scores.size()) {
std::stringstream strme;
strme << "Error: Wrong number of scores detected ("
- << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
+ << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
strme << "Line: " << tokens[0] << " ||| ... ||| " << scoresString << std::endl;
UTIL_THROW2(strme.str());
}
@@ -258,8 +258,10 @@ void LexicalReorderingTableCreator::FlushEncodedQueue(bool force)
if(force) {
m_lastFlushedLine = -1;
- m_hash.AddRange(m_lastRange);
- m_lastRange.clear();
+ if(!m_lastRange.empty()) {
+ m_hash.AddRange(m_lastRange);
+ m_lastRange.clear();
+ }
#ifdef WITH_THREADS
m_hash.WaitAll();
@@ -377,7 +379,6 @@ void EncodingTaskReordering::operator()()
encodedLine, i);
result.push_back(packedItem);
}
- lines.clear();
{
#ifdef WITH_THREADS
@@ -388,6 +389,7 @@ void EncodingTaskReordering::operator()()
m_creator.FlushEncodedQueue();
}
+ lines.clear();
result.clear();
lines.reserve(max_lines);
result.reserve(max_lines);
diff --git a/moses/TranslationModel/CompactPT/ListCoders.h b/moses/TranslationModel/CompactPT/ListCoders.h
index b41e183ce..b78dbdd8a 100644
--- a/moses/TranslationModel/CompactPT/ListCoders.h
+++ b/moses/TranslationModel/CompactPT/ListCoders.h
@@ -175,6 +175,9 @@ private:
uint i = 0;
while(it != end) {
+ UTIL_THROW_IF2(*it > 268435455, "You are trying to encode " << *it
+ << " with Simple9. Cannot encode numbers larger than 268435455 (2^28-1)");
+
uint l = bitlength * (length-i-1);
output |= *it << l;
it++;
diff --git a/moses/TranslationModel/CompactPT/PhraseDecoder.h b/moses/TranslationModel/CompactPT/PhraseDecoder.h
index 413918314..83f174cf6 100644
--- a/moses/TranslationModel/CompactPT/PhraseDecoder.h
+++ b/moses/TranslationModel/CompactPT/PhraseDecoder.h
@@ -38,7 +38,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/WordsRange.h"
-#include "moses/UserMessage.h"
#include "PhraseDictionaryCompact.h"
#include "StringVector.h"
diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
index bd212a19e..90d5575a1 100644
--- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
@@ -33,7 +33,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/WordsRange.h"
-#include "moses/UserMessage.h"
#include "moses/ThreadPool.h"
#include "util/exception.hh"
@@ -101,7 +100,7 @@ void PhraseDictionaryCompact::Load()
phraseSize = m_targetPhrasesMapped.load(pFile, true);
UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0,
- "Not successfully loaded");
+ "Not successfully loaded");
}
// now properly declared in TargetPhraseCollection.h
diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
index 3bf0d2820..ba1dfc578 100644
--- a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
@@ -112,9 +112,9 @@ PhraseTableCreator::PhraseTableCreator(std::string inPath,
if(tempfilePath.size()) {
MmapAllocator<unsigned char> allocEncoded(util::FMakeTemp(tempfilePath));
- m_encodedTargetPhrases = new StringVector<unsigned char, unsigned long, MmapAllocator>(allocEncoded);
+ m_encodedTargetPhrases = new StringVectorTemp<unsigned char, unsigned long, MmapAllocator>(allocEncoded);
} else {
- m_encodedTargetPhrases = new StringVector<unsigned char, unsigned long, MmapAllocator>();
+ m_encodedTargetPhrases = new StringVectorTemp<unsigned char, unsigned long, MmapAllocator>();
}
EncodeTargetPhrases();
@@ -409,6 +409,10 @@ void PhraseTableCreator::CalcHuffmanCodes()
void PhraseTableCreator::AddSourceSymbolId(std::string& symbol)
{
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+
if(m_sourceSymbolsMap.count(symbol) == 0) {
unsigned value = m_sourceSymbolsMap.size();
m_sourceSymbolsMap[symbol] = value;
@@ -417,6 +421,9 @@ void PhraseTableCreator::AddSourceSymbolId(std::string& symbol)
void PhraseTableCreator::AddTargetSymbolId(std::string& symbol)
{
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
if(m_targetSymbolsMap.count(symbol) == 0) {
unsigned value = m_targetSymbolsMap.size();
m_targetSymbolsMap[symbol] = value;
@@ -425,6 +432,9 @@ void PhraseTableCreator::AddTargetSymbolId(std::string& symbol)
unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol)
{
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
boost::unordered_map<std::string, unsigned>::iterator it
= m_sourceSymbolsMap.find(symbol);
@@ -436,13 +446,14 @@ unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol)
unsigned PhraseTableCreator::GetTargetSymbolId(std::string& symbol)
{
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
boost::unordered_map<std::string, unsigned>::iterator it
= m_targetSymbolsMap.find(symbol);
- if(it != m_targetSymbolsMap.end())
- return it->second;
- else
- return m_targetSymbolsMap.size();
+ UTIL_THROW_IF2(it == m_targetSymbolsMap.end(), "No id found for target symbol: " << symbol);
+ return it->second;
}
unsigned PhraseTableCreator::GetOrAddTargetSymbolId(std::string& symbol)
@@ -714,10 +725,10 @@ std::string PhraseTableCreator::EncodeLine(std::vector<std::string>& tokens, siz
std::vector<float> scores = Tokenize<float>(scoresStr);
if(scores.size() != m_numScoreComponent) {
- std::stringstream strme;
- strme << "Error: Wrong number of scores detected ("
- << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
- strme << "Line: " << tokens[0] << " ||| " << tokens[1] << " ||| " << tokens[2] << " ..." << std::endl;
+ std::stringstream strme;
+ strme << "Error: Wrong number of scores detected ("
+ << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
+ strme << "Line: " << tokens[0] << " ||| " << tokens[1] << " ||| " << tokens[2] << " ..." << std::endl;
UTIL_THROW2(strme.str());
}
@@ -867,8 +878,10 @@ void PhraseTableCreator::FlushRankedQueue(bool force)
}
if(force) {
- m_rnkHash.AddRange(m_lastSourceRange);
- m_lastSourceRange.clear();
+ if(!m_lastSourceRange.empty()) {
+ m_rnkHash.AddRange(m_lastSourceRange);
+ m_lastSourceRange.clear();
+ }
#ifdef WITH_THREADS
m_rnkHash.WaitAll();
@@ -952,8 +965,10 @@ void PhraseTableCreator::FlushEncodedQueue(bool force)
m_lastCollection.clear();
}
- m_srcHash.AddRange(m_lastSourceRange);
- m_lastSourceRange.clear();
+ if(!m_lastSourceRange.empty()) {
+ m_srcHash.AddRange(m_lastSourceRange);
+ m_lastSourceRange.clear();
+ }
#ifdef WITH_THREADS
m_srcHash.WaitAll();
@@ -1040,30 +1055,30 @@ void RankingTask::operator()()
*it = Moses::Trim(*it);
if(tokens.size() < 4) {
- std::stringstream strme;
- strme << "Error: It seems the following line has a wrong format:" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line has a wrong format:" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
- std::stringstream strme;
- strme << "Error: It seems the following line contains no alignment information, " << std::endl;
- strme << "but you are using ";
- strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
- strme << " encoding which makes use of alignment data. " << std::endl;
- strme << "Use -encoding None" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line contains no alignment information, " << std::endl;
+ strme << "but you are using ";
+ strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
+ strme << " encoding which makes use of alignment data. " << std::endl;
+ strme << "Use -encoding None" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
std::vector<float> scores = Tokenize<float>(tokens[2]);
if(scores.size() != m_creator.m_numScoreComponent) {
- std::stringstream strme;
- strme << "Error: It seems the following line has a wrong number of scores ("
- << scores.size() << " != " << m_creator.m_numScoreComponent << ") :" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
- UTIL_THROW2(strme.str());
+ std::stringstream strme;
+ strme << "Error: It seems the following line has a wrong number of scores ("
+ << scores.size() << " != " << m_creator.m_numScoreComponent << ") :" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
+ UTIL_THROW2(strme.str());
}
float sortScore = scores[m_creator.m_sortScoreIndex];
@@ -1140,20 +1155,20 @@ void EncodingTask::operator()()
*it = Moses::Trim(*it);
if(tokens.size() < 3) {
- std::stringstream strme;
- strme << "Error: It seems the following line has a wrong format:" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line has a wrong format:" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
if(tokens.size() > 3 && tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
- std::stringstream strme;
- strme << "Error: It seems the following line contains no alignment information, " << std::endl;
- strme << "but you are using ";
- strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
- strme << " encoding which makes use of alignment data. " << std::endl;
- strme << "Use -encoding None" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line contains no alignment information, " << std::endl;
+ strme << "but you are using ";
+ strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
+ strme << " encoding which makes use of alignment data. " << std::endl;
+ strme << "Use -encoding None" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
@@ -1199,7 +1214,7 @@ size_t CompressionTask::m_collectionNum = 0;
boost::mutex CompressionTask::m_mutex;
#endif
-CompressionTask::CompressionTask(StringVector<unsigned char, unsigned long,
+CompressionTask::CompressionTask(StringVectorTemp<unsigned char, unsigned long,
MmapAllocator>& encodedCollections,
PhraseTableCreator& creator)
: m_encodedCollections(encodedCollections), m_creator(creator) {}
diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.h b/moses/TranslationModel/CompactPT/PhraseTableCreator.h
index fd5fc1581..f63a4f61f 100644
--- a/moses/TranslationModel/CompactPT/PhraseTableCreator.h
+++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.h
@@ -31,11 +31,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/InputFileStream.h"
#include "moses/ThreadPool.h"
-#include "moses/UserMessage.h"
#include "moses/Util.h"
#include "BlockHashIndex.h"
#include "StringVector.h"
+#include "StringVectorTemp.h"
#include "CanonicalHuffman.h"
namespace Moses
@@ -238,7 +238,7 @@ private:
std::vector<size_t> m_lexicalTableIndex;
std::vector<SrcTrg> m_lexicalTable;
- StringVector<unsigned char, unsigned long, MmapAllocator>*
+ StringVectorTemp<unsigned char, unsigned long, MmapAllocator>*
m_encodedTargetPhrases;
StringVector<unsigned char, unsigned long, MmapAllocator>*
@@ -397,12 +397,12 @@ private:
static boost::mutex m_mutex;
#endif
static size_t m_collectionNum;
- StringVector<unsigned char, unsigned long, MmapAllocator>&
+ StringVectorTemp<unsigned char, unsigned long, MmapAllocator>&
m_encodedCollections;
PhraseTableCreator& m_creator;
public:
- CompressionTask(StringVector<unsigned char, unsigned long, MmapAllocator>&
+ CompressionTask(StringVectorTemp<unsigned char, unsigned long, MmapAllocator>&
encodedCollections, PhraseTableCreator& creator);
void operator()();
};
diff --git a/moses/TranslationModel/CompactPT/StringVectorTemp.h b/moses/TranslationModel/CompactPT/StringVectorTemp.h
new file mode 100644
index 000000000..ffac0b718
--- /dev/null
+++ b/moses/TranslationModel/CompactPT/StringVectorTemp.h
@@ -0,0 +1,430 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_StringVectorTemp_h
+#define moses_StringVectorTemp_h
+
+#include <vector>
+#include <algorithm>
+#include <string>
+#include <iterator>
+#include <cstdio>
+#include <cassert>
+
+#include <boost/iterator/iterator_facade.hpp>
+
+#include "ThrowingFwrite.h"
+#include "StringVector.h"
+
+#include "MmapAllocator.h"
+
+namespace Moses
+{
+
+
+// ********** StringVectorTemp **********
+
+template <typename ValueT = unsigned char, typename PosT = unsigned int,
+ template <typename> class Allocator = std::allocator>
+class StringVectorTemp
+{
+protected:
+ bool m_sorted;
+ bool m_memoryMapped;
+
+ std::vector<ValueT, Allocator<ValueT> >* m_charArray;
+ std::vector<PosT> m_positions;
+
+ virtual const ValueT* value_ptr(PosT i) const;
+
+public:
+ //typedef ValueIteratorRange<typename std::vector<ValueT, Allocator<ValueT> >::const_iterator> range;
+ typedef ValueIteratorRange<const ValueT *> range;
+
+ // ********** RangeIterator **********
+
+ class RangeIterator : public boost::iterator_facade<RangeIterator,
+ range, std::random_access_iterator_tag, range, PosT>
+ {
+
+ private:
+ PosT m_index;
+ StringVectorTemp<ValueT, PosT, Allocator>* m_container;
+
+ public:
+ RangeIterator();
+ RangeIterator(StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index=0);
+
+ PosT get_index();
+
+ private:
+ friend class boost::iterator_core_access;
+
+ range dereference() const;
+ bool equal(RangeIterator const& other) const;
+ void increment();
+ void decrement();
+ void advance(PosT n);
+
+ PosT distance_to(RangeIterator const& other) const;
+ };
+
+ // ********** StringIterator **********
+
+ class StringIterator : public boost::iterator_facade<StringIterator,
+ std::string, std::random_access_iterator_tag, const std::string, PosT>
+ {
+
+ private:
+ PosT m_index;
+ StringVectorTemp<ValueT, PosT, Allocator>* m_container;
+
+ public:
+ StringIterator();
+ StringIterator(StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index=0);
+
+ PosT get_index();
+
+ private:
+ friend class boost::iterator_core_access;
+
+ const std::string dereference() const;
+ bool equal(StringIterator const& other) const;
+ void increment();
+ void decrement();
+ void advance(PosT n);
+ PosT distance_to(StringIterator const& other) const;
+ };
+
+ typedef RangeIterator iterator;
+ typedef StringIterator string_iterator;
+
+ StringVectorTemp();
+ StringVectorTemp(Allocator<ValueT> alloc);
+
+ virtual ~StringVectorTemp() {
+ delete m_charArray;
+ }
+
+ void swap(StringVectorTemp<ValueT, PosT, Allocator> &c) {
+ m_positions.swap(c.m_positions);
+ m_charArray->swap(*c.m_charArray);
+
+ bool temp = m_sorted;
+ m_sorted = c.m_sorted;
+ c.m_sorted = temp;
+ }
+
+ bool is_sorted() const;
+ PosT size() const;
+ virtual PosT size2() const;
+
+ template<class Iterator> Iterator begin() const;
+ template<class Iterator> Iterator end() const;
+
+ iterator begin() const;
+ iterator end() const;
+
+ PosT length(PosT i) const;
+ //typename std::vector<ValueT, Allocator<ValueT> >::const_iterator begin(PosT i) const;
+ //typename std::vector<ValueT, Allocator<ValueT> >::const_iterator end(PosT i) const;
+ const ValueT* begin(PosT i) const;
+ const ValueT* end(PosT i) const;
+
+ void clear() {
+ m_charArray->clear();
+ m_sorted = true;
+ m_positions.clear();
+ }
+
+ range at(PosT i) const;
+ range operator[](PosT i) const;
+ range back() const;
+
+ template <typename StringT>
+ void push_back(StringT s);
+ void push_back(const char* c);
+
+ template <typename StringT>
+ PosT find(StringT &s) const;
+ PosT find(const char* c) const;
+};
+
+// ********** Implementation **********
+
+// StringVectorTemp
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::StringVectorTemp()
+ : m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector<ValueT, Allocator<ValueT> >()) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::StringVectorTemp(Allocator<ValueT> alloc)
+ : m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector<ValueT, Allocator<ValueT> >(alloc)) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename StringT>
+void StringVectorTemp<ValueT, PosT, Allocator>::push_back(StringT s)
+{
+ if(is_sorted() && size() && !(back() < s))
+ m_sorted = false;
+
+ m_positions.push_back(size2());
+ std::copy(s.begin(), s.end(), std::back_inserter(*m_charArray));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::push_back(const char* c)
+{
+ std::string dummy(c);
+ push_back(dummy);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename Iterator>
+Iterator StringVectorTemp<ValueT, PosT, Allocator>::begin() const
+{
+ return Iterator(const_cast<StringVectorTemp<ValueT, PosT, Allocator>&>(*this), 0);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename Iterator>
+Iterator StringVectorTemp<ValueT, PosT, Allocator>::end() const
+{
+ return Iterator(const_cast<StringVectorTemp<ValueT, PosT, Allocator>&>(*this), size());
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::iterator StringVectorTemp<ValueT, PosT, Allocator>::begin() const
+{
+ return begin<iterator>();
+};
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::iterator StringVectorTemp<ValueT, PosT, Allocator>::end() const
+{
+ return end<iterator>();
+};
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVectorTemp<ValueT, PosT, Allocator>::is_sorted() const
+{
+ return m_sorted;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::size() const
+{
+ return m_positions.size();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::size2() const
+{
+ return m_charArray->size();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::range StringVectorTemp<ValueT, PosT, Allocator>::at(PosT i) const
+{
+ return range(begin(i), end(i));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::range StringVectorTemp<ValueT, PosT, Allocator>::operator[](PosT i) const
+{
+ return at(i);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::range StringVectorTemp<ValueT, PosT, Allocator>::back() const
+{
+ return at(size()-1);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::length(PosT i) const
+{
+ if(i+1 < size())
+ return m_positions[i+1] - m_positions[i];
+ else
+ return size2() - m_positions[i];
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+const ValueT* StringVectorTemp<ValueT, PosT, Allocator>::value_ptr(PosT i) const
+{
+ return &(*m_charArray)[m_positions[i]];
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+//typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVectorTemp<ValueT, PosT, Allocator>::begin(PosT i) const
+const ValueT* StringVectorTemp<ValueT, PosT, Allocator>::begin(PosT i) const
+{
+ //return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i));
+ return value_ptr(i);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+//typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVectorTemp<ValueT, PosT, Allocator>::end(PosT i) const
+const ValueT* StringVectorTemp<ValueT, PosT, Allocator>::end(PosT i) const
+{
+ //return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i) + length(i));
+ return value_ptr(i) + length(i);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename StringT>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::find(StringT &s) const
+{
+ if(m_sorted)
+ return std::distance(begin(), std::lower_bound(begin(), end(), s));
+ return std::distance(begin(), std::find(begin(), end(), s));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::find(const char* c) const
+{
+ std::string s(c);
+ return find(s);
+}
+
+// RangeIterator
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::RangeIterator() : m_index(0), m_container(0) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::RangeIterator(StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index)
+ : m_index(index), m_container(&sv) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::get_index()
+{
+ return m_index;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::range
+StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::dereference() const
+{
+ return typename StringVectorTemp<ValueT, PosT, Allocator>::range(
+ m_container->begin(m_index),
+ m_container->end(m_index)
+ );
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::equal(
+ StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator const& other) const
+{
+ return m_index == other.m_index && m_container == other.m_container;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::increment()
+{
+ m_index++;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::decrement()
+{
+ m_index--;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::advance(PosT n)
+{
+ m_index += n;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::distance_to(
+ StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator const& other) const
+{
+ return other.m_index - m_index;
+}
+
+// StringIterator
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::StringIterator()
+ : m_index(0), m_container(0) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::StringIterator(
+ StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index) : m_index(index),
+ m_container(&sv) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::get_index()
+{
+ return m_index;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+const std::string StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::dereference() const
+{
+ return StringVectorTemp<ValueT, PosT, Allocator>::range(m_container->begin(m_index),
+ m_container->end(m_index)).str();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::equal(
+ StringVectorTemp<ValueT, PosT, Allocator>::StringIterator const& other) const
+{
+ return m_index == other.m_index && m_container == other.m_container;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::increment()
+{
+ m_index++;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::decrement()
+{
+ m_index--;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::advance(PosT n)
+{
+ m_index += n;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::distance_to(
+ StringVectorTemp<ValueT, PosT, Allocator>::StringIterator const& other) const
+{
+ return other.m_index - m_index;
+}
+
+// ********** Some typedefs **********
+
+typedef StringVectorTemp<unsigned char, unsigned int> MediumStringVectorTemp;
+typedef StringVectorTemp<unsigned char, unsigned long> LongStringVectorTemp;
+
+}
+
+#endif
diff --git a/moses/TranslationModel/DynSAInclude/FileHandler.cpp b/moses/TranslationModel/DynSAInclude/FileHandler.cpp
index ffde4a0f3..8645833fe 100644
--- a/moses/TranslationModel/DynSAInclude/FileHandler.cpp
+++ b/moses/TranslationModel/DynSAInclude/FileHandler.cpp
@@ -72,12 +72,12 @@ bool FileHandler::setStreamBuffer(bool checkExists)
// redirect stdin or stdout if necesary
if (path_ == FileHandler::kStdInDescriptor) {
UTIL_THROW_IF2((flags_ & std::ios::in) == 0,
- "Incorrect flags: " << flags_);
+ "Incorrect flags: " << flags_);
std::streambuf* sb = std::cin.rdbuf();
buffer_ = sb;
} else if (path_ == FileHandler::kStdOutDescriptor) {
UTIL_THROW_IF2((flags_ & std::ios::out) == 0,
- "Incorrect flags: " << flags_);
+ "Incorrect flags: " << flags_);
std::streambuf* sb = std::cout.rdbuf();
buffer_ = sb;
} else {
diff --git a/moses/TranslationModel/DynSAInclude/RandLMFilter.h b/moses/TranslationModel/DynSAInclude/RandLMFilter.h
index e8defb110..19566ff40 100644
--- a/moses/TranslationModel/DynSAInclude/RandLMFilter.h
+++ b/moses/TranslationModel/DynSAInclude/RandLMFilter.h
@@ -62,9 +62,9 @@ public:
address_mask_ = full_mask_ >> first_bit_;
}
Filter(FileHandler* fin, bool loaddata = true) : data_(NULL) {
- assert(loadHeader(fin));
+ assert(loadHeader(fin));
if (loaddata)
- assert(loadData(fin));
+ assert(loadData(fin));
}
virtual ~Filter() {
delete[] data_;
@@ -80,7 +80,7 @@ public:
}
// read / write functions
inline bool read(uint64_t address, T* value) {
- assert(address <= addresses_);
+ assert(address <= addresses_);
// copy address to 'value'
uint64_t data_bit = address * width_;
uint32_t data_cell = (data_bit >> log_cell_width_); // % cells_;
@@ -102,7 +102,7 @@ public:
return true;
}
inline T read(uint64_t address) {
- assert(address <= addresses_);
+ assert(address <= addresses_);
// return value at address
T value = 0;
uint64_t data_bit = address * width_;
@@ -124,7 +124,7 @@ public:
return value;
}
inline bool write(uint64_t address, T value) {
- assert(address <= addresses_);
+ assert(address <= addresses_);
assert(log2(value) <= width_);
// write 'value' to address
uint64_t data_bit = address * width_;
diff --git a/moses/TranslationModel/DynSAInclude/onlineRLM.h b/moses/TranslationModel/DynSAInclude/onlineRLM.h
index 1e7a9c2d6..1d3f66eac 100644
--- a/moses/TranslationModel/DynSAInclude/onlineRLM.h
+++ b/moses/TranslationModel/DynSAInclude/onlineRLM.h
@@ -148,8 +148,8 @@ int OnlineRLM<T>::query(const wordID_t* IDs, int len)
//markQueried(hpdItr); // mark this event as "hit"
value -= ((value & this->hitMask_) != 0) ? this->hitMask_ : 0; // check for previous hit marks
} else {
- UTIL_THROW_IF2(filterIdx >= this->cells_,
- "Out of bound: " << filterIdx);
+ UTIL_THROW_IF2(filterIdx >= this->cells_,
+ "Out of bound: " << filterIdx);
//markQueried(filterIdx);
}
}
@@ -341,7 +341,7 @@ const void* OnlineRLM<T>::getContext(const wordID_t* ngram, int len)
int dummy(0);
float**addresses = new float*[len]; // only interested in addresses of cache
UTIL_THROW_IF2(cache_->getCache2(ngram, len, &addresses[0], &dummy) != len,
- "Error");
+ "Error");
// return address of cache node
float *addr0 = addresses[0];
diff --git a/moses/TranslationModel/DynSAInclude/params.cpp b/moses/TranslationModel/DynSAInclude/params.cpp
index fa99c4838..03ad48446 100644
--- a/moses/TranslationModel/DynSAInclude/params.cpp
+++ b/moses/TranslationModel/DynSAInclude/params.cpp
@@ -65,10 +65,10 @@ bool Parameters::loadParams(int argc, char ** argv)
if( getValueType(param) == kBoolValue ) {
jumpBy = 1;
UTIL_THROW_IF2(!setParamValue(param, kTrueValue),
- "Couldn't set parameter " << param);
+ "Couldn't set parameter " << param);
} else { //not of type bool so must have corresponding value
UTIL_THROW_IF2(i+1 >= argc,
- "Out of bound error: " << i+1);
+ "Out of bound error: " << i+1);
jumpBy = 2;
std::string val = argv[i+1];
diff --git a/moses/TranslationModel/DynSAInclude/quantizer.h b/moses/TranslationModel/DynSAInclude/quantizer.h
index 002535dd0..6dbcc3cc4 100644
--- a/moses/TranslationModel/DynSAInclude/quantizer.h
+++ b/moses/TranslationModel/DynSAInclude/quantizer.h
@@ -17,7 +17,7 @@ class LogQtizer
{
public:
LogQtizer(float i): base_(pow(2, 1 / i)) {
- UTIL_THROW_IF2(base_ <= 1, "Can't calculate log base less than 1");
+ UTIL_THROW_IF2(base_ <= 1, "Can't calculate log base less than 1");
max_code_ = 0;
float value = 1; // code = 1 -> value = 1 for any base
std::vector<float> code_to_value_vec;
@@ -40,13 +40,13 @@ public:
std::cerr << "Initialized quantization (size = " << max_code_ + 1 << ")" << std::endl;
}
LogQtizer(FileHandler* fin) {
- UTIL_THROW_IF2(fin == NULL, "Null file handle");
+ UTIL_THROW_IF2(fin == NULL, "Null file handle");
load(fin);
}
int code(float value) {
// should just be: return log_b(value)
UTIL_THROW_IF2(value < min_value_ || value > max_value_,
- "Value " << value << " out of bound");
+ "Value " << value << " out of bound");
// but binary search removes errors due to floor operator above
int code = static_cast<int>(std::lower_bound(code_to_value_, code_to_value_+ max_code_,
diff --git a/moses/TranslationModel/DynSAInclude/vocab.cpp b/moses/TranslationModel/DynSAInclude/vocab.cpp
index 1e6c92203..b717f533c 100644
--- a/moses/TranslationModel/DynSAInclude/vocab.cpp
+++ b/moses/TranslationModel/DynSAInclude/vocab.cpp
@@ -134,7 +134,7 @@ bool Vocab::Load(FileHandler* vcbin, const FactorDirection& direction,
if (id == 0 && word != GetkOOVWord())
id = m_ids2words.size() + 1; // assign ids sequentially starting from 1
UTIL_THROW_IF2(m_ids2words.count(id) != 0 || m_words2ids.count(word) != 0,
- "Error");
+ "Error");
m_ids2words[id] = word;
m_words2ids[word] = id;
diff --git a/moses/TranslationModel/PhraseDictionary.cpp b/moses/TranslationModel/PhraseDictionary.cpp
index e0251b907..7fdd61f97 100644
--- a/moses/TranslationModel/PhraseDictionary.cpp
+++ b/moses/TranslationModel/PhraseDictionary.cpp
@@ -19,11 +19,11 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
+#include <queue>
#include "moses/TranslationModel/PhraseDictionary.h"
#include "moses/StaticData.h"
#include "moses/InputType.h"
#include "moses/TranslationOption.h"
-#include "moses/UserMessage.h"
#include "moses/DecodeStep.h"
#include "moses/DecodeGraph.h"
#include "moses/InputPath.h"
@@ -37,11 +37,11 @@ std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
CacheColl::~CacheColl()
{
- for (iterator iter = begin(); iter != end(); ++iter) {
- std::pair<const TargetPhraseCollection*, clock_t> &key = iter->second;
- const TargetPhraseCollection *tps = key.first;
- delete tps;
- }
+ for (iterator iter = begin(); iter != end(); ++iter) {
+ std::pair<const TargetPhraseCollection*, clock_t> &key = iter->second;
+ const TargetPhraseCollection *tps = key.first;
+ delete tps;
+ }
}
PhraseDictionary::PhraseDictionary(const std::string &line)
@@ -49,8 +49,8 @@ PhraseDictionary::PhraseDictionary(const std::string &line)
,m_tableLimit(20) // default
,m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
{
- m_id = s_staticColl.size();
- s_staticColl.push_back(this);
+ m_id = s_staticColl.size();
+ s_staticColl.push_back(this);
}
bool
@@ -139,22 +139,22 @@ SetFeaturesToApply()
}
}
-
- // tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
- void
- PhraseDictionary::
- Release(TargetPhraseCollection const* tpc) const
- {
- // do nothing by default
- return;
- }
- bool
- PhraseDictionary::
- PrefixExists(Phrase const& phrase) const
- {
- return true;
- }
+// tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
+void
+PhraseDictionary::
+Release(TargetPhraseCollection const* tpc) const
+{
+ // do nothing by default
+ return;
+}
+
+bool
+PhraseDictionary::
+PrefixExists(Phrase const& phrase) const
+{
+ return true;
+}
void
PhraseDictionary::
@@ -166,7 +166,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
// backoff
if (!SatisfyBackoff(inputPath)) {
- continue;
+ continue;
}
const Phrase &phrase = inputPath.GetPhrase();
@@ -183,7 +183,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
// for( std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter,
// iter != cache.end(),
// iter++ ) {
-//
+//
// }
//}
@@ -253,25 +253,25 @@ bool PhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const
size_t backoff = decodeGraph.GetBackoff();
if (backoff == 0) {
- // ie. don't backoff. Collect ALL translations
- return true;
+ // ie. don't backoff. Collect ALL translations
+ return true;
}
if (sourcePhrase.GetSize() > backoff) {
- // source phrase too big
- return false;
+ // source phrase too big
+ return false;
}
// lookup translation only if no other translations
InputPath::TargetPhrases::const_iterator iter;
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
- const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
- const TargetPhraseCollection *tpCollPrev = temp.first;
+ const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
+ const TargetPhraseCollection *tpCollPrev = temp.first;
- if (tpCollPrev && tpCollPrev->GetSize()) {
- // already have translation from another pt. Don't create translations
- return false;
- }
+ if (tpCollPrev && tpCollPrev->GetSize()) {
+ // already have translation from another pt. Don't create translations
+ return false;
+ }
}
return true;
diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h
index 441c94c0b..2d5ae32f6 100644
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@@ -62,7 +62,7 @@ class CacheColl : public boost::unordered_map<size_t, std::pair<const TargetPhra
// 3rd = time of last access
public:
- ~CacheColl();
+ ~CacheColl();
};
/**
@@ -74,7 +74,7 @@ public:
virtual bool ProvidesPrefixCheck() const;
static const std::vector<PhraseDictionary*>& GetColl() {
- return s_staticColl;
+ return s_staticColl;
}
PhraseDictionary(const std::string &line);
@@ -88,14 +88,15 @@ public:
}
//! continguous id for each pt, starting from 0
- size_t GetId() const
- { return m_id; }
+ size_t GetId() const {
+ return m_id;
+ }
virtual
void
Release(TargetPhraseCollection const* tpc) const;
- /// return true if phrase table entries starting with /phrase/
+ /// return true if phrase table entries starting with /phrase/
// exist in the table.
virtual
bool
diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp
new file mode 100644
index 000000000..e9c656937
--- /dev/null
+++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp
@@ -0,0 +1,759 @@
+// vim:tabstop=2
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+#include "util/exception.hh"
+
+#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
+#include "moses/FactorCollection.h"
+#include "moses/InputFileStream.h"
+#include "moses/StaticData.h"
+#include "moses/TargetPhrase.h"
+
+using namespace std;
+
+namespace Moses
+{
+std::map< const std::string, PhraseDictionaryDynamicCacheBased * > PhraseDictionaryDynamicCacheBased::s_instance_map;
+PhraseDictionaryDynamicCacheBased *PhraseDictionaryDynamicCacheBased::s_instance = NULL;
+
+//! contructor
+PhraseDictionaryDynamicCacheBased::PhraseDictionaryDynamicCacheBased(const std::string &line)
+ : PhraseDictionary(line)
+{
+ std::cerr << "Initializing PhraseDictionaryDynamicCacheBased feature..." << std::endl;
+
+ //disabling internal cache (provided by PhraseDictionary) for translation options (third parameter set to 0)
+ m_maxCacheSize = 0;
+
+ m_score_type = CBTM_SCORE_TYPE_HYPERBOLA;
+ m_maxAge = 1000;
+ m_entries = 0;
+ m_name = "default";
+ m_constant = false;
+ ReadParameters();
+
+ UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryDynamicCacheBased feature named " + m_name + " is allowed");
+ s_instance_map[m_name] = this;
+ s_instance = this; //for back compatibility
+}
+
+PhraseDictionaryDynamicCacheBased::~PhraseDictionaryDynamicCacheBased()
+{
+ Clear();
+}
+
+void PhraseDictionaryDynamicCacheBased::Load()
+{
+ VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load()" << std::endl);
+ SetFeaturesToApply();
+
+ vector<float> weight = StaticData::Instance().GetWeights(this);
+ SetPreComputedScores(weight.size());
+
+ Load(m_initfiles);
+}
+
+void PhraseDictionaryDynamicCacheBased::Load(const std::string filestr)
+{
+ VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load(const std::string filestr)" << std::endl);
+// std::vector<std::string> files = Tokenize(m_initfiles, "||");
+ std::vector<std::string> files = Tokenize(filestr, "||");
+ Load_Multiple_Files(files);
+}
+
+void PhraseDictionaryDynamicCacheBased::Load_Multiple_Files(std::vector<std::string> files)
+{
+ VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load_Multiple_Files(std::vector<std::string> files)" << std::endl);
+ for(size_t j = 0; j < files.size(); ++j) {
+ Load_Single_File(files[j]);
+ }
+}
+
+void PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file)
+{
+ VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file)" << std::endl);
+ //file format
+ //age |||| src_phr ||| trg_phr
+ //age |||| src_phr2 ||| trg_phr2 |||| src_phr3 ||| trg_phr3 |||| src_phr4 ||| trg_ph4
+ //....
+ //or
+ //age |||| src_phr ||| trg_phr ||| wa_align
+ //age |||| src_phr2 ||| trg_phr2 ||| wa_align2 |||| src_phr3 ||| trg_phr3 ||| wa_align3 |||| src_phr4 ||| trg_phr4 ||| wa_align4
+ //....
+ //each src_phr ad trg_phr are sequences of src and trg words, respectively, of any length
+ //if provided, wa_align is the alignment between src_phr and trg_phr
+ //
+ //there is no limit on the size of n
+ //
+ //entries can be repeated, but the last entry overwrites the previous
+
+
+ VERBOSE(2,"Loading data from the cache file " << file << std::endl);
+ InputFileStream cacheFile(file);
+
+ std::string line;
+ std::vector<std::string> words;
+
+ while (getline(cacheFile, line)) {
+ std::vector<std::string> vecStr = TokenizeMultiCharSeparator( line , "||||" );
+ if (vecStr.size() >= 2) {
+ std::string ageString = vecStr[0];
+ vecStr.erase(vecStr.begin());
+ Update(vecStr,ageString);
+ } else {
+ UTIL_THROW_IF2(false, "The format of the loaded file is wrong: " << line);
+ }
+ }
+ IFVERBOSE(2) Print();
+}
+
+
+void PhraseDictionaryDynamicCacheBased::SetParameter(const std::string& key, const std::string& value)
+{
+ VERBOSE(2, "PhraseDictionaryDynamicCacheBased::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl);
+
+ if(key == "cbtm-score-type") {
+ SetScoreType(Scan<size_t>(value));
+ } else if (key == "cbtm-max-age") {
+ SetMaxAge(Scan<unsigned int>(value));
+ } else if (key == "cbtm-file") {
+ m_initfiles = Scan<std::string>(value);
+ } else if (key == "cbtm-name") {
+ m_name = Scan<std::string>(value);
+ } else if (key == "cbtm-constant") {
+ m_constant = Scan<bool>(value);
+ } else {
+ PhraseDictionary::SetParameter(key, value);
+ }
+}
+
+void PhraseDictionaryDynamicCacheBased::InitializeForInput(InputType const& source)
+{
+ ReduceCache();
+}
+
+const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
+#endif
+ TargetPhraseCollection* tpc = NULL;
+ cacheMap::const_iterator it = m_cacheTM.find(source);
+ if(it != m_cacheTM.end()) {
+ tpc = new TargetPhraseCollection(*(it->second).first);
+
+ std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
+
+ while (it2 != tpc->end()) {
+ ((TargetPhrase*) *it2)->EvaluateInIsolation(source, GetFeaturesToApply());
+ it2++;
+ }
+ }
+ if (tpc) {
+ tpc->NthElement(m_tableLimit); // sort the phrases for the decoder
+ }
+
+ return tpc;
+}
+
+const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const
+{
+ const TargetPhraseCollection *ret = GetTargetPhraseCollection(src);
+ return ret;
+}
+
+const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
+{
+ const TargetPhraseCollection *ret = GetTargetPhraseCollection(src);
+ return ret;
+}
+
+ChartRuleLookupManager* PhraseDictionaryDynamicCacheBased::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/)
+{
+ UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
+}
+
+void PhraseDictionaryDynamicCacheBased::SetScoreType(size_t type)
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
+#endif
+
+ m_score_type = type;
+ if ( m_score_type != CBTM_SCORE_TYPE_HYPERBOLA
+ && m_score_type != CBTM_SCORE_TYPE_POWER
+ && m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL
+ && m_score_type != CBTM_SCORE_TYPE_COSINE
+ && m_score_type != CBTM_SCORE_TYPE_HYPERBOLA_REWARD
+ && m_score_type != CBTM_SCORE_TYPE_POWER_REWARD
+ && m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL_REWARD ) {
+ VERBOSE(2, "This score type " << m_score_type << " is unknown. Instead used " << CBTM_SCORE_TYPE_HYPERBOLA << "." << std::endl);
+ m_score_type = CBTM_SCORE_TYPE_HYPERBOLA;
+ }
+
+ VERBOSE(2, "PhraseDictionaryDynamicCacheBased ScoreType: " << m_score_type << std::endl);
+}
+
+
+void PhraseDictionaryDynamicCacheBased::SetMaxAge(unsigned int age)
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
+#endif
+ m_maxAge = age;
+ VERBOSE(2, "PhraseDictionaryCache MaxAge: " << m_maxAge << std::endl);
+}
+
+
+// friend
+ostream& operator<<(ostream& out, const PhraseDictionaryDynamicCacheBased& phraseDict)
+{
+ return out;
+}
+
+float PhraseDictionaryDynamicCacheBased::decaying_score(const int age)
+{
+ float sc;
+ switch(m_score_type) {
+ case CBTM_SCORE_TYPE_HYPERBOLA:
+ sc = (float) 1.0/age - 1.0;
+ break;
+ case CBTM_SCORE_TYPE_POWER:
+ sc = (float) pow(age, -0.25) - 1.0;
+ break;
+ case CBTM_SCORE_TYPE_EXPONENTIAL:
+ sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0;
+ break;
+ case CBTM_SCORE_TYPE_COSINE:
+ sc = (float) cos( (age-1) * (PI/2) / m_maxAge ) - 1.0;
+ break;
+ case CBTM_SCORE_TYPE_HYPERBOLA_REWARD:
+ sc = (float) 1.0/age;
+ break;
+ case CBTM_SCORE_TYPE_POWER_REWARD:
+ sc = (float) pow(age, -0.25);
+ break;
+ case CBTM_SCORE_TYPE_EXPONENTIAL_REWARD:
+ sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0);
+ break;
+ default:
+ sc = -1.0;
+ }
+ return sc;
+}
+
+void PhraseDictionaryDynamicCacheBased::SetPreComputedScores(const unsigned int numScoreComponent)
+{
+ VERBOSE(2, "PhraseDictionaryDynamicCacheBased SetPreComputedScores: " << m_maxAge << std::endl);
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ float sc;
+ for (size_t i=0; i<=m_maxAge; i++) {
+ if (i==m_maxAge) {
+ if ( m_score_type == CBTM_SCORE_TYPE_HYPERBOLA
+ || m_score_type == CBTM_SCORE_TYPE_POWER
+ || m_score_type == CBTM_SCORE_TYPE_EXPONENTIAL
+ || m_score_type == CBTM_SCORE_TYPE_COSINE ) {
+ sc = decaying_score(m_maxAge)/numScoreComponent;
+ } else { // m_score_type = CBTM_SCORE_TYPE_XXXXXXXXX_REWARD
+ sc = 0.0;
+ }
+ } else {
+ sc = decaying_score(i)/numScoreComponent;
+ }
+ Scores sc_vec;
+ for (size_t j=0; j<numScoreComponent; j++) {
+ sc_vec.push_back(sc); //CHECK THIS SCORE
+ }
+ precomputedScores.push_back(sc_vec);
+ }
+ m_lower_score = precomputedScores[m_maxAge].at(0);
+ VERBOSE(3, "SetPreComputedScores(const unsigned int): lower_age:|" << m_maxAge << "| lower_score:|" << m_lower_score << "|" << std::endl);
+}
+
+Scores PhraseDictionaryDynamicCacheBased::GetPreComputedScores(const unsigned int age)
+{
+ if (age < m_maxAge) {
+ return precomputedScores.at(age);
+ } else {
+ return precomputedScores.at(m_maxAge);
+ }
+}
+
+void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string &entries)
+{
+ if (entries != "") {
+ VERBOSE(3,"entries:|" << entries << "|" << std::endl);
+ std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
+ VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
+ ClearEntries(elements);
+ }
+}
+
+void PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector<std::string> entries)
+{
+ VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector<std::string> entries)" << std::endl);
+ std::vector<std::string> pp;
+
+ std::vector<std::string>::iterator it;
+ for(it = entries.begin(); it!=entries.end(); it++) {
+ pp.clear();
+ pp = TokenizeMultiCharSeparator((*it), "|||");
+ VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
+ VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
+
+ ClearEntries(pp[0], pp[1]);
+ }
+}
+
+void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)
+{
+ VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl);
+ const StaticData &staticData = StaticData::Instance();
+ const std::string& factorDelimiter = staticData.GetFactorDelimiter();
+ Phrase sourcePhrase(0);
+ Phrase targetPhrase(0);
+
+ //target
+ targetPhrase.Clear();
+ VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
+ targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, /*factorDelimiter,*/ NULL);
+ VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl);
+
+ //TODO: Would be better to reuse source phrases, but ownership has to be
+ //consistent across phrase table implementations
+ sourcePhrase.Clear();
+ VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
+ sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, /*factorDelimiter,*/ NULL);
+ VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
+ ClearEntries(sourcePhrase, targetPhrase);
+
+}
+
+void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
+{
+ VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)" << std::endl);
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ VERBOSE(3, "PhraseDictionaryCache deleting sp:|" << sp << "| tp:|" << tp << "|" << std::endl);
+
+ cacheMap::const_iterator it = m_cacheTM.find(sp);
+ VERBOSE(3,"sp:|" << sp << "|" << std::endl);
+ if(it!=m_cacheTM.end()) {
+ VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
+ // sp is found
+ // here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap
+ // and then add new entry
+
+ TargetCollectionAgePair TgtCollAgePair = it->second;
+ TargetPhraseCollection* tpc = TgtCollAgePair.first;
+ AgeCollection* ac = TgtCollAgePair.second;
+ const Phrase* p_ptr = NULL;
+ TargetPhrase* tp_ptr = NULL;
+ bool found = false;
+ size_t tp_pos=0;
+ while (!found && tp_pos < tpc->GetSize()) {
+ tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
+ p_ptr = (const Phrase*) tp_ptr;
+ if (tp == *p_ptr) {
+ found = true;
+ continue;
+ }
+ tp_pos++;
+ }
+ if (!found) {
+ VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
+ //do nothing
+ } else {
+ VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl);
+
+ tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection
+ ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection
+ m_entries--;
+ VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl);
+ VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl);
+ VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl);
+ }
+ if (tpc->GetSize() == 0) {
+ // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
+ ac->clear();
+ delete tpc;
+ delete ac;
+ m_cacheTM.erase(sp);
+ }
+
+ } else {
+ VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
+ //do nothing
+ }
+}
+
+
+
+
+void PhraseDictionaryDynamicCacheBased::ClearSource(std::string &entries)
+{
+ if (entries != "") {
+ VERBOSE(3,"entries:|" << entries << "|" << std::endl);
+ std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
+ VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
+ ClearEntries(elements);
+ }
+}
+
+void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector<std::string> entries)
+{
+ VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
+ const StaticData &staticData = StaticData::Instance();
+ const std::string& factorDelimiter = staticData.GetFactorDelimiter();
+ Phrase sourcePhrase(0);
+
+ std::vector<std::string>::iterator it;
+ for(it = entries.begin(); it!=entries.end(); it++) {
+
+ sourcePhrase.Clear();
+ VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl);
+ sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), *it, /*factorDelimiter,*/ NULL);
+ VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
+
+ ClearSource(sourcePhrase);
+ }
+
+ IFVERBOSE(2) Print();
+}
+
+void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp)
+{
+ VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp) sp:|" << sp << "|" << std::endl);
+ cacheMap::const_iterator it = m_cacheTM.find(sp);
+ if (it != m_cacheTM.end()) {
+ VERBOSE(3,"found:|" << sp << "|" << std::endl);
+ //sp is found
+
+ TargetCollectionAgePair TgtCollAgePair = it->second;
+ TargetPhraseCollection* tpc = TgtCollAgePair.first;
+ AgeCollection* ac = TgtCollAgePair.second;
+
+ m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache
+
+ // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
+ ac->clear();
+ delete tpc;
+ delete ac;
+ m_cacheTM.erase(sp);
+ } else {
+ //do nothing
+ }
+}
+
+void PhraseDictionaryDynamicCacheBased::Insert(std::string &entries)
+{
+ if (entries != "") {
+ VERBOSE(3,"entries:|" << entries << "|" << std::endl);
+ std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
+ VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
+ Insert(elements);
+ }
+}
+
+void PhraseDictionaryDynamicCacheBased::Insert(std::vector<std::string> entries)
+{
+ VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
+ if (m_constant == false) {
+ Decay();
+ }
+ Update(entries, "1");
+ IFVERBOSE(3) Print();
+}
+
+
+void PhraseDictionaryDynamicCacheBased::Update(std::vector<std::string> entries, std::string ageString)
+{
+ VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::vector<std::string> entries, std::string ageString)" << std::endl);
+ std::vector<std::string> pp;
+
+ VERBOSE(3,"ageString:|" << ageString << "|" << std::endl);
+ std::vector<std::string>::iterator it;
+ for(it = entries.begin(); it!=entries.end(); it++) {
+ pp.clear();
+ pp = TokenizeMultiCharSeparator((*it), "|||");
+ VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
+ VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
+
+ if (pp.size() > 2) {
+ VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl);
+ Update(pp[0], pp[1], ageString, pp[2]);
+ } else {
+ Update(pp[0], pp[1], ageString);
+ }
+ }
+}
+
+void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)
+{
+ VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)" << std::endl);
+ const StaticData &staticData = StaticData::Instance();
+ const std::string& factorDelimiter = staticData.GetFactorDelimiter();
+ Phrase sourcePhrase(0);
+ TargetPhrase targetPhrase(0);
+
+ VERBOSE(3, "ageString:|" << ageString << "|" << std::endl);
+ char *err_ind_temp;
+ ageString = Trim(ageString);
+ int age = strtod(ageString.c_str(), &err_ind_temp);
+ VERBOSE(3, "age:|" << age << "|" << std::endl);
+
+ //target
+ targetPhrase.Clear();
+ VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
+ targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, /*factorDelimiter,*/ NULL);
+ VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl);
+
+ //TODO: Would be better to reuse source phrases, but ownership has to be
+ //consistent across phrase table implementations
+ sourcePhrase.Clear();
+ VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
+ sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, /*factorDelimiter,*/ NULL);
+ VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
+
+ if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl);
+
+ Update(sourcePhrase, targetPhrase, age, waString);
+}
+
+void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, std::string waString)
+{
+ VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, std::string waString)" << std::endl);
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ VERBOSE(3, "PhraseDictionaryCache inserting sp:|" << sp << "| tp:|" << tp << "| age:|" << age << "| word-alignment |" << waString << "|" << std::endl);
+
+ cacheMap::const_iterator it = m_cacheTM.find(sp);
+ VERBOSE(3,"sp:|" << sp << "|" << std::endl);
+ if(it!=m_cacheTM.end()) {
+ VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
+ // sp is found
+ // here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap
+ // and then add new entry
+
+ TargetCollectionAgePair TgtCollAgePair = it->second;
+ TargetPhraseCollection* tpc = TgtCollAgePair.first;
+ AgeCollection* ac = TgtCollAgePair.second;
+// const TargetPhrase* p_ptr = NULL;
+ const Phrase* p_ptr = NULL;
+ TargetPhrase* tp_ptr = NULL;
+ bool found = false;
+ size_t tp_pos=0;
+ while (!found && tp_pos < tpc->GetSize()) {
+ tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
+ p_ptr = (const TargetPhrase*) tp_ptr;
+ if ((Phrase) tp == *p_ptr) {
+ found = true;
+ continue;
+ }
+ tp_pos++;
+ }
+ if (!found) {
+ VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
+ std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
+
+ targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
+ if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
+
+ tpc->Add(targetPhrase.release());
+
+ tp_pos = tpc->GetSize()-1;
+ ac->push_back(age);
+ m_entries++;
+ VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl);
+ } else {
+ tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
+ if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString);
+ ac->at(tp_pos) = age;
+ VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl);
+ }
+ } else {
+ VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
+ // p is not found
+ // create target collection
+ // we have to create new target collection age pair and add new entry to target collection age pair
+
+ TargetPhraseCollection* tpc = new TargetPhraseCollection();
+ AgeCollection* ac = new AgeCollection();
+ m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac)));
+
+ //tp is not found
+ std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
+ targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
+ if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
+
+ tpc->Add(targetPhrase.release());
+ ac->push_back(age);
+ m_entries++;
+ VERBOSE(3,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl);
+ }
+}
+
+void PhraseDictionaryDynamicCacheBased::Decay()
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ cacheMap::iterator it;
+ for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
+ Decay((*it).first);
+ }
+}
+
+void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
+{
+ VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) sp:|" << sp << "|" << std::endl);
+ cacheMap::const_iterator it = m_cacheTM.find(sp);
+ if (it != m_cacheTM.end()) {
+ VERBOSE(3,"found:|" << sp << "|" << std::endl);
+ //sp is found
+
+ TargetCollectionAgePair TgtCollAgePair = it->second;
+ TargetPhraseCollection* tpc = TgtCollAgePair.first;
+ AgeCollection* ac = TgtCollAgePair.second;
+
+ //loop in inverted order to allow a correct deletion of std::vectors tpc and ac
+ for (int tp_pos = tpc->GetSize() - 1 ; tp_pos >= 0; tp_pos--) {
+ unsigned int tp_age = ac->at(tp_pos); //increase the age by 1
+ tp_age++; //increase the age by 1
+ VERBOSE(3,"sp:|" << sp << "| " << " new tp_age:|" << tp_age << "|" << std::endl);
+
+ TargetPhrase* tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
+
+ if (tp_age > m_maxAge) {
+ VERBOSE(3,"tp_age:|" << tp_age << "| TOO BIG" << std::endl);
+ tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection
+ ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection
+ m_entries--;
+ } else {
+ VERBOSE(3,"tp_age:|" << tp_age << "| STILL GOOD" << std::endl);
+ tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(tp_age));
+ ac->at(tp_pos) = tp_age;
+ }
+ }
+ if (tpc->GetSize() == 0) {
+ // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
+ (((*it).second).second)->clear();
+ delete ((*it).second).second;
+ delete ((*it).second).first;
+ m_cacheTM.erase(sp);
+ }
+ } else {
+ //do nothing
+ VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
+ }
+
+ //put here the removal of entries with age greater than m_maxAge
+}
+
+void PhraseDictionaryDynamicCacheBased::Execute(std::string command)
+{
+ VERBOSE(2,"command:|" << command << "|" << std::endl);
+ std::vector<std::string> commands = Tokenize(command, "||");
+ Execute(commands);
+}
+
+void PhraseDictionaryDynamicCacheBased::Execute(std::vector<std::string> commands)
+{
+ for (size_t j=0; j<commands.size(); j++) {
+ Execute_Single_Command(commands[j]);
+ }
+ IFVERBOSE(2) Print();
+}
+
+void PhraseDictionaryDynamicCacheBased::Execute_Single_Command(std::string command)
+{
+ if (command == "clear") {
+ VERBOSE(2,"PhraseDictionaryDynamicCacheBased Execute command:|"<< command << "|. Cache cleared." << std::endl);
+ Clear();
+ } else {
+ VERBOSE(2,"PhraseDictionaryDynamicCacheBased Execute command:|"<< command << "| is unknown. Skipped." << std::endl);
+ }
+}
+
+
+void PhraseDictionaryDynamicCacheBased::Clear()
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ cacheMap::const_iterator it;
+ for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
+ (((*it).second).second)->clear();
+ delete ((*it).second).second;
+ delete ((*it).second).first;
+ }
+ m_cacheTM.clear();
+ m_entries = 0;
+}
+
+
+void PhraseDictionaryDynamicCacheBased::ExecuteDlt(std::map<std::string, std::string> dlt_meta)
+{
+ if (dlt_meta.find("cbtm") != dlt_meta.end()) {
+ Insert(dlt_meta["cbtm"]);
+ }
+ if (dlt_meta.find("cbtm-command") != dlt_meta.end()) {
+ Execute(dlt_meta["cbtm-command"]);
+ }
+ if (dlt_meta.find("cbtm-file") != dlt_meta.end()) {
+ Load(dlt_meta["cbtm-file"]);
+ }
+ if (dlt_meta.find("cbtm-clear-source") != dlt_meta.end()) {
+ ClearSource(dlt_meta["cbtm-clear-source"]);
+ }
+ if (dlt_meta.find("cbtm-clear-entries") != dlt_meta.end()) {
+ ClearEntries(dlt_meta["cbtm-clear-entries"]);
+ }
+ if (dlt_meta.find("cbtm-clear-all") != dlt_meta.end()) {
+ Clear();
+ }
+
+}
+
+void PhraseDictionaryDynamicCacheBased::Print() const
+{
+ VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Print()" << std::endl);
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
+#endif
+ cacheMap::const_iterator it;
+ for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
+ std::string source = (it->first).ToString();
+ TargetPhraseCollection* tpc = (it->second).first;
+ TargetPhraseCollection::iterator itr;
+ for(itr = tpc->begin(); itr != tpc->end(); itr++) {
+ std::string target = (*itr)->ToString();
+ std::cout << source << " ||| " << target << std::endl;
+ }
+ source.clear();
+ }
+}
+
+}// end namespace
diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h
new file mode 100644
index 000000000..41037ba34
--- /dev/null
+++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h
@@ -0,0 +1,179 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#pragma once
+
+#ifndef moses_PhraseDictionaryDynamicCacheBased_H
+#define moses_PhraseDictionaryDynamicCacheBased_H
+
+#include "moses/TypeDef.h"
+#include "moses/TranslationModel/PhraseDictionary.h"
+
+#ifdef WITH_THREADS
+#include <boost/thread/shared_mutex.hpp>
+#include <boost/thread/locks.hpp>
+#endif
+
+#define CBTM_SCORE_TYPE_UNDEFINED (-1)
+#define CBTM_SCORE_TYPE_HYPERBOLA 0
+#define CBTM_SCORE_TYPE_POWER 1
+#define CBTM_SCORE_TYPE_EXPONENTIAL 2
+#define CBTM_SCORE_TYPE_COSINE 3
+#define CBTM_SCORE_TYPE_HYPERBOLA_REWARD 10
+#define CBTM_SCORE_TYPE_POWER_REWARD 11
+#define CBTM_SCORE_TYPE_EXPONENTIAL_REWARD 12
+#define PI 3.14159265
+
+
+namespace Moses
+{
+class ChartParser;
+class ChartCellCollectionBase;
+class ChartRuleLookupManager;
+
+/** Implementation of a Cache-based phrase table.
+ */
+class PhraseDictionaryDynamicCacheBased : public PhraseDictionary
+{
+
+ typedef std::vector<unsigned int> AgeCollection;
+ typedef std::pair<TargetPhraseCollection*, AgeCollection*> TargetCollectionAgePair;
+ typedef std::map<Phrase, TargetCollectionAgePair> cacheMap;
+
+ // data structure for the cache
+ cacheMap m_cacheTM;
+ std::vector<Scores> precomputedScores;
+ unsigned int m_maxAge;
+ size_t m_score_type; //scoring type of the match
+ size_t m_entries; //total number of entries in the cache
+ float m_lower_score; //lower_bound_score for no match
+ bool m_constant; //flag for setting a non-decaying cache
+ std::string m_initfiles; // vector of files loaded in the initialization phase
+ std::string m_name; // internal name to identify this instance of the Cache-based phrase table
+
+#ifdef WITH_THREADS
+ //multiple readers - single writer lock
+ mutable boost::shared_mutex m_cacheLock;
+#endif
+
+ friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryDynamicCacheBased&);
+
+public:
+ PhraseDictionaryDynamicCacheBased(const std::string &line);
+ ~PhraseDictionaryDynamicCacheBased();
+
+ inline const std::string GetName() {
+ return m_name;
+ };
+ inline void SetName(const std::string name) {
+ m_name = name;
+ }
+
+ static const PhraseDictionaryDynamicCacheBased* Instance(const std::string& name) {
+ if (s_instance_map.find(name) == s_instance_map.end()) {
+ return NULL;
+ }
+ return s_instance_map[name];
+ }
+
+ static PhraseDictionaryDynamicCacheBased* InstanceNonConst(const std::string& name) {
+ if (s_instance_map.find(name) == s_instance_map.end()) {
+ return NULL;
+ }
+ return s_instance_map[name];
+ }
+
+
+ static const PhraseDictionaryDynamicCacheBased& Instance() {
+ return *s_instance;
+ }
+
+ static PhraseDictionaryDynamicCacheBased& InstanceNonConst() {
+ return *s_instance;
+ }
+
+ void Load();
+ void Load(const std::string files);
+
+ const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &src) const;
+ const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(Phrase const &src) const;
+ const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
+
+ // for phrase-based model
+ // void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
+
+ // for syntax/hiero model (CKY+ decoding)
+ ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t);
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+ void InitializeForInput(InputType const& source);
+
+ // virtual void InitializeForInput(InputType const&) {
+ // /* Don't do anything source specific here as this object is shared between threads.*/
+ // }
+
+ void Print() const; // prints the cache
+ void Clear(); // clears the cache
+
+ void ClearEntries(std::string &entries);
+ void ClearSource(std::string &entries);
+ void Insert(std::string &entries);
+ void Execute(std::string command);
+ void ExecuteDlt(std::map<std::string, std::string> dlt_meta);
+
+ void SetScoreType(size_t type);
+ void SetMaxAge(unsigned int age);
+
+protected:
+ static PhraseDictionaryDynamicCacheBased *s_instance;
+ static std::map< const std::string, PhraseDictionaryDynamicCacheBased * > s_instance_map;
+
+ float decaying_score(const int age); // calculates the decay score given the age
+ void Insert(std::vector<std::string> entries);
+
+ void Decay(); // traverse through the cache and decay each entry
+ void Decay(Phrase p); // traverse through the cache and decay each entry for a given Phrase
+ void Update(std::vector<std::string> entries, std::string ageString);
+ void Update(std::string sourceString, std::string targetString, std::string ageString, std::string waString="");
+ void Update(Phrase p, TargetPhrase tp, int age, std::string waString="");
+
+ void ClearEntries(std::vector<std::string> entries);
+ void ClearEntries(std::string sourceString, std::string targetString);
+ void ClearEntries(Phrase p, Phrase tp);
+
+ void ClearSource(std::vector<std::string> entries);
+ void ClearSource(Phrase sp);
+
+ void Execute(std::vector<std::string> commands);
+ void Execute_Single_Command(std::string command);
+
+
+ void SetPreComputedScores(const unsigned int numScoreComponent);
+ Scores GetPreComputedScores(const unsigned int age);
+
+ void Load_Multiple_Files(std::vector<std::string> files);
+ void Load_Single_File(const std::string file);
+
+ TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const;
+};
+
+} // namespace Moses
+
+#endif /* moses_PhraseDictionaryDynamicCacheBased_H_ */
diff --git a/moses/TranslationModel/PhraseDictionaryMemory.cpp b/moses/TranslationModel/PhraseDictionaryMemory.cpp
index 28a7f81ec..1724748bd 100644
--- a/moses/TranslationModel/PhraseDictionaryMemory.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMemory.cpp
@@ -30,7 +30,6 @@
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/WordsRange.h"
-#include "moses/UserMessage.h"
#include "moses/TranslationModel/RuleTable/LoaderFactory.h"
#include "moses/TranslationModel/RuleTable/Loader.h"
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h"
@@ -98,9 +97,9 @@ PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase
const Word &sourceNonTerm = word;
UTIL_THROW_IF2(iterAlign == alignmentInfo.end(),
- "No alignment for non-term at position " << pos);
+ "No alignment for non-term at position " << pos);
UTIL_THROW_IF2(iterAlign->first != pos,
- "Alignment info incorrect at position " << pos);
+ "Alignment info incorrect at position " << pos);
size_t targetNonTermInd = iterAlign->second;
++iterAlign;
@@ -115,7 +114,7 @@ PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase
}
UTIL_THROW_IF2(currNode == NULL,
- "Node not found at position " << pos);
+ "Node not found at position " << pos);
}
// finally, the source LHS
@@ -161,7 +160,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
// backoff
if (!SatisfyBackoff(inputPath)) {
- continue;
+ continue;
}
if (prevPtNode) {
@@ -173,7 +172,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
const TargetPhraseCollection &targetPhrases = ptNode->GetTargetPhraseCollection();
inputPath.SetTargetPhrases(*this, &targetPhrases, ptNode);
} else {
- inputPath.SetTargetPhrases(*this, NULL, NULL);
+ inputPath.SetTargetPhrases(*this, NULL, NULL);
}
}
}
diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
index f226b8ba4..f0b555e22 100644
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
@@ -33,19 +33,19 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
if (m_mode == "interpolate") {
size_t numWeights = m_numScoreComponents;
UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &&
- m_pdStr.size()*numWeights != m_multimodelweights.size(),
- "Number of scores and weights are not equal");
+ m_pdStr.size()*numWeights != m_multimodelweights.size(),
+ "Number of scores and weights are not equal");
} else if (m_mode == "all" || m_mode == "all-restrict") {
size_t componentWeights = 0;
for(size_t i = 0; i < m_numModels; ++i) {
const string &ptName = m_pdStr[i];
PhraseDictionary *pt = FindPhraseDictionary(ptName);
UTIL_THROW_IF2(pt == NULL,
- "Could not find component phrase table " << ptName);
+ "Could not find component phrase table " << ptName);
componentWeights += pt->GetNumScoreComponents();
}
UTIL_THROW_IF2(componentWeights != m_numScoreComponents,
- "Total number of component model scores is unequal to specified number of scores");
+ "Total number of component model scores is unequal to specified number of scores");
} else {
ostringstream msg;
msg << "combination mode unknown: " << m_mode;
@@ -57,10 +57,10 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::stri
:PhraseDictionary(line)
{
if (type == 1) {
- // PhraseDictionaryMultiModelCounts
+ // PhraseDictionaryMultiModelCounts
UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &&
- m_pdStr.size()*4 != m_multimodelweights.size(),
- "Number of scores and weights are not equal");
+ m_pdStr.size()*4 != m_multimodelweights.size(),
+ "Number of scores and weights are not equal");
}
}
@@ -91,7 +91,7 @@ void PhraseDictionaryMultiModel::Load()
PhraseDictionary *pt = FindPhraseDictionary(ptName);
UTIL_THROW_IF2(pt == NULL,
- "Could not find component phrase table " << ptName);
+ "Could not find component phrase table " << ptName);
m_pd.push_back(pt);
}
}
@@ -122,7 +122,7 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret);
-
+
return ret;
}
@@ -303,9 +303,7 @@ std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t n
weights_ptr = &m_multimodelweights; //fall back to weights defined in config
} else if(weights_ptr->size() != m_numModels && weights_ptr->size() != m_numModels * numWeights) {
//TODO: can we pass error message to client if weights are malformed?
- std::stringstream strme;
- strme << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << ". Reverting to weights in config";
- UserMessage::Add(strme.str());
+ std::cerr << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << ". Reverting to weights in config";
weights_ptr = &m_multimodelweights; //fall back to weights defined in config
}
diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.h b/moses/TranslationModel/PhraseDictionaryMultiModel.h
index 41df2e679..78016b676 100644
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.h
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.h
@@ -28,7 +28,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/StaticData.h"
#include "moses/TargetPhrase.h"
#include "moses/Util.h"
-#include "moses/UserMessage.h"
#ifdef WITH_DLIB
#include <dlib/optimization.h>
diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
index 9a9739de6..c632f9ff2 100644
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
@@ -65,7 +65,7 @@ PhraseDictionaryMultiModelCounts::PhraseDictionaryMultiModelCounts(const std::st
ReadParameters();
UTIL_THROW_IF2(m_targetTable.size() != m_pdStr.size(),
- "List of phrase tables and target tables must be equal");
+ "List of phrase tables and target tables must be equal");
}
@@ -85,11 +85,11 @@ void PhraseDictionaryMultiModelCounts::SetParameter(const std::string& key, cons
} else if (key == "lex-e2f") {
m_lexE2FStr = Tokenize(value, ",");
UTIL_THROW_IF2(m_lexE2FStr.size() != m_pdStr.size(),
- "Number of scores for lexical probability p(f|e) incorrectly specified");
+ "Number of scores for lexical probability p(f|e) incorrectly specified");
} else if (key == "lex-f2e") {
m_lexF2EStr = Tokenize(value, ",");
UTIL_THROW_IF2(m_lexF2EStr.size() != m_pdStr.size(),
- "Number of scores for lexical probability p(e|f) incorrectly specified");
+ "Number of scores for lexical probability p(e|f) incorrectly specified");
} else if (key == "target-table") {
m_targetTable = Tokenize(value, ",");
} else {
@@ -115,14 +115,14 @@ void PhraseDictionaryMultiModelCounts::Load()
PhraseDictionary *pt;
pt = FindPhraseDictionary(ptName);
UTIL_THROW_IF2(pt == NULL,
- "Could not find component phrase table " << ptName);
+ "Could not find component phrase table " << ptName);
m_pd.push_back(pt);
// reverse
const string &target_table = m_targetTable[i];
pt = FindPhraseDictionary(target_table);
UTIL_THROW_IF2(pt == NULL,
- "Could not find component phrase table " << target_table);
+ "Could not find component phrase table " << target_table);
m_inverse_pd.push_back(pt);
// lex
diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h
index c948b66b2..725c66eb9 100644
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h
@@ -27,7 +27,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/StaticData.h"
#include "moses/TargetPhrase.h"
#include "moses/Util.h"
-#include "moses/UserMessage.h"
#include <exception>
namespace Moses
diff --git a/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp b/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp
index 92b98b50d..84639a737 100644
--- a/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp
+++ b/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp
@@ -65,7 +65,7 @@ PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const W
PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
{
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
- "Not a non-terminal: " << targetNonTerm);
+ "Not a non-terminal: " << targetNonTerm);
return &m_nonTermMap[targetNonTerm];
}
@@ -73,9 +73,9 @@ PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateNonTerminalCh
PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceNonTerm, const Word &targetNonTerm)
{
UTIL_THROW_IF2(!sourceNonTerm.IsNonTerminal(),
- "Not a non-terminal: " << sourceNonTerm);
+ "Not a non-terminal: " << sourceNonTerm);
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
- "Not a non-terminal: " << targetNonTerm);
+ "Not a non-terminal: " << targetNonTerm);
NonTerminalMapKey key(sourceNonTerm, targetNonTerm);
return &m_nonTermMap[NonTerminalMapKey(sourceNonTerm, targetNonTerm)];
@@ -85,7 +85,7 @@ PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const W
const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetChild(const Word &sourceTerm) const
{
UTIL_THROW_IF2(sourceTerm.IsNonTerminal(),
- "Not a terminal: " << sourceTerm);
+ "Not a terminal: " << sourceTerm);
TerminalMap::const_iterator p = m_sourceTermMap.find(sourceTerm);
return (p == m_sourceTermMap.end()) ? NULL : &p->second;
@@ -95,7 +95,7 @@ const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetChild(const Wor
const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetNonTerminalChild(const Word &targetNonTerm) const
{
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
- "Not a non-terminal: " << targetNonTerm);
+ "Not a non-terminal: " << targetNonTerm);
NonTerminalMap::const_iterator p = m_nonTermMap.find(targetNonTerm);
return (p == m_nonTermMap.end()) ? NULL : &p->second;
@@ -104,9 +104,9 @@ const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetNonTerminalChil
const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const
{
UTIL_THROW_IF2(!sourceNonTerm.IsNonTerminal(),
- "Not a non-terminal: " << sourceNonTerm);
+ "Not a non-terminal: " << sourceNonTerm);
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
- "Not a non-terminal: " << targetNonTerm);
+ "Not a non-terminal: " << targetNonTerm);
NonTerminalMapKey key(sourceNonTerm, targetNonTerm);
NonTerminalMap::const_iterator p = m_nonTermMap.find(key);
diff --git a/moses/TranslationModel/PhraseDictionaryScope3.cpp b/moses/TranslationModel/PhraseDictionaryScope3.cpp
index 9c56f9e1d..4bf636c3e 100644
--- a/moses/TranslationModel/PhraseDictionaryScope3.cpp
+++ b/moses/TranslationModel/PhraseDictionaryScope3.cpp
@@ -30,7 +30,6 @@
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/WordsRange.h"
-#include "moses/UserMessage.h"
#include "moses/TranslationModel/RuleTable/LoaderFactory.h"
#include "moses/TranslationModel/RuleTable/Loader.h"
#include "moses/TranslationModel/Scope3Parser/Parser.h"
diff --git a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
index c2ffd95da..a336da759 100644
--- a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
@@ -14,20 +14,20 @@ PhraseDictionaryTransliteration::PhraseDictionaryTransliteration(const std::stri
{
ReadParameters();
UTIL_THROW_IF2(m_mosesDir.empty() ||
- m_scriptDir.empty() ||
- m_externalDir.empty() ||
- m_inputLang.empty() ||
- m_outputLang.empty(), "Must specify all arguments");
+ m_scriptDir.empty() ||
+ m_externalDir.empty() ||
+ m_inputLang.empty() ||
+ m_outputLang.empty(), "Must specify all arguments");
}
void PhraseDictionaryTransliteration::Load()
{
- SetFeaturesToApply();
+ SetFeaturesToApply();
}
void PhraseDictionaryTransliteration::CleanUpAfterSentenceProcessing(const InputType& source)
{
- ReduceCache();
+ ReduceCache();
}
void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
@@ -38,14 +38,14 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const Input
InputPath &inputPath = **iter;
if (!SatisfyBackoff(inputPath)) {
- continue;
+ continue;
}
const Phrase &sourcePhrase = inputPath.GetPhrase();
if (sourcePhrase.GetSize() != 1) {
- // only translit single words. A limitation of the translit script
- continue;
+ // only translit single words. A limitation of the translit script
+ continue;
}
GetTargetPhraseCollection(inputPath);
@@ -54,90 +54,89 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const Input
void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &inputPath) const
{
- const Phrase &sourcePhrase = inputPath.GetPhrase();
- size_t hash = hash_value(sourcePhrase);
+ const Phrase &sourcePhrase = inputPath.GetPhrase();
+ size_t hash = hash_value(sourcePhrase);
- CacheColl &cache = GetCache();
+ CacheColl &cache = GetCache();
- CacheColl::iterator iter;
- iter = cache.find(hash);
+ CacheColl::iterator iter;
+ iter = cache.find(hash);
- if (iter != cache.end()) {
- // already in cache
- const TargetPhraseCollection *tpColl = iter->second.first;
- inputPath.SetTargetPhrases(*this, tpColl, NULL);
- }
- else {
- // TRANSLITERATE
- char *ptr = tmpnam(NULL);
- string inFile(ptr);
- ptr = tmpnam(NULL);
- string outDir(ptr);
-
- ofstream inStream(inFile.c_str());
- inStream << sourcePhrase.ToString() << endl;
- inStream.close();
-
- string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
- " --transliteration-model-dir " + m_filePath +
- " --moses-src-dir " + m_mosesDir +
- " --external-bin-dir " + m_externalDir +
- " --input-extension " + m_inputLang +
- " --output-extension " + m_outputLang +
- " --oov-file " + inFile +
- " --out-dir " + outDir;
-
- int ret = system(cmd.c_str());
- UTIL_THROW_IF2(ret != 0, "Transliteration script error");
-
- TargetPhraseCollection *tpColl = new TargetPhraseCollection();
- vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir);
- vector<TargetPhrase*>::const_iterator iter;
- for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
- TargetPhrase *tp = *iter;
- tpColl->Add(tp);
- }
-
- std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
- cache[hash] = value;
-
- inputPath.SetTargetPhrases(*this, tpColl, NULL);
-
- // clean up temporary files
- remove(inFile.c_str());
-
- cmd = "rm -rf " + outDir;
- system(cmd.c_str());
+ if (iter != cache.end()) {
+ // already in cache
+ const TargetPhraseCollection *tpColl = iter->second.first;
+ inputPath.SetTargetPhrases(*this, tpColl, NULL);
+ } else {
+ // TRANSLITERATE
+ char *ptr = tmpnam(NULL);
+ string inFile(ptr);
+ ptr = tmpnam(NULL);
+ string outDir(ptr);
+
+ ofstream inStream(inFile.c_str());
+ inStream << sourcePhrase.ToString() << endl;
+ inStream.close();
+
+ string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
+ " --transliteration-model-dir " + m_filePath +
+ " --moses-src-dir " + m_mosesDir +
+ " --external-bin-dir " + m_externalDir +
+ " --input-extension " + m_inputLang +
+ " --output-extension " + m_outputLang +
+ " --oov-file " + inFile +
+ " --out-dir " + outDir;
+
+ int ret = system(cmd.c_str());
+ UTIL_THROW_IF2(ret != 0, "Transliteration script error");
+
+ TargetPhraseCollection *tpColl = new TargetPhraseCollection();
+ vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir);
+ vector<TargetPhrase*>::const_iterator iter;
+ for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
+ TargetPhrase *tp = *iter;
+ tpColl->Add(tp);
}
+
+ std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
+ cache[hash] = value;
+
+ inputPath.SetTargetPhrases(*this, tpColl, NULL);
+
+ // clean up temporary files
+ remove(inFile.c_str());
+
+ cmd = "rm -rf " + outDir;
+ system(cmd.c_str());
+ }
}
std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(const Phrase &sourcePhrase, const string &outDir) const
{
- std::vector<TargetPhrase*> ret;
+ std::vector<TargetPhrase*> ret;
- string outPath = outDir + "/out.txt";
- ifstream outStream(outPath.c_str());
+ string outPath = outDir + "/out.txt";
+ ifstream outStream(outPath.c_str());
- string line;
- while (getline(outStream, line)) {
- vector<string> toks;
- Tokenize(toks, line, "\t");
- UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
+ string line;
+ while (getline(outStream, line)) {
+ vector<string> toks;
+ Tokenize(toks, line, "\t");
+ UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
- TargetPhrase *tp = new TargetPhrase(this);
- Word &word = tp->AddWord();
- word.CreateFromString(Output, m_output, toks[0], false);
+ TargetPhrase *tp = new TargetPhrase(this);
+ Word &word = tp->AddWord();
+ word.CreateFromString(Output, m_output, toks[0], false);
- float score = Scan<float>(toks[1]);
- tp->GetScoreBreakdown().PlusEquals(this, score);
+ float score = Scan<float>(toks[1]);
+ tp->GetScoreBreakdown().PlusEquals(this, score);
- // score of all other ff when this rule is being loaded
- tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
+ // score of all other ff when this rule is being loaded
+ tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
- ret.push_back(tp);
- }
+ ret.push_back(tp);
+ }
- outStream.close();
+ outStream.close();
return ret;
}
@@ -146,7 +145,7 @@ ChartRuleLookupManager* PhraseDictionaryTransliteration::CreateRuleLookupManager
const ChartCellCollectionBase &cellCollection,
std::size_t /*maxChartSpan*/)
{
- return NULL;
+ return NULL;
//return new ChartRuleLookupManagerSkeleton(parser, cellCollection, *this);
}
@@ -155,17 +154,17 @@ PhraseDictionaryTransliteration::
SetParameter(const std::string& key, const std::string& value)
{
if (key == "moses-dir") {
- m_mosesDir = value;
+ m_mosesDir = value;
} else if (key == "script-dir") {
- m_scriptDir = value;
+ m_scriptDir = value;
} else if (key == "external-dir") {
- m_externalDir = value;
+ m_externalDir = value;
} else if (key == "input-lang") {
- m_inputLang = value;
+ m_inputLang = value;
} else if (key == "output-lang") {
- m_outputLang = value;
+ m_outputLang = value;
} else {
- PhraseDictionary::SetParameter(key, value);
+ PhraseDictionary::SetParameter(key, value);
}
}
diff --git a/moses/TranslationModel/PhraseDictionaryTree.cpp b/moses/TranslationModel/PhraseDictionaryTree.cpp
index c8b7cb5d2..0e031b362 100644
--- a/moses/TranslationModel/PhraseDictionaryTree.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTree.cpp
@@ -234,7 +234,7 @@ public:
typedef PhraseDictionaryTree::PrefixPtr PPtr;
void GetTargetCandidates(PPtr p,TgtCands& tgtCands) {
- UTIL_THROW_IF2(p == 0L, "Error");
+ UTIL_THROW_IF2(p == 0L, "Error");
// UTIL_THROW_IF2(p == NULL, "Error");
if(p.imp->isRoot()) return;
@@ -280,8 +280,8 @@ public:
}
PPtr Extend(PPtr p,const std::string& w) {
- UTIL_THROW_IF2(p == 0L, "Error");
- // UTIL_THROW_IF2(p == NULL, "Error");
+ UTIL_THROW_IF2(p == 0L, "Error");
+ // UTIL_THROW_IF2(p == NULL, "Error");
if(w.empty() || w==EPSILON) return p;
@@ -320,9 +320,7 @@ int PDTimp::Read(const std::string& fn)
if (NeedAlignmentInfo() && !HasAlignmentInfo()) {
// ERROR
- std::stringstream strme;
- strme << "You are asking for word alignment but the binary phrase table does not contain any alignment info. Please check if you had generated the correct phrase table with word alignment (.wa)\n";
- UserMessage::Add(strme.str());
+ std::cerr << "You are asking for word alignment but the binary phrase table does not contain any alignment info. Please check if you had generated the correct phrase table with word alignment (.wa)\n";
return false;
}
@@ -353,7 +351,7 @@ int PDTimp::Read(const std::string& fn)
tv.Read(iftv);
VERBOSE(1,"binary phrasefile loaded, default OFF_T: "
- <<PTF::getDefault() <<"\n");
+ <<PTF::getDefault() <<"\n");
return 1;
}
@@ -383,8 +381,8 @@ PhraseDictionaryTree::PhraseDictionaryTree()
: imp(new PDTimp)
{
if(sizeof(OFF_T)!=8) {
- UTIL_THROW2("ERROR: size of type 'OFF_T' has to be 64 bit!\n"
- "In gcc, use compiler settings '-D_FILE_OFFSET_BITS=64 -D_LARGE_FILES'\n");
+ UTIL_THROW2("ERROR: size of type 'OFF_T' has to be 64 bit!\n"
+ "In gcc, use compiler settings '-D_FILE_OFFSET_BITS=64 -D_LARGE_FILES'\n");
}
}
@@ -503,7 +501,7 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
// init numElement
numElement = tokens.size();
UTIL_THROW_IF2(numElement < (PrintWordAlignment()?4:3),
- "Format error");
+ "Format error");
}
if (tokens.size() != numElement) {
@@ -552,8 +550,8 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
PSA::Data& d=psa->insert(f);
if(d==InvalidOffT) d=fTell(ot);
else {
- UTIL_THROW2("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '"
- <<line);
+ UTIL_THROW2("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '"
+ <<line);
}
}
@@ -562,8 +560,8 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
if (!sparseFeatureString.empty()) {
std::vector<std::string> sparseTokens = Tokenize(sparseFeatureString);
if (sparseTokens.size() % 2 != 0) {
- UTIL_THROW2("ERROR: incorrectly formatted sparse feature string: " <<
- sparseFeatureString);
+ UTIL_THROW2("ERROR: incorrectly formatted sparse feature string: " <<
+ sparseFeatureString);
}
for (size_t i = 0; i < sparseTokens.size(); i+=2) {
fnames.push_back(imp->tv.add(sparseTokens[i]));
@@ -604,13 +602,13 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
PSA::Data& d=psa->insert(f);
if(d==InvalidOffT) d=fTell(ot);
else {
- UTIL_THROW2("ERROR: xsource phrase already inserted (B)!\nline(" << lnc << "): '"
- <<line);
+ UTIL_THROW2("ERROR: xsource phrase already inserted (B)!\nline(" << lnc << "): '"
+ <<line);
}
}
tgtCands.push_back(TgtCand(e,sc, alignmentString));
UTIL_THROW_IF2(currFirstWord == InvalidLabelId,
- "Uninitialize word");
+ "Uninitialize word");
tgtCands.back().SetFeatures(fnames, fvalues);
}
if (PrintWordAlignment())
diff --git a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp
index d1190bd86..a1105d3a7 100644
--- a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp
@@ -13,7 +13,6 @@
#include "moses/StaticData.h"
#include "moses/UniqueObject.h"
#include "moses/PDTAimp.h"
-#include "moses/UserMessage.h"
#include "util/exception.hh"
using namespace std;
@@ -53,7 +52,7 @@ void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source)
if(m_numScoreComponents!=weight.size()) {
std::stringstream strme;
UTIL_THROW2("ERROR: mismatch of number of scaling factors: " << weight.size()
- << " " << m_numScoreComponents);
+ << " " << m_numScoreComponents);
}
obj->Create(m_input, m_output, m_filePath, weight);
diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.cpp b/moses/TranslationModel/ProbingPT/ProbingPT.cpp
index b854c8c02..6ccf7dcd8 100644
--- a/moses/TranslationModel/ProbingPT/ProbingPT.cpp
+++ b/moses/TranslationModel/ProbingPT/ProbingPT.cpp
@@ -10,8 +10,8 @@ using namespace std;
namespace Moses
{
ProbingPT::ProbingPT(const std::string &line)
-: PhraseDictionary(line)
-,m_engine(NULL)
+ : PhraseDictionary(line)
+ ,m_engine(NULL)
{
ReadParameters();
@@ -26,39 +26,39 @@ ProbingPT::~ProbingPT()
void ProbingPT::Load()
{
- SetFeaturesToApply();
+ SetFeaturesToApply();
- m_engine = new QueryEngine(m_filePath.c_str());
+ m_engine = new QueryEngine(m_filePath.c_str());
- m_unkId = 456456546456;
+ m_unkId = 456456546456;
- // source vocab
- const std::map<uint64_t, std::string> &sourceVocab = m_engine->getSourceVocab();
- std::map<uint64_t, std::string>::const_iterator iterSource;
- for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end(); ++iterSource) {
- const string &wordStr = iterSource->second;
- const Factor *factor = FactorCollection::Instance().AddFactor(wordStr);
+ // source vocab
+ const std::map<uint64_t, std::string> &sourceVocab = m_engine->getSourceVocab();
+ std::map<uint64_t, std::string>::const_iterator iterSource;
+ for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end(); ++iterSource) {
+ const string &wordStr = iterSource->second;
+ const Factor *factor = FactorCollection::Instance().AddFactor(wordStr);
- uint64_t probingId = iterSource->first;
+ uint64_t probingId = iterSource->first;
- SourceVocabMap::value_type entry(factor, probingId);
- m_sourceVocabMap.insert(entry);
+ SourceVocabMap::value_type entry(factor, probingId);
+ m_sourceVocabMap.insert(entry);
- }
+ }
- // target vocab
- const std::map<unsigned int, std::string> &probingVocab = m_engine->getVocab();
- std::map<unsigned int, std::string>::const_iterator iter;
- for (iter = probingVocab.begin(); iter != probingVocab.end(); ++iter) {
- const string &wordStr = iter->second;
- const Factor *factor = FactorCollection::Instance().AddFactor(wordStr);
+ // target vocab
+ const std::map<unsigned int, std::string> &probingVocab = m_engine->getVocab();
+ std::map<unsigned int, std::string>::const_iterator iter;
+ for (iter = probingVocab.begin(); iter != probingVocab.end(); ++iter) {
+ const string &wordStr = iter->second;
+ const Factor *factor = FactorCollection::Instance().AddFactor(wordStr);
- unsigned int probingId = iter->first;
+ unsigned int probingId = iter->first;
- TargetVocabMap::value_type entry(factor, probingId);
- m_vocabMap.insert(entry);
+ TargetVocabMap::value_type entry(factor, probingId);
+ m_vocabMap.insert(entry);
- }
+ }
}
void ProbingPT::InitializeForInput(InputType const& source)
@@ -76,15 +76,15 @@ void ProbingPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQue
const Phrase &sourcePhrase = inputPath.GetPhrase();
if (sourcePhrase.GetSize() > StaticData::Instance().GetMaxPhraseLength()) {
- continue;
+ continue;
}
TargetPhraseCollection *tpColl = CreateTargetPhrase(sourcePhrase);
// add target phrase to phrase-table cache
size_t hash = hash_value(sourcePhrase);
- std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
- cache[hash] = value;
+ std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
+ cache[hash] = value;
inputPath.SetTargetPhrases(*this, tpColl, NULL);
}
@@ -95,15 +95,14 @@ std::vector<uint64_t> ProbingPT::ConvertToProbingSourcePhrase(const Phrase &sour
size_t size = sourcePhrase.GetSize();
std::vector<uint64_t> ret(size);
for (size_t i = 0; i < size; ++i) {
- const Factor *factor = sourcePhrase.GetFactor(i, m_input[0]);
- uint64_t probingId = GetSourceProbingId(factor);
- if (probingId == m_unkId) {
- ok = false;
- return ret;
- }
- else {
- ret[i] = probingId;
- }
+ const Factor *factor = sourcePhrase.GetFactor(i, m_input[0]);
+ uint64_t probingId = GetSourceProbingId(factor);
+ if (probingId == m_unkId) {
+ ok = false;
+ return ret;
+ } else {
+ ret[i] = probingId;
+ }
}
ok = true;
@@ -118,9 +117,9 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase
bool ok;
vector<uint64_t> probingSource = ConvertToProbingSourcePhrase(sourcePhrase, ok);
if (!ok) {
- // source phrase contains a word unknown in the pt.
- // We know immediately there's no translation for it
- return NULL;
+ // source phrase contains a word unknown in the pt.
+ // We know immediately there's no translation for it
+ return NULL;
}
std::pair<bool, std::vector<target_text> > query_result;
@@ -131,18 +130,18 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase
query_result = m_engine->query(probingSource);
if (query_result.first) {
- //m_engine->printTargetInfo(query_result.second);
- tpColl = new TargetPhraseCollection();
+ //m_engine->printTargetInfo(query_result.second);
+ tpColl = new TargetPhraseCollection();
- const std::vector<target_text> &probingTargetPhrases = query_result.second;
- for (size_t i = 0; i < probingTargetPhrases.size(); ++i) {
- const target_text &probingTargetPhrase = probingTargetPhrases[i];
- TargetPhrase *tp = CreateTargetPhrase(sourcePhrase, probingTargetPhrase);
+ const std::vector<target_text> &probingTargetPhrases = query_result.second;
+ for (size_t i = 0; i < probingTargetPhrases.size(); ++i) {
+ const target_text &probingTargetPhrase = probingTargetPhrases[i];
+ TargetPhrase *tp = CreateTargetPhrase(sourcePhrase, probingTargetPhrase);
- tpColl->Add(tp);
- }
+ tpColl->Add(tp);
+ }
- tpColl->Prune(true, m_tableLimit);
+ tpColl->Prune(true, m_tableLimit);
}
return tpColl;
@@ -157,16 +156,16 @@ TargetPhrase *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase, const ta
// words
for (size_t i = 0; i < size; ++i) {
- uint64_t probingId = probingPhrase[i];
- const Factor *factor = GetTargetFactor(probingId);
- assert(factor);
+ uint64_t probingId = probingPhrase[i];
+ const Factor *factor = GetTargetFactor(probingId);
+ assert(factor);
- Word &word = tp->AddWord();
- word.SetFactor(m_output[0], factor);
+ Word &word = tp->AddWord();
+ word.SetFactor(m_output[0], factor);
}
// score for this phrase table
- vector<float> scores = probingTargetPhrase.prob;
+ vector<float> scores = probingTargetPhrase.prob;
std::transform(scores.begin(), scores.end(), scores.begin(),TransformScore);
tp->GetScoreBreakdown().PlusEquals(this, scores);
@@ -176,7 +175,7 @@ TargetPhrase *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase, const ta
AlignmentInfo &aligns = tp->GetAlignTerm();
for (size_t i = 0; i < alignS.size(); i += 2 ) {
- aligns.Add((size_t) alignments[i], (size_t) alignments[i+1]);
+ aligns.Add((size_t) alignments[i], (size_t) alignments[i+1]);
}
*/
@@ -187,28 +186,26 @@ TargetPhrase *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase, const ta
const Factor *ProbingPT::GetTargetFactor(uint64_t probingId) const
{
- TargetVocabMap::right_map::const_iterator iter;
- iter = m_vocabMap.right.find(probingId);
- if (iter != m_vocabMap.right.end()) {
- return iter->second;
- }
- else {
- // not in mapping. Must be UNK
- return NULL;
- }
+ TargetVocabMap::right_map::const_iterator iter;
+ iter = m_vocabMap.right.find(probingId);
+ if (iter != m_vocabMap.right.end()) {
+ return iter->second;
+ } else {
+ // not in mapping. Must be UNK
+ return NULL;
+ }
}
uint64_t ProbingPT::GetSourceProbingId(const Factor *factor) const
{
- SourceVocabMap::left_map::const_iterator iter;
- iter = m_sourceVocabMap.left.find(factor);
- if (iter != m_sourceVocabMap.left.end()) {
- return iter->second;
- }
- else {
- // not in mapping. Must be UNK
- return m_unkId;
- }
+ SourceVocabMap::left_map::const_iterator iter;
+ iter = m_sourceVocabMap.left.find(factor);
+ if (iter != m_sourceVocabMap.left.end()) {
+ return iter->second;
+ } else {
+ // not in mapping. Must be UNK
+ return m_unkId;
+ }
}
ChartRuleLookupManager *ProbingPT::CreateRuleLookupManager(
diff --git a/moses/TranslationModel/ProbingPT/hash.cpp b/moses/TranslationModel/ProbingPT/hash.cpp
index 1049292b1..8945649ef 100644
--- a/moses/TranslationModel/ProbingPT/hash.cpp
+++ b/moses/TranslationModel/ProbingPT/hash.cpp
@@ -1,27 +1,30 @@
-#include "hash.hh"
+#include "hash.hh"
-uint64_t getHash(StringPiece text) {
- std::size_t len = text.size();
- uint64_t key = util::MurmurHashNative(text.data(), len);
- return key;
+uint64_t getHash(StringPiece text)
+{
+ std::size_t len = text.size();
+ uint64_t key = util::MurmurHashNative(text.data(), len);
+ return key;
}
-std::vector<uint64_t> getVocabIDs(StringPiece textin){
- //Tokenize
- std::vector<uint64_t> output;
+std::vector<uint64_t> getVocabIDs(StringPiece textin)
+{
+ //Tokenize
+ std::vector<uint64_t> output;
- util::TokenIter<util::SingleCharacter> it(textin, util::SingleCharacter(' '));
+ util::TokenIter<util::SingleCharacter> it(textin, util::SingleCharacter(' '));
- while(it){
- output.push_back(getHash(*it));
- it++;
- }
+ while(it) {
+ output.push_back(getHash(*it));
+ it++;
+ }
- return output;
+ return output;
}
-uint64_t getVocabID(std::string candidate) {
- std::size_t len = candidate.length();
- uint64_t key = util::MurmurHashNative(candidate.c_str(), len);
- return key;
+uint64_t getVocabID(std::string candidate)
+{
+ std::size_t len = candidate.length();
+ uint64_t key = util::MurmurHashNative(candidate.c_str(), len);
+ return key;
} \ No newline at end of file
diff --git a/moses/TranslationModel/ProbingPT/huffmanish.cpp b/moses/TranslationModel/ProbingPT/huffmanish.cpp
index eea0a7c53..7bb6f1af3 100644
--- a/moses/TranslationModel/ProbingPT/huffmanish.cpp
+++ b/moses/TranslationModel/ProbingPT/huffmanish.cpp
@@ -1,414 +1,451 @@
#include "huffmanish.hh"
-Huffman::Huffman (const char * filepath) {
- //Read the file
- util::FilePiece filein(filepath);
+Huffman::Huffman (const char * filepath)
+{
+ //Read the file
+ util::FilePiece filein(filepath);
- //Init uniq_lines to zero;
- uniq_lines = 0;
+ //Init uniq_lines to zero;
+ uniq_lines = 0;
- line_text prev_line; //Check for unique lines.
- int num_lines = 0 ;
+ line_text prev_line; //Check for unique lines.
+ int num_lines = 0 ;
- while (true){
- line_text new_line;
+ while (true) {
+ line_text new_line;
- num_lines++;
+ num_lines++;
- try {
- //Process line read
- new_line = splitLine(filein.ReadLine());
- count_elements(new_line); //Counts the number of elements, adds new and increments counters.
+ try {
+ //Process line read
+ new_line = splitLine(filein.ReadLine());
+ count_elements(new_line); //Counts the number of elements, adds new and increments counters.
- } catch (util::EndOfFileException e){
- std::cerr << "Unique entries counted: ";
- break;
- }
-
- if (new_line.source_phrase == prev_line.source_phrase){
- continue;
- } else {
- uniq_lines++;
- prev_line = new_line;
- }
+ } catch (util::EndOfFileException e) {
+ std::cerr << "Unique entries counted: ";
+ break;
}
- std::cerr << uniq_lines << std::endl;
-}
-
-void Huffman::count_elements(line_text linein){
- //For target phrase:
- util::TokenIter<util::SingleCharacter> it(linein.target_phrase, util::SingleCharacter(' '));
- while (it) {
- //Check if we have that entry
- std::map<std::string, unsigned int>::iterator mapiter;
- mapiter = target_phrase_words.find(it->as_string());
-
- if (mapiter != target_phrase_words.end()){
- //If the element is found, increment the count.
- mapiter->second++;
- } else {
- //Else create a new entry;
- target_phrase_words.insert(std::pair<std::string, unsigned int>(it->as_string(), 1));
- }
- it++;
+ if (new_line.source_phrase == prev_line.source_phrase) {
+ continue;
+ } else {
+ uniq_lines++;
+ prev_line = new_line;
}
+ }
- //For word allignment 1
- std::map<std::vector<unsigned char>, unsigned int>::iterator mapiter3;
- std::vector<unsigned char> numbers = splitWordAll1(linein.word_all1);
- mapiter3 = word_all1.find(numbers);
+ std::cerr << uniq_lines << std::endl;
+}
- if (mapiter3 != word_all1.end()){
- //If the element is found, increment the count.
- mapiter3->second++;
+void Huffman::count_elements(line_text linein)
+{
+ //For target phrase:
+ util::TokenIter<util::SingleCharacter> it(linein.target_phrase, util::SingleCharacter(' '));
+ while (it) {
+ //Check if we have that entry
+ std::map<std::string, unsigned int>::iterator mapiter;
+ mapiter = target_phrase_words.find(it->as_string());
+
+ if (mapiter != target_phrase_words.end()) {
+ //If the element is found, increment the count.
+ mapiter->second++;
} else {
- //Else create a new entry;
- word_all1.insert(std::pair<std::vector<unsigned char>, unsigned int>(numbers, 1));
+ //Else create a new entry;
+ target_phrase_words.insert(std::pair<std::string, unsigned int>(it->as_string(), 1));
}
+ it++;
+ }
+
+ //For word allignment 1
+ std::map<std::vector<unsigned char>, unsigned int>::iterator mapiter3;
+ std::vector<unsigned char> numbers = splitWordAll1(linein.word_all1);
+ mapiter3 = word_all1.find(numbers);
+
+ if (mapiter3 != word_all1.end()) {
+ //If the element is found, increment the count.
+ mapiter3->second++;
+ } else {
+ //Else create a new entry;
+ word_all1.insert(std::pair<std::vector<unsigned char>, unsigned int>(numbers, 1));
+ }
}
//Assigns huffman values for each unique element
-void Huffman::assign_values() {
- //First create vectors for all maps so that we could sort them later.
-
- //Create a vector for target phrases
- for(std::map<std::string, unsigned int>::iterator it = target_phrase_words.begin(); it != target_phrase_words.end(); it++ ) {
- target_phrase_words_counts.push_back(*it);
- }
- //Sort it
- std::sort(target_phrase_words_counts.begin(), target_phrase_words_counts.end(), sort_pair());
-
- //Create a vector for word allignments 1
- for(std::map<std::vector<unsigned char>, unsigned int>::iterator it = word_all1.begin(); it != word_all1.end(); it++ ) {
- word_all1_counts.push_back(*it);
- }
- //Sort it
- std::sort(word_all1_counts.begin(), word_all1_counts.end(), sort_pair_vec());
-
-
- //Afterwards we assign a value for each phrase, starting from 1, as zero is reserved for delimiter
- unsigned int i = 1; //huffman code
- for(std::vector<std::pair<std::string, unsigned int> >::iterator it = target_phrase_words_counts.begin();
- it != target_phrase_words_counts.end(); it++){
- target_phrase_huffman.insert(std::pair<std::string, unsigned int>(it->first, i));
- i++; //Go to the next huffman code
- }
-
- i = 1; //Reset i for the next map
- for(std::vector<std::pair<std::vector<unsigned char>, unsigned int> >::iterator it = word_all1_counts.begin();
- it != word_all1_counts.end(); it++){
- word_all1_huffman.insert(std::pair<std::vector<unsigned char>, unsigned int>(it->first, i));
- i++; //Go to the next huffman code
- }
-
- //After lookups are produced, clear some memory usage of objects not needed anymore.
- target_phrase_words.clear();
- word_all1.clear();
-
- target_phrase_words_counts.clear();
- word_all1_counts.clear();
-
- std::cerr << "Finished generating huffman codes." << std::endl;
+void Huffman::assign_values()
+{
+ //First create vectors for all maps so that we could sort them later.
+
+ //Create a vector for target phrases
+ for(std::map<std::string, unsigned int>::iterator it = target_phrase_words.begin(); it != target_phrase_words.end(); it++ ) {
+ target_phrase_words_counts.push_back(*it);
+ }
+ //Sort it
+ std::sort(target_phrase_words_counts.begin(), target_phrase_words_counts.end(), sort_pair());
+
+ //Create a vector for word allignments 1
+ for(std::map<std::vector<unsigned char>, unsigned int>::iterator it = word_all1.begin(); it != word_all1.end(); it++ ) {
+ word_all1_counts.push_back(*it);
+ }
+ //Sort it
+ std::sort(word_all1_counts.begin(), word_all1_counts.end(), sort_pair_vec());
+
+
+ //Afterwards we assign a value for each phrase, starting from 1, as zero is reserved for delimiter
+ unsigned int i = 1; //huffman code
+ for(std::vector<std::pair<std::string, unsigned int> >::iterator it = target_phrase_words_counts.begin();
+ it != target_phrase_words_counts.end(); it++) {
+ target_phrase_huffman.insert(std::pair<std::string, unsigned int>(it->first, i));
+ i++; //Go to the next huffman code
+ }
+
+ i = 1; //Reset i for the next map
+ for(std::vector<std::pair<std::vector<unsigned char>, unsigned int> >::iterator it = word_all1_counts.begin();
+ it != word_all1_counts.end(); it++) {
+ word_all1_huffman.insert(std::pair<std::vector<unsigned char>, unsigned int>(it->first, i));
+ i++; //Go to the next huffman code
+ }
+
+ //After lookups are produced, clear some memory usage of objects not needed anymore.
+ target_phrase_words.clear();
+ word_all1.clear();
+
+ target_phrase_words_counts.clear();
+ word_all1_counts.clear();
+
+ std::cerr << "Finished generating huffman codes." << std::endl;
}
-void Huffman::serialize_maps(const char * dirname){
- //Note that directory name should exist.
- std::string basedir(dirname);
- std::string target_phrase_path(basedir + "/target_phrases");
- std::string probabilities_path(basedir + "/probs");
- std::string word_all1_path(basedir + "/Wall1");
-
- //Target phrase
- std::ofstream os (target_phrase_path.c_str(), std::ios::binary);
- boost::archive::text_oarchive oarch(os);
- oarch << lookup_target_phrase;
- os.close();
-
- //Word all1
- std::ofstream os2 (word_all1_path.c_str(), std::ios::binary);
- boost::archive::text_oarchive oarch2(os2);
- oarch2 << lookup_word_all1;
- os2.close();
+void Huffman::serialize_maps(const char * dirname)
+{
+ //Note that directory name should exist.
+ std::string basedir(dirname);
+ std::string target_phrase_path(basedir + "/target_phrases");
+ std::string probabilities_path(basedir + "/probs");
+ std::string word_all1_path(basedir + "/Wall1");
+
+ //Target phrase
+ std::ofstream os (target_phrase_path.c_str(), std::ios::binary);
+ boost::archive::text_oarchive oarch(os);
+ oarch << lookup_target_phrase;
+ os.close();
+
+ //Word all1
+ std::ofstream os2 (word_all1_path.c_str(), std::ios::binary);
+ boost::archive::text_oarchive oarch2(os2);
+ oarch2 << lookup_word_all1;
+ os2.close();
}
-std::vector<unsigned char> Huffman::full_encode_line(line_text line){
- return vbyte_encode_line((encode_line(line)));
+std::vector<unsigned char> Huffman::full_encode_line(line_text line)
+{
+ return vbyte_encode_line((encode_line(line)));
}
-std::vector<unsigned int> Huffman::encode_line(line_text line){
- std::vector<unsigned int> retvector;
-
- //Get target_phrase first.
- util::TokenIter<util::SingleCharacter> it(line.target_phrase, util::SingleCharacter(' '));
- while (it) {
- retvector.push_back(target_phrase_huffman.find(it->as_string())->second);
- it++;
- }
- //Add a zero;
- retvector.push_back(0);
-
- //Get probabilities. Reinterpreting the float bytes as unsgined int.
- util::TokenIter<util::SingleCharacter> probit(line.prob, util::SingleCharacter(' '));
- while (probit) {
- //Sometimes we have too big floats to handle, so first convert to double
- double tempnum = atof(probit->data());
- float num = (float)tempnum;
- retvector.push_back(reinterpret_float(&num));
- probit++;
- }
- //Add a zero;
- retvector.push_back(0);
-
-
- //Get Word allignments
- retvector.push_back(word_all1_huffman.find(splitWordAll1(line.word_all1))->second);
- retvector.push_back(0);
-
- return retvector;
+std::vector<unsigned int> Huffman::encode_line(line_text line)
+{
+ std::vector<unsigned int> retvector;
+
+ //Get target_phrase first.
+ util::TokenIter<util::SingleCharacter> it(line.target_phrase, util::SingleCharacter(' '));
+ while (it) {
+ retvector.push_back(target_phrase_huffman.find(it->as_string())->second);
+ it++;
+ }
+ //Add a zero;
+ retvector.push_back(0);
+
+ //Get probabilities. Reinterpreting the float bytes as unsgined int.
+ util::TokenIter<util::SingleCharacter> probit(line.prob, util::SingleCharacter(' '));
+ while (probit) {
+ //Sometimes we have too big floats to handle, so first convert to double
+ double tempnum = atof(probit->data());
+ float num = (float)tempnum;
+ retvector.push_back(reinterpret_float(&num));
+ probit++;
+ }
+ //Add a zero;
+ retvector.push_back(0);
+
+
+ //Get Word allignments
+ retvector.push_back(word_all1_huffman.find(splitWordAll1(line.word_all1))->second);
+ retvector.push_back(0);
+
+ return retvector;
}
-void Huffman::produce_lookups(){
- //basically invert every map that we have
- for(std::map<std::string, unsigned int>::iterator it = target_phrase_huffman.begin(); it != target_phrase_huffman.end(); it++ ) {
- lookup_target_phrase.insert(std::pair<unsigned int, std::string>(it->second, it->first));
- }
+void Huffman::produce_lookups()
+{
+ //basically invert every map that we have
+ for(std::map<std::string, unsigned int>::iterator it = target_phrase_huffman.begin(); it != target_phrase_huffman.end(); it++ ) {
+ lookup_target_phrase.insert(std::pair<unsigned int, std::string>(it->second, it->first));
+ }
- for(std::map<std::vector<unsigned char>, unsigned int>::iterator it = word_all1_huffman.begin(); it != word_all1_huffman.end(); it++ ) {
- lookup_word_all1.insert(std::pair<unsigned int, std::vector<unsigned char> >(it->second, it->first));
- }
+ for(std::map<std::vector<unsigned char>, unsigned int>::iterator it = word_all1_huffman.begin(); it != word_all1_huffman.end(); it++ ) {
+ lookup_word_all1.insert(std::pair<unsigned int, std::vector<unsigned char> >(it->second, it->first));
+ }
}
-HuffmanDecoder::HuffmanDecoder (const char * dirname){
- //Read the maps from disk
+HuffmanDecoder::HuffmanDecoder (const char * dirname)
+{
+ //Read the maps from disk
- //Note that directory name should exist.
- std::string basedir(dirname);
- std::string target_phrase_path(basedir + "/target_phrases");
- std::string word_all1_path(basedir + "/Wall1");
+ //Note that directory name should exist.
+ std::string basedir(dirname);
+ std::string target_phrase_path(basedir + "/target_phrases");
+ std::string word_all1_path(basedir + "/Wall1");
- //Target phrases
- std::ifstream is (target_phrase_path.c_str(), std::ios::binary);
- boost::archive::text_iarchive iarch(is);
- iarch >> lookup_target_phrase;
- is.close();
+ //Target phrases
+ std::ifstream is (target_phrase_path.c_str(), std::ios::binary);
+ boost::archive::text_iarchive iarch(is);
+ iarch >> lookup_target_phrase;
+ is.close();
- //Word allignment 1
- std::ifstream is2 (word_all1_path.c_str(), std::ios::binary);
- boost::archive::text_iarchive iarch2(is2);
- iarch2 >> lookup_word_all1;
- is2.close();
+ //Word allignment 1
+ std::ifstream is2 (word_all1_path.c_str(), std::ios::binary);
+ boost::archive::text_iarchive iarch2(is2);
+ iarch2 >> lookup_word_all1;
+ is2.close();
}
HuffmanDecoder::HuffmanDecoder (std::map<unsigned int, std::string> * lookup_target,
- std::map<unsigned int, std::vector<unsigned char> > * lookup_word1) {
- lookup_target_phrase = *lookup_target;
- lookup_word_all1 = *lookup_word1;
+ std::map<unsigned int, std::vector<unsigned char> > * lookup_word1)
+{
+ lookup_target_phrase = *lookup_target;
+ lookup_word_all1 = *lookup_word1;
}
-std::vector<target_text> HuffmanDecoder::full_decode_line (std::vector<unsigned char> lines){
- std::vector<target_text> retvector; //All target phrases
- std::vector<unsigned int> decoded_lines = vbyte_decode_line(lines); //All decoded lines
- std::vector<unsigned int>::iterator it = decoded_lines.begin(); //Iterator for them
- std::vector<unsigned int> current_target_phrase; //Current target phrase decoded
-
- short zero_count = 0; //Count home many zeroes we have met. so far. Every 3 zeroes mean a new target phrase.
- while(it != decoded_lines.end()){
- if (zero_count == 3) {
- //We have finished with this entry, decode it, and add it to the retvector.
- retvector.push_back(decode_line(current_target_phrase));
- current_target_phrase.clear(); //Clear the current target phrase and the zero_count
- zero_count = 0; //So that we can reuse them for the next target phrase
- }
- //Add to the next target_phrase, number by number.
+std::vector<target_text> HuffmanDecoder::full_decode_line (std::vector<unsigned char> lines, int num_scores)
+{
+ std::vector<target_text> retvector; //All target phrases
+ std::vector<unsigned int> decoded_lines = vbyte_decode_line(lines); //All decoded lines
+ std::vector<unsigned int>::iterator it = decoded_lines.begin(); //Iterator for them
+ std::vector<unsigned int> current_target_phrase; //Current target phrase decoded
+
+ short zero_count = 0; //Count home many zeroes we have met. so far. Every 3 zeroes mean a new target phrase.
+ while(it != decoded_lines.end()) {
+ if (zero_count == 1) {
+ //We are extracting scores. we know how many scores there are so we can push them
+ //to the vector. This is done in case any of the scores is 0, because it would mess
+ //up the state machine.
+ for (int i = 0; i < num_scores; i++) {
current_target_phrase.push_back(*it);
- if (*it == 0) {
- zero_count++;
- }
- it++; //Go to the next word/symbol
+ it++;
+ }
}
- //Don't forget the last remaining line!
+
if (zero_count == 3) {
- //We have finished with this entry, decode it, and add it to the retvector.
- retvector.push_back(decode_line(current_target_phrase));
- current_target_phrase.clear(); //Clear the current target phrase and the zero_count
- zero_count = 0; //So that we can reuse them for the next target phrase
+ //We have finished with this entry, decode it, and add it to the retvector.
+ retvector.push_back(decode_line(current_target_phrase, num_scores));
+ current_target_phrase.clear(); //Clear the current target phrase and the zero_count
+ zero_count = 0; //So that we can reuse them for the next target phrase
}
-
- return retvector;
+ //Add to the next target_phrase, number by number.
+ current_target_phrase.push_back(*it);
+ if (*it == 0) {
+ zero_count++;
+ }
+ it++; //Go to the next word/symbol
+ }
+ //Don't forget the last remaining line!
+ if (zero_count == 3) {
+ //We have finished with this entry, decode it, and add it to the retvector.
+ retvector.push_back(decode_line(current_target_phrase, num_scores));
+ current_target_phrase.clear(); //Clear the current target phrase and the zero_count
+ zero_count = 0; //So that we can reuse them for the next target phrase
+ }
+
+ return retvector;
}
-target_text HuffmanDecoder::decode_line (std::vector<unsigned int> input){
- //demo decoder
- target_text ret;
- //Split everything
- std::vector<unsigned int> target_phrase;
- std::vector<unsigned int> probs;
- unsigned int wAll;
-
- //Split the line into the proper arrays
- short num_zeroes = 0;
- int counter = 0;
- while (num_zeroes < 3){
- unsigned int num = input[counter];
- if (num == 0) {
- num_zeroes++;
- } else if (num_zeroes == 0){
- target_phrase.push_back(num);
- } else if (num_zeroes == 1){
- probs.push_back(num);
- } else if (num_zeroes == 2){
- wAll = num;
- }
+target_text HuffmanDecoder::decode_line (std::vector<unsigned int> input, int num_scores)
+{
+ //demo decoder
+ target_text ret;
+ //Split everything
+ std::vector<unsigned int> target_phrase;
+ std::vector<unsigned int> probs;
+ unsigned int wAll;
+
+ //Split the line into the proper arrays
+ short num_zeroes = 0;
+ int counter = 0;
+ while (num_zeroes < 3) {
+ unsigned int num = input[counter];
+ if (num == 0) {
+ num_zeroes++;
+ } else if (num_zeroes == 0) {
+ target_phrase.push_back(num);
+ } else if (num_zeroes == 1) {
+ //Push exactly num_scores scores
+ for (int i = 0; i < num_scores; i++) {
+ probs.push_back(num);
counter++;
+ num = input[counter];
+ }
+ continue;
+ } else if (num_zeroes == 2) {
+ wAll = num;
}
+ counter++;
+ }
- ret.target_phrase = target_phrase;
- ret.word_all1 = lookup_word_all1.find(wAll)->second;
+ ret.target_phrase = target_phrase;
+ ret.word_all1 = lookup_word_all1.find(wAll)->second;
- //Decode probabilities
- for (std::vector<unsigned int>::iterator it = probs.begin(); it != probs.end(); it++){
- ret.prob.push_back(reinterpret_uint(&(*it)));
- }
+ //Decode probabilities
+ for (std::vector<unsigned int>::iterator it = probs.begin(); it != probs.end(); it++) {
+ ret.prob.push_back(reinterpret_uint(&(*it)));
+ }
- return ret;
+ return ret;
}
-inline std::string HuffmanDecoder::getTargetWordFromID(unsigned int id) {
- return lookup_target_phrase.find(id)->second;
+inline std::string HuffmanDecoder::getTargetWordFromID(unsigned int id)
+{
+ return lookup_target_phrase.find(id)->second;
}
-std::string HuffmanDecoder::getTargetWordsFromIDs(std::vector<unsigned int> ids){
- std::string returnstring;
- for (std::vector<unsigned int>::iterator it = ids.begin(); it != ids.end(); it++){
- returnstring.append(getTargetWordFromID(*it) + " ");
- }
+std::string HuffmanDecoder::getTargetWordsFromIDs(std::vector<unsigned int> ids)
+{
+ std::string returnstring;
+ for (std::vector<unsigned int>::iterator it = ids.begin(); it != ids.end(); it++) {
+ returnstring.append(getTargetWordFromID(*it) + " ");
+ }
- return returnstring;
+ return returnstring;
}
-inline std::string getTargetWordFromID(unsigned int id, std::map<unsigned int, std::string> * lookup_target_phrase) {
- return lookup_target_phrase->find(id)->second;
+inline std::string getTargetWordFromID(unsigned int id, std::map<unsigned int, std::string> * lookup_target_phrase)
+{
+ return lookup_target_phrase->find(id)->second;
}
-std::string getTargetWordsFromIDs(std::vector<unsigned int> ids, std::map<unsigned int, std::string> * lookup_target_phrase) {
- std::string returnstring;
- for (std::vector<unsigned int>::iterator it = ids.begin(); it != ids.end(); it++){
- returnstring.append(getTargetWordFromID(*it, lookup_target_phrase) + " ");
- }
+std::string getTargetWordsFromIDs(std::vector<unsigned int> ids, std::map<unsigned int, std::string> * lookup_target_phrase)
+{
+ std::string returnstring;
+ for (std::vector<unsigned int>::iterator it = ids.begin(); it != ids.end(); it++) {
+ returnstring.append(getTargetWordFromID(*it, lookup_target_phrase) + " ");
+ }
- return returnstring;
+ return returnstring;
}
/*Those functions are used to more easily store the floats in the binary phrase table
We convert the float unsinged int so that it is the same as our other values and we can
apply variable byte encoding on top of it.*/
-inline unsigned int reinterpret_float(float * num){
- unsigned int * converted_num;
- converted_num = reinterpret_cast<unsigned int *>(num);
- return *converted_num;
+inline unsigned int reinterpret_float(float * num)
+{
+ unsigned int * converted_num;
+ converted_num = reinterpret_cast<unsigned int *>(num);
+ return *converted_num;
}
-inline float reinterpret_uint(unsigned int * num){
- float * converted_num;
- converted_num = reinterpret_cast<float *>(num);
- return *converted_num;
+inline float reinterpret_uint(unsigned int * num)
+{
+ float * converted_num;
+ converted_num = reinterpret_cast<float *>(num);
+ return *converted_num;
}
/*Mostly taken from stackoverflow, http://stackoverflow.com/questions/5858646/optimizing-variable-length-encoding
and modified in order to return a vector of chars. Implements ULEB128 or variable byte encoding.
This is highly optimized version with unrolled loop */
-inline std::vector<unsigned char> vbyte_encode(unsigned int num){
- //Determine how many bytes we are going to take.
- short size;
- std::vector<unsigned char> byte_vector;
-
- if (num < 0x00000080U) {
- size = 1;
- byte_vector.reserve(size);
- goto b1;
- }
- if (num < 0x00004000U) {
- size = 2;
- byte_vector.reserve(size);
- goto b2;
- }
- if (num < 0x00200000U) {
- size = 3;
- byte_vector.reserve(size);
- goto b3;
- }
- if (num < 0x10000000U) {
- size = 4;
- byte_vector.reserve(size);
- goto b4;
- }
- size = 5;
+inline std::vector<unsigned char> vbyte_encode(unsigned int num)
+{
+ //Determine how many bytes we are going to take.
+ short size;
+ std::vector<unsigned char> byte_vector;
+
+ if (num < 0x00000080U) {
+ size = 1;
+ byte_vector.reserve(size);
+ goto b1;
+ }
+ if (num < 0x00004000U) {
+ size = 2;
byte_vector.reserve(size);
+ goto b2;
+ }
+ if (num < 0x00200000U) {
+ size = 3;
+ byte_vector.reserve(size);
+ goto b3;
+ }
+ if (num < 0x10000000U) {
+ size = 4;
+ byte_vector.reserve(size);
+ goto b4;
+ }
+ size = 5;
+ byte_vector.reserve(size);
- //Now proceed with the encoding.
- byte_vector.push_back((num & 0x7f) | 0x80);
- num >>= 7;
+ //Now proceed with the encoding.
+ byte_vector.push_back((num & 0x7f) | 0x80);
+ num >>= 7;
b4:
- byte_vector.push_back((num & 0x7f) | 0x80);
- num >>= 7;
+ byte_vector.push_back((num & 0x7f) | 0x80);
+ num >>= 7;
b3:
- byte_vector.push_back((num & 0x7f) | 0x80);
- num >>= 7;
+ byte_vector.push_back((num & 0x7f) | 0x80);
+ num >>= 7;
b2:
- byte_vector.push_back((num & 0x7f) | 0x80);
- num >>= 7;
+ byte_vector.push_back((num & 0x7f) | 0x80);
+ num >>= 7;
b1:
- byte_vector.push_back(num);
+ byte_vector.push_back(num);
- return byte_vector;
+ return byte_vector;
}
-std::vector<unsigned int> vbyte_decode_line(std::vector<unsigned char> line){
- std::vector<unsigned int> huffman_line;
- std::vector<unsigned char> current_num;
-
- for (std::vector<unsigned char>::iterator it = line.begin(); it != line.end(); it++){
- current_num.push_back(*it);
- if ((*it >> 7) != 1) {
- //We don't have continuation in the next bit
- huffman_line.push_back(bytes_to_int(current_num));
- current_num.clear();
- }
+std::vector<unsigned int> vbyte_decode_line(std::vector<unsigned char> line)
+{
+ std::vector<unsigned int> huffman_line;
+ std::vector<unsigned char> current_num;
+
+ for (std::vector<unsigned char>::iterator it = line.begin(); it != line.end(); it++) {
+ current_num.push_back(*it);
+ if ((*it >> 7) != 1) {
+ //We don't have continuation in the next bit
+ huffman_line.push_back(bytes_to_int(current_num));
+ current_num.clear();
}
- return huffman_line;
+ }
+ return huffman_line;
}
-inline unsigned int bytes_to_int(std::vector<unsigned char> number){
- unsigned int retvalue = 0;
- std::vector<unsigned char>::iterator it = number.begin();
- unsigned char shift = 0; //By how many bits to shift
+inline unsigned int bytes_to_int(std::vector<unsigned char> number)
+{
+ unsigned int retvalue = 0;
+ std::vector<unsigned char>::iterator it = number.begin();
+ unsigned char shift = 0; //By how many bits to shift
- while (it != number.end()) {
- retvalue |= (*it & 0x7f) << shift;
- shift += 7;
- it++;
- }
+ while (it != number.end()) {
+ retvalue |= (*it & 0x7f) << shift;
+ shift += 7;
+ it++;
+ }
- return retvalue;
+ return retvalue;
}
-std::vector<unsigned char> vbyte_encode_line(std::vector<unsigned int> line) {
- std::vector<unsigned char> retvec;
+std::vector<unsigned char> vbyte_encode_line(std::vector<unsigned int> line)
+{
+ std::vector<unsigned char> retvec;
- //For each unsigned int in the line, vbyte encode it and add it to a vector of unsigned chars.
- for (std::vector<unsigned int>::iterator it = line.begin(); it != line.end(); it++){
- std::vector<unsigned char> vbyte_encoded = vbyte_encode(*it);
- retvec.insert(retvec.end(), vbyte_encoded.begin(), vbyte_encoded.end());
- }
+ //For each unsigned int in the line, vbyte encode it and add it to a vector of unsigned chars.
+ for (std::vector<unsigned int>::iterator it = line.begin(); it != line.end(); it++) {
+ std::vector<unsigned char> vbyte_encoded = vbyte_encode(*it);
+ retvec.insert(retvec.end(), vbyte_encoded.begin(), vbyte_encoded.end());
+ }
- return retvec;
+ return retvec;
}
diff --git a/moses/TranslationModel/ProbingPT/huffmanish.hh b/moses/TranslationModel/ProbingPT/huffmanish.hh
index 3116484e9..46b7dbeea 100644
--- a/moses/TranslationModel/ProbingPT/huffmanish.hh
+++ b/moses/TranslationModel/ProbingPT/huffmanish.hh
@@ -90,10 +90,10 @@ public:
std::string getTargetWordsFromIDs(std::vector<unsigned int> ids);
- target_text decode_line (std::vector<unsigned int> input);
+ target_text decode_line (std::vector<unsigned int> input, int num_scores);
//Variable byte decodes a all target phrases contained here and then passes them to decode_line
- std::vector<target_text> full_decode_line (std::vector<unsigned char> lines);
+ std::vector<target_text> full_decode_line (std::vector<unsigned char> lines, int num_scores);
};
std::string getTargetWordsFromIDs(std::vector<unsigned int> ids, std::map<unsigned int, std::string> * lookup_target_phrase);
diff --git a/moses/TranslationModel/ProbingPT/line_splitter.cpp b/moses/TranslationModel/ProbingPT/line_splitter.cpp
index f50090e4c..97ccfcae1 100644
--- a/moses/TranslationModel/ProbingPT/line_splitter.cpp
+++ b/moses/TranslationModel/ProbingPT/line_splitter.cpp
@@ -1,52 +1,54 @@
#include "line_splitter.hh"
-line_text splitLine(StringPiece textin) {
- const char delim[] = " ||| ";
- line_text output;
-
- //Tokenize
- util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
- //Get source phrase
- output.source_phrase = *it;
- it++;
- //Get target_phrase
- output.target_phrase = *it;
- it++;
- //Get probabilities
- output.prob = *it;
- it++;
- //Get WordAllignment 1
- output.word_all1 = *it;
- it++;
- //Get WordAllignment 2
- output.word_all2 = *it;
-
- return output;
+line_text splitLine(StringPiece textin)
+{
+ const char delim[] = " ||| ";
+ line_text output;
+
+ //Tokenize
+ util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
+ //Get source phrase
+ output.source_phrase = *it;
+ it++;
+ //Get target_phrase
+ output.target_phrase = *it;
+ it++;
+ //Get probabilities
+ output.prob = *it;
+ it++;
+ //Get WordAllignment 1
+ output.word_all1 = *it;
+ it++;
+ //Get WordAllignment 2
+ output.word_all2 = *it;
+
+ return output;
}
-std::vector<unsigned char> splitWordAll1(StringPiece textin){
- const char delim[] = " ";
- const char delim2[] = "-";
- std::vector<unsigned char> output;
-
- //Split on space
- util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
-
- //For each int
- while (it) {
- //Split on dash (-)
- util::TokenIter<util::MultiCharacter> itInner(*it, util::MultiCharacter(delim2));
-
- //Insert the two entries in the vector. User will read entry 0 and 1 to get the first,
- //2 and 3 for second etc. Use unsigned char instead of int to save space, as
- //word allignments are all very small numbers that fit in a single byte
- output.push_back((unsigned char)(atoi(itInner->data())));
- itInner++;
- output.push_back((unsigned char)(atoi(itInner->data())));
- it++;
- }
-
- return output;
+std::vector<unsigned char> splitWordAll1(StringPiece textin)
+{
+ const char delim[] = " ";
+ const char delim2[] = "-";
+ std::vector<unsigned char> output;
+
+ //Split on space
+ util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
+
+ //For each int
+ while (it) {
+ //Split on dash (-)
+ util::TokenIter<util::MultiCharacter> itInner(*it, util::MultiCharacter(delim2));
+
+ //Insert the two entries in the vector. User will read entry 0 and 1 to get the first,
+ //2 and 3 for second etc. Use unsigned char instead of int to save space, as
+ //word allignments are all very small numbers that fit in a single byte
+ output.push_back((unsigned char)(atoi(itInner->data())));
+ itInner++;
+ output.push_back((unsigned char)(atoi(itInner->data())));
+ it++;
+ }
+
+ return output;
}
diff --git a/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp b/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp
index 35cb9e538..ca3e8f69f 100644
--- a/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp
+++ b/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp
@@ -1,32 +1,34 @@
#include "probing_hash_utils.hh"
//Read table from disk, return memory map location
-char * readTable(const char * filename, size_t size) {
- //Initial position of the file is the end of the file, thus we know the size
- int fd;
- char * map;
-
- fd = open(filename, O_RDONLY);
- if (fd == -1) {
- perror("Error opening file for reading");
- exit(EXIT_FAILURE);
- }
-
- map = (char *)mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
-
- if (map == MAP_FAILED) {
- close(fd);
- perror("Error mmapping the file");
- exit(EXIT_FAILURE);
- }
-
- return map;
-}
-
-
-void serialize_table(char *mem, size_t size, const char * filename){
- std::ofstream os (filename, std::ios::binary);
- os.write((const char*)&mem[0], size);
- os.close();
+char * readTable(const char * filename, size_t size)
+{
+ //Initial position of the file is the end of the file, thus we know the size
+ int fd;
+ char * map;
+
+ fd = open(filename, O_RDONLY);
+ if (fd == -1) {
+ perror("Error opening file for reading");
+ exit(EXIT_FAILURE);
+ }
+
+ map = (char *)mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
+
+ if (map == MAP_FAILED) {
+ close(fd);
+ perror("Error mmapping the file");
+ exit(EXIT_FAILURE);
+ }
+
+ return map;
+}
+
+
+void serialize_table(char *mem, size_t size, const char * filename)
+{
+ std::ofstream os (filename, std::ios::binary);
+ os.write((const char*)&mem[0], size);
+ os.close();
} \ No newline at end of file
diff --git a/moses/TranslationModel/ProbingPT/quering.cpp b/moses/TranslationModel/ProbingPT/quering.cpp
index 18efed917..59a1229a8 100644
--- a/moses/TranslationModel/ProbingPT/quering.cpp
+++ b/moses/TranslationModel/ProbingPT/quering.cpp
@@ -1,174 +1,198 @@
#include "quering.hh"
-unsigned char * read_binary_file(const char * filename, size_t filesize){
- //Get filesize
- int fd;
- unsigned char * map;
-
- fd = open(filename, O_RDONLY);
-
- if (fd == -1) {
- perror("Error opening file for reading");
- exit(EXIT_FAILURE);
- }
-
- map = (unsigned char *)mmap(0, filesize, PROT_READ, MAP_SHARED, fd, 0);
- if (map == MAP_FAILED) {
- close(fd);
- perror("Error mmapping the file");
- exit(EXIT_FAILURE);
- }
-
- return map;
+unsigned char * read_binary_file(const char * filename, size_t filesize)
+{
+ //Get filesize
+ int fd;
+ unsigned char * map;
+
+ fd = open(filename, O_RDONLY);
+
+ if (fd == -1) {
+ perror("Error opening file for reading");
+ exit(EXIT_FAILURE);
+ }
+
+ map = (unsigned char *)mmap(0, filesize, PROT_READ, MAP_SHARED, fd, 0);
+ if (map == MAP_FAILED) {
+ close(fd);
+ perror("Error mmapping the file");
+ exit(EXIT_FAILURE);
+ }
+
+ return map;
}
-QueryEngine::QueryEngine(const char * filepath) : decoder(filepath){
-
- //Create filepaths
- std::string basepath(filepath);
- std::string path_to_hashtable = basepath + "/probing_hash.dat";
- std::string path_to_data_bin = basepath + "/binfile.dat";
- std::string path_to_source_vocabid = basepath + "/source_vocabids";
-
- ///Source phrase vocabids
- read_map(&source_vocabids, path_to_source_vocabid.c_str());
-
- //Target phrase vocabIDs
- vocabids = decoder.get_target_lookup_map();
-
- //Read config file
- std::string line;
- std::ifstream config ((basepath + "/config").c_str());
- getline(config, line);
- int tablesize = atoi(line.c_str()); //Get tablesize.
- config.close();
-
- //Mmap binary table
- struct stat filestatus;
- stat(path_to_data_bin.c_str(), &filestatus);
- binary_filesize = filestatus.st_size;
- binary_mmaped = read_binary_file(path_to_data_bin.c_str(), binary_filesize);
-
- //Read hashtable
- size_t table_filesize = Table::Size(tablesize, 1.2);
- mem = readTable(path_to_hashtable.c_str(), table_filesize);
- Table table_init(mem, table_filesize);
- table = table_init;
-
- std::cerr << "Initialized successfully! " << std::endl;
+QueryEngine::QueryEngine(const char * filepath) : decoder(filepath)
+{
+
+ //Create filepaths
+ std::string basepath(filepath);
+ std::string path_to_hashtable = basepath + "/probing_hash.dat";
+ std::string path_to_data_bin = basepath + "/binfile.dat";
+ std::string path_to_source_vocabid = basepath + "/source_vocabids";
+
+ ///Source phrase vocabids
+ read_map(&source_vocabids, path_to_source_vocabid.c_str());
+
+ //Target phrase vocabIDs
+ vocabids = decoder.get_target_lookup_map();
+
+ //Read config file
+ std::string line;
+ std::ifstream config ((basepath + "/config").c_str());
+ //Check API version:
+ getline(config, line);
+ if (atoi(line.c_str()) != API_VERSION) {
+ std::cerr << "The ProbingPT API has changed, please rebinarize your phrase tables." << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ //Get tablesize.
+ getline(config, line);
+ int tablesize = atoi(line.c_str());
+ //Number of scores
+ getline(config, line);
+ num_scores = atoi(line.c_str());
+ //do we have a reordering table
+ getline(config, line);
+ std::transform(line.begin(), line.end(), line.begin(), ::tolower); //Get the boolean in lowercase
+ is_reordering = false;
+ if (line == "true") {
+ is_reordering = true;
+ std::cerr << "WARNING. REORDERING TABLES NOT SUPPORTED YET." << std::endl;
+ }
+ config.close();
+
+ //Mmap binary table
+ struct stat filestatus;
+ stat(path_to_data_bin.c_str(), &filestatus);
+ binary_filesize = filestatus.st_size;
+ binary_mmaped = read_binary_file(path_to_data_bin.c_str(), binary_filesize);
+
+ //Read hashtable
+ size_t table_filesize = Table::Size(tablesize, 1.2);
+ mem = readTable(path_to_hashtable.c_str(), table_filesize);
+ Table table_init(mem, table_filesize);
+ table = table_init;
+
+ std::cerr << "Initialized successfully! " << std::endl;
}
-QueryEngine::~QueryEngine(){
- //Clear mmap content from memory.
- munmap(binary_mmaped, binary_filesize);
- munmap(mem, table_filesize);
-
+QueryEngine::~QueryEngine()
+{
+ //Clear mmap content from memory.
+ munmap(binary_mmaped, binary_filesize);
+ munmap(mem, table_filesize);
+
}
-std::pair<bool, std::vector<target_text> > QueryEngine::query(std::vector<uint64_t> source_phrase){
- bool found;
- std::vector<target_text> translation_entries;
- const Entry * entry;
- //TOO SLOW
- //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
- uint64_t key = 0;
- for (int i = 0; i < source_phrase.size(); i++){
- key += source_phrase[i];
+std::pair<bool, std::vector<target_text> > QueryEngine::query(std::vector<uint64_t> source_phrase)
+{
+ bool found;
+ std::vector<target_text> translation_entries;
+ const Entry * entry;
+ //TOO SLOW
+ //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
+ uint64_t key = 0;
+ for (int i = 0; i < source_phrase.size(); i++) {
+ key += (source_phrase[i] << i);
+ }
+
+
+ found = table.Find(key, entry);
+
+ if (found) {
+ //The phrase that was searched for was found! We need to get the translation entries.
+ //We will read the largest entry in bytes and then filter the unnecesarry with functions
+ //from line_splitter
+ uint64_t initial_index = entry -> GetValue();
+ unsigned int bytes_toread = entry -> bytes_toread;
+
+ //ASK HIEU FOR MORE EFFICIENT WAY TO DO THIS!
+ std::vector<unsigned char> encoded_text; //Assign to the vector the relevant portion of the array.
+ encoded_text.reserve(bytes_toread);
+ for (int i = 0; i < bytes_toread; i++) {
+ encoded_text.push_back(binary_mmaped[i+initial_index]);
}
+ //Get only the translation entries necessary
+ translation_entries = decoder.full_decode_line(encoded_text, num_scores);
- found = table.Find(key, entry);
+ }
- if (found){
- //The phrase that was searched for was found! We need to get the translation entries.
- //We will read the largest entry in bytes and then filter the unnecesarry with functions
- //from line_splitter
- uint64_t initial_index = entry -> GetValue();
- unsigned int bytes_toread = entry -> bytes_toread;
-
- //ASK HIEU FOR MORE EFFICIENT WAY TO DO THIS!
- std::vector<unsigned char> encoded_text; //Assign to the vector the relevant portion of the array.
- encoded_text.reserve(bytes_toread);
- for (int i = 0; i < bytes_toread; i++){
- encoded_text.push_back(binary_mmaped[i+initial_index]);
- }
+ std::pair<bool, std::vector<target_text> > output (found, translation_entries);
- //Get only the translation entries necessary
- translation_entries = decoder.full_decode_line(encoded_text);
+ return output;
- }
-
- std::pair<bool, std::vector<target_text> > output (found, translation_entries);
-
- return output;
-
}
-std::pair<bool, std::vector<target_text> > QueryEngine::query(StringPiece source_phrase){
- bool found;
- std::vector<target_text> translation_entries;
- const Entry * entry;
- //Convert source frase to VID
- std::vector<uint64_t> source_phrase_vid = getVocabIDs(source_phrase);
- //TOO SLOW
- //uint64_t key = util::MurmurHashNative(&source_phrase_vid[0], source_phrase_vid.size());
- uint64_t key = 0;
- for (int i = 0; i < source_phrase_vid.size(); i++){
- key += source_phrase_vid[i];
+std::pair<bool, std::vector<target_text> > QueryEngine::query(StringPiece source_phrase)
+{
+ bool found;
+ std::vector<target_text> translation_entries;
+ const Entry * entry;
+ //Convert source frase to VID
+ std::vector<uint64_t> source_phrase_vid = getVocabIDs(source_phrase);
+ //TOO SLOW
+ //uint64_t key = util::MurmurHashNative(&source_phrase_vid[0], source_phrase_vid.size());
+ uint64_t key = 0;
+ for (int i = 0; i < source_phrase_vid.size(); i++) {
+ key += (source_phrase_vid[i] << i);
+ }
+
+ found = table.Find(key, entry);
+
+
+ if (found) {
+ //The phrase that was searched for was found! We need to get the translation entries.
+ //We will read the largest entry in bytes and then filter the unnecesarry with functions
+ //from line_splitter
+ uint64_t initial_index = entry -> GetValue();
+ unsigned int bytes_toread = entry -> bytes_toread;
+ //At the end of the file we can't readd + largest_entry cause we get a segfault.
+ std::cerr << "Entry size is bytes is: " << bytes_toread << std::endl;
+
+ //ASK HIEU FOR MORE EFFICIENT WAY TO DO THIS!
+ std::vector<unsigned char> encoded_text; //Assign to the vector the relevant portion of the array.
+ encoded_text.reserve(bytes_toread);
+ for (int i = 0; i < bytes_toread; i++) {
+ encoded_text.push_back(binary_mmaped[i+initial_index]);
}
- found = table.Find(key, entry);
+ //Get only the translation entries necessary
+ translation_entries = decoder.full_decode_line(encoded_text, num_scores);
+ }
- if (found){
- //The phrase that was searched for was found! We need to get the translation entries.
- //We will read the largest entry in bytes and then filter the unnecesarry with functions
- //from line_splitter
- uint64_t initial_index = entry -> GetValue();
- unsigned int bytes_toread = entry -> bytes_toread;
- //At the end of the file we can't readd + largest_entry cause we get a segfault.
- std::cerr << "Entry size is bytes is: " << bytes_toread << std::endl;
-
- //ASK HIEU FOR MORE EFFICIENT WAY TO DO THIS!
- std::vector<unsigned char> encoded_text; //Assign to the vector the relevant portion of the array.
- encoded_text.reserve(bytes_toread);
- for (int i = 0; i < bytes_toread; i++){
- encoded_text.push_back(binary_mmaped[i+initial_index]);
- }
+ std::pair<bool, std::vector<target_text> > output (found, translation_entries);
- //Get only the translation entries necessary
- translation_entries = decoder.full_decode_line(encoded_text);
-
- }
+ return output;
- std::pair<bool, std::vector<target_text> > output (found, translation_entries);
+}
- return output;
+void QueryEngine::printTargetInfo(std::vector<target_text> target_phrases)
+{
+ int entries = target_phrases.size();
-}
+ for (int i = 0; i<entries; i++) {
+ std::cout << "Entry " << i+1 << " of " << entries << ":" << std::endl;
+ //Print text
+ std::cout << getTargetWordsFromIDs(target_phrases[i].target_phrase, &vocabids) << "\t";
-void QueryEngine::printTargetInfo(std::vector<target_text> target_phrases){
- int entries = target_phrases.size();
-
- for (int i = 0; i<entries; i++){
- std::cout << "Entry " << i+1 << " of " << entries << ":" << std::endl;
- //Print text
- std::cout << getTargetWordsFromIDs(target_phrases[i].target_phrase, &vocabids) << "\t";
-
- //Print probabilities:
- for (int j = 0; j<target_phrases[i].prob.size(); j++){
- std::cout << target_phrases[i].prob[j] << " ";
- }
- std::cout << "\t";
-
- //Print word_all1
- for (int j = 0; j<target_phrases[i].word_all1.size(); j++){
- if (j%2 == 0){
- std::cout << (short)target_phrases[i].word_all1[j] << "-";
- }else{
- std::cout << (short)target_phrases[i].word_all1[j] << " ";
- }
- }
- std::cout << std::endl;
+ //Print probabilities:
+ for (int j = 0; j<target_phrases[i].prob.size(); j++) {
+ std::cout << target_phrases[i].prob[j] << " ";
+ }
+ std::cout << "\t";
+
+ //Print word_all1
+ for (int j = 0; j<target_phrases[i].word_all1.size(); j++) {
+ if (j%2 == 0) {
+ std::cout << (short)target_phrases[i].word_all1[j] << "-";
+ } else {
+ std::cout << (short)target_phrases[i].word_all1[j] << " ";
+ }
}
+ std::cout << std::endl;
+ }
}
diff --git a/moses/TranslationModel/ProbingPT/quering.hh b/moses/TranslationModel/ProbingPT/quering.hh
index 133f484ce..b6266f7c7 100644
--- a/moses/TranslationModel/ProbingPT/quering.hh
+++ b/moses/TranslationModel/ProbingPT/quering.hh
@@ -5,6 +5,8 @@
#include "hash.hh" //Includes line splitter
#include <sys/stat.h> //For finding size of file
#include "vocabid.hh"
+#include <algorithm> //toLower
+#define API_VERSION 3
char * read_binary_file(char * filename);
@@ -21,6 +23,8 @@ class QueryEngine {
size_t binary_filesize;
size_t table_filesize;
+ int num_scores;
+ bool is_reordering;
public:
QueryEngine (const char *);
~QueryEngine();
diff --git a/moses/TranslationModel/ProbingPT/storing.cpp b/moses/TranslationModel/ProbingPT/storing.cpp
index 5ea0df39c..01128c1e4 100644
--- a/moses/TranslationModel/ProbingPT/storing.cpp
+++ b/moses/TranslationModel/ProbingPT/storing.cpp
@@ -1,151 +1,161 @@
#include "storing.hh"
-BinaryFileWriter::BinaryFileWriter (std::string basepath) : os ((basepath + "/binfile.dat").c_str(), std::ios::binary) {
- binfile.reserve(10000); //Reserve part of the vector to avoid realocation
- it = binfile.begin();
- dist_from_start = 0; //Initialize variables
- extra_counter = 0;
+BinaryFileWriter::BinaryFileWriter (std::string basepath) : os ((basepath + "/binfile.dat").c_str(), std::ios::binary)
+{
+ binfile.reserve(10000); //Reserve part of the vector to avoid realocation
+ it = binfile.begin();
+ dist_from_start = 0; //Initialize variables
+ extra_counter = 0;
}
-void BinaryFileWriter::write (std::vector<unsigned char> * bytes) {
- binfile.insert(it, bytes->begin(), bytes->end()); //Insert the bytes
- //Keep track of the offsets
- it += bytes->size();
- dist_from_start = distance(binfile.begin(),it);
- //Flush the vector to disk every once in a while so that we don't consume too much ram
- if (dist_from_start > 9000) {
- flush();
- }
+void BinaryFileWriter::write (std::vector<unsigned char> * bytes)
+{
+ binfile.insert(it, bytes->begin(), bytes->end()); //Insert the bytes
+ //Keep track of the offsets
+ it += bytes->size();
+ dist_from_start = distance(binfile.begin(),it);
+ //Flush the vector to disk every once in a while so that we don't consume too much ram
+ if (dist_from_start > 9000) {
+ flush();
+ }
}
-void BinaryFileWriter::flush () {
- //Cast unsigned char to char before writing...
- os.write((char *)&binfile[0], dist_from_start);
- //Clear the vector:
- binfile.clear();
- binfile.reserve(10000);
- extra_counter += dist_from_start; //Keep track of the total number of bytes.
- it = binfile.begin(); //Reset iterator
- dist_from_start = distance(binfile.begin(),it); //Reset dist from start
+void BinaryFileWriter::flush ()
+{
+ //Cast unsigned char to char before writing...
+ os.write((char *)&binfile[0], dist_from_start);
+ //Clear the vector:
+ binfile.clear();
+ binfile.reserve(10000);
+ extra_counter += dist_from_start; //Keep track of the total number of bytes.
+ it = binfile.begin(); //Reset iterator
+ dist_from_start = distance(binfile.begin(),it); //Reset dist from start
}
-BinaryFileWriter::~BinaryFileWriter (){
- os.close();
- binfile.clear();
+BinaryFileWriter::~BinaryFileWriter ()
+{
+ os.close();
+ binfile.clear();
}
-void createProbingPT(const char * phrasetable_path, const char * target_path){
- //Get basepath and create directory if missing
- std::string basepath(target_path);
- mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
-
- //Set up huffman and serialize decoder maps.
- Huffman huffmanEncoder(phrasetable_path); //initialize
- huffmanEncoder.assign_values();
- huffmanEncoder.produce_lookups();
- huffmanEncoder.serialize_maps(target_path);
-
- //Get uniq lines:
- unsigned long uniq_entries = huffmanEncoder.getUniqLines();
-
- //Source phrase vocabids
- std::map<uint64_t, std::string> source_vocabids;
-
- //Read the file
- util::FilePiece filein(phrasetable_path);
-
- //Init the probing hash table
- size_t size = Table::Size(uniq_entries, 1.2);
- char * mem = new char[size];
- memset(mem, 0, size);
- Table table(mem, size);
-
- BinaryFileWriter binfile(basepath); //Init the binary file writer.
-
- line_text prev_line; //Check if the source phrase of the previous line is the same
-
- //Keep track of the size of each group of target phrases
- uint64_t entrystartidx = 0;
- //uint64_t line_num = 0;
-
-
- //Read everything and processs
- while(true){
- try {
- //Process line read
- line_text line;
- line = splitLine(filein.ReadLine());
- //Add source phrases to vocabularyIDs
- add_to_map(&source_vocabids, line.source_phrase);
-
- if ((binfile.dist_from_start + binfile.extra_counter) == 0) {
- prev_line = line; //For the first iteration assume the previous line is
- } //The same as this one.
-
- if (line.source_phrase != prev_line.source_phrase){
-
- //Create a new entry even
-
- //Create an entry for the previous source phrase:
- Entry pesho;
- pesho.value = entrystartidx;
- //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
- pesho.key = 0;
- std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase);
- for (int i = 0; i < vocabid_source.size(); i++){
- pesho.key += vocabid_source[i];
- }
- pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx;
-
- //Put into table
- table.Insert(pesho);
-
- entrystartidx = binfile.dist_from_start + binfile.extra_counter; //Designate start idx for new entry
-
- //Encode a line and write it to disk.
- std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line);
- binfile.write(&encoded_line);
-
- //Set prevLine
- prev_line = line;
-
- } else{
- //If we still have the same line, just append to it:
- std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line);
- binfile.write(&encoded_line);
- }
-
- } catch (util::EndOfFileException e){
- std::cerr << "Reading phrase table finished, writing remaining files to disk." << std::endl;
- binfile.flush();
-
- //After the final entry is constructed we need to add it to the phrase_table
- //Create an entry for the previous source phrase:
- Entry pesho;
- pesho.value = entrystartidx;
- //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
- pesho.key = 0;
- std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase);
- for (int i = 0; i < vocabid_source.size(); i++){
- pesho.key += vocabid_source[i];
- }
- pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx;
- //Put into table
- table.Insert(pesho);
-
- break;
+void createProbingPT(const char * phrasetable_path, const char * target_path,
+ const char * num_scores, const char * is_reordering)
+{
+ //Get basepath and create directory if missing
+ std::string basepath(target_path);
+ mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+
+ //Set up huffman and serialize decoder maps.
+ Huffman huffmanEncoder(phrasetable_path); //initialize
+ huffmanEncoder.assign_values();
+ huffmanEncoder.produce_lookups();
+ huffmanEncoder.serialize_maps(target_path);
+
+ //Get uniq lines:
+ unsigned long uniq_entries = huffmanEncoder.getUniqLines();
+
+ //Source phrase vocabids
+ std::map<uint64_t, std::string> source_vocabids;
+
+ //Read the file
+ util::FilePiece filein(phrasetable_path);
+
+ //Init the probing hash table
+ size_t size = Table::Size(uniq_entries, 1.2);
+ char * mem = new char[size];
+ memset(mem, 0, size);
+ Table table(mem, size);
+
+ BinaryFileWriter binfile(basepath); //Init the binary file writer.
+
+ line_text prev_line; //Check if the source phrase of the previous line is the same
+
+ //Keep track of the size of each group of target phrases
+ uint64_t entrystartidx = 0;
+ //uint64_t line_num = 0;
+
+
+ //Read everything and processs
+ while(true) {
+ try {
+ //Process line read
+ line_text line;
+ line = splitLine(filein.ReadLine());
+ //Add source phrases to vocabularyIDs
+ add_to_map(&source_vocabids, line.source_phrase);
+
+ if ((binfile.dist_from_start + binfile.extra_counter) == 0) {
+ prev_line = line; //For the first iteration assume the previous line is
+ } //The same as this one.
+
+ if (line.source_phrase != prev_line.source_phrase) {
+
+ //Create a new entry even
+
+ //Create an entry for the previous source phrase:
+ Entry pesho;
+ pesho.value = entrystartidx;
+ //The key is the sum of hashes of individual words bitshifted by their position in the phrase.
+ //Probably not entirerly correct, but fast and seems to work fine in practise.
+ pesho.key = 0;
+ std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase);
+ for (int i = 0; i < vocabid_source.size(); i++) {
+ pesho.key += (vocabid_source[i] << i);
}
+ pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx;
+
+ //Put into table
+ table.Insert(pesho);
+
+ entrystartidx = binfile.dist_from_start + binfile.extra_counter; //Designate start idx for new entry
+
+ //Encode a line and write it to disk.
+ std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line);
+ binfile.write(&encoded_line);
+
+ //Set prevLine
+ prev_line = line;
+
+ } else {
+ //If we still have the same line, just append to it:
+ std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line);
+ binfile.write(&encoded_line);
+ }
+
+ } catch (util::EndOfFileException e) {
+ std::cerr << "Reading phrase table finished, writing remaining files to disk." << std::endl;
+ binfile.flush();
+
+ //After the final entry is constructed we need to add it to the phrase_table
+ //Create an entry for the previous source phrase:
+ Entry pesho;
+ pesho.value = entrystartidx;
+ //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
+ pesho.key = 0;
+ std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase);
+ for (int i = 0; i < vocabid_source.size(); i++) {
+ pesho.key += (vocabid_source[i] << i);
+ }
+ pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx;
+ //Put into table
+ table.Insert(pesho);
+
+ break;
}
+ }
+
+ serialize_table(mem, size, (basepath + "/probing_hash.dat").c_str());
- serialize_table(mem, size, (basepath + "/probing_hash.dat").c_str());
+ serialize_map(&source_vocabids, (basepath + "/source_vocabids").c_str());
- serialize_map(&source_vocabids, (basepath + "/source_vocabids").c_str());
-
- delete[] mem;
+ delete[] mem;
- //Write configfile
- std::ofstream configfile;
- configfile.open((basepath + "/config").c_str());
- configfile << uniq_entries << '\n';
- configfile.close();
+ //Write configfile
+ std::ofstream configfile;
+ configfile.open((basepath + "/config").c_str());
+ configfile << API_VERSION << '\n';
+ configfile << uniq_entries << '\n';
+ configfile << num_scores << '\n';
+ configfile << is_reordering << '\n';
+ configfile.close();
}
diff --git a/moses/TranslationModel/ProbingPT/storing.hh b/moses/TranslationModel/ProbingPT/storing.hh
index dfcdbcc41..821fd14ca 100644
--- a/moses/TranslationModel/ProbingPT/storing.hh
+++ b/moses/TranslationModel/ProbingPT/storing.hh
@@ -12,8 +12,10 @@
#include "util/file_piece.hh"
#include "util/file.hh"
#include "vocabid.hh"
+#define API_VERSION 3
-void createProbingPT(const char * phrasetable_path, const char * target_path);
+void createProbingPT(const char * phrasetable_path, const char * target_path,
+ const char * num_scores, const char * is_reordering);
class BinaryFileWriter {
std::vector<unsigned char> binfile;
diff --git a/moses/TranslationModel/ProbingPT/tests/tokenization_tests.cpp b/moses/TranslationModel/ProbingPT/tests/tokenization_tests.cpp
index 2a63242de..528c9c37c 100644
--- a/moses/TranslationModel/ProbingPT/tests/tokenization_tests.cpp
+++ b/moses/TranslationModel/ProbingPT/tests/tokenization_tests.cpp
@@ -1,198 +1,206 @@
-#include "line_splitter.hh"
-
-bool test_vectorinsert() {
- StringPiece line1 = StringPiece("! ! ! ! ||| ! ! ! ! ||| 0.0804289 0.141656 0.0804289 0.443409 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 1 1");
- StringPiece line2 = StringPiece("! ! ! ) , has ||| ! ! ! ) - , a ||| 0.0804289 0.0257627 0.0804289 0.00146736 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 5-6 ||| 1 1 1");
- line_text output = splitLine(line1);
- line_text output2 = splitLine(line2);
-
- //Init container vector and iterator.
- std::vector<char> container;
- container.reserve(10000); //Reserve vector
- std::vector<char>::iterator it = container.begin();
- std::pair<std::vector<char>::iterator, int> binary_append_ret; //Return values from vector_append
-
- //Put a value into the vector
- binary_append_ret = vector_append(&output, &container, it, false);
- it = binary_append_ret.first;
- binary_append_ret = vector_append(&output2, &container, it, false);
- it = binary_append_ret.first;
-
- std::string test(container.begin(), container.end());
- std::string should_be = "! ! ! ! 0.0804289 0.141656 0.0804289 0.443409 2.718 0-0 1-1 2-2 3-3 1 1 1! ! ! ) - , a 0.0804289 0.0257627 0.0804289 0.00146736 2.718 0-0 1-1 2-2 3-3 4-4 4-5 5-6 1 1 1";
- if (test == should_be) {
- return true;
- } else {
- return false;
- }
+#include "line_splitter.hh"
+
+bool test_vectorinsert()
+{
+ StringPiece line1 = StringPiece("! ! ! ! ||| ! ! ! ! ||| 0.0804289 0.141656 0.0804289 0.443409 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 1 1");
+ StringPiece line2 = StringPiece("! ! ! ) , has ||| ! ! ! ) - , a ||| 0.0804289 0.0257627 0.0804289 0.00146736 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 5-6 ||| 1 1 1");
+ line_text output = splitLine(line1);
+ line_text output2 = splitLine(line2);
+
+ //Init container vector and iterator.
+ std::vector<char> container;
+ container.reserve(10000); //Reserve vector
+ std::vector<char>::iterator it = container.begin();
+ std::pair<std::vector<char>::iterator, int> binary_append_ret; //Return values from vector_append
+
+ //Put a value into the vector
+ binary_append_ret = vector_append(&output, &container, it, false);
+ it = binary_append_ret.first;
+ binary_append_ret = vector_append(&output2, &container, it, false);
+ it = binary_append_ret.first;
+
+ std::string test(container.begin(), container.end());
+ std::string should_be = "! ! ! ! 0.0804289 0.141656 0.0804289 0.443409 2.718 0-0 1-1 2-2 3-3 1 1 1! ! ! ) - , a 0.0804289 0.0257627 0.0804289 0.00146736 2.718 0-0 1-1 2-2 3-3 4-4 4-5 5-6 1 1 1";
+ if (test == should_be) {
+ return true;
+ } else {
+ return false;
+ }
}
-bool probabilitiesTest(){
- StringPiece line1 = StringPiece("0.536553 0.75961 0.634108 0.532927 2.718");
- StringPiece line2 = StringPiece("1.42081e-05 3.91895e-09 0.0738539 0.749514 2.718");
-
- std::vector<double> pesho;
- bool peshobool = false;
- bool kirobool = false;
- std::vector<double> kiro;
-
- pesho = splitProbabilities(line1);
- kiro = splitProbabilities(line2);
-
- if (pesho[0] == 0.536553 && pesho[1] == 0.75961 && pesho[2] == 0.634108 && pesho[3] == 0.532927 && pesho[4] == 2.718 && pesho.size() == 5) {
- peshobool = true;
- } else {
- std::cout << "Processed: " << pesho[0] << " " << pesho[1] << " " << pesho[2] << " " << pesho[3] << " " << pesho[4] << std::endl;
- std::cout << "Size is: " << pesho.size() << " Expected 5." << std::endl;
- std::cout << "Expected: " << "0.536553 0.75961 0.634108 0.532927 2.718" << std::endl;
- }
-
- if (kiro[0] == 1.42081e-05 && kiro[1] == 3.91895e-09 && kiro[2] == 0.0738539 && kiro[3] == 0.749514 && kiro[4] == 2.718 && kiro.size() == 5) {
- kirobool = true;
- } else {
- std::cout << "Processed: " << kiro[0] << " " << kiro[1] << " " << kiro[2] << " " << kiro[3] << " " << kiro[4] << std::endl;
- std::cout << "Size is: " << kiro.size() << " Expected 5." << std::endl;
- std::cout << "Expected: " << "1.42081e-05 3.91895e-09 0.0738539 0.749514 2.718" << std::endl;
- }
-
- return (peshobool && kirobool);
+bool probabilitiesTest()
+{
+ StringPiece line1 = StringPiece("0.536553 0.75961 0.634108 0.532927 2.718");
+ StringPiece line2 = StringPiece("1.42081e-05 3.91895e-09 0.0738539 0.749514 2.718");
+
+ std::vector<double> pesho;
+ bool peshobool = false;
+ bool kirobool = false;
+ std::vector<double> kiro;
+
+ pesho = splitProbabilities(line1);
+ kiro = splitProbabilities(line2);
+
+ if (pesho[0] == 0.536553 && pesho[1] == 0.75961 && pesho[2] == 0.634108 && pesho[3] == 0.532927 && pesho[4] == 2.718 && pesho.size() == 5) {
+ peshobool = true;
+ } else {
+ std::cout << "Processed: " << pesho[0] << " " << pesho[1] << " " << pesho[2] << " " << pesho[3] << " " << pesho[4] << std::endl;
+ std::cout << "Size is: " << pesho.size() << " Expected 5." << std::endl;
+ std::cout << "Expected: " << "0.536553 0.75961 0.634108 0.532927 2.718" << std::endl;
+ }
+
+ if (kiro[0] == 1.42081e-05 && kiro[1] == 3.91895e-09 && kiro[2] == 0.0738539 && kiro[3] == 0.749514 && kiro[4] == 2.718 && kiro.size() == 5) {
+ kirobool = true;
+ } else {
+ std::cout << "Processed: " << kiro[0] << " " << kiro[1] << " " << kiro[2] << " " << kiro[3] << " " << kiro[4] << std::endl;
+ std::cout << "Size is: " << kiro.size() << " Expected 5." << std::endl;
+ std::cout << "Expected: " << "1.42081e-05 3.91895e-09 0.0738539 0.749514 2.718" << std::endl;
+ }
+
+ return (peshobool && kirobool);
}
-bool wordAll1test(){
- StringPiece line1 = StringPiece("2-0 3-1 4-2 5-2");
- StringPiece line2 = StringPiece("0-0 1-1 2-2 3-3 4-3 6-4 5-5");
-
- std::vector<int> pesho;
- bool peshobool = false;
- bool kirobool = false;
- std::vector<int> kiro;
-
- pesho = splitWordAll1(line1);
- kiro = splitWordAll1(line2);
-
- if (pesho[0] == 2 && pesho[1] == 0 && pesho[2] == 3 && pesho[3] == 1 && pesho[4] == 4
- && pesho[5] == 2 && pesho[6] == 5 && pesho[7] == 2 && pesho.size() == 8) {
- peshobool = true;
- } else {
- std::cout << "Processed: " << pesho[0] << "-" << pesho[1] << " " << pesho[2] << "-" << pesho[3] << " "
- << pesho[4] << "-" << pesho[5] << " " << pesho[6] << "-" << pesho[7] << std::endl;
- std::cout << "Size is: " << pesho.size() << " Expected: 8." << std::endl;
- std::cout << "Expected: " << "2-0 3-1 4-2 5-2" << std::endl;
- }
-
- if (kiro[0] == 0 && kiro[1] == 0 && kiro[2] == 1 && kiro[3] == 1 && kiro[4] == 2 && kiro[5] == 2
- && kiro[6] == 3 && kiro[7] == 3 && kiro[8] == 4 && kiro[9] == 3 && kiro[10] == 6 && kiro[11] == 4
- && kiro[12] == 5 && kiro[13] == 5 && kiro.size() == 14){
- kirobool = true;
- } else {
- std::cout << "Processed: " << kiro[0] << "-" << kiro[1] << " " << kiro[2] << "-" << kiro[3] << " "
- << kiro[4] << "-" << kiro[5] << " " << kiro[6] << "-" << kiro[7] << " " << kiro[8] << "-" << kiro[9]
- << " " << kiro[10] << "-" << kiro[11] << " " << kiro[12] << "-" << kiro[13] << std::endl;
- std::cout << "Size is: " << kiro.size() << " Expected: 14" << std::endl;
- std::cout << "Expected: " << "0-0 1-1 2-2 3-3 4-3 6-4 5-5" << std::endl;
- }
-
- return (peshobool && kirobool);
+bool wordAll1test()
+{
+ StringPiece line1 = StringPiece("2-0 3-1 4-2 5-2");
+ StringPiece line2 = StringPiece("0-0 1-1 2-2 3-3 4-3 6-4 5-5");
+
+ std::vector<int> pesho;
+ bool peshobool = false;
+ bool kirobool = false;
+ std::vector<int> kiro;
+
+ pesho = splitWordAll1(line1);
+ kiro = splitWordAll1(line2);
+
+ if (pesho[0] == 2 && pesho[1] == 0 && pesho[2] == 3 && pesho[3] == 1 && pesho[4] == 4
+ && pesho[5] == 2 && pesho[6] == 5 && pesho[7] == 2 && pesho.size() == 8) {
+ peshobool = true;
+ } else {
+ std::cout << "Processed: " << pesho[0] << "-" << pesho[1] << " " << pesho[2] << "-" << pesho[3] << " "
+ << pesho[4] << "-" << pesho[5] << " " << pesho[6] << "-" << pesho[7] << std::endl;
+ std::cout << "Size is: " << pesho.size() << " Expected: 8." << std::endl;
+ std::cout << "Expected: " << "2-0 3-1 4-2 5-2" << std::endl;
+ }
+
+ if (kiro[0] == 0 && kiro[1] == 0 && kiro[2] == 1 && kiro[3] == 1 && kiro[4] == 2 && kiro[5] == 2
+ && kiro[6] == 3 && kiro[7] == 3 && kiro[8] == 4 && kiro[9] == 3 && kiro[10] == 6 && kiro[11] == 4
+ && kiro[12] == 5 && kiro[13] == 5 && kiro.size() == 14) {
+ kirobool = true;
+ } else {
+ std::cout << "Processed: " << kiro[0] << "-" << kiro[1] << " " << kiro[2] << "-" << kiro[3] << " "
+ << kiro[4] << "-" << kiro[5] << " " << kiro[6] << "-" << kiro[7] << " " << kiro[8] << "-" << kiro[9]
+ << " " << kiro[10] << "-" << kiro[11] << " " << kiro[12] << "-" << kiro[13] << std::endl;
+ std::cout << "Size is: " << kiro.size() << " Expected: 14" << std::endl;
+ std::cout << "Expected: " << "0-0 1-1 2-2 3-3 4-3 6-4 5-5" << std::endl;
+ }
+
+ return (peshobool && kirobool);
}
-bool wordAll2test(){
- StringPiece line1 = StringPiece("4 9 1");
- StringPiece line2 = StringPiece("3255 9 1");
-
- std::vector<int> pesho;
- bool peshobool = false;
- bool kirobool = false;
- std::vector<int> kiro;
-
- pesho = splitWordAll2(line1);
- kiro = splitWordAll2(line2);
-
- if (pesho[0] == 4 && pesho[1] == 9 && pesho[2] == 1 && pesho.size() == 3){
- peshobool = true;
- } else {
- std::cout << "Processed: " << pesho[0] << " " << pesho[1] << " " << pesho[2] << std::endl;
- std::cout << "Size: " << pesho.size() << " Expected: 3" << std::endl;
- std::cout << "Expected: " << "4 9 1" << std::endl;
- }
-
- if (kiro[0] == 3255 && kiro[1] == 9 && kiro[2] == 1 && kiro.size() == 3){
- kirobool = true;
- } else {
- std::cout << "Processed: " << kiro[0] << " " << kiro[1] << " " << kiro[2] << std::endl;
- std::cout << "Size: " << kiro.size() << " Expected: 3" << std::endl;
- std::cout << "Expected: " << "3255 9 1" << std::endl;
- }
-
- return (peshobool && kirobool);
+bool wordAll2test()
+{
+ StringPiece line1 = StringPiece("4 9 1");
+ StringPiece line2 = StringPiece("3255 9 1");
+
+ std::vector<int> pesho;
+ bool peshobool = false;
+ bool kirobool = false;
+ std::vector<int> kiro;
+
+ pesho = splitWordAll2(line1);
+ kiro = splitWordAll2(line2);
+
+ if (pesho[0] == 4 && pesho[1] == 9 && pesho[2] == 1 && pesho.size() == 3) {
+ peshobool = true;
+ } else {
+ std::cout << "Processed: " << pesho[0] << " " << pesho[1] << " " << pesho[2] << std::endl;
+ std::cout << "Size: " << pesho.size() << " Expected: 3" << std::endl;
+ std::cout << "Expected: " << "4 9 1" << std::endl;
+ }
+
+ if (kiro[0] == 3255 && kiro[1] == 9 && kiro[2] == 1 && kiro.size() == 3) {
+ kirobool = true;
+ } else {
+ std::cout << "Processed: " << kiro[0] << " " << kiro[1] << " " << kiro[2] << std::endl;
+ std::cout << "Size: " << kiro.size() << " Expected: 3" << std::endl;
+ std::cout << "Expected: " << "3255 9 1" << std::endl;
+ }
+
+ return (peshobool && kirobool);
}
-bool test_tokenization(){
- StringPiece line1 = StringPiece("! ! ! ! ||| ! ! ! ! ||| 0.0804289 0.141656 0.0804289 0.443409 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 1 1");
- StringPiece line2 = StringPiece("! ! ! ) , has ||| ! ! ! ) - , a ||| 0.0804289 0.0257627 0.0804289 0.00146736 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 5-6 ||| 1 1 1");
- StringPiece line3 = StringPiece("! ! ! ) , ||| ! ! ! ) - , ||| 0.0804289 0.075225 0.0804289 0.00310345 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 ||| 1 1 1");
- StringPiece line4 = StringPiece("! ! ! ) ||| ! ! ! ) . ||| 0.0804289 0.177547 0.0268096 0.000872597 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 3 1");
+bool test_tokenization()
+{
+ StringPiece line1 = StringPiece("! ! ! ! ||| ! ! ! ! ||| 0.0804289 0.141656 0.0804289 0.443409 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 1 1");
+ StringPiece line2 = StringPiece("! ! ! ) , has ||| ! ! ! ) - , a ||| 0.0804289 0.0257627 0.0804289 0.00146736 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 5-6 ||| 1 1 1");
+ StringPiece line3 = StringPiece("! ! ! ) , ||| ! ! ! ) - , ||| 0.0804289 0.075225 0.0804289 0.00310345 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 ||| 1 1 1");
+ StringPiece line4 = StringPiece("! ! ! ) ||| ! ! ! ) . ||| 0.0804289 0.177547 0.0268096 0.000872597 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 3 1");
- line_text output1 = splitLine(line1);
- line_text output2 = splitLine(line2);
- line_text output3 = splitLine(line3);
- line_text output4 = splitLine(line4);
+ line_text output1 = splitLine(line1);
+ line_text output2 = splitLine(line2);
+ line_text output3 = splitLine(line3);
+ line_text output4 = splitLine(line4);
- bool test1 = output1.prob == StringPiece("0.0804289 0.141656 0.0804289 0.443409 2.718");
- bool test2 = output2.word_all1 == StringPiece("0-0 1-1 2-2 3-3 4-4 4-5 5-6");
- bool test3 = output2.target_phrase == StringPiece("! ! ! ) - , a");
- bool test4 = output3.source_phrase == StringPiece("! ! ! ) ,");
- bool test5 = output4.word_all2 == StringPiece("1 3 1");
+ bool test1 = output1.prob == StringPiece("0.0804289 0.141656 0.0804289 0.443409 2.718");
+ bool test2 = output2.word_all1 == StringPiece("0-0 1-1 2-2 3-3 4-4 4-5 5-6");
+ bool test3 = output2.target_phrase == StringPiece("! ! ! ) - , a");
+ bool test4 = output3.source_phrase == StringPiece("! ! ! ) ,");
+ bool test5 = output4.word_all2 == StringPiece("1 3 1");
- //std::cout << test1 << " " << test2 << " " << test3 << " " << test4 << std::endl;
+ //std::cout << test1 << " " << test2 << " " << test3 << " " << test4 << std::endl;
- return (test1 && test2 && test3 && test4 && test5);
+ return (test1 && test2 && test3 && test4 && test5);
}
-bool test_linesplitter(){
- StringPiece line1 = StringPiece("! &#93; 0.0738539 0.901133 0.0738539 0.65207 2.718 0-0 1-1 1 1 1");
- target_text ans1;
- ans1 = splitSingleTargetLine(line1);
+bool test_linesplitter()
+{
+ StringPiece line1 = StringPiece("! &#93; 0.0738539 0.901133 0.0738539 0.65207 2.718 0-0 1-1 1 1 1");
+ target_text ans1;
+ ans1 = splitSingleTargetLine(line1);
- /* For testing purposes
- std::cout << ans1.target_phrase[0] << " " <<ans1.target_phrase[1] << " Size: " << ans1.target_phrase.size() << std::endl;
- std::cout << ans1.word_all1[3] << " " << ans1.word_all2[2] << " " << ans1.prob[3] << std::endl; */
+ /* For testing purposes
+ std::cout << ans1.target_phrase[0] << " " <<ans1.target_phrase[1] << " Size: " << ans1.target_phrase.size() << std::endl;
+ std::cout << ans1.word_all1[3] << " " << ans1.word_all2[2] << " " << ans1.prob[3] << std::endl; */
- return (ans1.target_phrase.size() == 2 && ans1.prob.size() == 5 && ans1.word_all1.size() == 4 && ans1.word_all2.size() == 3);
+ return (ans1.target_phrase.size() == 2 && ans1.prob.size() == 5 && ans1.word_all1.size() == 4 && ans1.word_all2.size() == 3);
}
-bool test_linessplitter(){
- StringPiece line1 = StringPiece("! &#93; 0.0738539 0.901133 0.0738539 0.65207 2.718 0-0 1-1 1 1 1\n\n! ) . proto došlo 0.0738539 7.14446e-06");
- StringPiece line2 = StringPiece("! &quot; ) 0.536553 0.75961 0.634108 0.532927 2.718 0-0 1-1 2-2 13 11 8\n! ) . 0.0369269 0.00049839 0.00671399 0.00372884 2.718 0-0 1-1 2-1 2-2 2 11 1\n&quot; ! ) 0.0738539 0.75961 0.00671399 0.532927 2.718 1-0 0-1 2-2 1 11 1\nse ! &quot; ) 0.0738539 0.75961 0.00671399 0.0225211 2.718 0-1 1-2 2-3 1 11 1\n\n! &quot; , a to 0.0738539 0.0894238 0.0738539 0.048");
+bool test_linessplitter()
+{
+ StringPiece line1 = StringPiece("! &#93; 0.0738539 0.901133 0.0738539 0.65207 2.718 0-0 1-1 1 1 1\n\n! ) . proto došlo 0.0738539 7.14446e-06");
+ StringPiece line2 = StringPiece("! &quot; ) 0.536553 0.75961 0.634108 0.532927 2.718 0-0 1-1 2-2 13 11 8\n! ) . 0.0369269 0.00049839 0.00671399 0.00372884 2.718 0-0 1-1 2-1 2-2 2 11 1\n&quot; ! ) 0.0738539 0.75961 0.00671399 0.532927 2.718 1-0 0-1 2-2 1 11 1\nse ! &quot; ) 0.0738539 0.75961 0.00671399 0.0225211 2.718 0-1 1-2 2-3 1 11 1\n\n! &quot; , a to 0.0738539 0.0894238 0.0738539 0.048");
- std::vector<target_text> ans1;
- std::vector<target_text> ans2;
+ std::vector<target_text> ans1;
+ std::vector<target_text> ans2;
- ans1 = splitTargetLine(line1);
- ans2 = splitTargetLine(line2);
+ ans1 = splitTargetLine(line1);
+ ans2 = splitTargetLine(line2);
- bool sizes = ans1.size() == 1 && ans2.size() == 4;
- bool prob = ans1[0].prob[3] == 0.65207 && ans2[1].prob[1] == 0.00049839;
- bool word_alls = ans2[0].word_all2[1] == 11 && ans2[3].word_all1[5] == 3;
+ bool sizes = ans1.size() == 1 && ans2.size() == 4;
+ bool prob = ans1[0].prob[3] == 0.65207 && ans2[1].prob[1] == 0.00049839;
+ bool word_alls = ans2[0].word_all2[1] == 11 && ans2[3].word_all1[5] == 3;
- /* FOr testing
- std::cout << ans1.size() << std::endl;
- std::cout << ans2.size() << std::endl;
- std::cout << ans1[0].prob[3] << std::endl;
- std::cout << ans2[1].prob[1] << std::endl;
- std::cout << ans2[0].word_all2[1] << std::endl;
- std::cout << ans2[3].word_all1[5] << std::endl; */
+ /* FOr testing
+ std::cout << ans1.size() << std::endl;
+ std::cout << ans2.size() << std::endl;
+ std::cout << ans1[0].prob[3] << std::endl;
+ std::cout << ans2[1].prob[1] << std::endl;
+ std::cout << ans2[0].word_all2[1] << std::endl;
+ std::cout << ans2[3].word_all1[5] << std::endl; */
- return sizes && prob && word_alls;
+ return sizes && prob && word_alls;
}
-int main(){
- if (probabilitiesTest() && wordAll1test() && wordAll2test() && test_tokenization() && test_linesplitter() && test_linessplitter() && test_vectorinsert()){
- std::cout << "All tests pass!" << std::endl;
- } else {
- std::cout << "Failiure in some tests!" << std::endl;
- }
+int main()
+{
+ if (probabilitiesTest() && wordAll1test() && wordAll2test() && test_tokenization() && test_linesplitter() && test_linessplitter() && test_vectorinsert()) {
+ std::cout << "All tests pass!" << std::endl;
+ } else {
+ std::cout << "Failiure in some tests!" << std::endl;
+ }
- return 1;
+ return 1;
} \ No newline at end of file
diff --git a/moses/TranslationModel/ProbingPT/tests/vocabid_test.cpp b/moses/TranslationModel/ProbingPT/tests/vocabid_test.cpp
index bc82db74e..fac439eeb 100644
--- a/moses/TranslationModel/ProbingPT/tests/vocabid_test.cpp
+++ b/moses/TranslationModel/ProbingPT/tests/vocabid_test.cpp
@@ -3,43 +3,44 @@
#include "hash.hh"
#include "vocabid.hh"
-int main(int argc, char* argv[]){
-
- //Create a map and serialize it
- std::map<uint64_t, std::string> vocabids;
- StringPiece demotext = StringPiece("Demo text with 3 elements");
- add_to_map(&vocabids, demotext);
- //Serialize map
- serialize_map(&vocabids, "/tmp/testmap.bin");
-
- //Read the map and test if the values are the same
- std::map<uint64_t, std::string> newmap;
- read_map(&newmap, "/tmp/testmap.bin");
-
- //Used hashes
- uint64_t num1 = getHash(StringPiece("Demo"));
- uint64_t num2 = getVocabID("text");
- uint64_t num3 = getHash(StringPiece("with"));
- uint64_t num4 = getVocabID("3");
- uint64_t num5 = getHash(StringPiece("elements"));
- uint64_t num6 = 0;
-
- //Tests
- bool test1 = getStringFromID(&newmap, num1) == getStringFromID(&vocabids, num1);
- bool test2 = getStringFromID(&newmap, num2) == getStringFromID(&vocabids, num2);
- bool test3 = getStringFromID(&newmap, num3) == getStringFromID(&vocabids, num3);
- bool test4 = getStringFromID(&newmap, num4) == getStringFromID(&vocabids, num4);
- bool test5 = getStringFromID(&newmap, num5) == getStringFromID(&vocabids, num5);
- bool test6 = getStringFromID(&newmap, num6) == getStringFromID(&vocabids, num6);
-
-
- if (test1 && test2 && test3 && test4 && test5 && test6){
- std::cout << "Map was successfully written and read!" << std::endl;
- } else {
- std::cout << "Error! " << test1 << " " << test2 << " " << test3 << " " << test4 << " " << test5 << " " << test6 << std::endl;
- }
-
-
- return 1;
-
-}
+int main(int argc, char* argv[])
+{
+
+ //Create a map and serialize it
+ std::map<uint64_t, std::string> vocabids;
+ StringPiece demotext = StringPiece("Demo text with 3 elements");
+ add_to_map(&vocabids, demotext);
+ //Serialize map
+ serialize_map(&vocabids, "/tmp/testmap.bin");
+
+ //Read the map and test if the values are the same
+ std::map<uint64_t, std::string> newmap;
+ read_map(&newmap, "/tmp/testmap.bin");
+
+ //Used hashes
+ uint64_t num1 = getHash(StringPiece("Demo"));
+ uint64_t num2 = getVocabID("text");
+ uint64_t num3 = getHash(StringPiece("with"));
+ uint64_t num4 = getVocabID("3");
+ uint64_t num5 = getHash(StringPiece("elements"));
+ uint64_t num6 = 0;
+
+ //Tests
+ bool test1 = getStringFromID(&newmap, num1) == getStringFromID(&vocabids, num1);
+ bool test2 = getStringFromID(&newmap, num2) == getStringFromID(&vocabids, num2);
+ bool test3 = getStringFromID(&newmap, num3) == getStringFromID(&vocabids, num3);
+ bool test4 = getStringFromID(&newmap, num4) == getStringFromID(&vocabids, num4);
+ bool test5 = getStringFromID(&newmap, num5) == getStringFromID(&vocabids, num5);
+ bool test6 = getStringFromID(&newmap, num6) == getStringFromID(&vocabids, num6);
+
+
+ if (test1 && test2 && test3 && test4 && test5 && test6) {
+ std::cout << "Map was successfully written and read!" << std::endl;
+ } else {
+ std::cout << "Error! " << test1 << " " << test2 << " " << test3 << " " << test4 << " " << test5 << " " << test6 << std::endl;
+ }
+
+
+ return 1;
+
+}
diff --git a/moses/TranslationModel/ProbingPT/vocabid.cpp b/moses/TranslationModel/ProbingPT/vocabid.cpp
index bcdbe78d0..1452f299d 100644
--- a/moses/TranslationModel/ProbingPT/vocabid.cpp
+++ b/moses/TranslationModel/ProbingPT/vocabid.cpp
@@ -1,29 +1,32 @@
-#include "vocabid.hh"
+#include "vocabid.hh"
-void add_to_map(std::map<uint64_t, std::string> *karta, StringPiece textin){
- //Tokenize
- util::TokenIter<util::SingleCharacter> it(textin, util::SingleCharacter(' '));
+void add_to_map(std::map<uint64_t, std::string> *karta, StringPiece textin)
+{
+ //Tokenize
+ util::TokenIter<util::SingleCharacter> it(textin, util::SingleCharacter(' '));
- while(it){
- karta->insert(std::pair<uint64_t, std::string>(getHash(*it), it->as_string()));
- it++;
- }
+ while(it) {
+ karta->insert(std::pair<uint64_t, std::string>(getHash(*it), it->as_string()));
+ it++;
+ }
}
-void serialize_map(std::map<uint64_t, std::string> *karta, const char* filename){
- std::ofstream os (filename, std::ios::binary);
- boost::archive::text_oarchive oarch(os);
+void serialize_map(std::map<uint64_t, std::string> *karta, const char* filename)
+{
+ std::ofstream os (filename, std::ios::binary);
+ boost::archive::text_oarchive oarch(os);
- oarch << *karta; //Serialise map
- os.close();
+ oarch << *karta; //Serialise map
+ os.close();
}
-void read_map(std::map<uint64_t, std::string> *karta, const char* filename){
- std::ifstream is (filename, std::ios::binary);
- boost::archive::text_iarchive iarch(is);
+void read_map(std::map<uint64_t, std::string> *karta, const char* filename)
+{
+ std::ifstream is (filename, std::ios::binary);
+ boost::archive::text_iarchive iarch(is);
- iarch >> *karta;
+ iarch >> *karta;
- //Close the stream after we are done.
- is.close();
+ //Close the stream after we are done.
+ is.close();
}
diff --git a/moses/TranslationModel/RuleTable/LoaderCompact.cpp b/moses/TranslationModel/RuleTable/LoaderCompact.cpp
index 299cfe7ea..c947dfdc2 100644
--- a/moses/TranslationModel/RuleTable/LoaderCompact.cpp
+++ b/moses/TranslationModel/RuleTable/LoaderCompact.cpp
@@ -21,7 +21,6 @@
#include "moses/AlignmentInfoCollection.h"
#include "moses/InputFileStream.h"
-#include "moses/UserMessage.h"
#include "moses/Util.h"
#include "moses/Word.h"
#include "Trie.h"
@@ -46,9 +45,7 @@ bool RuleTableLoaderCompact::Load(const std::vector<FactorType> &input,
// Read and check version number.
reader.ReadLine();
if (reader.m_line != "1") {
- std::stringstream msg;
- msg << "Unexpected compact rule table format: " << reader.m_line;
- UserMessage::Add(msg.str());
+ std::cerr << "Unexpected compact rule table format: " << reader.m_line;
return false;
}
@@ -211,11 +208,9 @@ bool RuleTableLoaderCompact::LoadRuleSection(
scoreVector[j] = FloorScore(TransformScore(score));
}
if (reader.m_line[tokenPositions[3+numScoreComponents]] != ':') {
- std::stringstream msg;
- msg << "Size of scoreVector != number ("
- << scoreVector.size() << "!=" << numScoreComponents
- << ") of score components on line " << reader.m_lineNum;
- UserMessage::Add(msg.str());
+ std::cerr << "Size of scoreVector != number ("
+ << scoreVector.size() << "!=" << numScoreComponents
+ << ") of score components on line " << reader.m_lineNum;
return false;
}
diff --git a/moses/TranslationModel/RuleTable/LoaderFactory.cpp b/moses/TranslationModel/RuleTable/LoaderFactory.cpp
index cdbfc965a..66a39e3bd 100644
--- a/moses/TranslationModel/RuleTable/LoaderFactory.cpp
+++ b/moses/TranslationModel/RuleTable/LoaderFactory.cpp
@@ -19,7 +19,6 @@
#include "LoaderFactory.h"
-#include "moses/UserMessage.h"
#include "moses/Util.h"
#include "moses/InputFileStream.h"
#include "LoaderCompact.h"
@@ -50,9 +49,7 @@ std::auto_ptr<RuleTableLoader> RuleTableLoaderFactory::Create(
if (tokens[0] == "1") {
return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderCompact());
}
- std::stringstream msg;
- msg << "Unsupported compact rule table format: " << tokens[0];
- UserMessage::Add(msg.str());
+ std::cerr << "Unsupported compact rule table format: " << tokens[0];
return std::auto_ptr<RuleTableLoader>();
} else if (tokens[0] == "[X]" && tokens[1] == "|||") {
return std::auto_ptr<RuleTableLoader>(new
diff --git a/moses/TranslationModel/RuleTable/LoaderStandard.cpp b/moses/TranslationModel/RuleTable/LoaderStandard.cpp
index fe87594fa..95463feea 100644
--- a/moses/TranslationModel/RuleTable/LoaderStandard.cpp
+++ b/moses/TranslationModel/RuleTable/LoaderStandard.cpp
@@ -26,6 +26,7 @@
#include <iostream>
#include <sys/stat.h>
#include <stdlib.h>
+#include <boost/algorithm/string/predicate.hpp>
#include "Trie.h"
#include "moses/FactorCollection.h"
#include "moses/Word.h"
@@ -33,7 +34,6 @@
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/WordsRange.h"
-#include "moses/UserMessage.h"
#include "moses/ChartTranslationOptionList.h"
#include "moses/FactorCollection.h"
#include "util/file_piece.hh"
@@ -43,6 +43,7 @@
#include "util/exception.hh"
using namespace std;
+using namespace boost::algorithm;
namespace Moses
{
@@ -68,12 +69,11 @@ void ReformatHieroRule(int sourceTarget, string &phrase, map<size_t, pair<size_t
for (size_t i = 0; i < toks.size(); ++i) {
string &tok = toks[i];
- size_t tokLen = tok.size();
- if (tok.substr(0, 1) == "[" && tok.substr(tokLen - 1, 1) == "]") {
+ if (starts_with(tok, "[") && ends_with(tok, "]")) {
// no-term
vector<string> split = Tokenize(tok, ",");
UTIL_THROW_IF2(split.size() != 2,
- "Incorrectly formmatted non-terminal: " << tok);
+ "Incorrectly formmatted non-terminal: " << tok);
tok = "[X]" + split[0] + "]";
size_t coIndex = Scan<size_t>(split[1]);
@@ -100,7 +100,7 @@ void ReformateHieroScore(string &scoreString)
string &tok = toks[i];
vector<string> nameValue = Tokenize(tok, "=");
UTIL_THROW_IF2(nameValue.size() != 2,
- "Incorrectly formatted score: " << tok);
+ "Incorrectly formatted score: " << tok);
float score = Scan<float>(nameValue[1]);
score = exp(-score);
@@ -149,7 +149,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
, size_t /* tableLimit */
, RuleTableTrie &ruleTable)
{
- PrintUserTime(string("Start loading text phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format");
+ PrintUserTime(string("Start loading text phrase table. ") + (format==MosesFormat?"Moses":"Hiero") + " format");
const StaticData &staticData = StaticData::Instance();
@@ -191,10 +191,6 @@ bool RuleTableLoaderStandard::Load(FormatType format
alignString = temp;
}
- if (++pipes) {
- StringPiece str(*pipes); //counts
- }
-
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
@@ -211,7 +207,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
const size_t numScoreComponents = ruleTable.GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
- << numScoreComponents << ") of score components on line " << count);
+ << numScoreComponents << ") of score components on line " << count);
}
// parse source & find pt node
@@ -231,7 +227,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
targetPhrase->SetAlignmentInfo(alignString);
targetPhrase->SetTargetLHS(targetLHS);
- //targetPhrase->SetDebugOutput(string("New Format pt ") + line);
+ ++pipes; // skip over counts field
if (++pipes) {
StringPiece sparseString(*pipes);
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
index 821b81c51..63ec38599 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
@@ -12,7 +12,6 @@
#include "moses/InputFileStream.h"
#include "moses/TypeDef.h"
#include "moses/StaticData.h"
-#include "moses/UserMessage.h"
#include "Loader.h"
#include "LoaderFactory.h"
#include "util/exception.hh"
@@ -50,7 +49,7 @@ void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
*this);
UTIL_THROW_IF2(!ret,
- "Rules not successfully loaded for sentence id " << translationId);
+ "Rules not successfully loaded for sentence id " << translationId);
}
void PhraseDictionaryALSuffixArray::CleanUpAfterSentenceProcessing(const InputType &source)
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
index ba1971e3a..0928cf1e4 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
@@ -40,7 +40,6 @@
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/WordsRange.h"
-#include "moses/UserMessage.h"
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h"
#include "moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.h"
#include "moses/TranslationModel/fuzzy-match/SentenceAlignment.h"
@@ -52,7 +51,8 @@ using namespace std;
#if defined __MINGW32__ && !defined mkdtemp
#include <windows.h>
#include <errno.h>
-char *mkdtemp(char *tempbuf) {
+char *mkdtemp(char *tempbuf)
+{
int rand_value = 0;
char* tempbase = NULL;
char tempbasebuf[MAX_PATH] = "";
@@ -111,20 +111,20 @@ PhraseDictionaryFuzzyMatch::
SetParameter(const std::string& key, const std::string& value)
{
if (key == "source") {
- m_config[0] = value;
+ m_config[0] = value;
} else if (key == "target") {
- m_config[1] = value;
+ m_config[1] = value;
} else if (key == "alignment") {
- m_config[2] = value;
+ m_config[2] = value;
} else {
- PhraseDictionary::SetParameter(key, value);
+ PhraseDictionary::SetParameter(key, value);
}
}
int removedirectoryrecursively(const char *dirname)
{
#if defined __MINGW32__
- //TODO(jie): replace this function with boost implementation
+ //TODO(jie): replace this function with boost implementation
#else
DIR *dir;
struct dirent *entry;
@@ -184,7 +184,7 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSenten
#endif // defined
char *temp = mkdtemp(dirName);
UTIL_THROW_IF2(temp == NULL,
- "Couldn't create temporary directory " << dirName);
+ "Couldn't create temporary directory " << dirName);
string dirNameStr(dirName);
@@ -251,11 +251,11 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSenten
const size_t numScoreComponents = GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
- << numScoreComponents << ") of score components on line " << count);
+ << numScoreComponents << ") of score components on line " << count);
}
UTIL_THROW_IF2(scoreVector.size() != numScoreComponents,
- "Number of scores incorrectly specified");
+ "Number of scores incorrectly specified");
// parse source & find pt node
@@ -331,9 +331,9 @@ PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDi
const Word &sourceNonTerm = word;
UTIL_THROW_IF2(iterAlign == alignmentInfo.end(),
- "No alignment for non-term at position " << pos);
+ "No alignment for non-term at position " << pos);
UTIL_THROW_IF2(iterAlign->first != pos,
- "Alignment info incorrect at position " << pos);
+ "Alignment info incorrect at position " << pos);
size_t targetNonTermInd = iterAlign->second;
++iterAlign;
@@ -349,7 +349,7 @@ PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDi
}
UTIL_THROW_IF2(currNode == NULL,
- "Node not found at position " << pos);
+ "Node not found at position " << pos);
}
@@ -375,7 +375,7 @@ const PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(long t
{
std::map<long, PhraseDictionaryNodeMemory>::const_iterator iter = m_collection.find(translationId);
UTIL_THROW_IF2(iter == m_collection.end(),
- "Couldn't find root node for input: " << translationId);
+ "Couldn't find root node for input: " << translationId);
return iter->second;
}
PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(const InputType &source)
@@ -383,7 +383,7 @@ PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(const InputT
long transId = source.GetTranslationId();
std::map<long, PhraseDictionaryNodeMemory>::iterator iter = m_collection.find(transId);
UTIL_THROW_IF2(iter == m_collection.end(),
- "Couldn't find root node for input: " << transId);
+ "Couldn't find root node for input: " << transId);
return iter->second;
}
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
index 4d3f9fbd3..c4433bc5a 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
@@ -80,28 +80,26 @@ const OnDiskPt::OnDiskWrapper &PhraseDictionaryOnDisk::GetImplementation() const
void PhraseDictionaryOnDisk::InitializeForInput(InputType const& source)
{
- const StaticData &staticData = StaticData::Instance();
-
ReduceCache();
OnDiskPt::OnDiskWrapper *obj = new OnDiskPt::OnDiskWrapper();
obj->BeginLoad(m_filePath);
UTIL_THROW_IF2(obj->GetMisc("Version") != OnDiskPt::OnDiskWrapper::VERSION_NUM,
- "On-disk phrase table is version " << obj->GetMisc("Version")
- << ". It is not compatible with version " << OnDiskPt::OnDiskWrapper::VERSION_NUM);
+ "On-disk phrase table is version " << obj->GetMisc("Version")
+ << ". It is not compatible with version " << OnDiskPt::OnDiskWrapper::VERSION_NUM);
UTIL_THROW_IF2(obj->GetMisc("NumSourceFactors") != m_input.size(),
- "On-disk phrase table has " << obj->GetMisc("NumSourceFactors") << " source factors."
- << ". The ini file specified " << m_input.size() << " source factors");
+ "On-disk phrase table has " << obj->GetMisc("NumSourceFactors") << " source factors."
+ << ". The ini file specified " << m_input.size() << " source factors");
UTIL_THROW_IF2(obj->GetMisc("NumTargetFactors") != m_output.size(),
- "On-disk phrase table has " << obj->GetMisc("NumTargetFactors") << " target factors."
- << ". The ini file specified " << m_output.size() << " target factors");
+ "On-disk phrase table has " << obj->GetMisc("NumTargetFactors") << " target factors."
+ << ". The ini file specified " << m_output.size() << " target factors");
UTIL_THROW_IF2(obj->GetMisc("NumScores") != m_numScoreComponents,
- "On-disk phrase table has " << obj->GetMisc("NumScores") << " scores."
- << ". The ini file specified " << m_numScoreComponents << " scores");
+ "On-disk phrase table has " << obj->GetMisc("NumScores") << " scores."
+ << ". The ini file specified " << m_numScoreComponents << " scores");
m_implementation.reset(obj);
}
@@ -141,7 +139,7 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath
// backoff
if (!SatisfyBackoff(inputPath)) {
- return;
+ return;
}
if (prevPtNode) {
@@ -214,11 +212,9 @@ void PhraseDictionaryOnDisk::SetParameter(const std::string& key, const std::str
{
if (key == "max-span-default") {
m_maxSpanDefault = Scan<size_t>(value);
- }
- else if (key == "max-span-labelled") {
+ } else if (key == "max-span-labelled") {
m_maxSpanLabelled = Scan<size_t>(value);
- }
- else {
+ } else {
PhraseDictionary::SetParameter(key, value);
}
}
diff --git a/moses/TranslationModel/Scope3Parser/Parser.cpp b/moses/TranslationModel/Scope3Parser/Parser.cpp
index 1491260a1..c8c8c3e49 100644
--- a/moses/TranslationModel/Scope3Parser/Parser.cpp
+++ b/moses/TranslationModel/Scope3Parser/Parser.cpp
@@ -39,10 +39,11 @@ namespace Moses
{
void Scope3Parser::GetChartRuleCollection(
- const WordsRange &range,
+ const InputPath &inputPath,
size_t last,
ChartParserCallback &outColl)
{
+ const WordsRange &range = inputPath.GetWordsRange();
const size_t start = range.GetStartPos();
const size_t end = range.GetEndPos();
diff --git a/moses/TranslationModel/Scope3Parser/Parser.h b/moses/TranslationModel/Scope3Parser/Parser.h
index c52f7b50e..70b26b50d 100644
--- a/moses/TranslationModel/Scope3Parser/Parser.h
+++ b/moses/TranslationModel/Scope3Parser/Parser.h
@@ -58,7 +58,7 @@ public:
}
void GetChartRuleCollection(
- const WordsRange &range,
+ const InputPath &inputPath,
size_t last,
ChartParserCallback &outColl);
diff --git a/moses/TranslationModel/SkeletonPT.cpp b/moses/TranslationModel/SkeletonPT.cpp
index 4fc3da96b..c1df952c1 100644
--- a/moses/TranslationModel/SkeletonPT.cpp
+++ b/moses/TranslationModel/SkeletonPT.cpp
@@ -14,7 +14,7 @@ SkeletonPT::SkeletonPT(const std::string &line)
void SkeletonPT::Load()
{
- SetFeaturesToApply();
+ SetFeaturesToApply();
}
void SkeletonPT::InitializeForInput(InputType const& source)
@@ -37,8 +37,8 @@ void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQu
// add target phrase to phrase-table cache
size_t hash = hash_value(sourcePhrase);
- std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
- cache[hash] = value;
+ std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
+ cache[hash] = value;
inputPath.SetTargetPhrases(*this, tpColl, NULL);
}
diff --git a/moses/TranslationModel/UG/Jamfile b/moses/TranslationModel/UG/Jamfile
index 032b729bd..3ac7910b2 100644
--- a/moses/TranslationModel/UG/Jamfile
+++ b/moses/TranslationModel/UG/Jamfile
@@ -3,6 +3,7 @@ try-align.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
+$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
@@ -15,6 +16,7 @@ $(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)/moses/TranslationModel/UG/generic//stringdist
$(TOP)//boost_iostreams
+$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
@@ -26,6 +28,7 @@ ptable-describe-features.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
+$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
@@ -37,6 +40,7 @@ count-ptable-features.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
+$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
@@ -48,6 +52,7 @@ ptable-lookup.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
+$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
@@ -59,6 +64,7 @@ sim-pe.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
+$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
@@ -70,6 +76,7 @@ spe-check-coverage.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
+$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
@@ -81,6 +88,7 @@ spe-check-coverage2.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
+$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
@@ -92,6 +100,7 @@ spe-check-coverage3.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
+$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
diff --git a/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp b/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp
index 4e0c7cbc1..073b64dfc 100644
--- a/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp
+++ b/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp
@@ -2,11 +2,13 @@
// (c) 2006,2007,2008 Ulrich Germann
// makes opening files a little more convenient
+#include <boost/algorithm/string/predicate.hpp>
#include "ug_stream.h"
namespace ugdiss
{
using namespace std;
+ using namespace boost::algorithm;
using namespace boost::iostreams;
filtering_istream*
@@ -28,11 +30,11 @@ namespace ugdiss
void
open_input_stream(string fname, filtering_istream& in)
{
- if (fname.size()>3 && fname.substr(fname.size()-3,3)==".gz")
+ if (ends_with(fname, ".gz"))
{
in.push(gzip_decompressor());
}
- else if (fname.size() > 4 && fname.substr(fname.size()-4,4)==".bz2")
+ else if (ends_with(fname, "bz2"))
{
in.push(bzip2_decompressor());
}
@@ -42,13 +44,11 @@ namespace ugdiss
void
open_output_stream(string fname, filtering_ostream& out)
{
- if ((fname.size() > 3 && fname.substr(fname.size()-3,3)==".gz") ||
- (fname.size() > 4 && fname.substr(fname.size()-4,4)==".gz_"))
+ if (ends_with(fname, ".gz") || ends_with(fname, ".gz_"))
{
out.push(gzip_compressor());
}
- else if ((fname.size() > 4 && fname.substr(fname.size()-4,4)==".bz2") ||
- (fname.size() > 5 && fname.substr(fname.size()-5,5)==".bz2_"))
+ else if (ends_with(fname, ".bz2") || ends_with(fname, ".bz2_"))
{
out.push(bzip2_compressor());
}
diff --git a/moses/TranslationModel/UG/mm/mtt-build.cc b/moses/TranslationModel/UG/mm/mtt-build.cc
index 49fd7f6c2..f49895ebf 100644
--- a/moses/TranslationModel/UG/mm/mtt-build.cc
+++ b/moses/TranslationModel/UG/mm/mtt-build.cc
@@ -4,6 +4,7 @@
// recognized based on the number of fields per line) into memory-mapped
// format. (c) 2007-2013 Ulrich Germann
+#include <boost/algorithm/string/predicate.hpp>
#include <boost/program_options.hpp>
#include <boost/program_options/options_description.hpp>
#include <boost/program_options/parsers.hpp>
@@ -32,6 +33,7 @@ using namespace std;
using namespace ugdiss;
using namespace Moses;
using namespace boost;
+using namespace boost::algorithm;
namespace po=boost::program_options;
int with_pfas;
@@ -200,7 +202,7 @@ process_tagged_input(ostream& out,
vector<string> w; string f; istringstream buf(line);
while (buf>>f) w.push_back(f);
- if (w.size() == 0 || (w[0].size() >= 4 && w[0].substr(0,4) == "SID="))
+ if (w.size() == 0 || starts_with(w[0], "SID="))
new_sent = true;
else if (w.size() == 1 && w[0] == "<P>")
diff --git a/moses/TranslationModel/UG/spe-check-coverage.cc b/moses/TranslationModel/UG/spe-check-coverage.cc
index 039b4cd37..4ab2d749a 100644
--- a/moses/TranslationModel/UG/spe-check-coverage.cc
+++ b/moses/TranslationModel/UG/spe-check-coverage.cc
@@ -1,6 +1,7 @@
#include "mmsapt.h"
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
+#include <boost/algorithm/string/predicate.hpp>
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include <boost/tokenizer.hpp>
@@ -12,6 +13,7 @@ using namespace Moses;
using namespace bitext;
using namespace std;
using namespace boost;
+using namespace boost::algorithm;
vector<FactorType> fo(1,FactorType(0));
@@ -111,7 +113,7 @@ int main(int argc, char* argv[])
int dynprovidx = -1;
for (size_t i = 0; i < fname.size(); ++i)
{
- if (fname[i].substr(0,7) == "prov-1.")
+ if (starts_with(fname[i], "prov-1."))
dynprovidx = i;
}
cout << endl;
@@ -189,8 +191,8 @@ int main(int argc, char* argv[])
size_t j = x-idx.first;
float f = (mmsapt && mmsapt->isLogVal(j)) ? exp(scores[x]) : scores[x];
string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";
- if (fname[j].substr(0,3) == "lex") fmt = "%10.3e";
- if (fname[j].substr(0,7) == "prov-1.")
+ if (starts_with(fname[j], "lex")) fmt = "%10.3e";
+ else if (starts_with(fname[j], "prov-1."))
{
f = round(f/(1-f));
fmt = "%10d";
diff --git a/moses/TranslationModel/fuzzy-match/create_xml.cpp b/moses/TranslationModel/fuzzy-match/create_xml.cpp
index a8b6a52cf..0a31b9b28 100644
--- a/moses/TranslationModel/fuzzy-match/create_xml.cpp
+++ b/moses/TranslationModel/fuzzy-match/create_xml.cpp
@@ -47,11 +47,13 @@ void create_xml(const string &inPath)
//cout << inLine << endl;
switch (step) {
case 0:
- /*setenceId = */ Scan<int>(inLine);
+ /*setenceId = */
+ Scan<int>(inLine);
++step;
break;
case 1:
- /*score = */ Scan<float>(inLine);
+ /*score = */
+ Scan<float>(inLine);
++step;
break;
case 2:
diff --git a/moses/TranslationOption.cpp b/moses/TranslationOption.cpp
index 5ef8293a2..ecd9e7f0e 100644
--- a/moses/TranslationOption.cpp
+++ b/moses/TranslationOption.cpp
@@ -80,14 +80,14 @@ void TranslationOption::EvaluateWithSourceContext(const InputType &input)
const InputPath &TranslationOption::GetInputPath() const
{
UTIL_THROW_IF2(m_inputPath == NULL,
- "No input path");
+ "No input path");
return *m_inputPath;
}
void TranslationOption::SetInputPath(const InputPath &inputPath)
{
UTIL_THROW_IF2(m_inputPath,
- "Input path already specified");
+ "Input path already specified");
m_inputPath = &inputPath;
}
diff --git a/moses/TranslationOption.h b/moses/TranslationOption.h
index 1f0a15a0f..a5effef88 100644
--- a/moses/TranslationOption.h
+++ b/moses/TranslationOption.h
@@ -148,6 +148,10 @@ public:
void EvaluateWithSourceContext(const InputType &input);
+ void UpdateScore(ScoreComponentCollection *futureScoreBreakdown = NULL) {
+ m_targetPhrase.UpdateScore(futureScoreBreakdown);
+ }
+
/** returns cached scores */
inline const Scores *GetLexReorderingScores(const LexicalReordering *scoreProducer) const {
_ScoreCacheMap::const_iterator it = m_lexReorderingScores.find(scoreProducer);
diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp
index 31372cc14..2b1847eb0 100644
--- a/moses/TranslationOptionCollection.cpp
+++ b/moses/TranslationOptionCollection.cpp
@@ -93,7 +93,7 @@ Prune()
if (m_maxNoTransOptPerCoverage == 0 && m_translationOptionThreshold == no_th)
return;
-
+
// bookkeeping for how many options used, pruned
size_t total = 0;
size_t totalPruned = 0;
@@ -468,7 +468,6 @@ CreateTranslationOptionsForRange
TranslationOption *transOpt = *iterColl;
if (StaticData::Instance().GetXmlInputType() != XmlConstraint || !ViolatesXmlOptionsConstraint(sPos,ePos,transOpt)) {
Add(transOpt);
- }
}
lastPartialTranslOptColl.DetachAll();
diff --git a/moses/TranslationOptionCollection.h b/moses/TranslationOptionCollection.h
index a3258c2b9..562912b18 100644
--- a/moses/TranslationOptionCollection.h
+++ b/moses/TranslationOptionCollection.h
@@ -107,6 +107,8 @@ protected:
void EvaluateWithSourceContext();
+ void EvaluateTranslatonOptionListWithSourceContext(TranslationOptionList&);
+
void CacheLexReordering();
void GetTargetPhraseCollectionBatch();
@@ -170,8 +172,9 @@ public:
return GetTranslationOptionList(coverage.GetStartPos(), coverage.GetEndPos());
}
- const InputPathList &GetInputPaths() const
- { return m_inputPathQueue; }
+ const InputPathList &GetInputPaths() const {
+ return m_inputPathQueue;
+ }
TO_STRING();
diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp
index 3fa20a7fb..698cf51c2 100644
--- a/moses/TranslationOptionCollectionConfusionNet.cpp
+++ b/moses/TranslationOptionCollectionConfusionNet.cpp
@@ -20,11 +20,11 @@ namespace Moses
/** constructor; just initialize the base class */
TranslationOptionCollectionConfusionNet::
-TranslationOptionCollectionConfusionNet(const ConfusionNet &input,
- size_t maxNoTransOptPerCoverage,
- float translationOptionThreshold)
- : TranslationOptionCollection(input, maxNoTransOptPerCoverage,
- translationOptionThreshold)
+TranslationOptionCollectionConfusionNet(const ConfusionNet &input,
+ size_t maxNoTransOptPerCoverage,
+ float translationOptionThreshold)
+ : TranslationOptionCollection(input, maxNoTransOptPerCoverage,
+ translationOptionThreshold)
{
// Prefix checkers are phrase dictionaries that provide a prefix check
// to indicate that a phrase table entry with a given prefix exists.
@@ -32,8 +32,8 @@ TranslationOptionCollectionConfusionNet(const ConfusionNet &input,
// expanding it further.
vector<PhraseDictionary*> prefixCheckers;
BOOST_FOREACH(PhraseDictionary* pd, PhraseDictionary::GetColl())
- if (pd->ProvidesPrefixCheck()) prefixCheckers.push_back(pd);
-
+ if (pd->ProvidesPrefixCheck()) prefixCheckers.push_back(pd);
+
const InputFeature &inputFeature = InputFeature::Instance();
UTIL_THROW_IF2(&inputFeature == NULL, "Input feature must be specified");
@@ -93,7 +93,7 @@ TranslationOptionCollectionConfusionNet(const ConfusionNet &input,
const Phrase &prevPhrase = prevPath.GetPhrase();
const ScorePair *prevInputScore = prevPath.GetInputScore();
UTIL_THROW_IF2(prevInputScore == NULL,
- "No input score for path: " << prevPath);
+ "No input score for path: " << prevPath);
// loop thru every word at this position
const ConfusionNet::Column &col = input.GetColumn(endPos);
@@ -103,10 +103,10 @@ TranslationOptionCollectionConfusionNet(const ConfusionNet &input,
Phrase subphrase(prevPhrase);
subphrase.AddWord(word);
- bool OK = prefixCheckers.size() == 0;
- for (size_t k = 0; !OK && k < prefixCheckers.size(); ++k)
- OK = prefixCheckers[k]->PrefixExists(subphrase);
- if (!OK) continue;
+ bool OK = prefixCheckers.size() == 0;
+ for (size_t k = 0; !OK && k < prefixCheckers.size(); ++k)
+ OK = prefixCheckers[k]->PrefixExists(subphrase);
+ if (!OK) continue;
const ScorePair &scores = col[i].second;
ScorePair *inputScore = new ScorePair(*prevInputScore);
@@ -122,8 +122,8 @@ TranslationOptionCollectionConfusionNet(const ConfusionNet &input,
} // for (iterPath = prevPaths.begin(); iterPath != prevPaths.end(); ++iterPath) {
}
}
- // cerr << "HAVE " << m_inputPathQueue.size()
- // << " input paths of max. length "
+ // cerr << "HAVE " << m_inputPathQueue.size()
+ // << " input paths of max. length "
// << maxSizePhrase << "." << endl;
}
@@ -247,9 +247,9 @@ CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, size_t st
// go thru each intermediate trans opt just created
const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList();
vector<TranslationOption*>::const_iterator iterPartialTranslOpt;
- for (iterPartialTranslOpt = partTransOptList.begin();
- iterPartialTranslOpt != partTransOptList.end();
- ++iterPartialTranslOpt) {
+ for (iterPartialTranslOpt = partTransOptList.begin();
+ iterPartialTranslOpt != partTransOptList.end();
+ ++iterPartialTranslOpt) {
TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;
if (transStep) {
diff --git a/moses/TranslationOptionCollectionLattice.cpp b/moses/TranslationOptionCollectionLattice.cpp
index 40b33ad9e..6f9de7836 100644
--- a/moses/TranslationOptionCollectionLattice.cpp
+++ b/moses/TranslationOptionCollectionLattice.cpp
@@ -24,7 +24,7 @@ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(
: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
{
UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
- "Not for models using the legqacy binary phrase table");
+ "Not for models using the legqacy binary phrase table");
const InputFeature &inputFeature = InputFeature::Instance();
UTIL_THROW_IF2(&inputFeature == NULL, "Input feature must be specified");
@@ -48,7 +48,7 @@ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(
WordsRange range(startPos, endPos);
if (range.GetNumWordsCovered() > maxPhraseLength) {
- continue;
+ continue;
}
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
@@ -73,53 +73,53 @@ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(
void TranslationOptionCollectionLattice::Extend(const InputPath &prevPath, const WordLattice &input)
{
- size_t nextPos = prevPath.GetWordsRange().GetEndPos() + 1;
- if (nextPos >= input.GetSize()) {
- return;
- }
+ size_t nextPos = prevPath.GetWordsRange().GetEndPos() + 1;
+ if (nextPos >= input.GetSize()) {
+ return;
+ }
- size_t startPos = prevPath.GetWordsRange().GetStartPos();
- const Phrase &prevPhrase = prevPath.GetPhrase();
- const ScorePair *prevInputScore = prevPath.GetInputScore();
- UTIL_THROW_IF2(prevInputScore == NULL,
- "Null previous score");
+ size_t startPos = prevPath.GetWordsRange().GetStartPos();
+ const Phrase &prevPhrase = prevPath.GetPhrase();
+ const ScorePair *prevInputScore = prevPath.GetInputScore();
+ UTIL_THROW_IF2(prevInputScore == NULL,
+ "Null previous score");
- const std::vector<size_t> &nextNodes = input.GetNextNodes(nextPos);
+ const std::vector<size_t> &nextNodes = input.GetNextNodes(nextPos);
- const ConfusionNet::Column &col = input.GetColumn(nextPos);
- for (size_t i = 0; i < col.size(); ++i) {
- const Word &word = col[i].first;
- UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported");
+ const ConfusionNet::Column &col = input.GetColumn(nextPos);
+ for (size_t i = 0; i < col.size(); ++i) {
+ const Word &word = col[i].first;
+ UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported");
- size_t nextNode = nextNodes[i];
- size_t endPos = nextPos + nextNode - 1;
+ size_t nextNode = nextNodes[i];
+ size_t endPos = nextPos + nextNode - 1;
- WordsRange range(startPos, endPos);
+ WordsRange range(startPos, endPos);
- size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength();
- if (range.GetNumWordsCovered() > maxPhraseLength) {
- continue;
- }
+ size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength();
+ if (range.GetNumWordsCovered() > maxPhraseLength) {
+ continue;
+ }
- const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
+ const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
- Phrase subphrase(prevPhrase);
- subphrase.AddWord(word);
+ Phrase subphrase(prevPhrase);
+ subphrase.AddWord(word);
- const ScorePair &scores = col[i].second;
- ScorePair *inputScore = new ScorePair(*prevInputScore);
- inputScore->PlusEquals(scores);
+ const ScorePair &scores = col[i].second;
+ ScorePair *inputScore = new ScorePair(*prevInputScore);
+ inputScore->PlusEquals(scores);
- InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
+ InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
- path->SetNextNode(nextNode);
- m_inputPathQueue.push_back(path);
+ path->SetNextNode(nextNode);
+ m_inputPathQueue.push_back(path);
- // recursive
- Extend(*path, input);
+ // recursive
+ Extend(*path, input);
- }
+ }
}
void TranslationOptionCollectionLattice::CreateTranslationOptions()
@@ -142,19 +142,18 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
const WordsRange &range = path.GetWordsRange();
if (tpColl && tpColl->GetSize()) {
- TargetPhraseCollection::const_iterator iter;
- for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) {
- const TargetPhrase &tp = **iter;
- TranslationOption *transOpt = new TranslationOption(range, tp);
- transOpt->SetInputPath(path);
- transOpt->EvaluateWithSourceContext(m_source);
-
- Add(transOpt);
- }
- }
- else if (path.GetPhrase().GetSize() == 1) {
- // unknown word processing
- ProcessOneUnknownWord(path, path.GetWordsRange().GetStartPos(), path.GetWordsRange().GetNumWordsCovered() , path.GetInputScore());
+ TargetPhraseCollection::const_iterator iter;
+ for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) {
+ const TargetPhrase &tp = **iter;
+ TranslationOption *transOpt = new TranslationOption(range, tp);
+ transOpt->SetInputPath(path);
+ transOpt->EvaluateWithSourceContext(m_source);
+
+ Add(transOpt);
+ }
+ } else if (path.GetPhrase().GetSize() == 1) {
+ // unknown word processing
+ ProcessOneUnknownWord(path, path.GetWordsRange().GetStartPos(), path.GetWordsRange().GetNumWordsCovered() , path.GetInputScore());
}
}
@@ -188,7 +187,7 @@ CreateTranslationOptionsForRange
UTIL_THROW(util::Exception,
"CreateTranslationOptionsForRange() not implemented for lattice");
}
-
+
} // namespace
diff --git a/moses/TranslationOptionCollectionLattice.h b/moses/TranslationOptionCollectionLattice.h
index f2f02df32..cea90f11e 100644
--- a/moses/TranslationOptionCollectionLattice.h
+++ b/moses/TranslationOptionCollectionLattice.h
@@ -15,10 +15,10 @@ class WordLattice;
class TranslationOptionCollectionLattice : public TranslationOptionCollection
{
protected:
- /* forcibly create translation option for a 1 word.
- * call the base class' ProcessOneUnknownWord() for each possible word in the confusion network
- * at a particular source position
- */
+ /* forcibly create translation option for a 1 word.
+ * call the base class' ProcessOneUnknownWord() for each possible word in the confusion network
+ * at a particular source position
+ */
void ProcessUnknownWord(size_t sourcePos); // do not implement
public:
diff --git a/moses/TranslationOptionList.h b/moses/TranslationOptionList.h
index fa205c26f..8bac573d6 100644
--- a/moses/TranslationOptionList.h
+++ b/moses/TranslationOptionList.h
@@ -64,8 +64,4 @@ namespace Moses
size_t SelectNBest(size_t const N);
size_t PruneByThreshold(float const th);
- TO_STRING();
-
- };
-
}
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index 6accf8f2e..7c629db7f 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -10,8 +10,11 @@
#include "moses/Incremental.h"
#include "mbr.h"
+#include "moses/Syntax/F2S/RuleMatcherCallback.h"
+#include "moses/Syntax/F2S/RuleMatcherHyperTree.h"
#include "moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h"
#include "moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h"
+#include "moses/Syntax/T2S/RuleMatcherSCFG.h"
#include "util/exception.hh"
@@ -20,48 +23,21 @@ using namespace std;
namespace Moses
{
-TranslationTask::TranslationTask(InputType* source, Moses::IOWrapper &ioWrapper,
- bool outputSearchGraphSLF,
- boost::shared_ptr<HypergraphOutput<Manager> > hypergraphOutput)
-: m_source(source)
-, m_ioWrapper(ioWrapper)
-, m_outputSearchGraphSLF(outputSearchGraphSLF)
-, m_hypergraphOutput(hypergraphOutput)
-, m_pbOrChart(1)
+TranslationTask::TranslationTask(InputType* source, Moses::IOWrapper &ioWrapper)
+ : m_source(source)
+ , m_ioWrapper(ioWrapper)
{}
-TranslationTask::TranslationTask(InputType *source, IOWrapper &ioWrapper,
-boost::shared_ptr<HypergraphOutput<ChartManager> > hypergraphOutputChart)
-: m_source(source)
-, m_ioWrapper(ioWrapper)
-, m_hypergraphOutputChart(hypergraphOutputChart)
-, m_pbOrChart(2)
-{}
-
-TranslationTask::~TranslationTask() {
+TranslationTask::~TranslationTask()
+{
delete m_source;
}
void TranslationTask::Run()
{
- switch (m_pbOrChart)
- {
- case 1:
- RunPb();
- break;
- case 2:
- RunChart();
- break;
- default:
- UTIL_THROW(util::Exception, "Unknown value: " << m_pbOrChart);
- }
-}
-
-
-void TranslationTask::RunPb()
-{
// shorthand for "global data"
const StaticData &staticData = StaticData::Instance();
+ const size_t translationId = m_source->GetTranslationId();
// input sentence
Sentence sentence;
@@ -72,7 +48,7 @@ void TranslationTask::RunPb()
// report thread number
#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
- TRACE_ERR("Translating line " << m_source->GetTranslationId() << " in thread id " << pthread_self() << endl);
+ VERBOSE(2, "Translating line " << translationId << " in thread id " << pthread_self() << endl);
#endif
@@ -81,354 +57,96 @@ void TranslationTask::RunPb()
// we still need to apply the decision rule (MAP, MBR, ...)
Timer initTime;
initTime.start();
- Manager manager(*m_source,staticData.GetSearchAlgorithm());
- VERBOSE(1, "Line " << m_source->GetTranslationId() << ": Initialize search took " << initTime << " seconds total" << endl);
- manager.ProcessSentence();
+
+ // which manager
+ BaseManager *manager;
+
+ if (!staticData.IsChart()) {
+ // phrase-based
+ manager = new Manager(*m_source);
+ } else if (staticData.GetSearchAlgorithm() == SyntaxF2S ||
+ staticData.GetSearchAlgorithm() == SyntaxT2S) {
+ // STSG-based tree-to-string / forest-to-string decoding (ask Phil Williams)
+ typedef Syntax::F2S::RuleMatcherCallback Callback;
+ typedef Syntax::F2S::RuleMatcherHyperTree<Callback> RuleMatcher;
+ manager = new Syntax::F2S::Manager<RuleMatcher>(*m_source);
+ } else if (staticData.GetSearchAlgorithm() == SyntaxS2T) {
+ // new-style string-to-tree decoding (ask Phil Williams)
+ S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
+ if (algorithm == RecursiveCYKPlus) {
+ typedef Syntax::S2T::EagerParserCallback Callback;
+ typedef Syntax::S2T::RecursiveCYKPlusParser<Callback> Parser;
+ manager = new Syntax::S2T::Manager<Parser>(*m_source);
+ } else if (algorithm == Scope3) {
+ typedef Syntax::S2T::StandardParserCallback Callback;
+ typedef Syntax::S2T::Scope3Parser<Callback> Parser;
+ manager = new Syntax::S2T::Manager<Parser>(*m_source);
+ } else {
+ UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
+ }
+ } else if (staticData.GetSearchAlgorithm() == SyntaxT2S_SCFG) {
+ // SCFG-based tree-to-string decoding (ask Phil Williams)
+ typedef Syntax::F2S::RuleMatcherCallback Callback;
+ typedef Syntax::T2S::RuleMatcherSCFG<Callback> RuleMatcher;
+ const TreeInput *tree = NULL;
+ manager = new Syntax::T2S::Manager<RuleMatcher>(*tree);
+ } else if (staticData.GetSearchAlgorithm() == ChartIncremental) {
+ // Ken's incremental decoding
+ manager = new Incremental::Manager(*m_source);
+ } else {
+ // original SCFG manager
+ manager = new ChartManager(*m_source);
+ }
+
+ VERBOSE(1, "Line " << translationId << ": Initialize search took " << initTime << " seconds total" << endl);
+ manager->Decode();
// we are done with search, let's look what we got
Timer additionalReportingTime;
additionalReportingTime.start();
+ manager->OutputBest(m_ioWrapper.GetSingleBestOutputCollector());
+
// output word graph
- if (m_ioWrapper.GetWordGraphCollector()) {
- ostringstream out;
- fix(out,PRECISION);
- manager.GetWordGraph(m_source->GetTranslationId(), out);
- m_ioWrapper.GetWordGraphCollector()->Write(m_source->GetTranslationId(), out.str());
- }
+ manager->OutputWordGraph(m_ioWrapper.GetWordGraphCollector());
// output search graph
- if (m_ioWrapper.GetSearchGraphOutputCollector()) {
- ostringstream out;
- fix(out,PRECISION);
- manager.OutputSearchGraph(m_source->GetTranslationId(), out);
- m_ioWrapper.GetSearchGraphOutputCollector()->Write(m_source->GetTranslationId(), out.str());
-
-#ifdef HAVE_PROTOBUF
- if (staticData.GetOutputSearchGraphPB()) {
- ostringstream sfn;
- sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << m_source->GetTranslationId() << ".pb" << ends;
- string fn = sfn.str();
- VERBOSE(2, "Writing search graph to " << fn << endl);
- fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out);
- manager.SerializeSearchGraphPB(m_source->GetTranslationId(), output);
- }
-#endif
- }
+ manager->OutputSearchGraph(m_ioWrapper.GetSearchGraphOutputCollector());
- // Output search graph in HTK standard lattice format (SLF)
- if (m_outputSearchGraphSLF) {
- stringstream fileName;
- fileName << staticData.GetParam("output-search-graph-slf")[0] << "/" << m_source->GetTranslationId() << ".slf";
- ofstream *file = new ofstream;
- file->open(fileName.str().c_str());
- if (file->is_open() && file->good()) {
- ostringstream out;
- fix(out,PRECISION);
- manager.OutputSearchGraphAsSLF(m_source->GetTranslationId(), out);
- *file << out.str();
- file -> flush();
- } else {
- TRACE_ERR("Cannot output HTK standard lattice for line " << m_source->GetTranslationId() << " because the output file is not open or not ready for writing" << endl);
- }
- delete file;
- }
+ manager->OutputSearchGraphSLF();
// Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
- if (m_hypergraphOutput.get()) {
- m_hypergraphOutput->Write(manager);
- }
+ manager->OutputSearchGraphHypergraph();
additionalReportingTime.stop();
- // apply decision rule and output best translation(s)
- if (m_ioWrapper.GetSingleBestOutputCollector()) {
- ostringstream out;
- ostringstream debug;
- fix(debug,PRECISION);
-
- // all derivations - send them to debug stream
- if (staticData.PrintAllDerivations()) {
- additionalReportingTime.start();
- manager.PrintAllDerivations(m_source->GetTranslationId(), debug);
- additionalReportingTime.stop();
- }
-
- Timer decisionRuleTime;
- decisionRuleTime.start();
-
- // MAP decoding: best hypothesis
- const Hypothesis* bestHypo = NULL;
- if (!staticData.UseMBR()) {
- bestHypo = manager.GetBestHypothesis();
- if (bestHypo) {
- if (StaticData::Instance().GetOutputHypoScore()) {
- out << bestHypo->GetTotalScore() << ' ';
- }
- if (staticData.IsPathRecoveryEnabled()) {
- m_ioWrapper.OutputInput(out, bestHypo);
- out << "||| ";
- }
- if (staticData.GetParam("print-id").size() && Scan<bool>(staticData.GetParam("print-id")[0]) ) {
- out << m_source->GetTranslationId() << " ";
- }
-
- if (staticData.GetReportSegmentation() == 2) {
- manager.GetOutputLanguageModelOrder(out, bestHypo);
- }
- m_ioWrapper.OutputBestSurface(
- out,
- bestHypo,
- staticData.GetOutputFactorOrder(),
- staticData.GetReportSegmentation(),
- staticData.GetReportAllFactors());
- if (staticData.PrintAlignmentInfo()) {
- out << "||| ";
- m_ioWrapper.OutputAlignment(out, bestHypo);
- }
-
- m_ioWrapper.OutputAlignment(m_ioWrapper.GetAlignmentInfoCollector(), m_source->GetTranslationId(), bestHypo);
- IFVERBOSE(1) {
- debug << "BEST TRANSLATION: " << *bestHypo << endl;
- }
- } else {
- VERBOSE(1, "NO BEST TRANSLATION" << endl);
- }
-
- out << endl;
- }
-
- // MBR decoding (n-best MBR, lattice MBR, consensus)
- else {
- // we first need the n-best translations
- size_t nBestSize = staticData.GetMBRSize();
- if (nBestSize <= 0) {
- cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
- exit(1);
- }
- TrellisPathList nBestList;
- manager.CalcNBest(nBestSize, nBestList,true);
- VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
- IFVERBOSE(2) {
- PrintUserTime("calculated n-best list for (L)MBR decoding");
- }
-
- // lattice MBR
- if (staticData.UseLatticeMBR()) {
- if (m_ioWrapper.GetNBestOutputCollector()) {
- //lattice mbr nbest
- vector<LatticeMBRSolution> solutions;
- size_t n = min(nBestSize, staticData.GetNBestSize());
- getLatticeMBRNBest(manager,nBestList,solutions,n);
- ostringstream out;
- m_ioWrapper.OutputLatticeMBRNBest(out, solutions,m_source->GetTranslationId());
- m_ioWrapper.GetNBestOutputCollector()->Write(m_source->GetTranslationId(), out.str());
- } else {
- //Lattice MBR decoding
- vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
- m_ioWrapper.OutputBestHypo(mbrBestHypo, m_source->GetTranslationId(), staticData.GetReportSegmentation(),
- staticData.GetReportAllFactors(),out);
- IFVERBOSE(2) {
- PrintUserTime("finished Lattice MBR decoding");
- }
- }
- }
-
- // consensus decoding
- else if (staticData.UseConsensusDecoding()) {
- const TrellisPath &conBestHypo = doConsensusDecoding(manager,nBestList);
- m_ioWrapper.OutputBestHypo(conBestHypo, m_source->GetTranslationId(),
- staticData.GetReportSegmentation(),
- staticData.GetReportAllFactors(),out);
- m_ioWrapper.OutputAlignment(m_ioWrapper.GetAlignmentInfoCollector(), m_source->GetTranslationId(), conBestHypo);
- IFVERBOSE(2) {
- PrintUserTime("finished Consensus decoding");
- }
- }
-
- // n-best MBR decoding
- else {
- const TrellisPath &mbrBestHypo = doMBR(nBestList);
- m_ioWrapper.OutputBestHypo(mbrBestHypo, m_source->GetTranslationId(),
- staticData.GetReportSegmentation(),
- staticData.GetReportAllFactors(),out);
- m_ioWrapper.OutputAlignment(m_ioWrapper.GetAlignmentInfoCollector(), m_source->GetTranslationId(), mbrBestHypo);
- IFVERBOSE(2) {
- PrintUserTime("finished MBR decoding");
- }
- }
- }
-
- // report best translation to output collector
- m_ioWrapper.GetSingleBestOutputCollector()->Write(m_source->GetTranslationId(),out.str(),debug.str());
-
- decisionRuleTime.stop();
- VERBOSE(1, "Line " << m_source->GetTranslationId() << ": Decision rule took " << decisionRuleTime << " seconds total" << endl);
- }
-
additionalReportingTime.start();
// output n-best list
- if (m_ioWrapper.GetNBestOutputCollector() && !staticData.UseLatticeMBR()) {
- TrellisPathList nBestList;
- ostringstream out;
- manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
- m_ioWrapper.OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_source->GetTranslationId(),
- staticData.GetReportSegmentation());
- m_ioWrapper.GetNBestOutputCollector()->Write(m_source->GetTranslationId(), out.str());
- }
+ manager->OutputNBest(m_ioWrapper.GetNBestOutputCollector());
//lattice samples
- if (m_ioWrapper.GetLatticeSamplesCollector()) {
- TrellisPathList latticeSamples;
- ostringstream out;
- manager.CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
- m_ioWrapper.OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_source->GetTranslationId(),
- staticData.GetReportSegmentation());
- m_ioWrapper.GetLatticeSamplesCollector()->Write(m_source->GetTranslationId(), out.str());
- }
+ manager->OutputLatticeSamples(m_ioWrapper.GetLatticeSamplesCollector());
// detailed translation reporting
- if (m_ioWrapper.GetDetailedTranslationCollector()) {
- ostringstream out;
- fix(out,PRECISION);
- TranslationAnalysis::PrintTranslationAnalysis(out, manager.GetBestHypothesis());
- m_ioWrapper.GetDetailedTranslationCollector()->Write(m_source->GetTranslationId(),out.str());
- }
+ manager->OutputDetailedTranslationReport(m_ioWrapper.GetDetailedTranslationCollector());
+
+ manager->OutputDetailedTreeFragmentsTranslationReport(m_ioWrapper.GetDetailTreeFragmentsOutputCollector());
//list of unknown words
- if (m_ioWrapper.GetUnknownsCollector()) {
- const vector<const Phrase*>& unknowns = manager.getSntTranslationOptions()->GetUnknownSources();
- ostringstream out;
- for (size_t i = 0; i < unknowns.size(); ++i) {
- out << *(unknowns[i]);
- }
- out << endl;
- m_ioWrapper.GetUnknownsCollector()->Write(m_source->GetTranslationId(), out.str());
- }
+ manager->OutputUnknowns(m_ioWrapper.GetUnknownsCollector());
+
+ manager->OutputAlignment(m_ioWrapper.GetAlignmentInfoCollector());
// report additional statistics
- manager.CalcDecoderStatistics();
- VERBOSE(1, "Line " << m_source->GetTranslationId() << ": Additional reporting took " << additionalReportingTime << " seconds total" << endl);
- VERBOSE(1, "Line " << m_source->GetTranslationId() << ": Translation took " << translationTime << " seconds total" << endl);
+ manager->CalcDecoderStatistics();
+ VERBOSE(1, "Line " << translationId << ": Additional reporting took " << additionalReportingTime << " seconds total" << endl);
+ VERBOSE(1, "Line " << translationId << ": Translation took " << translationTime << " seconds total" << endl);
IFVERBOSE(2) {
PrintUserTime("Sentence Decoding Time:");
}
-}
-
-
-void TranslationTask::RunChart()
-{
- const StaticData &staticData = StaticData::Instance();
- const size_t translationId = m_source->GetTranslationId();
-
- VERBOSE(2,"\nTRANSLATING(" << translationId << "): " << *m_source);
-
- if (staticData.UseS2TDecoder()) {
- S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
- if (algorithm == RecursiveCYKPlus) {
- typedef Syntax::S2T::EagerParserCallback Callback;
- typedef Syntax::S2T::RecursiveCYKPlusParser<Callback> Parser;
- DecodeS2T<Parser>();
- } else if (algorithm == Scope3) {
- typedef Syntax::S2T::StandardParserCallback Callback;
- typedef Syntax::S2T::Scope3Parser<Callback> Parser;
- DecodeS2T<Parser>();
- } else {
- UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
- }
- return;
- }
- if (staticData.GetSearchAlgorithm() == ChartIncremental) {
- Incremental::Manager manager(*m_source);
- const std::vector<search::Applied> &nbest = manager.ProcessSentence();
- if (!nbest.empty()) {
- m_ioWrapper.OutputBestHypo(nbest[0], translationId);
- if (staticData.IsDetailedTranslationReportingEnabled()) {
- const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
- m_ioWrapper.OutputDetailedTranslationReport(&nbest[0], sentence, translationId);
- }
- if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
- const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
- m_ioWrapper.OutputDetailedTreeFragmentsTranslationReport(&nbest[0], sentence, translationId);
- }
- } else {
- m_ioWrapper.OutputBestNone(translationId);
- }
- if (staticData.GetNBestSize() > 0)
- m_ioWrapper.OutputNBestList(nbest, translationId);
- return;
- }
-
- ChartManager manager(*m_source);
- manager.ProcessSentence();
-
- UTIL_THROW_IF2(staticData.UseMBR(), "Cannot use MBR");
-
- // Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
- if (m_hypergraphOutputChart.get()) {
- m_hypergraphOutputChart->Write(manager);
- }
-
-
- // 1-best
- const ChartHypothesis *bestHypo = manager.GetBestHypothesis();
- m_ioWrapper.OutputBestHypo(bestHypo, translationId);
- IFVERBOSE(2) {
- PrintUserTime("Best Hypothesis Generation Time:");
- }
-
- if (!staticData.GetAlignmentOutputFile().empty()) {
- m_ioWrapper.OutputAlignment(translationId, bestHypo);
- }
-
- if (staticData.IsDetailedTranslationReportingEnabled()) {
- const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
- m_ioWrapper.OutputDetailedTranslationReport(bestHypo, sentence, translationId);
- }
- if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
- const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
- m_ioWrapper.OutputDetailedTreeFragmentsTranslationReport(bestHypo, sentence, translationId);
- }
- if (!staticData.GetOutputUnknownsFile().empty()) {
- m_ioWrapper.OutputUnknowns(manager.GetParser().GetUnknownSources(),
- translationId);
- }
-
- //DIMw
- if (staticData.IsDetailedAllTranslationReportingEnabled()) {
- const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
- size_t nBestSize = staticData.GetNBestSize();
- std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
- manager.CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
- m_ioWrapper.OutputDetailedAllTranslationReport(nBestList, manager, sentence, translationId);
- }
-
- // n-best
- size_t nBestSize = staticData.GetNBestSize();
- if (nBestSize > 0) {
- VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
- std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
- manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
- m_ioWrapper.OutputNBestList(nBestList, translationId);
- IFVERBOSE(2) {
- PrintUserTime("N-Best Hypotheses Generation Time:");
- }
- }
-
- if (staticData.GetOutputSearchGraph()) {
- std::ostringstream out;
- manager.OutputSearchGraphMoses( out);
- OutputCollector *oc = m_ioWrapper.GetSearchGraphOutputCollector();
- UTIL_THROW_IF2(oc == NULL, "File for search graph output not specified");
- oc->Write(translationId, out.str());
- }
-
- IFVERBOSE(2) {
- PrintUserTime("Sentence Decoding Time:");
- }
- manager.CalcDecoderStatistics();
+ delete manager;
}
}
diff --git a/moses/TranslationTask.h b/moses/TranslationTask.h
index 3b5fe26d1..b2a36840a 100644
--- a/moses/TranslationTask.h
+++ b/moses/TranslationTask.h
@@ -8,7 +8,9 @@
#include "moses/Manager.h"
#include "moses/ChartManager.h"
+#include "moses/Syntax/F2S/Manager.h"
#include "moses/Syntax/S2T/Manager.h"
+#include "moses/Syntax/T2S/Manager.h"
namespace Moses
{
@@ -26,12 +28,7 @@ class TranslationTask : public Moses::Task
public:
- TranslationTask(Moses::InputType* source, Moses::IOWrapper &ioWrapper,
- bool outputSearchGraphSLF,
- boost::shared_ptr<Moses::HypergraphOutput<Moses::Manager> > hypergraphOutput);
-
- TranslationTask(Moses::InputType *source, IOWrapper &ioWrapper,
- boost::shared_ptr<Moses::HypergraphOutput<Moses::ChartManager> > hypergraphOutputChart);
+ TranslationTask(Moses::InputType* source, Moses::IOWrapper &ioWrapper);
~TranslationTask();
@@ -41,44 +38,9 @@ public:
private:
- int m_pbOrChart; // 1=pb. 2=chart
Moses::InputType* m_source;
Moses::IOWrapper &m_ioWrapper;
- bool m_outputSearchGraphSLF;
- boost::shared_ptr<Moses::HypergraphOutput<Moses::Manager> > m_hypergraphOutput;
- boost::shared_ptr<Moses::HypergraphOutput<Moses::ChartManager> > m_hypergraphOutputChart;
-
- void RunPb();
- void RunChart();
-
-
- template<typename Parser>
- void DecodeS2T() {
- const StaticData &staticData = StaticData::Instance();
- const std::size_t translationId = m_source->GetTranslationId();
- Syntax::S2T::Manager<Parser> manager(*m_source);
- manager.Decode();
- // 1-best
- const Syntax::SHyperedge *best = manager.GetBestSHyperedge();
- m_ioWrapper.OutputBestHypo(best, translationId);
- // n-best
- if (staticData.GetNBestSize() > 0) {
- Syntax::KBestExtractor::KBestVec nBestList;
- manager.ExtractKBest(staticData.GetNBestSize(), nBestList,
- staticData.GetDistinctNBest());
- m_ioWrapper.OutputNBestList(nBestList, translationId);
- }
- // Write 1-best derivation (-translation-details / -T option).
- if (staticData.IsDetailedTranslationReportingEnabled()) {
- m_ioWrapper.OutputDetailedTranslationReport(best, translationId);
- }
- // Write unknown words file (-output-unknowns option)
- if (!staticData.GetOutputUnknownsFile().empty()) {
- m_ioWrapper.OutputUnknowns(manager.GetUnknownWords(), translationId);
- }
- }
-
};
diff --git a/moses/TreeInput.cpp b/moses/TreeInput.cpp
index 6b36826b6..81a76507e 100644
--- a/moses/TreeInput.cpp
+++ b/moses/TreeInput.cpp
@@ -155,8 +155,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
if (startPos == endPos) {
TRACE_ERR("WARNING: tag " << tagName << " span is empty. Ignoring: " << line << endl);
continue;
- }
- else if (startPos > endPos) {
+ } else if (startPos > endPos) {
TRACE_ERR("ERROR: tag " << tagName << " startPos > endPos: " << line << endl);
return false;
}
@@ -196,7 +195,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, outputFactorOrder, targetLHSstr, true);
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL,
- "Null factor left-hand-side");
+ "Null factor left-hand-side");
targetPhrase.SetTargetLHS(targetLHS);
// not tested
@@ -248,8 +247,8 @@ int TreeInput::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
// remove extra spaces
//line = Trim(line);
- std::vector<XMLParseOutput> sourceLabels;
- ProcessAndStripXMLTags(line, sourceLabels, m_xmlOptions);
+ m_labelledSpans.clear();
+ ProcessAndStripXMLTags(line, m_labelledSpans, m_xmlOptions);
// do words 1st - hack
stringstream strme;
@@ -267,7 +266,7 @@ int TreeInput::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
// do source labels
vector<XMLParseOutput>::const_iterator iterLabel;
- for (iterLabel = sourceLabels.begin(); iterLabel != sourceLabels.end(); ++iterLabel) {
+ for (iterLabel = m_labelledSpans.begin(); iterLabel != m_labelledSpans.end(); ++iterLabel) {
const XMLParseOutput &labelItem = *iterLabel;
const WordsRange &range = labelItem.m_range;
const string &label = labelItem.m_label;
@@ -304,7 +303,7 @@ void TreeInput::AddChartLabel(size_t startPos, size_t endPos, const Word &label
, const std::vector<FactorType>& /* factorOrder */)
{
UTIL_THROW_IF2(!label.IsNonTerminal(),
- "Label must be a non-terminal");
+ "Label must be a non-terminal");
SourceLabelOverlap overlapType = StaticData::Instance().GetSourceLabelOverlap();
NonTerminalSet &list = GetLabelSet(startPos, endPos);
@@ -346,7 +345,7 @@ std::ostream& operator<<(std::ostream &out, const TreeInput &input)
for (iter = labelSet.begin(); iter != labelSet.end(); ++iter) {
const Word &word = *iter;
UTIL_THROW_IF2(!word.IsNonTerminal(),
- "Word must be a non-terminal");
+ "Word must be a non-terminal");
out << "[" << startPos <<"," << endPos << "]="
<< word << "(" << word.IsNonTerminal() << ") ";
}
diff --git a/moses/TreeInput.h b/moses/TreeInput.h
index 467623646..391fe1453 100644
--- a/moses/TreeInput.h
+++ b/moses/TreeInput.h
@@ -31,6 +31,7 @@ class TreeInput : public Sentence
protected:
std::vector<std::vector<NonTerminalSet> > m_sourceChart;
+ std::vector<XMLParseOutput> m_labelledSpans;
void AddChartLabel(size_t startPos, size_t endPos, const std::string &label
,const std::vector<FactorType>& factorOrder);
@@ -62,6 +63,12 @@ public:
virtual const NonTerminalSet &GetLabelSet(size_t startPos, size_t endPos) const {
return m_sourceChart[startPos][endPos - startPos];
}
+
+ //! Get the XMLParseOutput objects in the order they were created.
+ const std::vector<XMLParseOutput> &GetLabelledSpans() const
+ {
+ return m_labelledSpans;
+ }
};
}
diff --git a/moses/TrellisPath.cpp b/moses/TrellisPath.cpp
index ae3d3092c..e76adc2db 100644
--- a/moses/TrellisPath.cpp
+++ b/moses/TrellisPath.cpp
@@ -200,7 +200,7 @@ Phrase TrellisPath::GetSurfacePhrase() const
FactorType factorType = outputFactor[i];
const Factor *factor = targetPhrase.GetFactor(pos, factorType);
UTIL_THROW_IF2(factor == NULL,
- "No factor " << factorType << " at position " << pos);
+ "No factor " << factorType << " at position " << pos);
newWord[factorType] = factor;
}
}
diff --git a/moses/TrellisPath.h b/moses/TrellisPath.h
index 26e722696..def86549b 100644
--- a/moses/TrellisPath.h
+++ b/moses/TrellisPath.h
@@ -54,7 +54,7 @@ protected:
float m_totalScore;
//Used by Manager::LatticeSample()
- TrellisPath(const std::vector<const Hypothesis*> edges);
+ explicit TrellisPath(const std::vector<const Hypothesis*> edges);
void InitScore();
diff --git a/moses/TypeDef.h b/moses/TypeDef.h
index d7cf3b367..0a1e1ad9b 100644
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@@ -61,7 +61,7 @@ const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
const size_t DEFAULT_MAX_TRANS_OPT_SIZE = 5000;
const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
//#ifdef PT_UG
-// setting to std::numeric_limits<size_t>::max() makes the regression test for (deprecated) PhraseDictionaryDynamicSuffixArray fail.
+// setting to std::numeric_limits<size_t>::max() makes the regression test for (deprecated) PhraseDictionaryDynamicSuffixArray fail.
// const size_t DEFAULT_MAX_PHRASE_LENGTH = 100000;
//#else
const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
@@ -77,6 +77,9 @@ const size_t DEFAULT_VERBOSE_LEVEL = 1;
// output floats with five significant digits
static const size_t PRECISION = 3;
+// tolerance for equality in floating point comparisons
+const float FLOAT_EPSILON = 0.0001;
+
// enums.
// must be 0, 1, 2, ..., unless otherwise stated
@@ -93,7 +96,6 @@ enum FactorDirection {
enum DecodeType {
Translate
,Generate
- ,InsertNullFertilityWord //! an optional step that attempts to insert a few closed-class words to improve LM scores
};
namespace LexReorderType
@@ -115,14 +117,14 @@ enum DistortionOrientationOptions {
};
}
-
enum InputTypeEnum {
SentenceInput = 0
,ConfusionNetworkInput = 1
,WordLatticeInput = 2
,TreeInputType = 3
,WordLatticeInput2 = 4
-
+ , TabbedSentenceInput = 5
+ ,ForestInputType = 6
};
enum XmlInputType {
@@ -141,10 +143,14 @@ enum DictionaryFind {
enum SearchAlgorithm {
Normal = 0
,CubePruning = 1
- ,CubeGrowing = 2
- ,ChartDecoding= 3
+ //,CubeGrowing = 2
+ ,CYKPlus = 3
,NormalBatch = 4
,ChartIncremental = 5
+ ,SyntaxS2T = 6
+ ,SyntaxT2S = 7
+ ,SyntaxT2S_SCFG = 8
+ ,SyntaxF2S = 9
};
enum SourceLabelOverlap {
diff --git a/moses/UserMessage.cpp b/moses/UserMessage.cpp
deleted file mode 100644
index ad89d80a7..000000000
--- a/moses/UserMessage.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include <sstream>
-#include <iostream>
-#include "UserMessage.h"
-
-using namespace std;
-
-namespace Moses
-{
-
-const size_t MAX_MSG_QUEUE = 5;
-
-bool UserMessage::m_toStderr = true;
-bool UserMessage::m_toQueue = false;
-queue<string> UserMessage::m_msgQueue;
-
-void UserMessage::Add(const string &msg)
-{
- if (m_toStderr) {
- cerr << "ERROR:" << msg << endl;
- }
- if (m_toQueue) {
- if (m_msgQueue.size() >= MAX_MSG_QUEUE)
- m_msgQueue.pop();
- m_msgQueue.push(msg);
- }
-}
-
-string UserMessage::GetQueue()
-{
- stringstream strme("");
- while (!m_msgQueue.empty()) {
- strme << m_msgQueue.front() << endl;
- m_msgQueue.pop();
- }
- return strme.str();
-}
-
-}
-
-
-
diff --git a/moses/UserMessage.h b/moses/UserMessage.h
deleted file mode 100644
index ddb21e830..000000000
--- a/moses/UserMessage.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#ifndef moses_UserMessage_h
-#define moses_UserMessage_h
-
-#include <string>
-#include <queue>
-
-namespace Moses
-{
-
-/** User warnings/error messages.
- * Not the same as tracing messages, this should be usable even if Moses front-end if GUI
- */
-class UserMessage
-{
-protected:
- static bool m_toStderr, m_toQueue;
- static std::queue<std::string> m_msgQueue;
-
-public:
- //! whether messages to go to stderr, a queue to later display, or both
- static void SetOutput(bool toStderr, bool toQueue) {
- m_toStderr = toStderr;
- m_toQueue = toQueue;
- }
- //! add a message to be displayed
- static void Add(const std::string &msg);
- //! get all messages in queue. Each is on a separate line. Clear queue afterwards
- static std::string GetQueue();
-};
-
-}
-
-#endif
diff --git a/moses/Util.cpp b/moses/Util.cpp
index 9664c811e..5b6f16e2b 100644
--- a/moses/Util.cpp
+++ b/moses/Util.cpp
@@ -1,23 +1,23 @@
// $Id$
/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
#ifdef WIN32
#include <windows.h>
@@ -32,6 +32,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <stdio.h>
#include <iostream>
#include <iomanip>
+#include <boost/algorithm/string/predicate.hpp>
#include "TypeDef.h"
#include "Util.h"
#include "Timer.h"
@@ -42,6 +43,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/StaticData.h"
using namespace std;
+using namespace boost::algorithm;
namespace Moses
{
@@ -54,7 +56,7 @@ string GetTempFolder()
#ifdef _WIN32
char *tmpPath = getenv("TMP");
string str(tmpPath);
- if (str.substr(str.size() - 1, 1) != "\\")
+ if (!ends_with(str, "\\"))
str += "\\";
return str;
#else
@@ -110,6 +112,140 @@ double GetUserTime()
return g_timer.get_elapsed_time();
}
+std::vector< std::map<std::string, std::string> > ProcessAndStripDLT(std::string &line)
+{
+ std::vector< std::map<std::string, std::string> > meta;
+ std::string lline = ToLower(line);
+ bool check_dlt = true;
+
+ //allowed format of dlt tag
+ //<dlt type="name" id="name" attr="value"/>
+ //the type attribute is mandatory; the name should not contain any double quotation mark
+ //the id attribute is optional; if present, the name should not contain any double quotation mark
+ //only one additional attribute is possible; value can contain double quotation marks
+ //both name and value must be surrounded by double quotation mark
+
+// std::cerr << "GLOBAL START" << endl;
+ while (check_dlt) {
+ size_t start = lline.find("<dlt");
+ if (start == std::string::npos) {
+ //no more dlt tags
+ check_dlt = false;
+ continue;
+ }
+ size_t close = lline.find("/>");
+ if (close == std::string::npos) {
+ // error: dlt tag is not ended
+ check_dlt = false;
+ continue;
+ }
+ //std::string dlt = Trim(lline.substr(start+4, close-start-4));
+ std::string dlt = Trim(line.substr(start+4, close-start-4));
+
+ line.erase(start,close-start+2);
+ lline.erase(start,close-start+2);
+
+ if (dlt != "") {
+ std::map<std::string, std::string> tmp_meta;
+
+ //check if type is present and store it
+ size_t start_type = dlt.find("type=");
+ size_t len_type=4;
+ if (start_type != std::string::npos) {
+ //type is present
+ //required format type="value"
+ //double quotation mark is required
+
+ std::string val_type;
+ std::string label_type = dlt.substr(start_type, len_type);
+ if (dlt[start_type+len_type+1] == '"') {
+ val_type = dlt.substr(start_type+len_type+2);
+ size_t close_type = val_type.find('"');
+ val_type = val_type.substr(0, close_type);
+ dlt.erase(start_type,start_type+len_type+2+close_type+1);
+ } else {
+ TRACE_ERR("DLT parse error: missing character \" for type \n");
+ }
+ label_type = Trim(label_type);
+ dlt = Trim(dlt);
+
+ tmp_meta[label_type] = val_type;
+ } else {
+ //type is not present
+ UTIL_THROW(util::Exception, "ProcessAndStripDLT(std::string &line): Attribute type for dlt tag is mandatory.");
+ }
+
+ //check if id is present and store it
+ size_t start_id = dlt.find("id=");
+ size_t len_id=2;
+ if (start_id != std::string::npos) {
+ //id is present
+ //required format id="name"
+ //double quotation mark is required
+
+ std::string val_id;
+ std::string label_id = dlt.substr(start_id, len_id);
+ if (dlt[start_id+len_id+1] == '"') {
+ val_id = dlt.substr(start_id+len_id+2);
+ size_t close_id = val_id.find('"');
+ val_id = val_id.substr(0, close_id);
+ dlt.erase(start_id,start_id+len_id+2+close_id+1);
+ } else {
+ TRACE_ERR("DLT parse error: missing character \" for id \n");
+ }
+ label_id = Trim(label_id);
+ dlt = Trim(dlt);
+
+ tmp_meta[label_id] = val_id;
+ } else {
+ //id is not present
+ //do nothing
+ }
+
+ for (size_t i = 1; i < dlt.size(); i++) {
+ if (dlt[i] == '=') {
+ std::string label = dlt.substr(0, i);
+ std::string val = dlt.substr(i+1);
+ if (val[0] == '"') {
+
+ val = val.substr(1);
+ // it admits any double quotation mark (but is attribute) in the value of the attribute
+ // it assumes that just one attribute (besides id attribute) is present in the tag,
+ // it assumes that the value starts and ends with double quotation mark
+ size_t close = val.rfind('"');
+ if (close == std::string::npos) {
+ TRACE_ERR("SGML parse error: missing \"\n");
+ dlt = "";
+ i = 0;
+ } else {
+ dlt = val.substr(close+1);
+ val = val.substr(0, close);
+ i = 0;
+ }
+ } else {
+ size_t close = val.find(' ');
+ if (close == std::string::npos) {
+ dlt = "";
+ i = 0;
+ } else {
+ dlt = val.substr(close+1);
+ val = val.substr(0, close);
+ }
+ }
+ label = Trim(label);
+ dlt = Trim(dlt);
+
+ tmp_meta[label] = val;
+ }
+ }
+
+ meta.push_back(tmp_meta);
+ }
+ }
+// std::cerr << "GLOBAL END" << endl;
+ return meta;
+}
+
std::map<std::string, std::string> ProcessAndStripSGML(std::string &line)
{
std::map<std::string, std::string> meta;
@@ -220,7 +356,7 @@ void PrintFeatureWeight(const FeatureFunction* ff)
void ShowWeights()
{
- fix(cout,6);
+ FixPrecision(cout,6);
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
@@ -228,8 +364,7 @@ void ShowWeights()
const StatefulFeatureFunction *ff = sff[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
- }
- else {
+ } else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
@@ -237,8 +372,7 @@ void ShowWeights()
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
- }
- else {
+ } else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
diff --git a/moses/Util.h b/moses/Util.h
index acaa4b53c..beefa53da 100644
--- a/moses/Util.h
+++ b/moses/Util.h
@@ -57,10 +57,13 @@ namespace Moses
/** verbose macros
* */
-#define VERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR(str); } }
+#define VERBOSE(level,str) { IFVERBOSE(level) { TRACE_ERR(str); } }
#define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level)
-#define XVERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR("[" << __FILE__ << ":" << __LINE__ << "] ");TRACE_ERR(str); } }
+#define XVERBOSE(level,str) VERBOSE(level, "[" << HERE << "] " << str)
#define HERE __FILE__ << ":" << __LINE__
+#define FEATUREVERBOSE(level,str) FEATUREVERBOSE2(level, "[" << GetScoreProducerDescription() << "] " << str)
+#define FEATUREVERBOSE2(level,str) { IFFEATUREVERBOSE(level) { TRACE_ERR(str); } }
+#define IFFEATUREVERBOSE(level) if ((m_verbosity == std::numeric_limits<std::size_t>::max() && StaticData::Instance().GetVerboseLevel() >= level) || (m_verbosity != std::numeric_limits<std::size_t>::max() && m_verbosity >= level))
#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2)
@@ -102,6 +105,52 @@ inline std::string Scan<std::string>(const std::string &input)
return input;
}
+template<>
+inline WordAlignmentSort Scan<WordAlignmentSort>(const std::string &input)
+{
+ return (WordAlignmentSort) Scan<size_t>(input);
+}
+
+template<>
+inline InputTypeEnum Scan<InputTypeEnum>(const std::string &input)
+{
+ return (InputTypeEnum) Scan<size_t>(input);
+}
+
+template<>
+inline SearchAlgorithm Scan<SearchAlgorithm>(const std::string &input)
+{
+ return (SearchAlgorithm) Scan<size_t>(input);
+}
+
+template<>
+inline S2TParsingAlgorithm Scan<S2TParsingAlgorithm>(const std::string &input)
+{
+ return (S2TParsingAlgorithm) Scan<size_t>(input);
+}
+
+template<>
+inline SourceLabelOverlap Scan<SourceLabelOverlap>(const std::string &input)
+{
+ return (SourceLabelOverlap) Scan<size_t>(input);
+}
+
+template<>
+inline XmlInputType Scan<XmlInputType>(const std::string &input)
+{
+ XmlInputType ret;
+ if (input=="exclusive") ret = XmlExclusive;
+ else if (input=="inclusive") ret = XmlInclusive;
+ else if (input=="constraint") ret = XmlConstraint;
+ else if (input=="ignore") ret = XmlIgnore;
+ else if (input=="pass-through") ret = XmlPassThrough;
+ else {
+ UTIL_THROW2("Unknown XML input type");
+ }
+
+ return ret;
+}
+
//! Specialisation to understand yes/no y/n true/false 0/1
template<>
bool Scan<bool>(const std::string &input);
@@ -282,6 +331,20 @@ std::string Join(const std::string& delimiter, const std::vector<T>& items)
return outstr.str();
}
+/*
+ * Convert any container to string
+ */
+template<typename It>
+std::string Join(const std::string &delim, It begin, It end)
+{
+ std::ostringstream outstr;
+ if (begin != end)
+ outstr << *begin++;
+ for ( ; begin != end; ++begin)
+ outstr << delim << *begin;
+ return outstr.str();
+}
+
//! transform prob to natural log score
inline float TransformScore(float prob)
{
@@ -320,7 +383,7 @@ inline float CalcTranslationScore(const std::vector<float> &probVector,
const std::vector<float> &weightT)
{
UTIL_THROW_IF2(weightT.size() != probVector.size(),
- "Weight and score vector sizes not the same");
+ "Weight and score vector sizes not the same");
float rv=0.0;
for(float const *sb=&probVector[0],*se=sb+probVector.size(),*wb=&weightT[0];
sb!=se; ++sb, ++wb)
@@ -387,6 +450,9 @@ void ResetUserTime();
void PrintUserTime(const std::string &message);
double GetUserTime();
+// dump SGML parser for <dlt> tags
+std::vector< std::map<std::string, std::string> > ProcessAndStripDLT(std::string &line);
+
// dump SGML parser for <seg> tags
std::map<std::string, std::string> ProcessAndStripSGML(std::string &line);
@@ -431,8 +497,17 @@ T log_sum (T log_a, T log_b)
return ( v );
}
+/**
+ * Compare floats for equality with some tolerance.
+ */
+inline bool Equals(float a, float b)
+{
+ return fabs(a - b) < FLOAT_EPSILON;
+}
+
+
/** Enforce rounding */
-inline void fix(std::ostream& stream, size_t size)
+inline void FixPrecision(std::ostream& stream, size_t size = 3)
{
stream.setf(std::ios::fixed);
stream.precision(size);
diff --git a/moses/Word.cpp b/moses/Word.cpp
index b1ea77059..f55be5ee8 100644
--- a/moses/Word.cpp
+++ b/moses/Word.cpp
@@ -34,15 +34,15 @@ using namespace std;
namespace Moses
{
-
- // utility function for factorless decoding
- size_t
- max_fax()
- {
- if (StaticData::Instance().GetFactorDelimiter().size())
- return MAX_NUM_FACTORS;
- return 1;
- }
+
+// utility function for factorless decoding
+size_t
+max_fax()
+{
+ if (StaticData::Instance().GetFactorDelimiter().size())
+ return MAX_NUM_FACTORS;
+ return 1;
+}
// static
int Word::Compare(const Word &targetWord, const Word &sourceWord)
@@ -85,8 +85,8 @@ std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlan
unsigned int stop = min(max_fax(),factorType.size());
for (unsigned int i = 0 ; i < stop ; i++) {
UTIL_THROW_IF2(factorType[i] >= MAX_NUM_FACTORS,
- "Trying to reference factor " << factorType[i]
- << ". Max factor is " << MAX_NUM_FACTORS);
+ "Trying to reference factor " << factorType[i]
+ << ". Max factor is " << MAX_NUM_FACTORS);
const Factor *factor = m_factorArray[factorType[i]];
if (factor != NULL) {
@@ -109,49 +109,45 @@ StringPiece Word::GetString(FactorType factorType) const
class StrayFactorException : public util::Exception {};
-void
+void
Word::
CreateFromString(FactorDirection direction
- , const std::vector<FactorType> &factorOrder
- , const StringPiece &str
- , bool isNonTerminal
- , bool strict)
+ , const std::vector<FactorType> &factorOrder
+ , const StringPiece &str
+ , bool isNonTerminal
+ , bool strict)
{
FactorCollection &factorCollection = FactorCollection::Instance();
vector<StringPiece> bits(MAX_NUM_FACTORS);
string factorDelimiter = StaticData::Instance().GetFactorDelimiter();
- if (factorDelimiter.size())
- {
- util::TokenIter<util::MultiCharacter> fit(str, factorDelimiter);
- size_t i = 0;
- for (; i < MAX_NUM_FACTORS && fit; ++i,++fit)
- bits[i] = *fit;
- if (i == MAX_NUM_FACTORS)
- UTIL_THROW_IF(fit, StrayFactorException,
- "The hard limit for factors is " << MAX_NUM_FACTORS
- << ". The word " << str << " contains factor delimiter "
- << StaticData::Instance().GetFactorDelimiter()
- << " too many times.");
- if (strict)
- UTIL_THROW_IF(fit, StrayFactorException,
- "You have configured " << factorOrder.size()
- << " factors but the word " << str
- << " contains factor delimiter "
- << StaticData::Instance().GetFactorDelimiter()
- << " too many times.");
- UTIL_THROW_IF(!isNonTerminal && i < factorOrder.size(),util::Exception,
- "Too few factors in string '" << str << "'.");
- }
- else
- {
- bits[0] = str;
- }
- for (size_t k = 0; k < factorOrder.size(); ++k)
- {
- UTIL_THROW_IF(factorOrder[k] >= MAX_NUM_FACTORS, util::Exception,
- "Factor order out of bounds.");
- m_factorArray[factorOrder[k]] = factorCollection.AddFactor(bits[k], isNonTerminal);
- }
+ if (factorDelimiter.size()) {
+ util::TokenIter<util::MultiCharacter> fit(str, factorDelimiter);
+ size_t i = 0;
+ for (; i < MAX_NUM_FACTORS && fit; ++i,++fit)
+ bits[i] = *fit;
+ if (i == MAX_NUM_FACTORS)
+ UTIL_THROW_IF(fit, StrayFactorException,
+ "The hard limit for factors is " << MAX_NUM_FACTORS
+ << ". The word " << str << " contains factor delimiter "
+ << StaticData::Instance().GetFactorDelimiter()
+ << " too many times.");
+ if (strict)
+ UTIL_THROW_IF(fit, StrayFactorException,
+ "You have configured " << factorOrder.size()
+ << " factors but the word " << str
+ << " contains factor delimiter "
+ << StaticData::Instance().GetFactorDelimiter()
+ << " too many times.");
+ UTIL_THROW_IF(!isNonTerminal && i < factorOrder.size(),util::Exception,
+ "Too few factors in string '" << str << "'.");
+ } else {
+ bits[0] = str;
+ }
+ for (size_t k = 0; k < factorOrder.size(); ++k) {
+ UTIL_THROW_IF(factorOrder[k] >= MAX_NUM_FACTORS, util::Exception,
+ "Factor order out of bounds.");
+ m_factorArray[factorOrder[k]] = factorCollection.AddFactor(bits[k], isNonTerminal);
+ }
// assume term/non-term same for all factors
m_isNonTerminal = isNonTerminal;
}
@@ -188,10 +184,10 @@ void Word::OnlyTheseFactors(const FactorMask &factors)
bool Word::IsEpsilon() const
{
- const Factor *factor = m_factorArray[0];
- int compare = factor->GetString().compare(EPSILON);
+ const Factor *factor = m_factorArray[0];
+ int compare = factor->GetString().compare(EPSILON);
- return compare == 0;
+ return compare == 0;
}
TO_STRING_BODY(Word);
diff --git a/moses/Word.h b/moses/Word.h
index 47df6e141..efdebddfa 100644
--- a/moses/Word.h
+++ b/moses/Word.h
@@ -152,7 +152,7 @@ public:
, const std::vector<FactorType> &factorOrder
, const StringPiece &str
, bool isNonTerminal
- , bool strict = true);
+ , bool strict = true);
void CreateUnknownWord(const Word &sourceWord);
diff --git a/moses/WordLattice.cpp b/moses/WordLattice.cpp
index 6be229491..01b89bfb8 100644
--- a/moses/WordLattice.cpp
+++ b/moses/WordLattice.cpp
@@ -13,7 +13,7 @@ namespace Moses
WordLattice::WordLattice()
{
UTIL_THROW_IF2(&InputFeature::Instance() == NULL,
- "Input feature must be specified");
+ "Input feature must be specified");
}
size_t WordLattice::GetColumnIncrement(size_t i, size_t j) const
@@ -49,11 +49,11 @@ void WordLattice::Print(std::ostream& out) const
out<<"\n\n";
}
-int
+int
WordLattice::
InitializeFromPCNDataType
-(const PCN::CN& cn,
- const std::vector<FactorType>& factorOrder,
+(const PCN::CN& cn,
+ const std::vector<FactorType>& factorOrder,
const std::string& debug_line)
{
// const StaticData &staticData = StaticData::Instance();
@@ -78,20 +78,20 @@ InitializeFromPCNDataType
//check for correct number of link parameters
if (alt.m_denseFeatures.size() != numInputScores) {
- TRACE_ERR("ERROR: need " << numInputScores
- << " link parameters, found "
- << alt.m_denseFeatures.size()
- << " while reading column " << i
- << " from " << debug_line << "\n");
+ TRACE_ERR("ERROR: need " << numInputScores
+ << " link parameters, found "
+ << alt.m_denseFeatures.size()
+ << " while reading column " << i
+ << " from " << debug_line << "\n");
return false;
}
//check each element for bounds
std::vector<float>::const_iterator probsIterator;
data[i][j].second = std::vector<float>(0);
- for(probsIterator = alt.m_denseFeatures.begin();
- probsIterator < alt.m_denseFeatures.end();
- probsIterator++) {
+ for(probsIterator = alt.m_denseFeatures.begin();
+ probsIterator < alt.m_denseFeatures.end();
+ probsIterator++) {
IFVERBOSE(1) {
if (*probsIterator < 0.0f) {
TRACE_ERR("WARN: neg probability: " << *probsIterator << "\n");
@@ -229,9 +229,8 @@ WordLattice::CreateTranslationOptionCollection() const
if (StaticData::Instance().GetUseLegacyPT()) {
rv = new TranslationOptionCollectionConfusionNet(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
- }
- else {
- rv = new TranslationOptionCollectionLattice(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
+ } else {
+ rv = new TranslationOptionCollectionLattice(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
}
assert(rv);
diff --git a/moses/WordsBitmap.cpp b/moses/WordsBitmap.cpp
index 0866846ed..53c263cb5 100644
--- a/moses/WordsBitmap.cpp
+++ b/moses/WordsBitmap.cpp
@@ -66,7 +66,7 @@ int WordsBitmap::GetFutureCosts(int lastPos) const
bool WordsBitmap::IsAdjacent(size_t startPos, size_t endPos) const
{
if (GetNumWordsCovered() == 0) {
- return true;
+ return true;
}
size_t first = GetFirstGapPos();
diff --git a/moses/WordsBitmap.h b/moses/WordsBitmap.h
index 2deb7b661..ab81471b9 100644
--- a/moses/WordsBitmap.h
+++ b/moses/WordsBitmap.h
@@ -224,7 +224,7 @@ public:
//! converts bitmap into an integer ID, with an additional span covered
WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
- assert(m_size < (1<<16));
+ assert(m_size < (1<<16));
size_t start = GetFirstGapPos();
if (start == NOT_FOUND) start = m_size; // nothing left
diff --git a/moses/WordsRange.h b/moses/WordsRange.h
index a4b41de83..4a38ecde7 100644
--- a/moses/WordsRange.h
+++ b/moses/WordsRange.h
@@ -41,6 +41,7 @@ class WordsRange
{
friend std::ostream& operator << (std::ostream& out, const WordsRange& range);
+ // m_endPos is inclusive
size_t m_startPos, m_endPos;
public:
inline WordsRange(size_t startPos, size_t endPos) : m_startPos(startPos), m_endPos(endPos) {}
diff --git a/moses/XmlOption.cpp b/moses/XmlOption.cpp
index 2f66d647e..38b767d8a 100644
--- a/moses/XmlOption.cpp
+++ b/moses/XmlOption.cpp
@@ -24,6 +24,7 @@
#include <vector>
#include <string>
#include <iostream>
+#include <boost/algorithm/string/predicate.hpp>
#include <boost/foreach.hpp>
#include <boost/unordered_map.hpp>
#include "Util.h"
@@ -40,6 +41,7 @@
namespace Moses
{
using namespace std;
+using namespace boost::algorithm;
string ParseXmlTagAttribute(const string& tag,const string& attributeName)
{
@@ -73,7 +75,7 @@ string TrimXml(const string& str, const std::string& lbrackStr, const std::strin
if (str.size() < lbrackStr.length()+rbrackStr.length() ) return str;
// strip first and last character
- if (str.substr(0,lbrackStr.length()) == lbrackStr && str.substr(str.size()-rbrackStr.length()) == rbrackStr) {
+ if (starts_with(str, lbrackStr) && ends_with(str, rbrackStr)) {
return str.substr(lbrackStr.length(), str.size()-lbrackStr.length()-rbrackStr.length());
}
// not an xml token -> do nothing
@@ -321,32 +323,32 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
// update: add new aligned sentence pair to Mmsapt identified by name
else if (tagName == "update") {
#if PT_UG
- // get model name and aligned sentence pair
- string pdName = ParseXmlTagAttribute(tagContent,"name");
- string source = ParseXmlTagAttribute(tagContent,"source");
- string target = ParseXmlTagAttribute(tagContent,"target");
- string alignment = ParseXmlTagAttribute(tagContent,"alignment");
- // find PhraseDictionary by name
- const vector<PhraseDictionary*> &pds = PhraseDictionary::GetColl();
- PhraseDictionary* pd = NULL;
- for (vector<PhraseDictionary*>::const_iterator i = pds.begin(); i != pds.end(); ++i) {
- PhraseDictionary* curPd = *i;
- if (curPd->GetScoreProducerDescription() == pdName) {
- pd = curPd;
- break;
- }
- }
- if (pd == NULL) {
- TRACE_ERR("ERROR: No PhraseDictionary with name " << pdName << ", no update" << endl);
- return false;
+ // get model name and aligned sentence pair
+ string pdName = ParseXmlTagAttribute(tagContent,"name");
+ string source = ParseXmlTagAttribute(tagContent,"source");
+ string target = ParseXmlTagAttribute(tagContent,"target");
+ string alignment = ParseXmlTagAttribute(tagContent,"alignment");
+ // find PhraseDictionary by name
+ const vector<PhraseDictionary*> &pds = PhraseDictionary::GetColl();
+ PhraseDictionary* pd = NULL;
+ for (vector<PhraseDictionary*>::const_iterator i = pds.begin(); i != pds.end(); ++i) {
+ PhraseDictionary* curPd = *i;
+ if (curPd->GetScoreProducerDescription() == pdName) {
+ pd = curPd;
+ break;
}
- // update model
- VERBOSE(3,"Updating " << pdName << " ||| " << source << " ||| " << target << " ||| " << alignment << endl);
- Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(pd);
- pdsa->add(source, target, alignment);
-#else
- TRACE_ERR("ERROR: recompile with --with-mm to update PhraseDictionary at runtime" << endl);
+ }
+ if (pd == NULL) {
+ TRACE_ERR("ERROR: No PhraseDictionary with name " << pdName << ", no update" << endl);
return false;
+ }
+ // update model
+ VERBOSE(3,"Updating " << pdName << " ||| " << source << " ||| " << target << " ||| " << alignment << endl);
+ Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(pd);
+ pdsa->add(source, target, alignment);
+#else
+ TRACE_ERR("ERROR: recompile with --with-mm to update PhraseDictionary at runtime" << endl);
+ return false;
#endif
}
@@ -356,44 +358,44 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
// for PhraseDictionaryBitextSampling (Mmsapt) models:
// <update name="TranslationModelName" source=" " target=" " alignment=" " />
else if (tagName == "weight-overwrite") {
-
- // is a name->ff map stored anywhere so we don't have to build it every time?
- const vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
- boost::unordered_map<string, FeatureFunction*> map;
- BOOST_FOREACH(FeatureFunction* const& ff, ffs) {
- map[ff->GetScoreProducerDescription()] = ff;
- }
- // update each weight listed
- ScoreComponentCollection allWeights = StaticData::Instance().GetAllWeights();
- boost::unordered_map<string, FeatureFunction*>::iterator ffi;
- string ffName("");
- vector<float> ffWeights;
- vector<string> toks = Tokenize(ParseXmlTagAttribute(tagContent,"weights"));
- BOOST_FOREACH(string const& tok, toks) {
- if (tok.substr(tok.size() - 1, 1) == "=") {
- // start new feature
- if (ffName != "") {
- // set previous feature weights
- if (ffi != map.end()) {
- allWeights.Assign(ffi->second, ffWeights);
- }
- ffWeights.clear();
- }
- ffName = tok.substr(0, tok.size() - 1);
- ffi = map.find(ffName);
- if (ffi == map.end()) {
- TRACE_ERR("ERROR: No FeatureFunction with name " << ffName << ", no weight update" << endl);
- }
- } else {
- // weight for current feature
- ffWeights.push_back(Scan<float>(tok));
+ // is a name->ff map stored anywhere so we don't have to build it every time?
+ const vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
+ boost::unordered_map<string, FeatureFunction*> map;
+ BOOST_FOREACH(FeatureFunction* const& ff, ffs) {
+ map[ff->GetScoreProducerDescription()] = ff;
+ }
+
+ // update each weight listed
+ ScoreComponentCollection allWeights = StaticData::Instance().GetAllWeights();
+ boost::unordered_map<string, FeatureFunction*>::iterator ffi;
+ string ffName("");
+ vector<float> ffWeights;
+ vector<string> toks = Tokenize(ParseXmlTagAttribute(tagContent,"weights"));
+ BOOST_FOREACH(string const& tok, toks) {
+ if (ends_with(tok, "=")) {
+ // start new feature
+ if (ffName != "") {
+ // set previous feature weights
+ if (ffi != map.end()) {
+ allWeights.Assign(ffi->second, ffWeights);
}
+ ffWeights.clear();
+ }
+ ffName = tok.substr(0, tok.size() - 1);
+ ffi = map.find(ffName);
+ if (ffi == map.end()) {
+ TRACE_ERR("ERROR: No FeatureFunction with name " << ffName << ", no weight update" << endl);
+ }
+ } else {
+ // weight for current feature
+ ffWeights.push_back(Scan<float>(tok));
}
- if (ffi != map.end()) {
- allWeights.Assign(ffi->second, ffWeights);
- }
- StaticData::InstanceNonConst().SetAllWeights(allWeights);
+ }
+ if (ffi != map.end()) {
+ allWeights.Assign(ffi->second, ffWeights);
+ }
+ StaticData::InstanceNonConst().SetAllWeights(allWeights);
}
// default: opening tag that specifies translation options
@@ -401,8 +403,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
if (startPos > endPos) {
TRACE_ERR("ERROR: tag " << tagName << " startPos > endPos: " << line << endl);
return false;
- }
- else if (startPos == endPos) {
+ } else if (startPos == endPos) {
TRACE_ERR("WARNING: tag " << tagName << " 0 span: " << line << endl);
continue;
}