Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--OnDiskPt/Main.cpp16
-rw-r--r--OnDiskPt/TargetPhrase.cpp20
-rw-r--r--OnDiskPt/TargetPhrase.h10
-rw-r--r--OnDiskPt/Word.cpp23
-rw-r--r--biconcor/phrase-lookup.cpp9
-rw-r--r--mert/FeatureStats.cpp5
-rw-r--r--mert/ForestRescore.cpp68
-rw-r--r--mert/ForestRescore.h53
-rw-r--r--mert/ForestRescoreTest.cpp4
-rw-r--r--mert/HopeFearDecoder.cpp112
-rw-r--r--mert/HopeFearDecoder.h70
-rw-r--r--mert/HwcmScorer.cpp12
-rw-r--r--mert/Hypergraph.cpp57
-rw-r--r--mert/Hypergraph.h355
-rw-r--r--mert/HypergraphTest.cpp30
-rw-r--r--mert/InterpolatedScorer.cpp24
-rw-r--r--mert/MiraFeatureVector.cpp9
-rw-r--r--mert/MiraWeightVector.cpp6
-rw-r--r--mert/StatisticsBasedScorer.h2
-rw-r--r--mert/TER/alignmentStruct.cpp16
-rw-r--r--mert/TER/alignmentStruct.h26
-rw-r--r--mert/TER/bestShiftStruct.h22
-rw-r--r--mert/TER/hashMap.cpp232
-rw-r--r--mert/TER/hashMap.h40
-rw-r--r--mert/TER/hashMapInfos.cpp239
-rw-r--r--mert/TER/hashMapInfos.h42
-rw-r--r--mert/TER/hashMapStringInfos.cpp313
-rw-r--r--mert/TER/hashMapStringInfos.h42
-rw-r--r--mert/TER/infosHasher.cpp58
-rw-r--r--mert/TER/infosHasher.h36
-rw-r--r--mert/TER/stringHasher.cpp46
-rw-r--r--mert/TER/stringHasher.h28
-rw-r--r--mert/TER/stringInfosHasher.cpp58
-rw-r--r--mert/TER/stringInfosHasher.h36
-rw-r--r--mert/TER/terAlignment.cpp286
-rw-r--r--mert/TER/terAlignment.h72
-rw-r--r--mert/TER/terShift.cpp116
-rw-r--r--mert/TER/terShift.h46
-rw-r--r--mert/TER/tercalc.cpp1512
-rw-r--r--mert/TER/tercalc.h90
-rw-r--r--mert/TER/tools.cpp1167
-rw-r--r--mert/TER/tools.h107
-rw-r--r--mert/evaluator.cpp9
-rw-r--r--mert/kbmira.cpp12
-rw-r--r--misc/CreateProbingPT.cpp33
-rw-r--r--misc/QueryProbingPT.cpp59
-rw-r--r--misc/prunePhraseTable.cpp16
-rw-r--r--moses-cmd/LatticeMBRGrid.cpp2
-rw-r--r--moses-cmd/Main.cpp30
-rw-r--r--moses-cmd/MainVW.cpp4
-rw-r--r--moses/AlignmentInfoCollection.h9
-rw-r--r--moses/BaseManager.cpp12
-rw-r--r--moses/BaseManager.h19
-rw-r--r--moses/BitmapContainer.cpp18
-rw-r--r--moses/ChartCell.cpp2
-rw-r--r--moses/ChartCell.h2
-rw-r--r--moses/ChartCellLabelSet.h3
-rw-r--r--moses/ChartHypothesis.cpp114
-rw-r--r--moses/ChartHypothesis.h70
-rw-r--r--moses/ChartKBestExtractor.cpp5
-rw-r--r--moses/ChartManager.cpp204
-rw-r--r--moses/ChartManager.h80
-rw-r--r--moses/ChartRuleLookupManager.h2
-rw-r--r--moses/ChartTranslationOption.cpp4
-rw-r--r--moses/ChartTranslationOption.h4
-rw-r--r--moses/ChartTranslationOptions.cpp17
-rw-r--r--moses/ConfusionNet.cpp504
-rw-r--r--moses/DecodeGraph.h4
-rw-r--r--moses/DecodeStepTranslation.cpp12
-rw-r--r--moses/FF/BleuScoreFeature.cpp4
-rw-r--r--moses/FF/BleuScoreFeature.h32
-rw-r--r--moses/FF/ConstrainedDecoding.h34
-rw-r--r--moses/FF/ControlRecombination.h32
-rw-r--r--moses/FF/CountNonTerms.cpp60
-rw-r--r--moses/FF/CountNonTerms.h32
-rw-r--r--moses/FF/CoveredReferenceFeature.cpp41
-rw-r--r--moses/FF/CoveredReferenceFeature.h24
-rw-r--r--moses/FF/DecodeFeature.h38
-rw-r--r--moses/FF/DistortionScoreProducer.h28
-rw-r--r--moses/FF/DynamicCacheBasedLanguageModel.h40
-rw-r--r--moses/FF/ExternalFeature.h32
-rw-r--r--moses/FF/Factory.cpp34
-rw-r--r--moses/FF/FeatureFunction.cpp4
-rw-r--r--moses/FF/FeatureFunction.h24
-rw-r--r--moses/FF/GlobalLexicalModel.cpp8
-rw-r--r--moses/FF/GlobalLexicalModel.h32
-rw-r--r--moses/FF/GlobalLexicalModelUnlimited.cpp14
-rw-r--r--moses/FF/GlobalLexicalModelUnlimited.h34
-rw-r--r--moses/FF/HyperParameterAsWeight.h34
-rw-r--r--moses/FF/InputFeature.cpp10
-rw-r--r--moses/FF/InputFeature.h32
-rw-r--r--moses/FF/InternalTree.cpp369
-rw-r--r--moses/FF/InternalTree.h228
-rw-r--r--moses/FF/LexicalReordering/LexicalReordering.cpp8
-rw-r--r--moses/FF/LexicalReordering/LexicalReordering.h44
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.cpp12
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.h2
-rw-r--r--moses/FF/LexicalReordering/SparseReordering.cpp63
-rw-r--r--moses/FF/LexicalReordering/SparseReordering.h30
-rw-r--r--moses/FF/MaxSpanFreeNonTermSource.cpp50
-rw-r--r--moses/FF/MaxSpanFreeNonTermSource.h63
-rw-r--r--moses/FF/NieceTerminal.cpp78
-rw-r--r--moses/FF/NieceTerminal.h28
-rw-r--r--moses/FF/OSM-Feature/KenOSM.cpp38
-rw-r--r--moses/FF/OSM-Feature/KenOSM.h60
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.cpp15
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.h26
-rw-r--r--moses/FF/PhraseBoundaryFeature.h32
-rw-r--r--moses/FF/PhraseLengthFeature.cpp6
-rw-r--r--moses/FF/PhraseLengthFeature.h32
-rw-r--r--moses/FF/PhraseOrientationFeature.cpp768
-rw-r--r--moses/FF/PhraseOrientationFeature.h200
-rw-r--r--moses/FF/PhrasePairFeature.cpp10
-rw-r--r--moses/FF/PhrasePairFeature.h30
-rw-r--r--moses/FF/PhrasePenalty.cpp36
-rw-r--r--moses/FF/PhrasePenalty.h34
-rw-r--r--moses/FF/ReferenceComparison.h68
-rw-r--r--moses/FF/RuleScope.cpp35
-rw-r--r--moses/FF/RuleScope.h65
-rw-r--r--moses/FF/SetSourcePhrase.cpp10
-rw-r--r--moses/FF/SetSourcePhrase.h44
-rw-r--r--moses/FF/SkeletonChangeInput.cpp48
-rw-r--r--moses/FF/SkeletonChangeInput.h26
-rw-r--r--moses/FF/SkeletonStatefulFF.cpp18
-rw-r--r--moses/FF/SkeletonStatefulFF.h26
-rw-r--r--moses/FF/SkeletonStatelessFF.cpp32
-rw-r--r--moses/FF/SkeletonStatelessFF.h22
-rw-r--r--moses/FF/SkeletonTranslationOptionListFeature.h48
-rw-r--r--moses/FF/SoftMatchingFeature.cpp67
-rw-r--r--moses/FF/SoftMatchingFeature.h24
-rw-r--r--moses/FF/SoftSourceSyntacticConstraintsFeature.cpp104
-rw-r--r--moses/FF/SoftSourceSyntacticConstraintsFeature.h35
-rw-r--r--moses/FF/SourceGHKMTreeInputMatchFeature.cpp10
-rw-r--r--moses/FF/SourceGHKMTreeInputMatchFeature.h26
-rw-r--r--moses/FF/SourceWordDeletionFeature.h32
-rw-r--r--moses/FF/SpanLength.cpp60
-rw-r--r--moses/FF/SpanLength.h60
-rw-r--r--moses/FF/SparseHieroReorderingFeature.cpp59
-rw-r--r--moses/FF/SparseHieroReorderingFeature.h43
-rw-r--r--moses/FF/StatefulFeatureFunction.h5
-rw-r--r--moses/FF/StatelessFeatureFunction.h8
-rw-r--r--moses/FF/SyntaxRHS.cpp36
-rw-r--r--moses/FF/SyntaxRHS.h30
-rw-r--r--moses/FF/TargetBigramFeature.cpp4
-rw-r--r--moses/FF/TargetBigramFeature.h32
-rw-r--r--moses/FF/TargetNgramFeature.cpp4
-rw-r--r--moses/FF/TargetNgramFeature.h30
-rw-r--r--moses/FF/TargetWordInsertionFeature.h32
-rw-r--r--moses/FF/TreeStructureFeature.cpp39
-rw-r--r--moses/FF/TreeStructureFeature.h45
-rw-r--r--moses/FF/UnknownWordPenaltyProducer.h36
-rw-r--r--moses/FF/VW/ThreadLocalByFeatureStorage.h91
-rw-r--r--moses/FF/VW/VW.h103
-rw-r--r--moses/FF/VW/VWFeatureBase.cpp6
-rw-r--r--moses/FF/VW/VWFeatureBase.h190
-rw-r--r--moses/FF/VW/VWFeatureSource.h46
-rw-r--r--moses/FF/VW/VWFeatureSourceBagOfWords.h38
-rw-r--r--moses/FF/VW/VWFeatureSourceExternalFeatures.h82
-rw-r--r--moses/FF/VW/VWFeatureSourceIndicator.h54
-rw-r--r--moses/FF/VW/VWFeatureSourcePhraseInternal.h44
-rw-r--r--moses/FF/VW/VWFeatureSourceWindow.h66
-rw-r--r--moses/FF/VW/VWFeatureTarget.h46
-rw-r--r--moses/FF/VW/VWFeatureTargetIndicator.h36
-rw-r--r--moses/FF/VW/VWFeatureTargetPhraseInternal.h38
-rw-r--r--moses/FF/WordPenaltyProducer.cpp6
-rw-r--r--moses/FF/WordPenaltyProducer.h36
-rw-r--r--moses/FF/WordTranslationFeature.cpp10
-rw-r--r--moses/FF/WordTranslationFeature.h32
-rw-r--r--moses/FactorCollection.cpp3
-rw-r--r--moses/HypergraphOutput.cpp40
-rw-r--r--moses/HypergraphOutput.h43
-rw-r--r--moses/Hypothesis.cpp26
-rw-r--r--moses/Hypothesis.h2
-rw-r--r--moses/IOWrapper.cpp24
-rw-r--r--moses/Incremental.cpp59
-rw-r--r--moses/Incremental.h76
-rw-r--r--moses/InputPath.cpp2
-rw-r--r--moses/InputPath.h4
-rw-r--r--moses/LM/Base.cpp6
-rw-r--r--moses/LM/Base.h24
-rw-r--r--moses/LM/BilingualLM.cpp158
-rw-r--r--moses/LM/BilingualLM.h65
-rw-r--r--moses/LM/DALMWrapper.cpp745
-rw-r--r--moses/LM/Implementation.cpp3
-rw-r--r--moses/LM/Ken.cpp3
-rw-r--r--moses/LM/LDHT.cpp4
-rw-r--r--moses/LM/NeuralLMWrapper.h5
-rw-r--r--moses/LM/SingleFactor.cpp12
-rw-r--r--moses/LM/bilingual-lm/BiLM_NPLM.cpp58
-rw-r--r--moses/LM/bilingual-lm/BiLM_NPLM.h15
-rw-r--r--moses/LM/oxlm/OxLM.cpp38
-rw-r--r--moses/LM/oxlm/OxLM.h18
-rw-r--r--moses/LM/oxlm/OxLMMapper.cpp30
-rw-r--r--moses/LM/oxlm/OxLMMapper.h22
-rw-r--r--moses/LM/oxlm/OxLMParallelMapper.cpp17
-rw-r--r--moses/LM/oxlm/OxLMParallelMapper.h16
-rw-r--r--moses/LM/oxlm/SourceOxLM.cpp46
-rw-r--r--moses/LM/oxlm/SourceOxLM.h20
-rw-r--r--moses/Manager.cpp452
-rw-r--r--moses/Manager.h4
-rw-r--r--moses/PCNTools.h8
-rw-r--r--moses/PDTAimp.cpp50
-rw-r--r--moses/PP/CountsPhraseProperty.cpp10
-rw-r--r--moses/PP/CountsPhraseProperty.h4
-rw-r--r--moses/PP/NonTermContextProperty.cpp128
-rw-r--r--moses/PP/NonTermContextProperty.h46
-rw-r--r--moses/PP/OrientationPhraseProperty.cpp4
-rw-r--r--moses/PP/OrientationPhraseProperty.h4
-rw-r--r--moses/PP/PhraseProperty.cpp4
-rw-r--r--moses/PP/PhraseProperty.h14
-rw-r--r--moses/PP/SourceLabelsPhraseProperty.cpp60
-rw-r--r--moses/PP/SourceLabelsPhraseProperty.h17
-rw-r--r--moses/PP/SpanLengthPhraseProperty.cpp119
-rw-r--r--moses/PP/SpanLengthPhraseProperty.h22
-rw-r--r--moses/Parameter.cpp65
-rw-r--r--moses/Parameter.h16
-rw-r--r--moses/ScoreComponentCollection.cpp8
-rw-r--r--moses/ScoreComponentCollection.h8
-rw-r--r--moses/ScoreComponentCollectionTest.cpp26
-rw-r--r--moses/SearchNormal.cpp14
-rw-r--r--moses/Sentence.h2
-rw-r--r--moses/StaticData.cpp258
-rw-r--r--moses/StaticData.h7
-rw-r--r--moses/SyntacticLanguageModel.h4
-rw-r--r--moses/Syntax/BoundedPriorityContainer.h40
-rw-r--r--moses/Syntax/Cube.cpp4
-rw-r--r--moses/Syntax/Cube.h16
-rw-r--r--moses/Syntax/CubeQueue.h10
-rw-r--r--moses/Syntax/KBestExtractor.cpp51
-rw-r--r--moses/Syntax/KBestExtractor.h12
-rw-r--r--moses/Syntax/Manager.cpp26
-rw-r--r--moses/Syntax/Manager.h14
-rw-r--r--moses/Syntax/NonTerminalMap.h40
-rw-r--r--moses/Syntax/PHyperedge.h3
-rw-r--r--moses/Syntax/PVertex.h5
-rw-r--r--moses/Syntax/RuleTable.h4
-rw-r--r--moses/Syntax/RuleTableFF.h15
-rw-r--r--moses/Syntax/S2T/DerivationWriter.h4
-rw-r--r--moses/Syntax/S2T/Manager-inl.h44
-rw-r--r--moses/Syntax/S2T/Manager.h10
-rw-r--r--moses/Syntax/S2T/OovHandler-inl.h8
-rw-r--r--moses/Syntax/S2T/OovHandler.h4
-rw-r--r--moses/Syntax/S2T/PChart.h17
-rw-r--r--moses/Syntax/S2T/PHyperedgeToSHyperedgeBundle.h7
-rw-r--r--moses/Syntax/S2T/ParserCallback.h40
-rw-r--r--moses/Syntax/S2T/Parsers/Parser.h4
-rw-r--r--moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h49
-rw-r--r--moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h8
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h16
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h6
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.cpp16
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.h10
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/SentenceMap.h2
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.cpp8
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.h4
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/TailLattice.h8
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.cpp6
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.h4
-rw-r--r--moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h6
-rw-r--r--moses/Syntax/S2T/RuleTrie.h4
-rw-r--r--moses/Syntax/S2T/RuleTrieCYKPlus.cpp20
-rw-r--r--moses/Syntax/S2T/RuleTrieCYKPlus.h26
-rw-r--r--moses/Syntax/S2T/RuleTrieCreator.h6
-rw-r--r--moses/Syntax/S2T/RuleTrieLoader.cpp4
-rw-r--r--moses/Syntax/S2T/RuleTrieLoader.h2
-rw-r--r--moses/Syntax/S2T/RuleTrieScope3.cpp10
-rw-r--r--moses/Syntax/S2T/RuleTrieScope3.h42
-rw-r--r--moses/Syntax/S2T/SChart.h13
-rw-r--r--moses/Syntax/SHyperedge.cpp36
-rw-r--r--moses/Syntax/SHyperedge.h3
-rw-r--r--moses/Syntax/SHyperedgeBundle.h3
-rw-r--r--moses/Syntax/SHyperedgeBundleScorer.h7
-rw-r--r--moses/Syntax/SVertex.h3
-rw-r--r--moses/Syntax/SVertexRecombinationOrderer.h11
-rw-r--r--moses/Syntax/SVertexStack.h8
-rw-r--r--moses/Syntax/SymbolEqualityPred.h2
-rw-r--r--moses/Syntax/SymbolHasher.h2
-rw-r--r--moses/TabbedSentence.cpp74
-rw-r--r--moses/TabbedSentence.h18
-rw-r--r--moses/TargetPhrase.cpp23
-rw-r--r--moses/TargetPhrase.h23
-rw-r--r--moses/TargetPhraseCollection.h11
-rw-r--r--moses/TrainingTask.h19
-rw-r--r--moses/TranslationAnalysis.h7
-rw-r--r--moses/TranslationModel/BilingualDynSuffixArray.h4
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h11
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp249
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h4
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp249
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h6
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp14
-rw-r--r--moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp38
-rw-r--r--moses/TranslationModel/CompactPT/PhraseTableCreator.cpp66
-rw-r--r--moses/TranslationModel/DynSAInclude/FileHandler.cpp4
-rw-r--r--moses/TranslationModel/PhraseDictionary.cpp68
-rw-r--r--moses/TranslationModel/PhraseDictionary.h5
-rw-r--r--moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h8
-rw-r--r--moses/TranslationModel/PhraseDictionaryMultiModel.cpp20
-rw-r--r--moses/TranslationModel/PhraseDictionaryTransliteration.cpp169
-rw-r--r--moses/TranslationModel/PhraseDictionaryTree.cpp30
-rw-r--r--moses/TranslationModel/ProbingPT/ProbingPT.cpp145
-rw-r--r--moses/TranslationModel/ProbingPT/hash.cpp39
-rw-r--r--moses/TranslationModel/ProbingPT/huffmanish.cpp687
-rw-r--r--moses/TranslationModel/ProbingPT/line_splitter.cpp92
-rw-r--r--moses/TranslationModel/ProbingPT/probing_hash_utils.cpp56
-rw-r--r--moses/TranslationModel/ProbingPT/quering.cpp336
-rw-r--r--moses/TranslationModel/ProbingPT/storing.cpp283
-rw-r--r--moses/TranslationModel/ProbingPT/tests/tokenization_tests.cpp346
-rw-r--r--moses/TranslationModel/ProbingPT/tests/vocabid_test.cpp81
-rw-r--r--moses/TranslationModel/ProbingPT/vocabid.cpp41
-rw-r--r--moses/TranslationModel/RuleTable/LoaderCompact.cpp8
-rw-r--r--moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp2
-rw-r--r--moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp6
-rw-r--r--moses/TranslationModel/Scope3Parser/Parser.cpp2
-rw-r--r--moses/TranslationModel/Scope3Parser/Parser.h2
-rw-r--r--moses/TranslationModel/fuzzy-match/create_xml.cpp6
-rw-r--r--moses/TranslationOption.h2
-rw-r--r--moses/TranslationOptionCollection.cpp11
-rw-r--r--moses/TranslationOptionCollection.h4
-rw-r--r--moses/TranslationOptionCollectionLattice.cpp107
-rw-r--r--moses/TranslationTask.cpp7
-rw-r--r--moses/TreeInput.cpp3
-rw-r--r--moses/TypeDef.h14
-rw-r--r--moses/Util.cpp6
-rw-r--r--moses/Util.h2
-rw-r--r--moses/Word.cpp98
-rw-r--r--moses/XmlOption.cpp121
-rw-r--r--phrase-extract/DomainFeature.cpp2
-rw-r--r--phrase-extract/ExtractionPhrasePair.cpp62
-rw-r--r--phrase-extract/ExtractionPhrasePair.h18
-rw-r--r--phrase-extract/PhraseExtractionOptions.h4
-rw-r--r--phrase-extract/PropertiesConsolidator.cpp8
-rw-r--r--phrase-extract/ScoreFeatureTest.cpp16
-rw-r--r--phrase-extract/SentenceAlignment.cpp8
-rw-r--r--phrase-extract/SentenceAlignment.h8
-rw-r--r--phrase-extract/SyntaxTree.h4
-rw-r--r--phrase-extract/XmlTree.cpp3
-rw-r--r--phrase-extract/consolidate-direct-main.cpp5
-rw-r--r--phrase-extract/consolidate-main.cpp17
-rw-r--r--phrase-extract/consolidate-reverse-main.cpp5
-rw-r--r--phrase-extract/extract-ghkm/ExtractGHKM.cpp20
-rw-r--r--phrase-extract/extract-ghkm/PhraseOrientation.cpp94
-rw-r--r--phrase-extract/extract-ghkm/PhraseOrientation.h18
-rw-r--r--phrase-extract/extract-ghkm/Rule.h3
-rw-r--r--phrase-extract/extract-ghkm/ScfgRule.cpp28
-rw-r--r--phrase-extract/extract-ghkm/ScfgRule.h4
-rw-r--r--phrase-extract/extract-ghkm/ScfgRuleWriter.cpp2
-rw-r--r--phrase-extract/extract-ghkm/ScfgRuleWriter.h2
-rw-r--r--phrase-extract/extract-ghkm/StsgRule.cpp4
-rw-r--r--phrase-extract/extract-ghkm/Subgraph.h12
-rw-r--r--phrase-extract/extract-ghkm/XmlTreeParser.h6
-rw-r--r--phrase-extract/extract-main.cpp8
-rw-r--r--phrase-extract/extract-mixed-syntax/AlignedSentence.cpp271
-rw-r--r--phrase-extract/extract-mixed-syntax/AlignedSentence.h45
-rw-r--r--phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp225
-rw-r--r--phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.h48
-rw-r--r--phrase-extract/extract-mixed-syntax/ConsistentPhrase.cpp39
-rw-r--r--phrase-extract/extract-mixed-syntax/ConsistentPhrase.h35
-rw-r--r--phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp105
-rw-r--r--phrase-extract/extract-mixed-syntax/ConsistentPhrases.h27
-rw-r--r--phrase-extract/extract-mixed-syntax/InputFileStream.cpp67
-rw-r--r--phrase-extract/extract-mixed-syntax/InputFileStream.h36
-rw-r--r--phrase-extract/extract-mixed-syntax/Main.cpp183
-rw-r--r--phrase-extract/extract-mixed-syntax/NonTerm.cpp29
-rw-r--r--phrase-extract/extract-mixed-syntax/NonTerm.h33
-rw-r--r--phrase-extract/extract-mixed-syntax/Parameter.cpp89
-rw-r--r--phrase-extract/extract-mixed-syntax/Phrase.cpp12
-rw-r--r--phrase-extract/extract-mixed-syntax/Phrase.h12
-rw-r--r--phrase-extract/extract-mixed-syntax/Rule.cpp794
-rw-r--r--phrase-extract/extract-mixed-syntax/Rule.h98
-rw-r--r--phrase-extract/extract-mixed-syntax/RulePhrase.cpp26
-rw-r--r--phrase-extract/extract-mixed-syntax/RulePhrase.h10
-rw-r--r--phrase-extract/extract-mixed-syntax/RuleSymbol.cpp35
-rw-r--r--phrase-extract/extract-mixed-syntax/RuleSymbol.h17
-rw-r--r--phrase-extract/extract-mixed-syntax/Rules.cpp265
-rw-r--r--phrase-extract/extract-mixed-syntax/Rules.h76
-rw-r--r--phrase-extract/extract-mixed-syntax/SyntaxTree.cpp51
-rw-r--r--phrase-extract/extract-mixed-syntax/SyntaxTree.h4
-rw-r--r--phrase-extract/extract-mixed-syntax/Word.cpp33
-rw-r--r--phrase-extract/extract-mixed-syntax/Word.h44
-rw-r--r--phrase-extract/extract-mixed-syntax/gzfilebuf.h94
-rw-r--r--phrase-extract/extract-mixed-syntax/pugixml.cpp18338
-rw-r--r--phrase-extract/filter-rule-table/StringBasedFilter.cpp2
-rw-r--r--phrase-extract/filter-rule-table/StringBasedFilter.h5
-rw-r--r--phrase-extract/filter-rule-table/TreeBasedFilter.cpp20
-rw-r--r--phrase-extract/filter-rule-table/TreeBasedFilter.h7
-rw-r--r--phrase-extract/pcfg-extract/options.h9
-rw-r--r--phrase-extract/pcfg-extract/pcfg_extract.h12
-rw-r--r--phrase-extract/pcfg-extract/rule_collection.h32
-rw-r--r--phrase-extract/pcfg-extract/rule_extractor.h16
-rw-r--r--phrase-extract/pcfg-score/options.h9
-rw-r--r--phrase-extract/pcfg-score/pcfg_score.h16
-rw-r--r--phrase-extract/pcfg-score/tree_scorer.h18
-rw-r--r--phrase-extract/score-main.cpp183
-rw-r--r--phrase-extract/score-stsg/LexicalTable.cpp4
-rw-r--r--phrase-extract/score-stsg/RuleGroup.h20
-rw-r--r--phrase-extract/score-stsg/RuleSymbol.h3
-rw-r--r--phrase-extract/score-stsg/RuleTableWriter.h4
-rw-r--r--phrase-extract/score-stsg/ScoreStsg.cpp22
-rw-r--r--phrase-extract/score-stsg/TokenizedRuleHalf.h3
-rw-r--r--phrase-extract/score-stsg/Vocabulary.h9
-rw-r--r--symal/cmd.h10
-rw-r--r--vw/Classifier.h21
-rw-r--r--vw/ClassifierFactory.cpp16
-rw-r--r--vw/Normalizer.h15
-rw-r--r--vw/VWPredictor.cpp3
406 files changed, 20048 insertions, 21038 deletions
diff --git a/OnDiskPt/Main.cpp b/OnDiskPt/Main.cpp
index 7feff85e2..c95310766 100644
--- a/OnDiskPt/Main.cpp
+++ b/OnDiskPt/Main.cpp
@@ -153,19 +153,19 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
break;
}
case 4: {
- // store only the 3rd one (rule count)
- float val = Moses::Scan<float>(tok);
- misc[0] = val;
- break;
+ // store only the 3rd one (rule count)
+ float val = Moses::Scan<float>(tok);
+ misc[0] = val;
+ break;
}
case 5: {
- // sparse features
- sparseFeatures << tok << " ";
+ // sparse features
+ sparseFeatures << tok << " ";
break;
}
case 6: {
- property << tok << " ";
- break;
+ property << tok << " ";
+ break;
}
default:
cerr << "ERROR in line " << line << endl;
diff --git a/OnDiskPt/TargetPhrase.cpp b/OnDiskPt/TargetPhrase.cpp
index cb6135d45..cb644075a 100644
--- a/OnDiskPt/TargetPhrase.cpp
+++ b/OnDiskPt/TargetPhrase.cpp
@@ -166,10 +166,10 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
size_t propSize = m_property.size();
size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
- + sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
- + sizeof(float) * numScores // scores
- + sizeof(UINT64) + sparseFeatureSize // sparse features string
- + sizeof(UINT64) + propSize; // property string
+ + sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
+ + sizeof(float) * numScores // scores
+ + sizeof(UINT64) + sparseFeatureSize // sparse features string
+ + sizeof(UINT64) + propSize; // property string
char *mem = (char*) malloc(memNeeded);
//memset(mem, 0, memNeeded);
@@ -350,13 +350,13 @@ UINT64 TargetPhrase::ReadStringFromFile(std::fstream &fileTPColl, std::string &o
bytesRead += sizeof(UINT64);
if (strSize) {
- char *mem = (char*) malloc(strSize + 1);
- mem[strSize] = '\0';
- fileTPColl.read(mem, strSize);
- outStr = string(mem);
- free(mem);
+ char *mem = (char*) malloc(strSize + 1);
+ mem[strSize] = '\0';
+ fileTPColl.read(mem, strSize);
+ outStr = string(mem);
+ free(mem);
- bytesRead += strSize;
+ bytesRead += strSize;
}
return bytesRead;
diff --git a/OnDiskPt/TargetPhrase.h b/OnDiskPt/TargetPhrase.h
index 89b7f967e..efd91a4ae 100644
--- a/OnDiskPt/TargetPhrase.h
+++ b/OnDiskPt/TargetPhrase.h
@@ -113,14 +113,12 @@ public:
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
- void SetProperty(const std::string &value)
- {
- m_property = value;
+ void SetProperty(const std::string &value) {
+ m_property = value;
}
- void SetSparseFeatures(const std::string &value)
- {
- m_sparseFeatures = value;
+ void SetSparseFeatures(const std::string &value) {
+ m_sparseFeatures = value;
}
};
diff --git a/OnDiskPt/Word.cpp b/OnDiskPt/Word.cpp
index 33bdb6cc5..9e6fb6502 100644
--- a/OnDiskPt/Word.cpp
+++ b/OnDiskPt/Word.cpp
@@ -105,18 +105,17 @@ void Word::ConvertToMoses(
overwrite = Moses::Word(m_isNonTerminal);
if (m_isNonTerminal) {
- const std::string &tok = vocab.GetString(m_vocabId);
- overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
- }
- else {
- // TODO: this conversion should have been done at load time.
- util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
-
- for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
- UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
- overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
- }
- UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
+ const std::string &tok = vocab.GetString(m_vocabId);
+ overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
+ } else {
+ // TODO: this conversion should have been done at load time.
+ util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
+
+ for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
+ UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
+ overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
+ }
+ UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
}
}
diff --git a/biconcor/phrase-lookup.cpp b/biconcor/phrase-lookup.cpp
index c6d1b9cdf..3ef82e73a 100644
--- a/biconcor/phrase-lookup.cpp
+++ b/biconcor/phrase-lookup.cpp
@@ -7,7 +7,8 @@ size_t lookup( string );
vector<string> tokenize( const char input[] );
SuffixArray suffixArray;
-int main(int argc, char* argv[]) {
+int main(int argc, char* argv[])
+{
// handle parameters
string query;
string fileNameSuffix;
@@ -95,14 +96,14 @@ int main(int argc, char* argv[]) {
}
cout << lookup( query ) << endl;
}
- }
- else if (queryFlag) {
+ } else if (queryFlag) {
cout << lookup( query ) << endl;
}
return 0;
}
-size_t lookup( string query ) {
+size_t lookup( string query )
+{
cerr << "query is " << query << endl;
vector< string > queryString = tokenize( query.c_str() );
return suffixArray.Count( queryString );
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index a0c6a6ebc..a3ed2cc9b 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -61,7 +61,8 @@ void SparseVector::set(const string& name, FeatureStatsType value)
m_fvector[id] = value;
}
-void SparseVector::set(size_t id, FeatureStatsType value) {
+void SparseVector::set(size_t id, FeatureStatsType value)
+{
assert(m_id_to_name.size() > id);
m_fvector[id] = value;
}
@@ -204,7 +205,7 @@ FeatureStats::FeatureStats(const size_t size)
FeatureStats::~FeatureStats()
{
- delete [] m_array;
+ delete [] m_array;
}
void FeatureStats::Copy(const FeatureStats &stats)
diff --git a/mert/ForestRescore.cpp b/mert/ForestRescore.cpp
index d17306d88..009152e35 100644
--- a/mert/ForestRescore.cpp
+++ b/mert/ForestRescore.cpp
@@ -31,9 +31,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
-namespace MosesTuning {
+namespace MosesTuning
+{
-std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
+std::ostream& operator<<(std::ostream& out, const WordVec& wordVec)
+{
out << "[";
for (size_t i = 0; i < wordVec.size(); ++i) {
out << wordVec[i]->first;
@@ -44,7 +46,8 @@ std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
}
-void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
+void ReferenceSet::Load(const vector<string>& files, Vocab& vocab)
+{
for (size_t i = 0; i < files.size(); ++i) {
util::FilePiece fh(files[i].c_str());
size_t sentenceId = 0;
@@ -55,14 +58,15 @@ void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
} catch (util::EndOfFileException &e) {
break;
}
- AddLine(sentenceId, line, vocab);
- ++sentenceId;
+ AddLine(sentenceId, line, vocab);
+ ++sentenceId;
}
}
}
-void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab) {
+void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab)
+{
//cerr << line << endl;
NgramCounter ngramCounts;
list<WordVec> openNgrams;
@@ -74,14 +78,14 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
openNgrams.push_front(WordVec());
for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
k->push_back(nextTok);
- ++ngramCounts[*k];
+ ++ngramCounts[*k];
}
if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
}
//merge into overall ngram map
for (NgramCounter::const_iterator ni = ngramCounts.begin();
- ni != ngramCounts.end(); ++ni) {
+ ni != ngramCounts.end(); ++ni) {
size_t count = ni->second;
//cerr << *ni << " " << count << endl;
if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
@@ -104,8 +108,9 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
//cerr << endl;
}
-
-size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const {
+
+size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const
+{
const NgramMap& ngramCounts = ngramCounts_.at(sentenceId);
NgramMap::const_iterator ngi = ngramCounts.find(ngram);
if (ngi == ngramCounts.end()) return 0;
@@ -114,7 +119,8 @@ size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool
VertexState::VertexState(): bleuStats(kBleuNgramOrder), targetLength(0) {}
-void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const {
+void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const
+{
for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
//cerr << "Checking: " << *ngi << " matches " << references_.NgramMatches(sentenceId_,*ngi,false) << endl;
size_t order = ngi->first.size();
@@ -124,7 +130,8 @@ void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStats
}
}
-size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
+size_t HgBleuScorer::GetTargetLength(const Edge& edge) const
+{
size_t targetLength = 0;
for (size_t i = 0; i < edge.Words().size(); ++i) {
const Vocab::Entry* word = edge.Words()[i];
@@ -137,7 +144,8 @@ size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
return targetLength;
}
-FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats) {
+FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats)
+{
NgramCounter ngramCounts;
size_t childId = 0;
size_t wordId = 0;
@@ -147,7 +155,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
bool inRightContext = false;
list<WordVec> openNgrams;
const Vocab::Entry* currentWord = NULL;
- while (wordId < edge.Words().size()) {
+ while (wordId < edge.Words().size()) {
currentWord = edge.Words()[wordId];
if (currentWord != NULL) {
++wordId;
@@ -214,7 +222,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
}
if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
}
-
+
//Collect matches
//This edge
//cerr << "edge ngrams" << endl;
@@ -227,26 +235,27 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j];
}
}
-
+
FeatureStatsType sourceLength = head.SourceCovered();
size_t referenceLength = references_.Length(sentenceId_);
- FeatureStatsType effectiveReferenceLength =
+ FeatureStatsType effectiveReferenceLength =
sourceLength / totalSourceLength_ * referenceLength;
bleuStats[bleuStats.size()-1] = effectiveReferenceLength;
- //backgroundBleu_[backgroundBleu_.size()-1] =
+ //backgroundBleu_[backgroundBleu_.size()-1] =
// backgroundRefLength_ * sourceLength / totalSourceLength_;
FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_);
return bleu;
}
-void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats) {
+void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats)
+{
//TODO: Maybe more efficient to absorb into the Score() method
VertexState& vertexState = vertexStates_[vertexId];
//cerr << "Updating state for " << vertexId << endl;
-
+
//leftContext
int wi = 0;
const VertexState* childState = NULL;
@@ -263,9 +272,9 @@ void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const ve
//start of child state
childState = &(vertexStates_[winnerEdge.Children()[childi++]]);
contexti = 0;
- }
+ }
if ((size_t)contexti < childState->leftContext.size()) {
- vertexState.leftContext.push_back(childState->leftContext[contexti++]);
+ vertexState.leftContext.push_back(childState->leftContext[contexti++]);
} else {
//end of child context
childState = NULL;
@@ -314,7 +323,8 @@ typedef pair<const Edge*,FeatureStatsType> BackPointer;
* Recurse through back pointers
**/
static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<BackPointer>& bps,
- HgHypothesis* bestHypo) {
+ HgHypothesis* bestHypo)
+{
//cerr << "Expanding " << vertexId << " Score: " << bps[vertexId].second << endl;
//UTIL_THROW_IF(bps[vertexId].second == kMinScore+1, HypergraphException, "Landed at vertex " << vertexId << " which is a dead end");
if (!bps[vertexId].first) return;
@@ -334,7 +344,7 @@ static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<
}
}
-void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
+void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
{
BackPointer init(NULL,kMinScore);
vector<BackPointer> backPointers(graph.VertexSize(),init);
@@ -349,7 +359,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
//UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges");
//If no incoming edges, vertex is a dead end
backPointers[vi].first = NULL;
- backPointers[vi].second = kMinScore;
+ backPointers[vi].second = kMinScore;
} else {
//cerr << "\nVertex: " << vi << endl;
for (size_t ei = 0; ei < incoming.size(); ++ei) {
@@ -362,10 +372,10 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
incomingScore = max(incomingScore + backPointers[childId].second, kMinScore);
}
vector<FeatureStatsType> bleuStats(kBleuNgramOrder*2+1);
- // cerr << "Score: " << incomingScore << " Bleu: ";
- // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
+ // cerr << "Score: " << incomingScore << " Bleu: ";
+ // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
FeatureStatsType totalScore = incomingScore;
- if (bleuWeight) {
+ if (bleuWeight) {
FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats);
if (isnan(bleuScore)) {
cerr << "WARN: bleu score undefined" << endl;
@@ -379,7 +389,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
}
//UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
totalScore += bleuWeight * bleuScore;
- // cerr << bleuScore << " Total: " << incomingScore << endl << endl;
+ // cerr << bleuScore << " Total: " << incomingScore << endl << endl;
//cerr << "is " << incomingScore << " bs " << bleuScore << endl;
}
if (totalScore >= winnerScore) {
diff --git a/mert/ForestRescore.h b/mert/ForestRescore.h
index 900275b74..2101a9248 100644
--- a/mert/ForestRescore.h
+++ b/mert/ForestRescore.h
@@ -27,7 +27,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "BleuScorer.h"
#include "Hypergraph.h"
-namespace MosesTuning {
+namespace MosesTuning
+{
std::ostream& operator<<(std::ostream& out, const WordVec& wordVec);
@@ -47,18 +48,21 @@ struct NgramEquals : public std::binary_function<const WordVec&, const WordVec&,
typedef boost::unordered_map<WordVec, size_t, NgramHash, NgramEquals> NgramCounter;
-class ReferenceSet {
+class ReferenceSet
+{
public:
-
+
void AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab);
void Load(const std::vector<std::string>& files, Vocab& vocab);
size_t NgramMatches(size_t sentenceId, const WordVec&, bool clip) const;
- size_t Length(size_t sentenceId) const {return lengths_[sentenceId];}
+ size_t Length(size_t sentenceId) const {
+ return lengths_[sentenceId];
+ }
private:
//ngrams to (clipped,unclipped) counts
@@ -80,31 +84,32 @@ struct VertexState {
/**
* Used to score an rule (ie edge) when we are applying it.
**/
-class HgBleuScorer {
- public:
- HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
+class HgBleuScorer
+{
+public:
+ HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
references_(references), sentenceId_(sentenceId), graph_(graph), backgroundBleu_(backgroundBleu),
- backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
- vertexStates_.resize(graph.VertexSize());
- totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
- }
-
- FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
+ backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
+ vertexStates_.resize(graph.VertexSize());
+ totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
+ }
- void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
+ FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
+ void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
- private:
- const ReferenceSet& references_;
- std::vector<VertexState> vertexStates_;
- size_t sentenceId_;
- size_t totalSourceLength_;
- const Graph& graph_;
- std::vector<FeatureStatsType> backgroundBleu_;
- FeatureStatsType backgroundRefLength_;
- void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
- size_t GetTargetLength(const Edge& edge) const;
+private:
+ const ReferenceSet& references_;
+ std::vector<VertexState> vertexStates_;
+ size_t sentenceId_;
+ size_t totalSourceLength_;
+ const Graph& graph_;
+ std::vector<FeatureStatsType> backgroundBleu_;
+ FeatureStatsType backgroundRefLength_;
+
+ void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
+ size_t GetTargetLength(const Edge& edge) const;
};
struct HgHypothesis {
diff --git a/mert/ForestRescoreTest.cpp b/mert/ForestRescoreTest.cpp
index 86975d3a5..4b62e8317 100644
--- a/mert/ForestRescoreTest.cpp
+++ b/mert/ForestRescoreTest.cpp
@@ -15,7 +15,7 @@ BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
Vocab vocab;
WordVec words;
string wordStrings[] =
- {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
for (size_t i = 0; i < 9; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
}
@@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
Vocab vocab;
WordVec words;
string wordStrings[] =
- {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
for (size_t i = 0; i < 13; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
}
diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp
index 1f498a202..e907d3ea0 100644
--- a/mert/HopeFearDecoder.cpp
+++ b/mert/HopeFearDecoder.cpp
@@ -34,11 +34,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
namespace fs = boost::filesystem;
-namespace MosesTuning {
+namespace MosesTuning
+{
static const ValType BLEU_RATIO = 5;
-ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
+ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv)
+{
vector<ValType> stats(scorer_->NumberOfScores(),0);
for(reset(); !finished(); next()) {
vector<ValType> sent;
@@ -51,13 +53,14 @@ ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
}
NbestHopeFearDecoder::NbestHopeFearDecoder(
- const vector<string>& featureFiles,
- const vector<string>& scoreFiles,
- bool streaming,
- bool no_shuffle,
- bool safe_hope,
- Scorer* scorer
- ) : safe_hope_(safe_hope) {
+ const vector<string>& featureFiles,
+ const vector<string>& scoreFiles,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ Scorer* scorer
+) : safe_hope_(safe_hope)
+{
scorer_ = scorer;
if (streaming) {
train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
@@ -67,25 +70,29 @@ NbestHopeFearDecoder::NbestHopeFearDecoder(
}
-void NbestHopeFearDecoder::next() {
+void NbestHopeFearDecoder::next()
+{
train_->next();
}
-bool NbestHopeFearDecoder::finished() {
+bool NbestHopeFearDecoder::finished()
+{
return train_->finished();
}
-void NbestHopeFearDecoder::reset() {
+void NbestHopeFearDecoder::reset()
+{
train_->reset();
}
void NbestHopeFearDecoder::HopeFear(
- const std::vector<ValType>& backgroundBleu,
- const MiraWeightVector& wv,
- HopeFearData* hopeFear
- ) {
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+)
+{
+
-
// Hope / fear decode
ValType hope_scale = 1.0;
size_t hope_index=0, fear_index=0, model_index=0;
@@ -134,7 +141,8 @@ void NbestHopeFearDecoder::HopeFear(
hopeFear->hopeFearEqual = (hope_index == fear_index);
}
-void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats) {
+void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
+{
// Find max model
size_t max_index=0;
ValType max_score=0;
@@ -152,18 +160,19 @@ void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValTy
HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
- (
- const string& hypergraphDir,
- const vector<string>& referenceFiles,
- size_t num_dense,
- bool streaming,
- bool no_shuffle,
- bool safe_hope,
- size_t hg_pruning,
- const MiraWeightVector& wv,
- Scorer* scorer
- ) :
- num_dense_(num_dense) {
+(
+ const string& hypergraphDir,
+ const vector<string>& referenceFiles,
+ size_t num_dense,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ size_t hg_pruning,
+ const MiraWeightVector& wv,
+ Scorer* scorer
+) :
+ num_dense_(num_dense)
+{
UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs");
UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist");
@@ -177,17 +186,17 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
static const string kWeights = "weights";
fs::directory_iterator dend;
size_t fileCount = 0;
-
+
cerr << "Reading hypergraphs" << endl;
for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
const fs::path& hgpath = di->path();
if (hgpath.filename() == kWeights) continue;
- // cerr << "Reading " << hgpath.filename() << endl;
+ // cerr << "Reading " << hgpath.filename() << endl;
Graph graph(vocab_);
size_t id = boost::lexical_cast<size_t>(hgpath.stem().string());
util::scoped_fd fd(util::OpenReadOrThrow(hgpath.string().c_str()));
//util::FilePiece file(di->path().string().c_str());
- util::FilePiece file(fd.release());
+ util::FilePiece file(fd.release());
ReadGraph(file,graph);
//cerr << "ref length " << references_.Length(id) << endl;
@@ -196,7 +205,7 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
prunedGraph.reset(new Graph(vocab_));
graph.Prune(prunedGraph.get(), weights, edgeCount);
graphs_[id] = prunedGraph;
- // cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
+ // cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
++fileCount;
if (fileCount % 10 == 0) cerr << ".";
if (fileCount % 400 == 0) cerr << " [count=" << fileCount << "]\n";
@@ -211,23 +220,27 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
}
-void HypergraphHopeFearDecoder::reset() {
+void HypergraphHopeFearDecoder::reset()
+{
sentenceIdIter_ = sentenceIds_.begin();
}
-void HypergraphHopeFearDecoder::next() {
+void HypergraphHopeFearDecoder::next()
+{
sentenceIdIter_++;
}
-bool HypergraphHopeFearDecoder::finished() {
+bool HypergraphHopeFearDecoder::finished()
+{
return sentenceIdIter_ == sentenceIds_.end();
}
void HypergraphHopeFearDecoder::HopeFear(
- const vector<ValType>& backgroundBleu,
- const MiraWeightVector& wv,
- HopeFearData* hopeFear
- ) {
+ const vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+)
+{
size_t sentenceId = *sentenceIdIter_;
SparseVector weights;
wv.ToSparse(&weights);
@@ -247,12 +260,12 @@ void HypergraphHopeFearDecoder::HopeFear(
Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo);
- // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
+ // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
// where model score is having far more influence than BLEU
- // hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
- // if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
- // hope_scale = abs(hope_bleu) / abs(hope_model);
- // else break;
+ // hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
+ // if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
+ // hope_scale = abs(hope_bleu) / abs(hope_model);
+ // else break;
//TODO: Don't currently get model and bleu so commented this out for now.
break;
}
@@ -311,15 +324,16 @@ void HypergraphHopeFearDecoder::HopeFear(
if (hopeFear->hopeFearEqual) {
for (size_t i = 0; i < fearStats.size(); ++i) {
if (fearStats[i] != hopeFear->hopeStats[i]) {
- hopeFear->hopeFearEqual = false;
- break;
+ hopeFear->hopeFearEqual = false;
+ break;
}
}
}
hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures);
}
-void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats) {
+void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats)
+{
assert(!finished());
HgHypothesis bestHypo;
size_t sentenceId = *sentenceIdIter_;
diff --git a/mert/HopeFearDecoder.h b/mert/HopeFearDecoder.h
index d1881eeb2..e0579032c 100644
--- a/mert/HopeFearDecoder.h
+++ b/mert/HopeFearDecoder.h
@@ -35,7 +35,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
// the n-best list and lattice/hypergraph implementations
//
-namespace MosesTuning {
+namespace MosesTuning
+{
class Scorer;
@@ -44,7 +45,7 @@ struct HopeFearData {
MiraFeatureVector modelFeatures;
MiraFeatureVector hopeFeatures;
MiraFeatureVector fearFeatures;
-
+
std::vector<float> modelStats;
std::vector<float> hopeStats;
@@ -55,7 +56,8 @@ struct HopeFearData {
};
//Abstract base class
-class HopeFearDecoder {
+class HopeFearDecoder
+{
public:
//iterator methods
virtual void reset() = 0;
@@ -68,10 +70,10 @@ public:
* Calculate hope, fear and model hypotheses
**/
virtual void HopeFear(
- const std::vector<ValType>& backgroundBleu,
- const MiraWeightVector& wv,
- HopeFearData* hopeFear
- ) = 0;
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ ) = 0;
/** Max score decoding */
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
@@ -86,25 +88,26 @@ protected:
/** Gets hope-fear from nbest lists */
-class NbestHopeFearDecoder : public virtual HopeFearDecoder {
+class NbestHopeFearDecoder : public virtual HopeFearDecoder
+{
public:
NbestHopeFearDecoder(const std::vector<std::string>& featureFiles,
- const std::vector<std::string>& scoreFiles,
- bool streaming,
- bool no_shuffle,
- bool safe_hope,
- Scorer* scorer
- );
+ const std::vector<std::string>& scoreFiles,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ Scorer* scorer
+ );
virtual void reset();
virtual void next();
virtual bool finished();
virtual void HopeFear(
- const std::vector<ValType>& backgroundBleu,
- const MiraWeightVector& wv,
- HopeFearData* hopeFear
- );
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ );
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
@@ -117,29 +120,30 @@ private:
/** Gets hope-fear from hypergraphs */
-class HypergraphHopeFearDecoder : public virtual HopeFearDecoder {
+class HypergraphHopeFearDecoder : public virtual HopeFearDecoder
+{
public:
HypergraphHopeFearDecoder(
- const std::string& hypergraphDir,
- const std::vector<std::string>& referenceFiles,
- size_t num_dense,
- bool streaming,
- bool no_shuffle,
- bool safe_hope,
- size_t hg_pruning,
- const MiraWeightVector& wv,
- Scorer* scorer_
- );
+ const std::string& hypergraphDir,
+ const std::vector<std::string>& referenceFiles,
+ size_t num_dense,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ size_t hg_pruning,
+ const MiraWeightVector& wv,
+ Scorer* scorer_
+ );
virtual void reset();
virtual void next();
virtual bool finished();
virtual void HopeFear(
- const std::vector<ValType>& backgroundBleu,
- const MiraWeightVector& wv,
- HopeFearData* hopeFear
- );
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ );
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
diff --git a/mert/HwcmScorer.cpp b/mert/HwcmScorer.cpp
index 6aff77def..bb3cd4382 100644
--- a/mert/HwcmScorer.cpp
+++ b/mert/HwcmScorer.cpp
@@ -55,7 +55,8 @@ void HwcmScorer::setReferenceFiles(const vector<string>& referenceFiles)
}
-void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc) {
+void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc)
+{
if (tree->GetLength() > 0) {
string head = getHead(tree);
@@ -64,8 +65,7 @@ void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
extractHeadWordChain(*it, history, hwc);
}
- }
- else {
+ } else {
vector<string> new_history(kHwcmOrder);
new_history[0] = head;
hwc[0][head]++;
@@ -85,11 +85,11 @@ void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history
}
}
-string HwcmScorer::getHead(TreePointer tree) {
+string HwcmScorer::getHead(TreePointer tree)
+{
// assumption (only true for dependency parse: each constituent has a preterminal label, and corresponding terminal is head)
// if constituent has multiple preterminals, first one is picked; if it has no preterminals, empty string is returned
- for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it)
- {
+ for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
TreePointer child = *it;
if (child->GetLength() == 1 && child->GetChildren()[0]->IsTerminal()) {
diff --git a/mert/Hypergraph.cpp b/mert/Hypergraph.cpp
index f88e808f1..a8087acb5 100644
--- a/mert/Hypergraph.cpp
+++ b/mert/Hypergraph.cpp
@@ -31,18 +31,22 @@ using namespace std;
static const string kBOS = "<s>";
static const string kEOS = "</s>";
-namespace MosesTuning {
+namespace MosesTuning
+{
-StringPiece NextLine(util::FilePiece& from) {
+StringPiece NextLine(util::FilePiece& from)
+{
StringPiece line;
while ((line = from.ReadLine()).starts_with("#"));
return line;
}
-Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS)){
+Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS))
+{
}
-const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str) {
+const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str)
+{
#if BOOST_VERSION >= 104200
Map::const_iterator i= map_.find(str, Hash(), Equals());
#else
@@ -62,7 +66,8 @@ double_conversion::StringToDoubleConverter converter(double_conversion::StringTo
/**
* Reads an incoming edge. Returns edge and source words covered.
**/
-static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
+static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph)
+{
Edge* edge = graph.NewEdge();
StringPiece line = from.ReadLine(); //Don't allow comments within edge lists
util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| "));
@@ -82,7 +87,7 @@ static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
edge->AddWord(&found);
}
}
-
+
//Features
++pipes;
for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
@@ -100,17 +105,18 @@ static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
//Covered words
++pipes;
size_t sourceCovered = boost::lexical_cast<size_t>(*pipes);
- return pair<Edge*,size_t>(edge,sourceCovered);
+ return pair<Edge*,size_t>(edge,sourceCovered);
}
-void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const {
+void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const
+{
Graph& newGraph = *pNewGraph;
//TODO: Optimise case where no pruning required
//For debug
-
-
+
+
/*
map<const Edge*, string> edgeIds;
for (size_t i = 0; i < edges_.Size(); ++i) {
@@ -136,7 +142,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
//Compute backward scores
for (size_t vi = 0; vi < vertices_.Size(); ++vi) {
- // cerr << "Vertex " << vi << endl;
+ // cerr << "Vertex " << vi << endl;
const Vertex& vertex = vertices_[vi];
const vector<const Edge*>& incoming = vertex.GetIncoming();
if (!incoming.size()) {
@@ -150,7 +156,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
//cerr << "\tChild " << incoming[ei]->Children()[i] << endl;
size_t childId = incoming[ei]->Children()[i];
UTIL_THROW_IF(vertexBackwardScores[childId] == kMinScore,
- HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
+ HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
outgoing[childId].push_back(incoming[ei]);
incomingScore += vertexBackwardScores[childId];
}
@@ -172,7 +178,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
} else {
for (size_t ei = 0; ei < outgoing[vi].size(); ++ei) {
//cerr << "Edge " << edgeIds[outgoing[vi][ei]] << endl;
- FeatureStatsType outgoingScore = 0;
+ FeatureStatsType outgoingScore = 0;
//add score of head
outgoingScore += vertexForwardScores[edgeHeads[outgoing[vi][ei]]];
//cerr << "Forward score " << outgoingScore << endl;
@@ -204,11 +210,11 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
}
FeatureStatsType score = edgeForwardScores[edge] + edgeBackwardScores[edge];
edgeScores.insert(pair<FeatureStatsType, const Edge*>(score,edge));
- // cerr << edgeIds[edge] << " " << score << endl;
+ // cerr << edgeIds[edge] << " " << score << endl;
}
-
+
multimap<FeatureStatsType, const Edge*>::const_reverse_iterator ei = edgeScores.rbegin();
size_t edgeCount = 1;
while(edgeCount < minEdgeCount && ei != edgeScores.rend()) {
@@ -235,10 +241,10 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
map<size_t,size_t> oldIdToNew;
size_t vi = 0;
for (set<size_t>::const_iterator i = retainedVertices.begin(); i != retainedVertices.end(); ++i, ++vi) {
- // cerr << *i << " New: " << vi << endl;
+// cerr << *i << " New: " << vi << endl;
oldIdToNew[*i] = vi;
Vertex* vertex = newGraph.NewVertex();
- vertex->SetSourceCovered(vertices_[*i].SourceCovered());
+ vertex->SetSourceCovered(vertices_[*i].SourceCovered());
}
for (set<const Edge*>::const_iterator i = retainedEdges.begin(); i != retainedEdges.end(); ++i) {
@@ -255,7 +261,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
newHead.AddEdge(newEdge);
}
-
+
/*
cerr << "New graph" << endl;
for (size_t vi = 0; vi < newGraph.VertexSize(); ++vi) {
@@ -275,21 +281,22 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
}
cerr << endl;
}
-
-*/
+
+ */
}
/**
* Read from "Kenneth's hypergraph" aka cdec target_graph format (with comments)
**/
-void ReadGraph(util::FilePiece &from, Graph &graph) {
+void ReadGraph(util::FilePiece &from, Graph &graph)
+{
//First line should contain field names
StringPiece line = from.ReadLine();
UTIL_THROW_IF(line.compare("# target ||| features ||| source-covered") != 0, HypergraphException, "Incorrect format spec on first line: '" << line << "'");
line = NextLine(from);
-
+
//Then expect numbers of vertices
util::TokenIter<util::SingleCharacter, false> i(line, util::SingleCharacter(' '));
unsigned long int vertices = boost::lexical_cast<unsigned long int>(*i);
@@ -304,9 +311,11 @@ void ReadGraph(util::FilePiece &from, Graph &graph) {
for (unsigned long int e = 0; e < edge_count; ++e) {
pair<Edge*,size_t> edge = ReadEdge(from, graph);
vertex->AddEdge(edge.first);
- //Note: the file format attaches this to the edge, but it's really a property
+ //Note: the file format attaches this to the edge, but it's really a property
//of the vertex.
- if (!e) {vertex->SetSourceCovered(edge.second);}
+ if (!e) {
+ vertex->SetSourceCovered(edge.second);
+ }
}
}
}
diff --git a/mert/Hypergraph.h b/mert/Hypergraph.h
index 1f6475f02..14226fb48 100644
--- a/mert/Hypergraph.h
+++ b/mert/Hypergraph.h
@@ -37,81 +37,88 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FeatureStats.h"
-namespace MosesTuning {
+namespace MosesTuning
+{
typedef unsigned int WordIndex;
const WordIndex kMaxWordIndex = UINT_MAX;
const FeatureStatsType kMinScore = -1e10;
-template <class T> class FixedAllocator : boost::noncopyable {
- public:
- FixedAllocator() : current_(NULL), end_(NULL) {}
-
- void Init(std::size_t count) {
- assert(!current_);
- array_.reset(new T[count]);
- current_ = array_.get();
- end_ = current_ + count;
- }
-
- T &operator[](std::size_t idx) {
- return array_.get()[idx];
- }
- const T &operator[](std::size_t idx) const {
- return array_.get()[idx];
- }
-
- T *New() {
- T *ret = current_++;
- UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
- return ret;
- }
-
- std::size_t Capacity() const {
- return end_ - array_.get();
- }
-
- std::size_t Size() const {
- return current_ - array_.get();
- }
-
- private:
- boost::scoped_array<T> array_;
- T *current_, *end_;
+template <class T> class FixedAllocator : boost::noncopyable
+{
+public:
+ FixedAllocator() : current_(NULL), end_(NULL) {}
+
+ void Init(std::size_t count) {
+ assert(!current_);
+ array_.reset(new T[count]);
+ current_ = array_.get();
+ end_ = current_ + count;
+ }
+
+ T &operator[](std::size_t idx) {
+ return array_.get()[idx];
+ }
+ const T &operator[](std::size_t idx) const {
+ return array_.get()[idx];
+ }
+
+ T *New() {
+ T *ret = current_++;
+ UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
+ return ret;
+ }
+
+ std::size_t Capacity() const {
+ return end_ - array_.get();
+ }
+
+ std::size_t Size() const {
+ return current_ - array_.get();
+ }
+
+private:
+ boost::scoped_array<T> array_;
+ T *current_, *end_;
};
-class Vocab {
- public:
- Vocab();
+class Vocab
+{
+public:
+ Vocab();
- typedef std::pair<const char *const, WordIndex> Entry;
+ typedef std::pair<const char *const, WordIndex> Entry;
- const Entry &FindOrAdd(const StringPiece &str);
+ const Entry &FindOrAdd(const StringPiece &str);
- const Entry& Bos() const {return bos_;}
+ const Entry& Bos() const {
+ return bos_;
+ }
- const Entry& Eos() const {return eos_;}
+ const Entry& Eos() const {
+ return eos_;
+ }
- private:
- util::Pool piece_backing_;
+private:
+ util::Pool piece_backing_;
- struct Hash : public std::unary_function<const char *, std::size_t> {
- std::size_t operator()(StringPiece str) const {
- return util::MurmurHashNative(str.data(), str.size());
- }
- };
+ struct Hash : public std::unary_function<const char *, std::size_t> {
+ std::size_t operator()(StringPiece str) const {
+ return util::MurmurHashNative(str.data(), str.size());
+ }
+ };
- struct Equals : public std::binary_function<const char *, const char *, bool> {
- bool operator()(StringPiece first, StringPiece second) const {
- return first == second;
- }
- };
+ struct Equals : public std::binary_function<const char *, const char *, bool> {
+ bool operator()(StringPiece first, StringPiece second) const {
+ return first == second;
+ }
+ };
- typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
- Map map_;
- Entry eos_;
- Entry bos_;
+ typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
+ Map map_;
+ Entry eos_;
+ Entry bos_;
};
@@ -125,121 +132,141 @@ typedef boost::shared_ptr<SparseVector> FeaturePtr;
/**
* An edge has 1 head vertex, 0..n child (tail) vertices, a list of words and a feature vector.
**/
-class Edge {
- public:
- Edge() {features_.reset(new SparseVector());}
-
- void AddWord(const Vocab::Entry *word) {
- words_.push_back(word);
- }
-
- void AddChild(size_t child) {
- children_.push_back(child);
- }
-
- void AddFeature(const StringPiece& name, FeatureStatsType value) {
- //TODO StringPiece interface
- features_->set(name.as_string(),value);
- }
-
-
- const WordVec &Words() const {
- return words_;
- }
-
- const FeaturePtr& Features() const {
- return features_;
- }
-
- void SetFeatures(const FeaturePtr& features) {
- features_ = features;
- }
-
- const std::vector<size_t>& Children() const {
- return children_;
- }
-
- FeatureStatsType GetScore(const SparseVector& weights) const {
- return inner_product(*(features_.get()), weights);
- }
-
- private:
- // NULL for non-terminals.
- std::vector<const Vocab::Entry*> words_;
- std::vector<size_t> children_;
- boost::shared_ptr<SparseVector> features_;
+class Edge
+{
+public:
+ Edge() {
+ features_.reset(new SparseVector());
+ }
+
+ void AddWord(const Vocab::Entry *word) {
+ words_.push_back(word);
+ }
+
+ void AddChild(size_t child) {
+ children_.push_back(child);
+ }
+
+ void AddFeature(const StringPiece& name, FeatureStatsType value) {
+ //TODO StringPiece interface
+ features_->set(name.as_string(),value);
+ }
+
+
+ const WordVec &Words() const {
+ return words_;
+ }
+
+ const FeaturePtr& Features() const {
+ return features_;
+ }
+
+ void SetFeatures(const FeaturePtr& features) {
+ features_ = features;
+ }
+
+ const std::vector<size_t>& Children() const {
+ return children_;
+ }
+
+ FeatureStatsType GetScore(const SparseVector& weights) const {
+ return inner_product(*(features_.get()), weights);
+ }
+
+private:
+ // NULL for non-terminals.
+ std::vector<const Vocab::Entry*> words_;
+ std::vector<size_t> children_;
+ boost::shared_ptr<SparseVector> features_;
};
/*
* A vertex has 0..n incoming edges
**/
-class Vertex {
- public:
- Vertex() : sourceCovered_(0) {}
-
- void AddEdge(const Edge* edge) {incoming_.push_back(edge);}
-
- void SetSourceCovered(size_t sourceCovered) {sourceCovered_ = sourceCovered;}
-
- const std::vector<const Edge*>& GetIncoming() const {return incoming_;}
-
- size_t SourceCovered() const {return sourceCovered_;}
-
- private:
- std::vector<const Edge*> incoming_;
- size_t sourceCovered_;
+class Vertex
+{
+public:
+ Vertex() : sourceCovered_(0) {}
+
+ void AddEdge(const Edge* edge) {
+ incoming_.push_back(edge);
+ }
+
+ void SetSourceCovered(size_t sourceCovered) {
+ sourceCovered_ = sourceCovered;
+ }
+
+ const std::vector<const Edge*>& GetIncoming() const {
+ return incoming_;
+ }
+
+ size_t SourceCovered() const {
+ return sourceCovered_;
+ }
+
+private:
+ std::vector<const Edge*> incoming_;
+ size_t sourceCovered_;
};
-class Graph : boost::noncopyable {
- public:
- Graph(Vocab& vocab) : vocab_(vocab) {}
-
- void SetCounts(std::size_t vertices, std::size_t edges) {
- vertices_.Init(vertices);
- edges_.Init(edges);
- }
-
- Vocab &MutableVocab() { return vocab_; }
-
- Edge *NewEdge() {
- return edges_.New();
- }
-
- Vertex *NewVertex() {
- return vertices_.New();
- }
-
- const Vertex &GetVertex(std::size_t index) const {
- return vertices_[index];
- }
-
- Edge &GetEdge(std::size_t index) {
- return edges_[index];
- }
-
- /* Created a pruned copy of this graph with minEdgeCount edges. Uses
- the scores in the max-product semiring to rank edges, as suggested by
- Colin Cherry */
- void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
-
- std::size_t VertexSize() const { return vertices_.Size(); }
- std::size_t EdgeSize() const { return edges_.Size(); }
-
- bool IsBoundary(const Vocab::Entry* word) const {
- return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
- }
-
- private:
- FixedAllocator<Edge> edges_;
- FixedAllocator<Vertex> vertices_;
- Vocab& vocab_;
+class Graph : boost::noncopyable
+{
+public:
+ Graph(Vocab& vocab) : vocab_(vocab) {}
+
+ void SetCounts(std::size_t vertices, std::size_t edges) {
+ vertices_.Init(vertices);
+ edges_.Init(edges);
+ }
+
+ Vocab &MutableVocab() {
+ return vocab_;
+ }
+
+ Edge *NewEdge() {
+ return edges_.New();
+ }
+
+ Vertex *NewVertex() {
+ return vertices_.New();
+ }
+
+ const Vertex &GetVertex(std::size_t index) const {
+ return vertices_[index];
+ }
+
+ Edge &GetEdge(std::size_t index) {
+ return edges_[index];
+ }
+
+ /* Created a pruned copy of this graph with minEdgeCount edges. Uses
+ the scores in the max-product semiring to rank edges, as suggested by
+ Colin Cherry */
+ void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
+
+ std::size_t VertexSize() const {
+ return vertices_.Size();
+ }
+ std::size_t EdgeSize() const {
+ return edges_.Size();
+ }
+
+ bool IsBoundary(const Vocab::Entry* word) const {
+ return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
+ }
+
+private:
+ FixedAllocator<Edge> edges_;
+ FixedAllocator<Vertex> vertices_;
+ Vocab& vocab_;
};
-class HypergraphException : public util::Exception {
- public:
- HypergraphException() {}
- ~HypergraphException() throw() {}
+class HypergraphException : public util::Exception
+{
+public:
+ HypergraphException() {}
+ ~HypergraphException() throw() {}
};
diff --git a/mert/HypergraphTest.cpp b/mert/HypergraphTest.cpp
index 345a445f0..0dc1c04c6 100644
--- a/mert/HypergraphTest.cpp
+++ b/mert/HypergraphTest.cpp
@@ -8,12 +8,12 @@
using namespace std;
using namespace MosesTuning;
-BOOST_AUTO_TEST_CASE(prune)
+BOOST_AUTO_TEST_CASE(prune)
{
Vocab vocab;
WordVec words;
string wordStrings[] =
- {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
for (size_t i = 0; i < 13; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
}
@@ -105,7 +105,7 @@ BOOST_AUTO_TEST_CASE(prune)
BOOST_CHECK_EQUAL(5, pruned.EdgeSize());
BOOST_CHECK_EQUAL(4, pruned.VertexSize());
-
+
//edges retained should be best path (<s> ab jk </s>) and hi
BOOST_CHECK_EQUAL(1, pruned.GetVertex(0).GetIncoming().size());
BOOST_CHECK_EQUAL(2, pruned.GetVertex(1).GetIncoming().size());
@@ -115,37 +115,37 @@ BOOST_AUTO_TEST_CASE(prune)
const Edge* edge;
edge = pruned.GetVertex(0).GetIncoming()[0];
- BOOST_CHECK_EQUAL(1, edge->Words().size());
- BOOST_CHECK_EQUAL(words[0], edge->Words()[0]);
+ BOOST_CHECK_EQUAL(1, edge->Words().size());
+ BOOST_CHECK_EQUAL(words[0], edge->Words()[0]);
edge = pruned.GetVertex(1).GetIncoming()[0];
- BOOST_CHECK_EQUAL(3, edge->Words().size());
- BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
- BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first);
- BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first);
+ BOOST_CHECK_EQUAL(3, edge->Words().size());
+ BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+ BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(1).GetIncoming()[1];
BOOST_CHECK_EQUAL(3, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
- BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[10]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(2).GetIncoming()[0];
BOOST_CHECK_EQUAL(3, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
- BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[12]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(3).GetIncoming()[0];
BOOST_CHECK_EQUAL(2, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
- BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first);
+
+
-
+// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
-// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
-
}
diff --git a/mert/InterpolatedScorer.cpp b/mert/InterpolatedScorer.cpp
index ea4240472..b8ec3a855 100644
--- a/mert/InterpolatedScorer.cpp
+++ b/mert/InterpolatedScorer.cpp
@@ -174,19 +174,19 @@ float InterpolatedScorer::calculateScore(const std::vector<ScoreStatsType>& tota
float InterpolatedScorer::getReferenceLength(const std::vector<ScoreStatsType>& totals) const
{
- size_t scorerNum = 0;
- size_t last = 0;
- float refLen = 0;
- for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
- itsc != m_scorers.end(); ++itsc) {
- int numScoresScorer = (*itsc)->NumberOfScores();
- std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
- refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum];
- last += numScoresScorer;
- scorerNum++;
- }
- return refLen;
+ size_t scorerNum = 0;
+ size_t last = 0;
+ float refLen = 0;
+ for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
+ itsc != m_scorers.end(); ++itsc) {
+ int numScoresScorer = (*itsc)->NumberOfScores();
+ std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
+ refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum];
+ last += numScoresScorer;
+ scorerNum++;
}
+ return refLen;
+}
void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
{
diff --git a/mert/MiraFeatureVector.cpp b/mert/MiraFeatureVector.cpp
index 347ad488e..ad3588339 100644
--- a/mert/MiraFeatureVector.cpp
+++ b/mert/MiraFeatureVector.cpp
@@ -9,7 +9,8 @@ namespace MosesTuning
{
-void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit) {
+void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit)
+{
vector<size_t> sparseFeats = sparse.feats();
bool bFirst = true;
size_t lastFeat = 0;
@@ -40,7 +41,8 @@ MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
InitSparse(vec.sparse);
}
-MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense) {
+MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense)
+{
m_dense.resize(num_dense);
//Assume that features with id [0,num_dense) are the dense features
for (size_t id = 0; id < num_dense; ++id) {
@@ -162,7 +164,8 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
return MiraFeatureVector(dense,sparseFeats,sparseVals);
}
-bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b) {
+bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b)
+{
ValType eps = 1e-8;
//dense features
if (a.m_dense.size() != b.m_dense.size()) return false;
diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp
index c8a1ca774..eba9617c8 100644
--- a/mert/MiraWeightVector.cpp
+++ b/mert/MiraWeightVector.cpp
@@ -93,7 +93,8 @@ void MiraWeightVector::update(size_t index, ValType delta)
m_lastUpdated[index] = m_numUpdates;
}
-void MiraWeightVector::ToSparse(SparseVector* sparse) const {
+void MiraWeightVector::ToSparse(SparseVector* sparse) const
+{
for (size_t i = 0; i < m_weights.size(); ++i) {
if(abs(m_weights[i])>1e-8) {
sparse->set(i,m_weights[i]);
@@ -171,7 +172,8 @@ size_t AvgWeightVector::size() const
return m_wv.m_weights.size();
}
-void AvgWeightVector::ToSparse(SparseVector* sparse) const {
+void AvgWeightVector::ToSparse(SparseVector* sparse) const
+{
for (size_t i = 0; i < size(); ++i) {
ValType w = weight(i);
if(abs(w)>1e-8) {
diff --git a/mert/StatisticsBasedScorer.h b/mert/StatisticsBasedScorer.h
index f1c77e0ba..ba45634cc 100644
--- a/mert/StatisticsBasedScorer.h
+++ b/mert/StatisticsBasedScorer.h
@@ -23,7 +23,7 @@ namespace MosesTuning
*/
class StatisticsBasedScorer : public Scorer
{
-friend class HopeFearDecoder;
+ friend class HopeFearDecoder;
public:
StatisticsBasedScorer(const std::string& name, const std::string& config);
diff --git a/mert/TER/alignmentStruct.cpp b/mert/TER/alignmentStruct.cpp
index 544ee61ac..e42ec4a14 100644
--- a/mert/TER/alignmentStruct.cpp
+++ b/mert/TER/alignmentStruct.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -23,15 +23,15 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace TERCpp
{
- string alignmentStruct::toString()
- {
- stringstream s;
+string alignmentStruct::toString()
+{
+ stringstream s;
// s << "nword : " << vectorToString(nwords)<<endl;
// s << "alignment" << vectorToString(alignment)<<endl;
// s << "afterShift" << vectorToString(alignment)<<endl;
- s << "Nothing to be printed" <<endl;
- return s.str();
- }
+ s << "Nothing to be printed" <<endl;
+ return s.str();
+}
// alignmentStruct::alignmentStruct()
// {
@@ -99,7 +99,7 @@ namespace TERCpp
// return s.str();
// }
- /* The distance of the shift. */
+/* The distance of the shift. */
// int alignmentStruct::distance()
// {
// if (moveto < start)
diff --git a/mert/TER/alignmentStruct.h b/mert/TER/alignmentStruct.h
index adda2c345..c1459960b 100644
--- a/mert/TER/alignmentStruct.h
+++ b/mert/TER/alignmentStruct.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,10 +34,10 @@ using namespace Tools;
namespace TERCpp
{
- class alignmentStruct
- {
- private:
- public:
+class alignmentStruct
+{
+private:
+public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -53,14 +53,14 @@ namespace TERCpp
// int end;
// int moveto;
// int newloc;
- vector<string> nwords; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
- string toString();
- };
+ vector<string> nwords; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<vecInt> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+ string toString();
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/bestShiftStruct.h b/mert/TER/bestShiftStruct.h
index 9457fd1d8..d68f2319f 100644
--- a/mert/TER/bestShiftStruct.h
+++ b/mert/TER/bestShiftStruct.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -36,10 +36,10 @@ using namespace Tools;
namespace TERCpp
{
- class bestShiftStruct
- {
- private:
- public:
+class bestShiftStruct
+{
+private:
+public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -55,16 +55,16 @@ namespace TERCpp
// int end;
// int moveto;
// int newloc;
- terShift m_best_shift;
- terAlignment m_best_align;
- bool m_empty;
+ terShift m_best_shift;
+ terAlignment m_best_align;
+ bool m_empty;
// vector<string> nwords; // The words we shifted
// char* alignment ; // for pra_more output
// vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
// double cost;
- };
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/hashMap.cpp b/mert/TER/hashMap.cpp
index de84ff796..253fda715 100644
--- a/mert/TER/hashMap.cpp
+++ b/mert/TER/hashMap.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -28,156 +28,142 @@ using namespace std;
namespace HashMapSpace
{
// hashMap::hashMap();
- /* hashMap::~hashMap()
- {
- // vector<stringHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
- /**
- * int hashMap::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
- int hashMap::trouve ( long searchKey )
+/* hashMap::~hashMap()
{
- long foundKey;
+// vector<stringHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+/**
+ * int hashMap::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+int hashMap::trouve ( long searchKey )
+{
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
- int hashMap::trouve ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;;
+ }
+ return 0;
+}
+int hashMap::trouve ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
- /**
- * long hashMap::hashValue ( string key )
- * @param key
- * @return
- */
- long hashMap::hashValue ( string key )
- {
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+ }
+ return 0;
+}
+/**
+ * long hashMap::hashValue ( string key )
+ * @param key
+ * @return
+ */
+long hashMap::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
- }
- /**
- * void hashMap::addHasher ( string key, string value )
- * @param key
- * @param value
- */
- void hashMap::addHasher ( string key, string value )
- {
- if ( trouve ( hashValue ( key ) ) ==0 )
- {
+}
+/**
+ * void hashMap::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+void hashMap::addHasher ( string key, string value )
+{
+ if ( trouve ( hashValue ( key ) ) ==0 ) {
// cerr << "ICI1" <<endl;
- stringHasher H ( hashValue ( key ),key,value );
+ stringHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
- }
- stringHasher hashMap::getHasher ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
- stringHasher defaut(0,"","");
+ m_hasher.push_back ( H );
+ }
+}
+stringHasher hashMap::getHasher ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+ stringHasher defaut(0,"","");
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return ( *l_hasher );
- }
- }
- return defaut;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
}
- string hashMap::getValue ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+ }
+ return defaut;
+}
+string hashMap::getValue ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
- }
- }
- return "";
+ return ( *l_hasher ).getValue();
}
- string hashMap::searchValue ( string value )
- {
+ }
+ return "";
+}
+string hashMap::searchValue ( string value )
+{
// long searchKey=hashValue ( key );
// long foundKey;
- string foundValue;
+ string foundValue;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundValue= ( *l_hasher ).getValue();
- if ( foundValue.compare ( value ) == 0 )
- {
- return ( *l_hasher ).getKey();
- }
- }
- return "";
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundValue= ( *l_hasher ).getValue();
+ if ( foundValue.compare ( value ) == 0 ) {
+ return ( *l_hasher ).getKey();
}
+ }
+ return "";
+}
- void hashMap::setValue ( string key , string value )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+void hashMap::setValue ( string key , string value )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- ( *l_hasher ).setValue ( value );
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
- }
- }
}
+ }
+}
- /**
- *
- */
- void hashMap::printHash()
- {
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- }
+/**
+ *
+ */
+void hashMap::printHash()
+{
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+}
diff --git a/mert/TER/hashMap.h b/mert/TER/hashMap.h
index 6cb721573..c2708b360 100644
--- a/mert/TER/hashMap.h
+++ b/mert/TER/hashMap.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -35,27 +35,27 @@ using namespace std;
namespace HashMapSpace
{
- class hashMap
- {
- private:
- vector<stringHasher> m_hasher;
+class hashMap
+{
+private:
+ vector<stringHasher> m_hasher;
- public:
+public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, string value );
- stringHasher getHasher ( string key );
- string getValue ( string key );
- string searchValue ( string key );
- void setValue ( string key , string value );
- void printHash();
- vector<stringHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
- };
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, string value );
+ stringHasher getHasher ( string key );
+ string getValue ( string key );
+ string searchValue ( string key );
+ void setValue ( string key , string value );
+ void printHash();
+ vector<stringHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
}
diff --git a/mert/TER/hashMapInfos.cpp b/mert/TER/hashMapInfos.cpp
index 23f57d808..0ab6d21b2 100644
--- a/mert/TER/hashMapInfos.cpp
+++ b/mert/TER/hashMapInfos.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -28,117 +28,108 @@ using namespace std;
namespace HashMapSpace
{
// hashMapInfos::hashMap();
- /* hashMapInfos::~hashMap()
- {
- // vector<infosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
- /**
- * int hashMapInfos::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
- int hashMapInfos::trouve ( long searchKey )
+/* hashMapInfos::~hashMap()
{
- long foundKey;
+// vector<infosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+/**
+ * int hashMapInfos::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+int hashMapInfos::trouve ( long searchKey )
+{
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
- int hashMapInfos::trouve ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;;
+ }
+ return 0;
+}
+int hashMapInfos::trouve ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
+ }
+ return 0;
+}
- /**
- * long hashMapInfos::hashValue ( string key )
- * @param key
- * @return
- */
- long hashMapInfos::hashValue ( string key )
- {
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+/**
+ * long hashMapInfos::hashValue ( string key )
+ * @param key
+ * @return
+ */
+long hashMapInfos::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
- }
- /**
- * void hashMapInfos::addHasher ( string key, string value )
- * @param key
- * @param value
- */
- void hashMapInfos::addHasher ( string key, vector<int> value )
- {
- if ( trouve ( hashValue ( key ) ) ==0 )
- {
+}
+/**
+ * void hashMapInfos::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+void hashMapInfos::addHasher ( string key, vector<int> value )
+{
+ if ( trouve ( hashValue ( key ) ) ==0 ) {
// cerr << "ICI1" <<endl;
- infosHasher H ( hashValue ( key ),key,value );
+ infosHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
- }
- void hashMapInfos::addValue ( string key, vector<int> value )
- {
- addHasher ( key, value );
- }
- infosHasher hashMapInfos::getHasher ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+ m_hasher.push_back ( H );
+ }
+}
+void hashMapInfos::addValue ( string key, vector<int> value )
+{
+ addHasher ( key, value );
+}
+infosHasher hashMapInfos::getHasher ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return ( *l_hasher );
- }
- }
- vector<int> temp;
- infosHasher defaut(0,"",temp);
- return defaut;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
}
- vector<int> hashMapInfos::getValue ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
- vector<int> retour;
+ }
+ vector<int> temp;
+ infosHasher defaut(0,"",temp);
+ return defaut;
+}
+vector<int> hashMapInfos::getValue ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+ vector<int> retour;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
- }
- }
- return retour;
+ return ( *l_hasher ).getValue();
}
+ }
+ return retour;
+}
// string hashMapInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
@@ -158,42 +149,38 @@ namespace HashMapSpace
// }
//
- void hashMapInfos::setValue ( string key , vector<int> value )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+void hashMapInfos::setValue ( string key , vector<int> value )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- ( *l_hasher ).setValue ( value );
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
- }
- }
- }
- string hashMapInfos::toString ()
- {
- stringstream to_return;
- for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- to_return << (*l_hasher).toString();
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- return to_return.str();
}
+ }
+}
+string hashMapInfos::toString ()
+{
+ stringstream to_return;
+ for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
+}
- /**
- *
- */
- void hashMapInfos::printHash()
- {
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
+/**
+ *
+ */
+void hashMapInfos::printHash()
+{
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- }
+ }
+}
diff --git a/mert/TER/hashMapInfos.h b/mert/TER/hashMapInfos.h
index 5e7dbb6e7..e975aa738 100644
--- a/mert/TER/hashMapInfos.h
+++ b/mert/TER/hashMapInfos.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace
{
- class hashMapInfos
- {
- private:
- vector<infosHasher> m_hasher;
+class hashMapInfos
+{
+private:
+ vector<infosHasher> m_hasher;
- public:
+public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<int> value );
- void addValue ( string key, vector<int> value );
- infosHasher getHasher ( string key );
- vector<int> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<int> value );
+ void addValue ( string key, vector<int> value );
+ infosHasher getHasher ( string key );
+ vector<int> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<int> value );
- void printHash();
- string toString();
- vector<infosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
- };
+ void setValue ( string key , vector<int> value );
+ void printHash();
+ string toString();
+ vector<infosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
}
diff --git a/mert/TER/hashMapStringInfos.cpp b/mert/TER/hashMapStringInfos.cpp
index 773c148d4..d984bdadc 100644
--- a/mert/TER/hashMapStringInfos.cpp
+++ b/mert/TER/hashMapStringInfos.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,179 +27,166 @@ using namespace std;
namespace HashMapSpace
{
- // hashMapStringInfos::hashMap();
- /* hashMapStringInfos::~hashMap()
- {
- // vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
- /**
- * int hashMapStringInfos::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
- int hashMapStringInfos::trouve ( long searchKey )
- {
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+// hashMapStringInfos::hashMap();
+/* hashMapStringInfos::~hashMap()
+{
+// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+}*/
+/**
+* int hashMapStringInfos::trouve ( long searchKey )
+* @param searchKey
+* @return
+*/
+int hashMapStringInfos::trouve ( long searchKey )
+{
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
+ }
+ return 0;
+}
- int hashMapStringInfos::trouve ( string key )
- {
- long searchKey = hashValue ( key );
- long foundKey;;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+int hashMapStringInfos::trouve ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
+ }
+ return 0;
+}
- /**
- * long hashMapStringInfos::hashValue ( string key )
- * @param key
- * @return
- */
- long hashMapStringInfos::hashValue ( string key )
- {
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> > ( loc );
- return coll.hash ( key.data(), key.data() + key.length() );
+/**
+* long hashMapStringInfos::hashValue ( string key )
+* @param key
+* @return
+*/
+long hashMapStringInfos::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> > ( loc );
+ return coll.hash ( key.data(), key.data() + key.length() );
// boost::hash<string> hasher;
// return hasher ( key );
+}
+/**
+* void hashMapStringInfos::addHasher ( string key, string value )
+* @param key
+* @param value
+*/
+void hashMapStringInfos::addHasher ( string key, vector<string> value )
+{
+ if ( trouve ( hashValue ( key ) ) == 0 ) {
+ // cerr << "ICI1" <<endl;
+ stringInfosHasher H ( hashValue ( key ), key, value );
+ // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
+ // cerr << "ICI2" <<endl;
+
+ m_hasher.push_back ( H );
+ }
+}
+void hashMapStringInfos::addValue ( string key, vector<string> value )
+{
+ addHasher ( key, value );
+}
+stringInfosHasher hashMapStringInfos::getHasher ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
}
- /**
- * void hashMapStringInfos::addHasher ( string key, string value )
- * @param key
- * @param value
- */
- void hashMapStringInfos::addHasher ( string key, vector<string> value )
- {
- if ( trouve ( hashValue ( key ) ) == 0 )
- {
- // cerr << "ICI1" <<endl;
- stringInfosHasher H ( hashValue ( key ), key, value );
- // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
- // cerr << "ICI2" <<endl;
-
- m_hasher.push_back ( H );
- }
- }
- void hashMapStringInfos::addValue ( string key, vector<string> value )
- {
- addHasher ( key, value );
- }
- stringInfosHasher hashMapStringInfos::getHasher ( string key )
- {
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return ( *l_hasher );
- }
- }
- vector<string> tmp;
- stringInfosHasher defaut ( 0, "", tmp );
- return defaut;
- }
- vector<string> hashMapStringInfos::getValue ( string key )
- {
- long searchKey = hashValue ( key );
- long foundKey;
- vector<string> retour;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
- }
- }
- return retour;
+ }
+ vector<string> tmp;
+ stringInfosHasher defaut ( 0, "", tmp );
+ return defaut;
+}
+vector<string> hashMapStringInfos::getValue ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ vector<string> retour;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
+ return ( *l_hasher ).getValue();
}
- // string hashMapStringInfos::searchValue ( string value )
- // {
- // // long searchKey=hashValue ( key );
- // // long foundKey;
- // vector<int> foundValue;
- //
- // // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- // for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- // {
- // foundValue= ( *l_hasher ).getValue();
- // /* if ( foundValue.compare ( value ) == 0 )
- // {
- // return ( *l_hasher ).getKey();
- // }*/
- // }
- // return "";
- // }
- //
-
- void hashMapStringInfos::setValue ( string key , vector<string> value )
- {
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- ( *l_hasher ).setValue ( value );
- // return ( *l_hasher ).getValue();
- }
- }
+ }
+ return retour;
+}
+// string hashMapStringInfos::searchValue ( string value )
+// {
+// // long searchKey=hashValue ( key );
+// // long foundKey;
+// vector<int> foundValue;
+//
+// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+// {
+// foundValue= ( *l_hasher ).getValue();
+// /* if ( foundValue.compare ( value ) == 0 )
+// {
+// return ( *l_hasher ).getKey();
+// }*/
+// }
+// return "";
+// }
+//
+
+void hashMapStringInfos::setValue ( string key , vector<string> value )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
+ // return ( *l_hasher ).getValue();
}
+ }
+}
- string hashMapStringInfos::toString ()
- {
- stringstream to_return;
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- to_return << (*l_hasher).toString();
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- return to_return.str();
- }
+string hashMapStringInfos::toString ()
+{
+ stringstream to_return;
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
+}
- /**
- *
- */
- void hashMapStringInfos::printHash()
- {
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- }
- vector< stringInfosHasher > hashMapStringInfos::getHashMap()
- {
- return m_hasher;
- }
+/**
+*
+*/
+void hashMapStringInfos::printHash()
+{
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+}
+vector< stringInfosHasher > hashMapStringInfos::getHashMap()
+{
+ return m_hasher;
+}
diff --git a/mert/TER/hashMapStringInfos.h b/mert/TER/hashMapStringInfos.h
index 5337d50f2..a0eae951d 100644
--- a/mert/TER/hashMapStringInfos.h
+++ b/mert/TER/hashMapStringInfos.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace
{
- class hashMapStringInfos
- {
- private:
- vector<stringInfosHasher> m_hasher;
+class hashMapStringInfos
+{
+private:
+ vector<stringInfosHasher> m_hasher;
- public:
+public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<string> value );
- void addValue ( string key, vector<string> value );
- stringInfosHasher getHasher ( string key );
- vector<string> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<string> value );
+ void addValue ( string key, vector<string> value );
+ stringInfosHasher getHasher ( string key );
+ vector<string> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<string> value );
- void printHash();
- string toString();
- vector<stringInfosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
- };
+ void setValue ( string key , vector<string> value );
+ void printHash();
+ string toString();
+ vector<stringInfosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
}
diff --git a/mert/TER/infosHasher.cpp b/mert/TER/infosHasher.cpp
index 8ce23ae44..450b70d94 100644
--- a/mert/TER/infosHasher.cpp
+++ b/mert/TER/infosHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace
{
- infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
- {
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
- }
+infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+}
// infosHasher::~infosHasher(){};*/
- long infosHasher::getHashKey()
- {
- return m_hashKey;
- }
- string infosHasher::getKey()
- {
- return m_key;
- }
- vector<int> infosHasher::getValue()
- {
- return m_value;
- }
- void infosHasher::setValue ( vector<int> value )
- {
- m_value=value;
- }
- string infosHasher::toString()
- {
- stringstream to_return;
- to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
- return to_return.str();
- }
+long infosHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string infosHasher::getKey()
+{
+ return m_key;
+}
+vector<int> infosHasher::getValue()
+{
+ return m_value;
+}
+void infosHasher::setValue ( vector<int> value )
+{
+ m_value=value;
+}
+string infosHasher::toString()
+{
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+}
// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;
diff --git a/mert/TER/infosHasher.h b/mert/TER/infosHasher.h
index d3d56317a..ab9c7b5ed 100644
--- a/mert/TER/infosHasher.h
+++ b/mert/TER/infosHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -31,23 +31,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace HashMapSpace
{
- class infosHasher
- {
- private:
- long m_hashKey;
- string m_key;
- vector<int> m_value;
-
- public:
- infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
- long getHashKey();
- string getKey();
- vector<int> getValue();
- void setValue ( vector<int> value );
- string toString();
-
-
- };
+class infosHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ vector<int> m_value;
+
+public:
+ infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<int> getValue();
+ void setValue ( vector<int> value );
+ string toString();
+
+
+};
}
diff --git a/mert/TER/stringHasher.cpp b/mert/TER/stringHasher.cpp
index f4d1526e8..729310352 100644
--- a/mert/TER/stringHasher.cpp
+++ b/mert/TER/stringHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -26,29 +26,29 @@ using namespace std;
namespace HashMapSpace
{
- stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
- {
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueTxt;
- }
+stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueTxt;
+}
// stringHasher::~stringHasher(){};*/
- long stringHasher::getHashKey()
- {
- return m_hashKey;
- }
- string stringHasher::getKey()
- {
- return m_key;
- }
- string stringHasher::getValue()
- {
- return m_value;
- }
- void stringHasher::setValue ( string value )
- {
- m_value=value;
- }
+long stringHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string stringHasher::getKey()
+{
+ return m_key;
+}
+string stringHasher::getValue()
+{
+ return m_value;
+}
+void stringHasher::setValue ( string value )
+{
+ m_value=value;
+}
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringHasher.h b/mert/TER/stringHasher.h
index d831f642c..5b0ccfc94 100644
--- a/mert/TER/stringHasher.h
+++ b/mert/TER/stringHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -28,22 +28,22 @@ using namespace std;
namespace HashMapSpace
{
- class stringHasher
- {
- private:
- long m_hashKey;
- string m_key;
- string m_value;
+class stringHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ string m_value;
- public:
- stringHasher ( long cle, string cleTxt, string valueTxt );
- long getHashKey();
- string getKey();
- string getValue();
- void setValue ( string value );
+public:
+ stringHasher ( long cle, string cleTxt, string valueTxt );
+ long getHashKey();
+ string getKey();
+ string getValue();
+ void setValue ( string value );
- };
+};
}
diff --git a/mert/TER/stringInfosHasher.cpp b/mert/TER/stringInfosHasher.cpp
index 007fd720f..ecbc10fa5 100644
--- a/mert/TER/stringInfosHasher.cpp
+++ b/mert/TER/stringInfosHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace
{
- stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
- {
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
- }
+stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+}
// stringInfosHasher::~stringInfosHasher(){};*/
- long stringInfosHasher::getHashKey()
- {
- return m_hashKey;
- }
- string stringInfosHasher::getKey()
- {
- return m_key;
- }
- vector<string> stringInfosHasher::getValue()
- {
- return m_value;
- }
- void stringInfosHasher::setValue ( vector<string> value )
- {
- m_value=value;
- }
- string stringInfosHasher::toString()
- {
- stringstream to_return;
- to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
- return to_return.str();
- }
+long stringInfosHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string stringInfosHasher::getKey()
+{
+ return m_key;
+}
+vector<string> stringInfosHasher::getValue()
+{
+ return m_value;
+}
+void stringInfosHasher::setValue ( vector<string> value )
+{
+ m_value=value;
+}
+string stringInfosHasher::toString()
+{
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+}
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringInfosHasher.h b/mert/TER/stringInfosHasher.h
index 307b48da7..e4369f27a 100644
--- a/mert/TER/stringInfosHasher.h
+++ b/mert/TER/stringInfosHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -29,23 +29,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace HashMapSpace
{
- class stringInfosHasher
- {
- private:
- long m_hashKey;
- string m_key;
- vector<string> m_value;
-
- public:
- stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
- long getHashKey();
- string getKey();
- vector<string> getValue();
- void setValue ( vector<string> value );
- string toString();
-
-
- };
+class stringInfosHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ vector<string> m_value;
+
+public:
+ stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<string> getValue();
+ void setValue ( vector<string> value );
+ string toString();
+
+
+};
}
diff --git a/mert/TER/terAlignment.cpp b/mert/TER/terAlignment.cpp
index 6c5d35cc5..ec7bcafb7 100644
--- a/mert/TER/terAlignment.cpp
+++ b/mert/TER/terAlignment.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -24,191 +24,163 @@ using namespace std;
namespace TERCpp
{
- terAlignment::terAlignment()
- {
+terAlignment::terAlignment()
+{
// vector<string> ref;
// vector<string> hyp;
// vector<string> aftershift;
- // TERshift[] allshifts = null;
+ // TERshift[] allshifts = null;
- numEdits=0;
- numWords=0;
- bestRef="";
+ numEdits=0;
+ numWords=0;
+ bestRef="";
- numIns=0;
- numDel=0;
- numSub=0;
- numSft=0;
- numWsf=0;
- }
- string terAlignment::toString()
- {
- stringstream s;
- s.str ( "" );
- s << "Original Ref: \t" << join ( " ", ref ) << endl;
- s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
- s << "Hyp After Shift:\t" << join ( " ", aftershift );
+ numIns=0;
+ numDel=0;
+ numSub=0;
+ numSft=0;
+ numWsf=0;
+}
+string terAlignment::toString()
+{
+ stringstream s;
+ s.str ( "" );
+ s << "Original Ref: \t" << join ( " ", ref ) << endl;
+ s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
+ s << "Hyp After Shift:\t" << join ( " ", aftershift );
// s << "Hyp After Shift: " << join ( " ", aftershift );
- s << endl;
+ s << endl;
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
- if ( ( int ) sizeof ( alignment ) >0 )
- {
- s << "Alignment: (";
+ if ( ( int ) sizeof ( alignment ) >0 ) {
+ s << "Alignment: (";
// s += "\nAlignment: (";
- for ( int i = 0; i < ( int ) ( alignment.size() ); i++ )
- {
- s << alignment[i];
+ for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
+ s << alignment[i];
// s+=alignment[i];
- }
+ }
// s += ")";
- s << ")";
- }
- s << endl;
- if ( ( int ) allshifts.size() == 0 )
- {
+ s << ")";
+ }
+ s << endl;
+ if ( ( int ) allshifts.size() == 0 ) {
// s += "\nNumShifts: 0";
- s << "NumShifts: 0";
- }
- else
- {
+ s << "NumShifts: 0";
+ } else {
// s += "\nNumShifts: " + (int)allshifts.size();
- s << "NumShifts: "<< ( int ) allshifts.size();
- for ( int i = 0; i < ( int ) allshifts.size(); i++ )
- {
- s << endl << " " ;
- s << ( ( terShift ) allshifts[i] ).toString();
+ s << "NumShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
+ s << endl << " " ;
+ s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
- }
- }
- s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
+ }
+ }
+ s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
- return s.str();
+ return s.str();
- }
- string terAlignment::join ( string delim, vector<string> arr )
- {
- if ( ( int ) arr.size() == 0 ) return "";
+}
+string terAlignment::join ( string delim, vector<string> arr )
+{
+ if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
- stringstream s;
- s.str ( "" );
- for ( int i = 0; i < ( int ) arr.size(); i++ )
- {
- if ( i == 0 )
- {
- s << arr.at ( i );
- }
- else
- {
- s << delim << arr.at ( i );
- }
- }
- return s.str();
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ ) {
+ if ( i == 0 ) {
+ s << arr.at ( i );
+ } else {
+ s << delim << arr.at ( i );
+ }
+ }
+ return s.str();
// return "";
+}
+double terAlignment::score()
+{
+ if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
+ return 1.0;
+ }
+ if ( numWords <= 0.0 ) {
+ return 0.0;
+ }
+ return ( double ) numEdits / numWords;
+}
+double terAlignment::scoreAv()
+{
+ if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
+ return 1.0;
+ }
+ if ( averageWords <= 0.0 ) {
+ return 0.0;
+ }
+ return ( double ) numEdits / averageWords;
+}
+
+void terAlignment::scoreDetails()
+{
+ numIns = numDel = numSub = numWsf = numSft = 0;
+ if((int)allshifts.size()>0) {
+ for(int i = 0; i < (int)allshifts.size(); ++i) {
+ numWsf += allshifts[i].size();
}
- double terAlignment::score()
- {
- if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) )
- {
- return 1.0;
- }
- if ( numWords <= 0.0 )
- {
- return 0.0;
- }
- return ( double ) numEdits / numWords;
+ numSft = allshifts.size();
+ }
+
+ if((int)alignment.size()>0 ) {
+ for(int i = 0; i < (int)alignment.size(); ++i) {
+ switch (alignment[i]) {
+ case 'S':
+ case 'T':
+ numSub++;
+ break;
+ case 'D':
+ numDel++;
+ break;
+ case 'I':
+ numIns++;
+ break;
+ }
}
- double terAlignment::scoreAv()
- {
- if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) )
- {
- return 1.0;
- }
- if ( averageWords <= 0.0 )
- {
- return 0.0;
- }
- return ( double ) numEdits / averageWords;
+ }
+ // if(numEdits != numSft + numDel + numIns + numSub)
+ // System.out.println("** Error, unmatch edit erros " + numEdits +
+ // " vs " + (numSft + numDel + numIns + numSub));
+}
+string terAlignment::printAlignments()
+{
+ stringstream to_return;
+ for(int i = 0; i < (int)alignment.size(); ++i) {
+ char alignInfo=alignment.at(i);
+ if (alignInfo == 'A' ) {
+ alignInfo='A';
}
- void terAlignment::scoreDetails()
- {
- numIns = numDel = numSub = numWsf = numSft = 0;
- if((int)allshifts.size()>0)
- {
- for(int i = 0; i < (int)allshifts.size(); ++i)
- {
- numWsf += allshifts[i].size();
- }
- numSft = allshifts.size();
- }
-
- if((int)alignment.size()>0 )
- {
- for(int i = 0; i < (int)alignment.size(); ++i)
- {
- switch (alignment[i])
- {
- case 'S':
- case 'T':
- numSub++;
- break;
- case 'D':
- numDel++;
- break;
- case 'I':
- numIns++;
- break;
- }
- }
- }
- // if(numEdits != numSft + numDel + numIns + numSub)
- // System.out.println("** Error, unmatch edit erros " + numEdits +
- // " vs " + (numSft + numDel + numIns + numSub));
- }
- string terAlignment::printAlignments()
- {
- stringstream to_return;
- for(int i = 0; i < (int)alignment.size(); ++i)
- {
- char alignInfo=alignment.at(i);
- if (alignInfo == 'A' )
- {
- alignInfo='A';
- }
-
- if (i==0)
- {
- to_return << alignInfo;
- }
- else
- {
- to_return << " " << alignInfo;
- }
- }
- return to_return.str();
+ if (i==0) {
+ to_return << alignInfo;
+ } else {
+ to_return << " " << alignInfo;
+ }
}
+ return to_return.str();
+}
string terAlignment::printAllShifts()
{
- stringstream to_return;
- if ( ( int ) allshifts.size() == 0 )
- {
+ stringstream to_return;
+ if ( ( int ) allshifts.size() == 0 ) {
// s += "\nNumShifts: 0";
- to_return << "NbrShifts: 0";
- }
- else
- {
+ to_return << "NbrShifts: 0";
+ } else {
// s += "\nNumShifts: " + (int)allshifts.size();
- to_return << "NbrShifts: "<< ( int ) allshifts.size();
- for ( int i = 0; i < ( int ) allshifts.size(); i++ )
- {
- to_return << "\t" ;
- to_return << ( ( terShift ) allshifts[i] ).toString();
+ to_return << "NbrShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
+ to_return << "\t" ;
+ to_return << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
- }
- }
- return to_return.str();
+ }
+ }
+ return to_return.str();
}
} \ No newline at end of file
diff --git a/mert/TER/terAlignment.h b/mert/TER/terAlignment.h
index 0af86f663..2af0b7490 100644
--- a/mert/TER/terAlignment.h
+++ b/mert/TER/terAlignment.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,41 +34,41 @@ using namespace std;
namespace TERCpp
{
- class terAlignment
- {
- private:
- public:
-
- terAlignment();
- string toString();
- void scoreDetails();
-
- vector<string> ref;
- vector<string> hyp;
- vector<string> aftershift;
- vector<terShift> allshifts;
- vector<int> hyp_int;
- vector<int> aftershift_int;
-
- double numEdits;
- double numWords;
- double averageWords;
- vector<char> alignment;
- string bestRef;
-
- int numIns;
- int numDel;
- int numSub;
- int numSft;
- int numWsf;
-
-
- string join ( string delim, vector<string> arr );
- double score();
- double scoreAv();
- string printAlignments();
- string printAllShifts();
- };
+class terAlignment
+{
+private:
+public:
+
+ terAlignment();
+ string toString();
+ void scoreDetails();
+
+ vector<string> ref;
+ vector<string> hyp;
+ vector<string> aftershift;
+ vector<terShift> allshifts;
+ vector<int> hyp_int;
+ vector<int> aftershift_int;
+
+ double numEdits;
+ double numWords;
+ double averageWords;
+ vector<char> alignment;
+ string bestRef;
+
+ int numIns;
+ int numDel;
+ int numSub;
+ int numSft;
+ int numWsf;
+
+
+ string join ( string delim, vector<string> arr );
+ double score();
+ double scoreAv();
+ string printAlignments();
+ string printAllShifts();
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/terShift.cpp b/mert/TER/terShift.cpp
index c1106db76..440b4d2ce 100644
--- a/mert/TER/terShift.cpp
+++ b/mert/TER/terShift.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -42,32 +42,32 @@ namespace TERCpp
// numSft=0;
// numWsf=0;
// }
- terShift::terShift ()
- {
- start = 0;
- end = 0;
- moveto = 0;
- newloc = 0;
- cost=1.0;
- }
- terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
- {
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- cost=1.0;
- }
+terShift::terShift ()
+{
+ start = 0;
+ end = 0;
+ moveto = 0;
+ newloc = 0;
+ cost=1.0;
+}
+terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
+{
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ cost=1.0;
+}
- terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
- {
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- shifted = _shifted;
- cost=1.0;
- }
+terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
+{
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ shifted = _shifted;
+ cost=1.0;
+}
// string terShift::vectorToString(vector<string> vec)
// {
// string retour("");
@@ -78,44 +78,38 @@ namespace TERCpp
// return retour;
// }
- string terShift::toString()
- {
- stringstream s;
- s.str ( "" );
- s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
- if ( ( int ) shifted.size() > 0 )
- {
- s << " (" << vectorToString ( shifted ) << ")";
- }
- return s.str();
- }
+string terShift::toString()
+{
+ stringstream s;
+ s.str ( "" );
+ s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
+ if ( ( int ) shifted.size() > 0 ) {
+ s << " (" << vectorToString ( shifted ) << ")";
+ }
+ return s.str();
+}
- /* The distance of the shift. */
- int terShift::distance()
- {
- if ( moveto < start )
- {
- return start - moveto;
- }
- else if ( moveto > end )
- {
- return moveto - end;
- }
- else
- {
- return moveto - start;
- }
- }
+/* The distance of the shift. */
+int terShift::distance()
+{
+ if ( moveto < start ) {
+ return start - moveto;
+ } else if ( moveto > end ) {
+ return moveto - end;
+ } else {
+ return moveto - start;
+ }
+}
- bool terShift::leftShift()
- {
- return ( moveto < start );
- }
+bool terShift::leftShift()
+{
+ return ( moveto < start );
+}
- int terShift::size()
- {
- return ( end - start ) + 1;
- }
+int terShift::size()
+{
+ return ( end - start ) + 1;
+}
// terShift terShift::operator=(terShift t)
// {
//
diff --git a/mert/TER/terShift.h b/mert/TER/terShift.h
index ba84a5947..74545e0de 100644
--- a/mert/TER/terShift.h
+++ b/mert/TER/terShift.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,32 +34,32 @@ using namespace Tools;
namespace TERCpp
{
- class terShift
- {
- private:
- public:
+class terShift
+{
+private:
+public:
- terShift();
- terShift ( int _start, int _end, int _moveto, int _newloc );
- terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
- string toString();
- int distance() ;
- bool leftShift();
- int size();
+ terShift();
+ terShift ( int _start, int _end, int _moveto, int _newloc );
+ terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
+ string toString();
+ int distance() ;
+ bool leftShift();
+ int size();
// terShift operator=(terShift t);
// string vectorToString(vector<string> vec);
- int start;
- int end;
- int moveto;
- int newloc;
- vector<string> shifted; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<string> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
- };
+ int start;
+ int end;
+ int moveto;
+ int newloc;
+ vector<string> shifted; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<string> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/tercalc.cpp b/mert/TER/tercalc.cpp
index b7f63772c..c4629c639 100644
--- a/mert/TER/tercalc.cpp
+++ b/mert/TER/tercalc.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -35,902 +35,724 @@ using namespace Tools;
namespace TERCpp
{
- terCalc::terCalc()
- {
- TAILLE_PERMUT_MAX = 50;
- infinite = 999999.0;
- shift_cost = 1.0;
- insert_cost = 1.0;
- delete_cost = 1.0;
- substitute_cost = 1.0;
- match_cost = 0.0;
- NBR_SEGS_EVALUATED = 0;
- NBR_PERMUTS_CONSID = 0;
- NBR_BS_APPELS = 0;
- TAILLE_BEAM = 20;
- DIST_MAX_PERMUT = 50;
- PRINT_DEBUG = false;
- hypSpans.clear();
- refSpans.clear();
- }
+terCalc::terCalc()
+{
+ TAILLE_PERMUT_MAX = 50;
+ infinite = 999999.0;
+ shift_cost = 1.0;
+ insert_cost = 1.0;
+ delete_cost = 1.0;
+ substitute_cost = 1.0;
+ match_cost = 0.0;
+ NBR_SEGS_EVALUATED = 0;
+ NBR_PERMUTS_CONSID = 0;
+ NBR_BS_APPELS = 0;
+ TAILLE_BEAM = 20;
+ DIST_MAX_PERMUT = 50;
+ PRINT_DEBUG = false;
+ hypSpans.clear();
+ refSpans.clear();
+}
- terAlignment terCalc::WERCalculation ( vector< string > hyp , vector< string > ref )
- {
-
- return minimizeDistanceEdition ( hyp, ref, hypSpans );
-
- }
+terAlignment terCalc::WERCalculation ( vector< string > hyp , vector< string > ref )
+{
- terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
- {
- stringstream s;
- s.str ( "" );
- string stringRef ( "" );
- string stringHyp ( "" );
- for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ )
- {
- if ( l_it == ref.begin() )
- {
- s << ( *l_it );
- }
- else
- {
- s << " " << ( *l_it );
- }
- }
- stringRef = s.str();
- s.str ( "" );
- for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ )
- {
- if ( l_itHyp == hyp.begin() )
- {
- s << ( *l_itHyp );
- }
- else
- {
- s << " " << ( *l_itHyp );
- }
- }
- stringHyp = s.str();
- s.str ( "" );
- return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
- }
+ return minimizeDistanceEdition ( hyp, ref, hypSpans );
+}
- hashMapInfos terCalc::createConcordMots ( vector<string> hyp, vector<string> ref )
- {
- hashMap tempHash;
- hashMapInfos retour;
- for ( int i = 0; i < ( int ) hyp.size(); i++ )
- {
- tempHash.addHasher ( hyp.at ( i ), "" );
- }
- bool cor[ref.size() ];
- for ( int i = 0; i < ( int ) ref.size(); i++ )
- {
- if ( tempHash.trouve ( ( string ) ref.at ( i ) ) )
- {
- cor[i] = true;
- }
- else
- {
- cor[i] = false;
- }
- }
- for ( int start = 0; start < ( int ) ref.size(); start++ )
- {
- if ( cor[start] )
- {
- for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) );end++ )
- {
- vector<string> ajouter = subVector ( ref, start, end + 1 );
- string ajouterString = vectorToString ( ajouter );
- vector<int> values = retour.getValue ( ajouterString );
- values.push_back ( start );
- if ( values.size() > 1 )
- {
- retour.setValue ( ajouterString, values );
- }
- else
- {
- retour.addValue ( ajouterString, values );
- }
- }
- }
- }
- return retour;
+terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
+{
+ stringstream s;
+ s.str ( "" );
+ string stringRef ( "" );
+ string stringHyp ( "" );
+ for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) {
+ if ( l_it == ref.begin() ) {
+ s << ( *l_it );
+ } else {
+ s << " " << ( *l_it );
+ }
+ }
+ stringRef = s.str();
+ s.str ( "" );
+ for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) {
+ if ( l_itHyp == hyp.begin() ) {
+ s << ( *l_itHyp );
+ } else {
+ s << " " << ( *l_itHyp );
}
+ }
+ stringHyp = s.str();
+ s.str ( "" );
+ return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
+}
+
- bool terCalc::trouverIntersection ( vecInt refSpan, vecInt hypSpan )
- {
- if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) )
- {
- return true;
+hashMapInfos terCalc::createConcordMots ( vector<string> hyp, vector<string> ref )
+{
+ hashMap tempHash;
+ hashMapInfos retour;
+ for ( int i = 0; i < ( int ) hyp.size(); i++ ) {
+ tempHash.addHasher ( hyp.at ( i ), "" );
+ }
+ bool cor[ref.size() ];
+ for ( int i = 0; i < ( int ) ref.size(); i++ ) {
+ if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) {
+ cor[i] = true;
+ } else {
+ cor[i] = false;
+ }
+ }
+ for ( int start = 0; start < ( int ) ref.size(); start++ ) {
+ if ( cor[start] ) {
+ for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) ); end++ ) {
+ vector<string> ajouter = subVector ( ref, start, end + 1 );
+ string ajouterString = vectorToString ( ajouter );
+ vector<int> values = retour.getValue ( ajouterString );
+ values.push_back ( start );
+ if ( values.size() > 1 ) {
+ retour.setValue ( ajouterString, values );
+ } else {
+ retour.addValue ( ajouterString, values );
}
- return false;
+ }
}
+ }
+ return retour;
+}
+bool terCalc::trouverIntersection ( vecInt refSpan, vecInt hypSpan )
+{
+ if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) {
+ return true;
+ }
+ return false;
+}
- terAlignment terCalc::minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
- {
- double current_best = infinite;
- double last_best = infinite;
- int first_good = 0;
- int current_first_good = 0;
- int last_good = -1;
- int cur_last_good = 0;
- int last_peak = 0;
- int cur_last_peak = 0;
- int i, j;
- double cost, icost, dcost;
- double score;
-
-
-
- NBR_BS_APPELS++;
-
-
- for ( i = 0; i <= ( int ) ref.size(); i++ )
- {
- for ( j = 0; j <= ( int ) hyp.size(); j++ )
- {
- S[i][j] = -1.0;
- P[i][j] = '0';
- }
- }
- S[0][0] = 0.0;
- for ( j = 0; j <= ( int ) hyp.size(); j++ )
- {
- last_best = current_best;
- current_best = infinite;
- first_good = current_first_good;
- current_first_good = -1;
- last_good = cur_last_good;
- cur_last_good = -1;
- last_peak = cur_last_peak;
- cur_last_peak = 0;
- for ( i = first_good; i <= ( int ) ref.size(); i++ )
- {
- if ( i > last_good )
- {
- break;
- }
- if ( S[i][j] < 0 )
- {
- continue;
- }
- score = S[i][j];
- if ( ( j < ( int ) hyp.size() ) && ( score > last_best + TAILLE_BEAM ) )
- {
- continue;
- }
- if ( current_first_good == -1 )
- {
- current_first_good = i ;
- }
- if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) )
- {
- if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) )
- {
- if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 )
- {
- cost = match_cost + score;
- if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) )
- {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = 'A';
- }
- if ( cost < current_best )
- {
- current_best = cost;
- }
- if ( current_best == cost )
- {
- cur_last_peak = i + 1;
- }
- }
- else
- {
- cost = substitute_cost + score;
- if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) )
- {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = 'S';
- if ( cost < current_best )
- {
- current_best = cost;
- }
- if ( current_best == cost )
- {
- cur_last_peak = i + 1 ;
- }
- }
- }
- }
- }
- cur_last_good = i + 1;
- if ( j < ( int ) hyp.size() )
- {
- icost = score + insert_cost;
- if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) )
- {
- S[i][j+1] = icost;
- P[i][j+1] = 'I';
- if ( ( cur_last_peak < i ) && ( current_best == icost ) )
- {
- cur_last_peak = i;
- }
- }
- }
- if ( i < ( int ) ref.size() )
- {
- dcost = score + delete_cost;
- if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) )
- {
- S[i+1][j] = dcost;
- P[i+1][j] = 'D';
- if ( i >= last_good )
- {
- last_good = i + 1 ;
- }
- }
- }
- }
- }
+
+terAlignment terCalc::minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
+{
+ double current_best = infinite;
+ double last_best = infinite;
+ int first_good = 0;
+ int current_first_good = 0;
+ int last_good = -1;
+ int cur_last_good = 0;
+ int last_peak = 0;
+ int cur_last_peak = 0;
+ int i, j;
+ double cost, icost, dcost;
+ double score;
- int tracelength = 0;
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) )
- {
- tracelength++;
- if ( P[i][j] == 'A' )
- {
- i--;
- j--;
+
+ NBR_BS_APPELS++;
+
+
+ for ( i = 0; i <= ( int ) ref.size(); i++ ) {
+ for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
+ S[i][j] = -1.0;
+ P[i][j] = '0';
+ }
+ }
+ S[0][0] = 0.0;
+ for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
+ last_best = current_best;
+ current_best = infinite;
+ first_good = current_first_good;
+ current_first_good = -1;
+ last_good = cur_last_good;
+ cur_last_good = -1;
+ last_peak = cur_last_peak;
+ cur_last_peak = 0;
+ for ( i = first_good; i <= ( int ) ref.size(); i++ ) {
+ if ( i > last_good ) {
+ break;
+ }
+ if ( S[i][j] < 0 ) {
+ continue;
+ }
+ score = S[i][j];
+ if ( ( j < ( int ) hyp.size() ) && ( score > last_best + TAILLE_BEAM ) ) {
+ continue;
+ }
+ if ( current_first_good == -1 ) {
+ current_first_good = i ;
+ }
+ if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) ) {
+ if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) {
+ if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) {
+ cost = match_cost + score;
+ if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) ) {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = 'A';
}
- else
- if ( P[i][j] == 'S' )
- {
- i--;
- j--;
- }
- else
- if ( P[i][j] == 'D' )
- {
- i--;
- }
- else
- if ( P[i][j] == 'I' )
- {
- j--;
- }
- else
- {
- cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P[i][j] << endl;
- exit ( -1 );
- }
- }
- vector<char> path ( tracelength );
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) )
- {
- path[--tracelength] = P[i][j];
- if ( P[i][j] == 'A' )
- {
- i--;
- j--;
+ if ( cost < current_best ) {
+ current_best = cost;
+ }
+ if ( current_best == cost ) {
+ cur_last_peak = i + 1;
}
- else
- if ( P[i][j] == 'S' )
- {
- i--;
- j--;
- }
- else
- if ( P[i][j] == 'D' )
- {
- i--;
- }
- else
- if ( P[i][j] == 'I' )
- {
- j--;
- }
+ } else {
+ cost = substitute_cost + score;
+ if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) ) {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = 'S';
+ if ( cost < current_best ) {
+ current_best = cost;
+ }
+ if ( current_best == cost ) {
+ cur_last_peak = i + 1 ;
+ }
+ }
+ }
}
- terAlignment to_return;
- to_return.numWords = ref.size();
- to_return.alignment = path;
- to_return.numEdits = S[ref.size() ][hyp.size() ];
- to_return.hyp = hyp;
- to_return.ref = ref;
- to_return.averageWords = (int)ref.size();
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
+ }
+ cur_last_good = i + 1;
+ if ( j < ( int ) hyp.size() ) {
+ icost = score + insert_cost;
+ if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) ) {
+ S[i][j+1] = icost;
+ P[i][j+1] = 'I';
+ if ( ( cur_last_peak < i ) && ( current_best == icost ) ) {
+ cur_last_peak = i;
+ }
}
- return to_return;
-
+ }
+ if ( i < ( int ) ref.size() ) {
+ dcost = score + delete_cost;
+ if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) ) {
+ S[i+1][j] = dcost;
+ P[i+1][j] = 'D';
+ if ( i >= last_good ) {
+ last_good = i + 1 ;
+ }
+ }
+ }
+ }
+ }
+
+
+ int tracelength = 0;
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) ) {
+ tracelength++;
+ if ( P[i][j] == 'A' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'S' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'D' ) {
+ i--;
+ } else if ( P[i][j] == 'I' ) {
+ j--;
+ } else {
+ cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P[i][j] << endl;
+ exit ( -1 );
+ }
+ }
+ vector<char> path ( tracelength );
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) ) {
+ path[--tracelength] = P[i][j];
+ if ( P[i][j] == 'A' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'S' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'D' ) {
+ i--;
+ } else if ( P[i][j] == 'I' ) {
+ j--;
}
- terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
- {
- hashMapInfos rloc = createConcordMots ( hyp, ref );
- terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans );
- vector<string> cur = hyp;
- cur_align.hyp = hyp;
- cur_align.ref = ref;
- cur_align.aftershift = hyp;
- double edits = 0;
+ }
+ terAlignment to_return;
+ to_return.numWords = ref.size();
+ to_return.alignment = path;
+ to_return.numEdits = S[ref.size() ][hyp.size() ];
+ to_return.hyp = hyp;
+ to_return.ref = ref;
+ to_return.averageWords = (int)ref.size();
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
+ }
+ return to_return;
+
+}
+terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
+{
+ hashMapInfos rloc = createConcordMots ( hyp, ref );
+ terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans );
+ vector<string> cur = hyp;
+ cur_align.hyp = hyp;
+ cur_align.ref = ref;
+ cur_align.aftershift = hyp;
+ double edits = 0;
// int numshifts = 0;
- vector<terShift> allshifts;
+ vector<terShift> allshifts;
// cerr << "Initial Alignment:" << endl << cur_align.toString() <<endl;
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
- }
- while ( true )
- {
- bestShiftStruct returns;
- returns = findBestShift ( cur, hyp, ref, rloc, cur_align );
- if ( returns.m_empty )
- {
- break;
- }
- terShift bestShift = returns.m_best_shift;
- cur_align = returns.m_best_align;
- edits += bestShift.cost;
- bestShift.alignment = cur_align.alignment;
- bestShift.aftershift = cur_align.aftershift;
- allshifts.push_back ( bestShift );
- cur = cur_align.aftershift;
- }
- terAlignment to_return;
- to_return = cur_align;
- to_return.allshifts = allshifts;
- to_return.numEdits += edits;
- NBR_SEGS_EVALUATED++;
- return to_return;
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
+ }
+ while ( true ) {
+ bestShiftStruct returns;
+ returns = findBestShift ( cur, hyp, ref, rloc, cur_align );
+ if ( returns.m_empty ) {
+ break;
}
- bestShiftStruct terCalc::findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
- {
- bestShiftStruct to_return;
- bool anygain = false;
- bool herr[ ( int ) hyp.size() ];
- bool rerr[ ( int ) ref.size() ];
- int ralign[ ( int ) ref.size() ];
- calculateTerAlignment ( med_align, herr, rerr, ralign );
- vector<vecTerShift> poss_shifts;
-
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl;
- cerr << "indices: ";
- for (int l_i=0; l_i < ( int ) ref.size() ; l_i++)
- {
- cerr << l_i << "\t";
- }
- cerr << endl;
- cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl;
- cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl;
- cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl;
- cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl;
- cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl;
- cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl;
- cerr << "END DEBUG " << endl;
- }
- poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign );
- double curerr = med_align.numEdits;
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Possible Shifts:" << endl;
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- )
- {
- for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ )
- {
- cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
- }
- }
- cerr << endl;
- cerr << "END DEBUG " << endl;
- }
+ terShift bestShift = returns.m_best_shift;
+ cur_align = returns.m_best_align;
+ edits += bestShift.cost;
+ bestShift.alignment = cur_align.alignment;
+ bestShift.aftershift = cur_align.aftershift;
+ allshifts.push_back ( bestShift );
+ cur = cur_align.aftershift;
+ }
+ terAlignment to_return;
+ to_return = cur_align;
+ to_return.allshifts = allshifts;
+ to_return.numEdits += edits;
+ NBR_SEGS_EVALUATED++;
+ return to_return;
+}
+bestShiftStruct terCalc::findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
+{
+ bestShiftStruct to_return;
+ bool anygain = false;
+ bool herr[ ( int ) hyp.size() ];
+ bool rerr[ ( int ) ref.size() ];
+ int ralign[ ( int ) ref.size() ];
+ calculateTerAlignment ( med_align, herr, rerr, ralign );
+ vector<vecTerShift> poss_shifts;
+
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl;
+ cerr << "indices: ";
+ for (int l_i=0; l_i < ( int ) ref.size() ; l_i++) {
+ cerr << l_i << "\t";
+ }
+ cerr << endl;
+ cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl;
+ cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl;
+ cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl;
+ cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl;
+ cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl;
+ cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign );
+ double curerr = med_align.numEdits;
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Possible Shifts:" << endl;
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
+ for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ ) {
+ cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
+ }
+ }
+ cerr << endl;
+ cerr << "END DEBUG " << endl;
+ }
// exit(0);
- double cur_best_shift_cost = 0.0;
- terAlignment cur_best_align = med_align;
- terShift cur_best_shift;
+ double cur_best_shift_cost = 0.0;
+ terAlignment cur_best_align = med_align;
+ terShift cur_best_shift;
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- )
- {
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
- cerr << "END DEBUG " << endl;
- }
- /* Consider shifts of length i+1 */
- double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- double maxfix = ( 2 * ( 1 + i ) );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
- {
- break;
- }
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ /* Consider shifts of length i+1 */
+ double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ double maxfix = ( 2 * ( 1 + i ) );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
+ break;
+ }
- for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ )
- {
- curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
- {
- break;
- }
- terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "cur : "<< join(" ",cur) << endl;
- cerr << "curshift : "<< curshift.toString() << endl;
-
- }
- alignmentStruct shiftReturns = permuter ( cur, curshift );
- vector<string> shiftarr = shiftReturns.nwords;
- vector<vecInt> curHypSpans = shiftReturns.aftershift;
-
- if ( PRINT_DEBUG )
- {
- cerr << "shiftarr : "<< join(" ",shiftarr) << endl;
+ for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ ) {
+ curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
+ break;
+ }
+ terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "cur : "<< join(" ",cur) << endl;
+ cerr << "curshift : "<< curshift.toString() << endl;
+
+ }
+ alignmentStruct shiftReturns = permuter ( cur, curshift );
+ vector<string> shiftarr = shiftReturns.nwords;
+ vector<vecInt> curHypSpans = shiftReturns.aftershift;
+
+ if ( PRINT_DEBUG ) {
+ cerr << "shiftarr : "<< join(" ",shiftarr) << endl;
// cerr << "curHypSpans : "<< curHypSpans.toString() << endl;
- cerr << "END DEBUG " << endl;
- }
- terAlignment curalign = minimizeDistanceEdition ( shiftarr, ref, curHypSpans );
-
- curalign.hyp = hyp;
- curalign.ref = ref;
- curalign.aftershift = shiftarr;
-
-
- double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
-
- // if (DEBUG) {
- // string testeuh=terAlignment join(" ", shiftarr);
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
- cerr << "Details of gains : gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost )"<<endl;
- cerr << "Details of gains : gain = ("<<cur_best_align.numEdits << "+" << cur_best_shift_cost << ") - (" << curalign.numEdits << "+" << curshift.cost << ")"<<endl;
- cerr << "" << curalign.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
- }
- // }
- //
- if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) )
- {
- anygain = true;
- cur_best_shift = curshift;
- cur_best_shift_cost = curshift.cost;
- cur_best_align = curalign;
- // if (DEBUG)
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
- }
- }
- }
- }
- if ( anygain )
- {
- to_return.m_best_shift = cur_best_shift;
- to_return.m_best_align = cur_best_align;
- to_return.m_empty = false;
+ cerr << "END DEBUG " << endl;
+ }
+ terAlignment curalign = minimizeDistanceEdition ( shiftarr, ref, curHypSpans );
+
+ curalign.hyp = hyp;
+ curalign.ref = ref;
+ curalign.aftershift = shiftarr;
+
+
+ double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
+
+ // if (DEBUG) {
+ // string testeuh=terAlignment join(" ", shiftarr);
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
+ cerr << "Details of gains : gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost )"<<endl;
+ cerr << "Details of gains : gain = ("<<cur_best_align.numEdits << "+" << cur_best_shift_cost << ") - (" << curalign.numEdits << "+" << curshift.cost << ")"<<endl;
+ cerr << "" << curalign.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ // }
+ //
+ if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) {
+ anygain = true;
+ cur_best_shift = curshift;
+ cur_best_shift_cost = curshift.cost;
+ cur_best_align = curalign;
+ // if (DEBUG)
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
}
- else
- {
- to_return.m_empty = true;
- }
- return to_return;
+ }
}
+ }
+ if ( anygain ) {
+ to_return.m_best_shift = cur_best_shift;
+ to_return.m_best_align = cur_best_align;
+ to_return.m_empty = false;
+ } else {
+ to_return.m_empty = true;
+ }
+ return to_return;
+}
- void terCalc::calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign )
- {
- int hpos = -1;
- int rpos = -1;
- if ( PRINT_DEBUG )
- {
+void terCalc::calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign )
+{
+ int hpos = -1;
+ int rpos = -1;
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) {
+ herr[i] = false;
+ rerr[i] = false;
+ ralign[i] = -1;
+ }
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) {
+ char sym = align.alignment[i];
+ if ( sym == 'A' ) {
+ hpos++;
+ rpos++;
+ herr[hpos] = false;
+ rerr[rpos] = false;
+ ralign[rpos] = hpos;
+ } else if ( sym == 'S' ) {
+ hpos++;
+ rpos++;
+ herr[hpos] = true;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos;
+ } else if ( sym == 'I' ) {
+ hpos++;
+ herr[hpos] = true;
+ } else if ( sym == 'D' ) {
+ rpos++;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos+1;
+ } else {
+ cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl;
+ exit ( -1 );
+ }
+ }
+}
- cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl;
- cerr << "END DEBUG " << endl;
- }
- for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
- {
- herr[i] = false;
- rerr[i] = false;
- ralign[i] = -1;
- }
- for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
- {
- char sym = align.alignment[i];
- if ( sym == 'A' )
- {
- hpos++;
- rpos++;
- herr[hpos] = false;
- rerr[rpos] = false;
- ralign[rpos] = hpos;
- }
- else
- if ( sym == 'S' )
- {
- hpos++;
- rpos++;
- herr[hpos] = true;
- rerr[rpos] = true;
- ralign[rpos] = hpos;
- }
- else
- if ( sym == 'I' )
- {
- hpos++;
- herr[hpos] = true;
- }
- else
- if ( sym == 'D' )
- {
- rpos++;
- rerr[rpos] = true;
- ralign[rpos] = hpos+1;
- }
- else
- {
- cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl;
- exit ( -1 );
- }
- }
+vector<vecTerShift> terCalc::calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
+{
+ vector<vecTerShift> to_return;
+ if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) ) {
+ return to_return;
+ }
+
+ vector<vecTerShift> allshifts ( TAILLE_PERMUT_MAX + 1 );
+ for ( int start = 0; start < ( int ) hyp.size(); start++ ) {
+ string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
+ if ( ! rloc.trouve ( subVectorHypString ) ) {
+ continue;
}
- vector<vecTerShift> terCalc::calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
- {
- vector<vecTerShift> to_return;
- if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) )
- {
- return to_return;
+ bool ok = false;
+ vector<int> mtiVec = rloc.getValue ( subVectorHypString );
+ vector<int>::iterator mti = mtiVec.begin();
+ while ( mti != mtiVec.end() && ( ! ok ) ) {
+ int moveto = ( *mti );
+ mti++;
+ if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] - 1 ) <= DIST_MAX_PERMUT ) ) {
+ ok = true;
+ }
+ }
+ if ( ! ok ) {
+ continue;
+ }
+ ok = true;
+ for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ ) {
+ /* check if cand is good if so, add it */
+ vector<string> cand = subVector ( hyp, start, end + 1 );
+ ok = false;
+ if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) {
+ continue;
+ }
+
+ bool any_herr = false;
+
+ for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) {
+ if ( herr[start+i] ) {
+ any_herr = true;
}
+ }
+ if ( any_herr == false ) {
+ ok = true;
+ continue;
+ }
+
+ vector<int> movetoitVec;
+ movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
+// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl;
+ vector<int>::iterator movetoit = movetoitVec.begin();
+ while ( movetoit != movetoitVec.end() ) {
+ int moveto = ( *movetoit );
+ movetoit++;
+ if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] ) <= DIST_MAX_PERMUT ) ) ) {
+ continue;
+ }
+ ok = true;
- vector<vecTerShift> allshifts ( TAILLE_PERMUT_MAX + 1 );
- for ( int start = 0; start < ( int ) hyp.size(); start++ )
- {
- string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
- if ( ! rloc.trouve ( subVectorHypString ) )
- {
- continue;
- }
+ /* check to see if there are any errors in either string
+ (only move if this is the case!)
+ */
+
+ bool any_rerr = false;
+ for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ ) {
+ if ( rerr[moveto+i] ) {
+ any_rerr = true;
+ }
+ }
+ if ( ! any_rerr ) {
+ continue;
+ }
+ for ( int roff = -1; roff <= ( end - start ); roff++ ) {
+ terShift topush;
+ bool topushNull = true;
+ if ( ( roff == -1 ) && ( moveto == 0 ) ) {
+ if ( PRINT_DEBUG ) {
- bool ok = false;
- vector<int> mtiVec = rloc.getValue ( subVectorHypString );
- vector<int>::iterator mti = mtiVec.begin();
- while ( mti != mtiVec.end() && ( ! ok ) )
- {
- int moveto = ( *mti );
- mti++;
- if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] - 1 ) <= DIST_MAX_PERMUT ) )
- {
- ok = true;
- }
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
}
- if ( ! ok )
- {
- continue;
+ terShift t01 ( start, end, -1, -1 );
+ topush = t01;
+ topushNull = false;
+ } else if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) ) {
+ int newloc = ralign[moveto+roff];
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
}
- ok = true;
- for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ )
- {
- /* check if cand is good if so, add it */
- vector<string> cand = subVector ( hyp, start, end + 1 );
- ok = false;
- if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) )
- {
- continue;
- }
-
- bool any_herr = false;
-
- for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ )
- {
- if ( herr[start+i] )
- {
- any_herr = true;
- }
- }
- if ( any_herr == false )
- {
- ok = true;
- continue;
- }
-
- vector<int> movetoitVec;
- movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
-// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl;
- vector<int>::iterator movetoit = movetoitVec.begin();
- while ( movetoit != movetoitVec.end() )
- {
- int moveto = ( *movetoit );
- movetoit++;
- if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] ) <= DIST_MAX_PERMUT ) ) )
- {
- continue;
- }
- ok = true;
-
- /* check to see if there are any errors in either string
- (only move if this is the case!)
- */
-
- bool any_rerr = false;
- for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ )
- {
- if ( rerr[moveto+i] )
- {
- any_rerr = true;
- }
- }
- if ( ! any_rerr )
- {
- continue;
- }
- for ( int roff = -1; roff <= ( end - start ); roff++ )
- {
- terShift topush;
- bool topushNull = true;
- if ( ( roff == -1 ) && ( moveto == 0 ) )
- {
- if ( PRINT_DEBUG )
- {
-
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
- }
- terShift t01 ( start, end, -1, -1 );
- topush = t01;
- topushNull = false;
- }
- else
- if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) )
- {
- int newloc = ralign[moveto+roff];
- if ( PRINT_DEBUG )
- {
-
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
- }
- terShift t02 ( start, end, moveto + roff, newloc );
- topush = t02;
- topushNull = false;
- }
- if ( !topushNull )
- {
- topush.shifted = cand;
- topush.cost = shift_cost;
- if ( PRINT_DEBUG )
- {
-
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl;
- cerr << "start : " << start << endl;
- cerr << "end : " << end << endl;
- cerr << "end - start : " << end - start << endl;
- cerr << "END DEBUG " << endl;
- }
- ( allshifts.at ( end - start ) ).push_back ( topush );
- }
- }
- }
+ terShift t02 ( start, end, moveto + roff, newloc );
+ topush = t02;
+ topushNull = false;
+ }
+ if ( !topushNull ) {
+ topush.shifted = cand;
+ topush.cost = shift_cost;
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl;
+ cerr << "start : " << start << endl;
+ cerr << "end : " << end << endl;
+ cerr << "end - start : " << end - start << endl;
+ cerr << "END DEBUG " << endl;
}
+ ( allshifts.at ( end - start ) ).push_back ( topush );
+ }
}
- to_return.clear();
- for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ )
- {
- to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
- }
- return to_return;
+ }
}
+ }
+ to_return.clear();
+ for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ ) {
+ to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
+ }
+ return to_return;
+}
- alignmentStruct terCalc::permuter ( vector<string> words, terShift s )
- {
- return permuter ( words, s.start, s.end, s.newloc );
- }
+alignmentStruct terCalc::permuter ( vector<string> words, terShift s )
+{
+ return permuter ( words, s.start, s.end, s.newloc );
+}
- alignmentStruct terCalc::permuter ( vector<string> words, int start, int end, int newloc )
- {
- int c = 0;
- vector<string> nwords ( words );
- vector<vecInt> spans ( ( int ) hypSpans.size() );
- alignmentStruct to_return;
- if ( PRINT_DEBUG )
- {
+alignmentStruct terCalc::permuter ( vector<string> words, int start, int end, int newloc )
+{
+ int c = 0;
+ vector<string> nwords ( words );
+ vector<vecInt> spans ( ( int ) hypSpans.size() );
+ alignmentStruct to_return;
+ if ( PRINT_DEBUG ) {
+
+ if ( ( int ) hypSpans.size() > 0 ) {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ;
+ } else {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ;
+ }
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl;
+ }
+ if (newloc >= ( int ) words.size()) {
+ if ( PRINT_DEBUG ) {
+ cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl;
+ }
+ newloc = ( int ) words.size()-1;
+ }
- if ( ( int ) hypSpans.size() > 0 )
- {
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ;
- }
- else
- {
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ;
- }
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl;
- }
- if (newloc >= ( int ) words.size())
- {
- if ( PRINT_DEBUG )
- {
- cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl;
- }
- newloc = ( int ) words.size()-1;
- }
-
// }
- if ( newloc == -1 )
- {
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = 0; i <= start - 1;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
+ if ( newloc == -1 ) {
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ if ( newloc < start ) {
+
+ for ( int i = 0; i < newloc; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
}
- else
- {
- if ( newloc < start )
- {
-
- for ( int i = 0; i < newloc; i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc ; i < start ;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- }
- else
- {
- if ( newloc > end )
- {
- for ( int i = 0; i <= start - 1; i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i <= newloc;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc + 1; i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- }
- else
- {
- // we are moving inside of ourselves
- for ( int i = 0; i <= start - 1; i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- }
- }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
}
- NBR_PERMUTS_CONSID++;
-
- if ( PRINT_DEBUG )
- {
- cerr << "nwords" << join(" ",nwords) << endl;
-// cerr << "spans" << spans. << endl;
- }
-
- to_return.nwords = nwords;
- to_return.aftershift = spans;
- return to_return;
- }
- void terCalc::setDebugMode ( bool b )
- {
- PRINT_DEBUG = b;
+ }
+ for ( int i = newloc ; i < start ; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ if ( newloc > end ) {
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i <= newloc; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = newloc + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ // we are moving inside of ourselves
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
}
+ }
+ NBR_PERMUTS_CONSID++;
+
+ if ( PRINT_DEBUG ) {
+ cerr << "nwords" << join(" ",nwords) << endl;
+// cerr << "spans" << spans. << endl;
+ }
+
+ to_return.nwords = nwords;
+ to_return.aftershift = spans;
+ return to_return;
+}
+void terCalc::setDebugMode ( bool b )
+{
+ PRINT_DEBUG = b;
+}
}
diff --git a/mert/TER/tercalc.h b/mert/TER/tercalc.h
index 92d9caf2b..778d83395 100644
--- a/mert/TER/tercalc.h
+++ b/mert/TER/tercalc.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -41,62 +41,62 @@ namespace TERCpp
{
// typedef size_t WERelement[2];
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
- typedef vector<terShift> vecTerShift;
- /**
- @author
- */
- class terCalc
- {
- private :
+typedef vector<terShift> vecTerShift;
+/**
+ @author
+*/
+class terCalc
+{
+private :
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
- WERalignment l_WERalignment;
+ WERalignment l_WERalignment;
// HashMap contenant les valeurs de hash de chaque mot
- hashMap bagOfWords;
- int TAILLE_PERMUT_MAX;
- // Increments internes
- int NBR_SEGS_EVALUATED;
- int NBR_PERMUTS_CONSID;
- int NBR_BS_APPELS;
- int DIST_MAX_PERMUT;
- bool PRINT_DEBUG;
+ hashMap bagOfWords;
+ int TAILLE_PERMUT_MAX;
+ // Increments internes
+ int NBR_SEGS_EVALUATED;
+ int NBR_PERMUTS_CONSID;
+ int NBR_BS_APPELS;
+ int DIST_MAX_PERMUT;
+ bool PRINT_DEBUG;
- // Utilisés dans minDistEdit et ils ne sont pas réajustés
- double S[1000][1000];
- char P[1000][1000];
- vector<vecInt> refSpans;
- vector<vecInt> hypSpans;
- int TAILLE_BEAM;
+ // Utilisés dans minDistEdit et ils ne sont pas réajustés
+ double S[1000][1000];
+ char P[1000][1000];
+ vector<vecInt> refSpans;
+ vector<vecInt> hypSpans;
+ int TAILLE_BEAM;
- public:
- int shift_cost;
- int insert_cost;
- int delete_cost;
- int substitute_cost;
- int match_cost;
- double infinite;
- terCalc();
+public:
+ int shift_cost;
+ int insert_cost;
+ int delete_cost;
+ int substitute_cost;
+ int match_cost;
+ double infinite;
+ terCalc();
// ~terCalc();
// size_t* hashVec ( vector<string> s );
- void setDebugMode ( bool b );
+ void setDebugMode ( bool b );
// int WERCalculation ( size_t * ref, size_t * hyp );
// int WERCalculation ( vector<string> ref, vector<string> hyp );
// int WERCalculation ( vector<int> ref, vector<int> hyp );
- terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
+ terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
// string vectorToString(vector<string> vec);
// vector<string> subVector(vector<string> vec, int start, int end);
- hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
- terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
- bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
- terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
- terAlignment TER ( vector<string> hyp, vector<string> ref );
- terAlignment TER ( vector<int> hyp, vector<int> ref );
- bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
- void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
- vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
- alignmentStruct permuter ( vector<string> words, terShift s );
- alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
- };
+ hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
+ terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
+ bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
+ terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
+ terAlignment TER ( vector<string> hyp, vector<string> ref );
+ terAlignment TER ( vector<int> hyp, vector<int> ref );
+ bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
+ void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
+ vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
+ alignmentStruct permuter ( vector<string> words, terShift s );
+ alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
+};
}
diff --git a/mert/TER/tools.cpp b/mert/TER/tools.cpp
index 64e1483b6..8858a7119 100644
--- a/mert/TER/tools.cpp
+++ b/mert/TER/tools.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -25,748 +25,677 @@ using namespace boost::xpressive;
namespace Tools
{
- string vectorToString ( vector<string> vec )
- {
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour += ( *vecIter );
- }
- else
- {
- retour += "\t" + ( *vecIter );
- }
- }
- return retour;
+string vectorToString ( vector<string> vec )
+{
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour += ( *vecIter );
+ } else {
+ retour += "\t" + ( *vecIter );
}
- string vectorToString ( vector<char> vec )
- {
- stringstream retour;
- retour.str("");
- for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << "\t" << ( *vecIter );
- }
- }
- return retour.str();
+ }
+ return retour;
+}
+string vectorToString ( vector<char> vec )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << "\t" << ( *vecIter );
}
- string vectorToString ( vector<int> vec )
- {
- stringstream retour;
- retour.str("");
- for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << "\t" << ( *vecIter );
- }
- }
- return retour.str();
+ }
+ return retour.str();
+}
+string vectorToString ( vector<int> vec )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << "\t" << ( *vecIter );
}
+ }
+ return retour.str();
+}
- string vectorToString ( vector< string > vec, string s )
- {
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour += ( *vecIter );
- }
- else
- {
- retour += s + ( *vecIter );
- }
- }
- return retour;
-
+string vectorToString ( vector< string > vec, string s )
+{
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour += ( *vecIter );
+ } else {
+ retour += s + ( *vecIter );
}
+ }
+ return retour;
- string vectorToString ( vector< char > vec, string s )
- {
- stringstream retour;
- retour.str("");
- for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << s << ( *vecIter );
- }
- }
- return retour.str();
+}
+string vectorToString ( vector< char > vec, string s )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << s << ( *vecIter );
}
+ }
+ return retour.str();
- string vectorToString ( vector< int > vec, string s )
- {
- stringstream retour;
- retour.str("");
- for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << s << ( *vecIter );
- }
- }
- return retour.str();
+}
+string vectorToString ( vector< int > vec, string s )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << s << ( *vecIter );
}
+ }
+ return retour.str();
- string vectorToString ( vector< bool > vec, string s )
- {
- stringstream retour;
- retour.str("");
- for ( vector<bool>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << s << ( *vecIter );
- }
- }
- return retour.str();
+}
+string vectorToString ( vector< bool > vec, string s )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<bool>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << s << ( *vecIter );
}
- string vectorToString ( char* vec, string s , int taille)
- {
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++)
- {
- if ( l_i == 0 )
- {
- retour << vec[l_i];
- }
- else
- {
- retour << s << vec[l_i];
- }
- }
- return retour.str();
+ }
+ return retour.str();
+}
+string vectorToString ( char* vec, string s , int taille)
+{
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++) {
+ if ( l_i == 0 ) {
+ retour << vec[l_i];
+ } else {
+ retour << s << vec[l_i];
}
+ }
+ return retour.str();
- string vectorToString ( int* vec, string s , int taille)
- {
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++)
- {
- if ( l_i == 0 )
- {
- retour << vec[l_i];
- }
- else
- {
- retour << s << vec[l_i];
- }
- }
- return retour.str();
+}
+string vectorToString ( int* vec, string s , int taille)
+{
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++) {
+ if ( l_i == 0 ) {
+ retour << vec[l_i];
+ } else {
+ retour << s << vec[l_i];
}
+ }
+ return retour.str();
- string vectorToString ( bool* vec, string s , int taille)
- {
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++)
- {
- if ( l_i == 0 )
- {
- retour << vec[l_i];
- }
- else
- {
- retour << s << vec[l_i];
- }
- }
- return retour.str();
+}
+string vectorToString ( bool* vec, string s , int taille)
+{
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++) {
+ if ( l_i == 0 ) {
+ retour << vec[l_i];
+ } else {
+ retour << s << vec[l_i];
}
-
- vector<string> subVector ( vector<string> vec, int start, int end )
- {
- vector<string> retour;
- if ( start > end )
- {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
- }
-
- vector<int> subVector ( vector<int> vec, int start, int end )
- {
- vector<int> retour;
- if ( start > end )
- {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
- }
-
- vector<float> subVector ( vector<float> vec, int start, int end )
- {
- vector<float> retour;
- if ( start > end )
- {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
- }
-
- vector<string> copyVector ( vector<string> vec )
- {
- vector<string> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
+ }
+ return retour.str();
+
+}
+
+vector<string> subVector ( vector<string> vec, int start, int end )
+{
+ vector<string> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<int> subVector ( vector<int> vec, int start, int end )
+{
+ vector<int> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<float> subVector ( vector<float> vec, int start, int end )
+{
+ vector<float> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<string> copyVector ( vector<string> vec )
+{
+ vector<string> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<int> copyVector ( vector<int> vec )
+{
+ vector<int> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<float> copyVector ( vector<float> vec )
+{
+ vector<float> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<string> stringToVector ( string s, string tok )
+{
+ vector<string> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ to_return.push_back ( to_push );
+ to_push = "";
+ pushed = true;
+ }
}
- vector<int> copyVector ( vector<int> vec )
- {
- vector<int> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
}
- vector<float> copyVector ( vector<float> vec )
- {
- vector<float> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ )
- {
- retour.push_back ( vec.at ( i ) );
+ }
+ to_return.push_back ( to_push );
+ return to_return;
+}
+vector<int> stringToVectorInt ( string s, string tok )
+{
+ vector<int> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
}
- return retour;
+ to_push = "";
+ pushed = true;
+ }
}
- vector<string> stringToVector ( string s, string tok )
- {
- vector<string> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ )
- {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
- {
- if ( ( *sIt ) == ( *sTok ) )
- {
- to_return.push_back ( to_push );
- to_push = "";
- pushed = true;
- }
- }
- if ( !pushed )
- {
- to_push.push_back ( ( *sIt ) );
- }
- }
- to_return.push_back ( to_push );
- return to_return;
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
}
- vector<int> stringToVectorInt ( string s, string tok )
- {
- vector<int> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ )
- {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
- {
- if ( ( *sIt ) == ( *sTok ) )
- {
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- to_push = "";
- pushed = true;
- }
- }
- if ( !pushed )
- {
- to_push.push_back ( ( *sIt ) );
- }
- }
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
+}
+vector<float> stringToVectorFloat ( string s, string tok )
+{
+ vector<float> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atof ( to_push.c_str() ) );
}
- return to_return;
+ to_push = "";
+ pushed = true;
+ }
}
- vector<float> stringToVectorFloat ( string s, string tok )
- {
- vector<float> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ )
- {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
- {
- if ( ( *sIt ) == ( *sTok ) )
- {
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atof ( to_push.c_str() ) );
- }
- to_push = "";
- pushed = true;
- }
- }
- if ( !pushed )
- {
- to_push.push_back ( ( *sIt ) );
- }
- }
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- return to_return;
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
}
+ }
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
+}
- string lowerCase ( string str )
- {
- for ( int i = 0;i < ( int ) str.size();i++ )
- {
- if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) )
- {
- str[i] = str[i] + 0x20;
- }
- }
- return str;
+string lowerCase ( string str )
+{
+ for ( int i = 0; i < ( int ) str.size(); i++ ) {
+ if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) ) {
+ str[i] = str[i] + 0x20;
}
- string removePunctTercom ( string str )
- {
- string str_mod = str;
- sregex rex;
- string replace;
+ }
+ return str;
+}
+string removePunctTercom ( string str )
+{
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
- string removePunct ( string str )
- {
- string str_mod = str;
- sregex rex;
- string replace;
+ return str_mod;
+}
+string removePunct ( string str )
+{
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
- string tokenizePunct ( string str )
- {
- string str_mod = str;
- sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
- string replace ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ return str_mod;
+}
+string tokenizePunct ( string str )
+{
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
+ string replace ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
- replace = ( "$2.$4. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
+ replace = ( "$2.$4. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " ? " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " ? " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " ; " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " ; " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " ! " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " ! " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " ( " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " ( " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " ) " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " ) " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " \" " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " \" " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
- replace = ( "num_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
+ replace = ( "num_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
- replace = ( "ordinal_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
+ replace = ( "ordinal_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Dd]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Dd]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Dd]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Dd]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]rs) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]rs) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]rs) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]rs) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Nn]o) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Nn]o) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Nn]o) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Nn]o) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
// rex = sregex::compile ( "(^(([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "(^(([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
+ return str_mod;
+}
- string normalizeStd ( string str )
- {
- string str_mod = str;
- sregex rex = sregex::compile ( "(<skipped>)" );
- string replace ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+string normalizeStd ( string str )
+{
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(<skipped>)" );
+ string replace ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "-\n" );
- replace = ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "-\n" );
+ replace = ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "\n" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "\n" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&quot;" );
- replace = ( "\"" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&quot;" );
+ replace = ( "\"" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&amp;" );
- replace = ( "& " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&amp;" );
+ replace = ( "& " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&lt;" );
- replace = ( "<" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&lt;" );
+ replace = ( "<" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&gt;" );
- replace = ( ">" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&gt;" );
+ replace = ( ">" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
+ return str_mod;
+}
- param copyParam ( param p )
- {
- param to_return;
- to_return.caseOn = p.caseOn;
- to_return.noPunct = p.noPunct;
- to_return.debugMode = p.debugMode;
- to_return.debugLevel = p.debugLevel;
- to_return.hypothesisFile = p.hypothesisFile;
- to_return.referenceFile = p.referenceFile;
- to_return.normalize = p.normalize;
- to_return.noTxtIds = p.noTxtIds;
- to_return.outputFileExtension = p.outputFileExtension;
- to_return.outputFileName = p.outputFileName;
- to_return.sgmlInputs = p.sgmlInputs;
- to_return.tercomLike = p.tercomLike;
- to_return.printAlignments = p.printAlignments;
- to_return.WER=p.WER;
- return to_return;
- }
- string printParams ( param p )
- {
- stringstream s;
- s << "caseOn = " << p.caseOn << endl;
- s << "noPunct = " << p.noPunct << endl;
- s << "debugMode = " << p.debugMode << endl;
- s << "debugLevel = " << p.debugLevel << endl;
- s << "hypothesisFile = " << p.hypothesisFile << endl;
- s << "referenceFile = " << p.referenceFile << endl;
- s << "normalize = " << p.normalize << endl;
- s << "noTxtIds = " << p.noTxtIds << endl;
- s << "outputFileExtension = " << p.outputFileExtension << endl;
- s << "outputFileName = " << p.outputFileName << endl;
- s << "sgmlInputs = " << p.sgmlInputs << endl;
- s << "tercomLike = " << p.tercomLike << endl;
- return s.str();
+param copyParam ( param p )
+{
+ param to_return;
+ to_return.caseOn = p.caseOn;
+ to_return.noPunct = p.noPunct;
+ to_return.debugMode = p.debugMode;
+ to_return.debugLevel = p.debugLevel;
+ to_return.hypothesisFile = p.hypothesisFile;
+ to_return.referenceFile = p.referenceFile;
+ to_return.normalize = p.normalize;
+ to_return.noTxtIds = p.noTxtIds;
+ to_return.outputFileExtension = p.outputFileExtension;
+ to_return.outputFileName = p.outputFileName;
+ to_return.sgmlInputs = p.sgmlInputs;
+ to_return.tercomLike = p.tercomLike;
+ to_return.printAlignments = p.printAlignments;
+ to_return.WER=p.WER;
+ return to_return;
+}
+string printParams ( param p )
+{
+ stringstream s;
+ s << "caseOn = " << p.caseOn << endl;
+ s << "noPunct = " << p.noPunct << endl;
+ s << "debugMode = " << p.debugMode << endl;
+ s << "debugLevel = " << p.debugLevel << endl;
+ s << "hypothesisFile = " << p.hypothesisFile << endl;
+ s << "referenceFile = " << p.referenceFile << endl;
+ s << "normalize = " << p.normalize << endl;
+ s << "noTxtIds = " << p.noTxtIds << endl;
+ s << "outputFileExtension = " << p.outputFileExtension << endl;
+ s << "outputFileName = " << p.outputFileName << endl;
+ s << "sgmlInputs = " << p.sgmlInputs << endl;
+ s << "tercomLike = " << p.tercomLike << endl;
+ return s.str();
- }
- string join ( string delim, vector<string> arr )
- {
- if ( ( int ) arr.size() == 0 ) return "";
+}
+string join ( string delim, vector<string> arr )
+{
+ if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
- stringstream s;
- s.str ( "" );
- for ( int i = 0; i < ( int ) arr.size(); i++ )
- {
- if ( i == 0 )
- {
- s << arr.at ( i );
- }
- else
- {
- s << delim << arr.at ( i );
- }
- }
- return s.str();
-// return "";
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ ) {
+ if ( i == 0 ) {
+ s << arr.at ( i );
+ } else {
+ s << delim << arr.at ( i );
}
+ }
+ return s.str();
+// return "";
+}
}
diff --git a/mert/TER/tools.h b/mert/TER/tools.h
index 0a85e7b4b..157b739a5 100644
--- a/mert/TER/tools.h
+++ b/mert/TER/tools.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -35,32 +35,31 @@ using namespace std;
namespace Tools
{
- typedef vector<double> vecDouble;
- typedef vector<char> vecChar;
- typedef vector<int> vecInt;
- typedef vector<float> vecFloat;
- typedef vector<size_t> vecSize_t;
- typedef vector<string> vecString;
- typedef vector<string> alignmentElement;
- typedef vector<alignmentElement> WERalignment;
+typedef vector<double> vecDouble;
+typedef vector<char> vecChar;
+typedef vector<int> vecInt;
+typedef vector<float> vecFloat;
+typedef vector<size_t> vecSize_t;
+typedef vector<string> vecString;
+typedef vector<string> alignmentElement;
+typedef vector<alignmentElement> WERalignment;
-struct param
-{
- bool debugMode;
- string referenceFile; // path to the resources
- string hypothesisFile; // path to the configuration files
- string outputFileExtension;
- string outputFileName;
- bool noPunct;
- bool caseOn;
- bool normalize;
- bool tercomLike;
- bool sgmlInputs;
- bool noTxtIds;
- bool printAlignments;
- bool WER;
- int debugLevel;
+struct param {
+ bool debugMode;
+ string referenceFile; // path to the resources
+ string hypothesisFile; // path to the configuration files
+ string outputFileExtension;
+ string outputFileName;
+ bool noPunct;
+ bool caseOn;
+ bool normalize;
+ bool tercomLike;
+ bool sgmlInputs;
+ bool noTxtIds;
+ bool printAlignments;
+ bool WER;
+ int debugLevel;
};
// param = { false, "","","","" };
@@ -68,35 +67,35 @@ struct param
// private:
// public:
- string vectorToString ( vector<string> vec );
- string vectorToString ( vector<char> vec );
- string vectorToString ( vector<int> vec );
- string vectorToString ( vector<string> vec, string s );
- string vectorToString ( vector<char> vec, string s );
- string vectorToString ( vector<int> vec, string s );
- string vectorToString ( vector<bool> vec, string s );
- string vectorToString ( char* vec, string s, int taille );
- string vectorToString ( int* vec, string s , int taille );
- string vectorToString ( bool* vec, string s , int taille );
- vector<string> subVector ( vector<string> vec, int start, int end );
- vector<int> subVector ( vector<int> vec, int start, int end );
- vector<float> subVector ( vector<float> vec, int start, int end );
- vector<string> copyVector ( vector<string> vec );
- vector<int> copyVector ( vector<int> vec );
- vector<float> copyVector ( vector<float> vec );
- vector<string> stringToVector ( string s, string tok );
- vector<string> stringToVector ( char s, string tok );
- vector<string> stringToVector ( int s, string tok );
- vector<int> stringToVectorInt ( string s, string tok );
- vector<float> stringToVectorFloat ( string s, string tok );
- string lowerCase(string str);
- string removePunct(string str);
- string tokenizePunct(string str);
- string removePunctTercom(string str);
- string normalizeStd(string str);
- string printParams(param p);
- string join ( string delim, vector<string> arr );
+string vectorToString ( vector<string> vec );
+string vectorToString ( vector<char> vec );
+string vectorToString ( vector<int> vec );
+string vectorToString ( vector<string> vec, string s );
+string vectorToString ( vector<char> vec, string s );
+string vectorToString ( vector<int> vec, string s );
+string vectorToString ( vector<bool> vec, string s );
+string vectorToString ( char* vec, string s, int taille );
+string vectorToString ( int* vec, string s , int taille );
+string vectorToString ( bool* vec, string s , int taille );
+vector<string> subVector ( vector<string> vec, int start, int end );
+vector<int> subVector ( vector<int> vec, int start, int end );
+vector<float> subVector ( vector<float> vec, int start, int end );
+vector<string> copyVector ( vector<string> vec );
+vector<int> copyVector ( vector<int> vec );
+vector<float> copyVector ( vector<float> vec );
+vector<string> stringToVector ( string s, string tok );
+vector<string> stringToVector ( char s, string tok );
+vector<string> stringToVector ( int s, string tok );
+vector<int> stringToVectorInt ( string s, string tok );
+vector<float> stringToVectorFloat ( string s, string tok );
+string lowerCase(string str);
+string removePunct(string str);
+string tokenizePunct(string str);
+string removePunctTercom(string str);
+string normalizeStd(string str);
+string printParams(param p);
+string join ( string delim, vector<string> arr );
// };
- param copyParam(param p);
+param copyParam(param p);
}
#endif
diff --git a/mert/evaluator.cpp b/mert/evaluator.cpp
index caae07684..7ab03c7eb 100644
--- a/mert/evaluator.cpp
+++ b/mert/evaluator.cpp
@@ -43,7 +43,8 @@ private:
};
// load hypothesis from candidate output
-vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile) {
+vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile)
+{
ifstream cand(candFile.c_str());
if (!cand.good()) throw runtime_error("Error opening candidate file");
@@ -61,7 +62,8 @@ vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile) {
}
// load 1-best hypothesis from n-best file (useful if relying on alignment/tree information)
-vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile) {
+vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile)
+{
vector<ScoreStats> entries;
Data data(g_scorer);
@@ -81,8 +83,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_i
if (nbest_input) {
entries = loadNBest(candFile);
- }
- else {
+ } else {
entries = loadCand(candFile);
}
diff --git a/mert/kbmira.cpp b/mert/kbmira.cpp
index 0abce8af4..5a119e875 100644
--- a/mert/kbmira.cpp
+++ b/mert/kbmira.cpp
@@ -77,7 +77,7 @@ int main(int argc, char** argv)
bool model_bg = false; // Use model for background corpus
bool verbose = false; // Verbose updates
bool safe_hope = false; // Model score cannot have more than BLEU_RATIO times more influence than BLEU
- size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word
+ size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word
// Command-line processing follows pro.cpp
po::options_description desc("Allowed options");
@@ -157,7 +157,7 @@ int main(int argc, char** argv)
do {
size_t equals = buffer.find_last_of("=");
UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '"
- << buffer << "'");
+ << buffer << "'");
string name = buffer.substr(0,equals);
names.push_back(name);
initParams.push_back(boost::lexical_cast<ValType>(buffer.substr(equals+2)));
@@ -183,7 +183,7 @@ int main(int argc, char** argv)
//Make sure that SparseVector encodes dense feature names as 0..n-1.
for (size_t i = 0; i < names.size(); ++i) {
size_t id = SparseVector::encode(names[i]);
- assert(id == i);
+ assert(id == i);
if (verbose) cerr << names[i] << " " << initParams[i] << endl;
}
@@ -246,12 +246,12 @@ int main(int argc, char** argv)
int iNumUpdates = 0;
ValType totalLoss = 0.0;
size_t sentenceIndex = 0;
- for(decoder->reset();!decoder->finished(); decoder->next()) {
+ for(decoder->reset(); !decoder->finished(); decoder->next()) {
HopeFearData hfd;
decoder->HopeFear(bg,wv,&hfd);
-
+
// Update weights
- if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
+ if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
// Vector difference
MiraFeatureVector diff = hfd.hopeFeatures - hfd.fearFeatures;
// Bleu difference
diff --git a/misc/CreateProbingPT.cpp b/misc/CreateProbingPT.cpp
index 2b0e8cd8a..b23427f30 100644
--- a/misc/CreateProbingPT.cpp
+++ b/misc/CreateProbingPT.cpp
@@ -3,26 +3,27 @@
-int main(int argc, char* argv[]){
+int main(int argc, char* argv[])
+{
- const char * is_reordering = "false";
+ const char * is_reordering = "false";
- if (!(argc == 5 || argc == 4)) {
- // Tell the user how to run the program
- std::cerr << "Provided " << argc << " arguments, needed 4 or 5." << std::endl;
- std::cerr << "Usage: " << argv[0] << " path_to_phrasetable output_dir num_scores is_reordering" << std::endl;
- std::cerr << "is_reordering should be either true or false, but it is currently a stub feature." << std::endl;
- //std::cerr << "Usage: " << argv[0] << " path_to_phrasetable number_of_uniq_lines output_bin_file output_hash_table output_vocab_id" << std::endl;
- return 1;
- }
+ if (!(argc == 5 || argc == 4)) {
+ // Tell the user how to run the program
+ std::cerr << "Provided " << argc << " arguments, needed 4 or 5." << std::endl;
+ std::cerr << "Usage: " << argv[0] << " path_to_phrasetable output_dir num_scores is_reordering" << std::endl;
+ std::cerr << "is_reordering should be either true or false, but it is currently a stub feature." << std::endl;
+ //std::cerr << "Usage: " << argv[0] << " path_to_phrasetable number_of_uniq_lines output_bin_file output_hash_table output_vocab_id" << std::endl;
+ return 1;
+ }
- if (argc == 5) {
- is_reordering = argv[4];
- }
+ if (argc == 5) {
+ is_reordering = argv[4];
+ }
- createProbingPT(argv[1], argv[2], argv[3], is_reordering);
+ createProbingPT(argv[1], argv[2], argv[3], is_reordering);
- util::PrintUsage(std::cout);
- return 0;
+ util::PrintUsage(std::cout);
+ return 0;
}
diff --git a/misc/QueryProbingPT.cpp b/misc/QueryProbingPT.cpp
index 8a3441a0d..b7226e5ac 100644
--- a/misc/QueryProbingPT.cpp
+++ b/misc/QueryProbingPT.cpp
@@ -26,36 +26,37 @@
#include <unistd.h>
#include <fcntl.h>
-int main(int argc, char* argv[]) {
- if (argc != 2) {
- // Tell the user how to run the program
- std::cerr << "Usage: " << argv[0] << " path_to_directory" << std::endl;
- return 1;
+int main(int argc, char* argv[])
+{
+ if (argc != 2) {
+ // Tell the user how to run the program
+ std::cerr << "Usage: " << argv[0] << " path_to_directory" << std::endl;
+ return 1;
+ }
+
+ QueryEngine queries(argv[1]);
+
+ //Interactive search
+ std::cout << "Please enter a string to be searched, or exit to exit." << std::endl;
+ while (true) {
+ std::string cinstr = "";
+ getline(std::cin, cinstr);
+ if (cinstr == "exit") {
+ break;
+ } else {
+ //Actual lookup
+ std::pair<bool, std::vector<target_text> > query_result;
+ query_result = queries.query(StringPiece(cinstr));
+
+ if (query_result.first) {
+ queries.printTargetInfo(query_result.second);
+ } else {
+ std::cout << "Key not found!" << std::endl;
+ }
}
+ }
- QueryEngine queries(argv[1]);
-
- //Interactive search
- std::cout << "Please enter a string to be searched, or exit to exit." << std::endl;
- while (true){
- std::string cinstr = "";
- getline(std::cin, cinstr);
- if (cinstr == "exit"){
- break;
- }else{
- //Actual lookup
- std::pair<bool, std::vector<target_text> > query_result;
- query_result = queries.query(StringPiece(cinstr));
-
- if (query_result.first) {
- queries.printTargetInfo(query_result.second);
- } else {
- std::cout << "Key not found!" << std::endl;
- }
- }
- }
-
- util::PrintUsage(std::cout);
+ util::PrintUsage(std::cout);
- return 0;
+ return 0;
}
diff --git a/misc/prunePhraseTable.cpp b/misc/prunePhraseTable.cpp
index dcf8d73da..f6d608bc6 100644
--- a/misc/prunePhraseTable.cpp
+++ b/misc/prunePhraseTable.cpp
@@ -53,13 +53,15 @@ using namespace std;
namespace po = boost::program_options;
typedef multimap<float,string> Lines;
-static void usage(const po::options_description& desc, char** argv) {
- cerr << "Usage: " + string(argv[0]) + " [options] input-file output-file" << endl;
- cerr << desc << endl;
+static void usage(const po::options_description& desc, char** argv)
+{
+ cerr << "Usage: " + string(argv[0]) + " [options] input-file output-file" << endl;
+ cerr << desc << endl;
}
//Find top n translations of source, and send them to output
-static void outputTopN(Lines lines, size_t maxPhrases, ostream& out) {
+static void outputTopN(Lines lines, size_t maxPhrases, ostream& out)
+{
size_t count = 0;
for (Lines::const_reverse_iterator i = lines.rbegin(); i != lines.rend(); ++i) {
out << i->second << endl;
@@ -92,7 +94,7 @@ static void outputTopN(const Phrase& sourcePhrase, const multimap<float,const Ta
out << endl;
}
}*/
-int main(int argc, char** argv)
+int main(int argc, char** argv)
{
bool help;
string input_file;
@@ -112,7 +114,7 @@ int main(int argc, char** argv)
cmdline_options.add(desc);
po::variables_map vm;
po::parsed_options parsed = po::command_line_parser(argc,argv).
- options(cmdline_options).run();
+ options(cmdline_options).run();
po::store(parsed, vm);
po::notify(vm);
if (help) {
@@ -135,7 +137,7 @@ int main(int argc, char** argv)
mosesargs.push_back("-f");
mosesargs.push_back(config_file);
- boost::scoped_ptr<Parameter> params(new Parameter());
+ boost::scoped_ptr<Parameter> params(new Parameter());
char** mosesargv = new char*[mosesargs.size()];
for (size_t i = 0; i < mosesargs.size(); ++i) {
mosesargv[i] = new char[mosesargs[i].length() + 1];
diff --git a/moses-cmd/LatticeMBRGrid.cpp b/moses-cmd/LatticeMBRGrid.cpp
index d35b921e2..631c717f4 100644
--- a/moses-cmd/LatticeMBRGrid.cpp
+++ b/moses-cmd/LatticeMBRGrid.cpp
@@ -201,7 +201,7 @@ int main(int argc, char* argv[])
cout << lineCount << " ||| " << p << " " << r << " " << prune << " " << scale << " ||| ";
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
manager.OutputBestHypo(mbrBestHypo, lineCount, staticData.GetReportSegmentation(),
- staticData.GetReportAllFactors(),cout);
+ staticData.GetReportAllFactors(),cout);
}
}
diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp
index b22a4f695..f88f186b5 100644
--- a/moses-cmd/Main.cpp
+++ b/moses-cmd/Main.cpp
@@ -80,7 +80,7 @@ int main(int argc, char** argv)
#ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif
-
+
// echo command line, if verbose
IFVERBOSE(1) {
TRACE_ERR("command: ");
@@ -121,7 +121,7 @@ int main(int argc, char** argv)
// set up read/writing class
IFVERBOSE(1) {
- PrintUserTime("Created input-output object");
+ PrintUserTime("Created input-output object");
}
IOWrapper* ioWrapper = new IOWrapper();
@@ -161,28 +161,26 @@ int main(int argc, char** argv)
#ifdef PT_UG
bool spe = params.isParamSpecified("spe-src");
if (spe) {
- // simulated post-editing: always run single-threaded!
+ // simulated post-editing: always run single-threaded!
task->Run();
delete task;
string src,trg,aln;
UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] "
<< "missing update data for simulated post-editing.");
UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] "
- << "missing update data for simulated post-editing.");
+ << "missing update data for simulated post-editing.");
UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] "
- << "missing update data for simulated post-editing.");
- BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl())
- {
- Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
- if (sapt) sapt->add(src,trg,aln);
- VERBOSE(1,"[" << HERE << " added src] " << src << endl);
- VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
- VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
- }
- }
- else
+ << "missing update data for simulated post-editing.");
+ BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) {
+ Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
+ if (sapt) sapt->add(src,trg,aln);
+ VERBOSE(1,"[" << HERE << " added src] " << src << endl);
+ VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
+ VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
+ }
+ } else
#endif
- pool.Submit(task);
+ pool.Submit(task);
#else
task->Run();
delete task;
diff --git a/moses-cmd/MainVW.cpp b/moses-cmd/MainVW.cpp
index 9663badf5..a93ba8b18 100644
--- a/moses-cmd/MainVW.cpp
+++ b/moses-cmd/MainVW.cpp
@@ -80,7 +80,7 @@ int main(int argc, char** argv)
#ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif
-
+
// echo command line, if verbose
IFVERBOSE(1) {
TRACE_ERR("command: ");
@@ -121,7 +121,7 @@ int main(int argc, char** argv)
// set up read/writing class
IFVERBOSE(1) {
- PrintUserTime("Created input-output object");
+ PrintUserTime("Created input-output object");
}
IOWrapper* ioWrapper = new IOWrapper();
diff --git a/moses/AlignmentInfoCollection.h b/moses/AlignmentInfoCollection.h
index 1db0a2268..92462d3b8 100644
--- a/moses/AlignmentInfoCollection.h
+++ b/moses/AlignmentInfoCollection.h
@@ -46,14 +46,13 @@ public:
* contains such an object then returns a pointer to it; otherwise a new
* one is inserted.
*/
- private:
+private:
const AlignmentInfo* Add(AlignmentInfo const& ainfo);
- public:
+public:
template<typename ALNREP>
- AlignmentInfo const *
- Add(ALNREP const & aln)
- {
+ AlignmentInfo const *
+ Add(ALNREP const & aln) {
return this->Add(AlignmentInfo(aln));
}
diff --git a/moses/BaseManager.cpp b/moses/BaseManager.cpp
index f79842311..2c57e8336 100644
--- a/moses/BaseManager.cpp
+++ b/moses/BaseManager.cpp
@@ -13,11 +13,11 @@ namespace Moses
* print surface factor only for the given phrase
*/
void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
- const std::vector<FactorType> &outputFactorOrder,
- bool reportAllFactors) const
+ const std::vector<FactorType> &outputFactorOrder,
+ bool reportAllFactors) const
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
- "Cannot be empty phrase");
+ "Cannot be empty phrase");
if (reportAllFactors == true) {
out << phrase;
} else {
@@ -26,12 +26,12 @@ void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
out << *factor;
UTIL_THROW_IF2(factor == NULL,
- "Empty factor 0 at position " << pos);
+ "Empty factor 0 at position " << pos);
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
- "Empty factor " << i << " at position " << pos);
+ "Empty factor " << i << " at position " << pos);
out << "|" << *factor;
}
@@ -45,7 +45,7 @@ void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
// but there are scripts and tools that expect the output of -T to look like
// that.
void BaseManager::WriteApplicationContext(std::ostream &out,
- const ApplicationContext &context) const
+ const ApplicationContext &context) const
{
assert(!context.empty());
ApplicationContext::const_reverse_iterator p = context.rbegin();
diff --git a/moses/BaseManager.h b/moses/BaseManager.h
index fe88d6412..c0b6d22c1 100644
--- a/moses/BaseManager.h
+++ b/moses/BaseManager.h
@@ -17,23 +17,22 @@ protected:
const InputType &m_source; /**< source sentence to be translated */
BaseManager(const InputType &source)
- :m_source(source)
- {}
+ :m_source(source) {
+ }
// output
typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
typedef std::set< std::pair<size_t, size_t> > Alignments;
void OutputSurface(std::ostream &out,
- const Phrase &phrase,
- const std::vector<FactorType> &outputFactorOrder,
- bool reportAllFactors) const;
+ const Phrase &phrase,
+ const std::vector<FactorType> &outputFactorOrder,
+ bool reportAllFactors) const;
void WriteApplicationContext(std::ostream &out,
- const ApplicationContext &context) const;
+ const ApplicationContext &context) const;
template <class T>
- void ShiftOffsets(std::vector<T> &offsets, T shift) const
- {
+ void ShiftOffsets(std::vector<T> &offsets, T shift) const {
T currPos = shift;
for (size_t i = 0; i < offsets.size(); ++i) {
if (offsets[i] == 0) {
@@ -46,8 +45,8 @@ protected:
}
public:
- virtual ~BaseManager()
- {}
+ virtual ~BaseManager() {
+ }
//! the input sentence being decoded
const InputType& GetSource() const {
diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp
index d7e27c298..40ec74153 100644
--- a/moses/BitmapContainer.cpp
+++ b/moses/BitmapContainer.cpp
@@ -162,16 +162,16 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
if (m_translations.size() > 1) {
UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
- "Non-monotonic future score: "
- << m_translations.Get(0)->GetFutureScore() << " vs. "
- << m_translations.Get(1)->GetFutureScore());
+ "Non-monotonic future score: "
+ << m_translations.Get(0)->GetFutureScore() << " vs. "
+ << m_translations.Get(1)->GetFutureScore());
}
if (m_hypotheses.size() > 1) {
UTIL_THROW_IF2(m_hypotheses[0]->GetTotalScore() < m_hypotheses[1]->GetTotalScore(),
- "Non-monotonic total score"
- << m_hypotheses[0]->GetTotalScore() << " vs. "
- << m_hypotheses[1]->GetTotalScore());
+ "Non-monotonic total score"
+ << m_hypotheses[0]->GetTotalScore() << " vs. "
+ << m_hypotheses[1]->GetTotalScore());
}
HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
@@ -446,9 +446,9 @@ BitmapContainer::ProcessBestHypothesis()
if (!Empty()) {
HypothesisQueueItem *check = Dequeue(true);
UTIL_THROW_IF2(item->GetHypothesis()->GetTotalScore() < check->GetHypothesis()->GetTotalScore(),
- "Non-monotonic total score: "
- << item->GetHypothesis()->GetTotalScore() << " vs. "
- << check->GetHypothesis()->GetTotalScore());
+ "Non-monotonic total score: "
+ << item->GetHypothesis()->GetTotalScore() << " vs. "
+ << check->GetHypothesis()->GetTotalScore());
}
// Logging for the criminally insane
diff --git a/moses/ChartCell.cpp b/moses/ChartCell.cpp
index b6241218b..c942375e2 100644
--- a/moses/ChartCell.cpp
+++ b/moses/ChartCell.cpp
@@ -85,7 +85,7 @@ void ChartCell::PruneToSize()
* \param allChartCells entire chart - needed to look up underlying hypotheses
*/
void ChartCell::Decode(const ChartTranslationOptionList &transOptList
- , const ChartCellCollection &allChartCells)
+ , const ChartCellCollection &allChartCells)
{
const StaticData &staticData = StaticData::Instance();
diff --git a/moses/ChartCell.h b/moses/ChartCell.h
index fcb060489..d9213d5e1 100644
--- a/moses/ChartCell.h
+++ b/moses/ChartCell.h
@@ -97,7 +97,7 @@ public:
~ChartCell();
void Decode(const ChartTranslationOptionList &transOptList
- ,const ChartCellCollection &allChartCells);
+ ,const ChartCellCollection &allChartCells);
//! Get all hypotheses in the cell that have the specified constituent label
const HypoList *GetSortedHypotheses(const Word &constituentLabel) const {
diff --git a/moses/ChartCellLabelSet.h b/moses/ChartCellLabelSet.h
index 591aa17a6..4977c941f 100644
--- a/moses/ChartCellLabelSet.h
+++ b/moses/ChartCellLabelSet.h
@@ -124,8 +124,7 @@ public:
const ChartCellLabel *Find(size_t idx) const {
try {
return m_map.at(idx);
- }
- catch (const std::out_of_range& oor) {
+ } catch (const std::out_of_range& oor) {
return NULL;
}
}
diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp
index 6415ec5bb..0d62e33bf 100644
--- a/moses/ChartHypothesis.cpp
+++ b/moses/ChartHypothesis.cpp
@@ -61,8 +61,7 @@ ChartHypothesis::ChartHypothesis(const ChartTranslationOptions &transOpt,
const std::vector<HypothesisDimension> &childEntries = item.GetHypothesisDimensions();
m_prevHypos.reserve(childEntries.size());
std::vector<HypothesisDimension>::const_iterator iter;
- for (iter = childEntries.begin(); iter != childEntries.end(); ++iter)
- {
+ for (iter = childEntries.begin(); iter != childEntries.end(); ++iter) {
m_prevHypos.push_back(iter->GetHypothesis());
}
}
@@ -85,17 +84,14 @@ ChartHypothesis::ChartHypothesis(const ChartHypothesis &pred,
ChartHypothesis::~ChartHypothesis()
{
// delete feature function states
- for (unsigned i = 0; i < m_ffStates.size(); ++i)
- {
+ for (unsigned i = 0; i < m_ffStates.size(); ++i) {
delete m_ffStates[i];
}
// delete hypotheses that are not in the chart (recombined away)
- if (m_arcList)
- {
+ if (m_arcList) {
ChartArcList::iterator iter;
- for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter)
- {
+ for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
ChartHypothesis *hypo = *iter;
Delete(hypo);
}
@@ -112,25 +108,19 @@ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
{
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
- for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos)
- {
+ for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos) {
const Word &word = GetCurrTargetPhrase().GetWord(pos);
- if (word.IsNonTerminal())
- {
+ if (word.IsNonTerminal()) {
// non-term. fill out with prev hypo
size_t nonTermInd = GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
prevHypo->GetOutputPhrase(outPhrase);
- }
- else
- {
+ } else {
outPhrase.AddWord(word);
- if (placeholderFactor != NOT_FOUND)
- {
+ if (placeholderFactor != NOT_FOUND) {
std::set<size_t> sourcePosSet = GetCurrTargetPhrase().GetAlignTerm().GetAlignmentsForTarget(pos);
- if (sourcePosSet.size() == 1)
- {
+ if (sourcePosSet.size() == 1) {
const std::vector<const Word*> *ruleSourceFromInputPath = GetTranslationOption().GetSourceRuleFromInputPath();
UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
"No source rule");
@@ -140,8 +130,7 @@ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
UTIL_THROW_IF2(sourceWord == NULL,
"No source word");
const Factor *factor = sourceWord->GetFactor(placeholderFactor);
- if (factor)
- {
+ if (factor) {
outPhrase.Back()[0] = factor;
}
}
@@ -165,33 +154,24 @@ void ChartHypothesis::GetOutputPhrase(size_t leftRightMost, size_t numWords, Phr
const TargetPhrase &tp = GetCurrTargetPhrase();
size_t targetSize = tp.GetSize();
- for (size_t i = 0; i < targetSize; ++i)
- {
+ for (size_t i = 0; i < targetSize; ++i) {
size_t pos;
- if (leftRightMost == 1)
- {
+ if (leftRightMost == 1) {
pos = i;
- }
- else if (leftRightMost == 2)
- {
+ } else if (leftRightMost == 2) {
pos = targetSize - i - 1;
- }
- else
- {
+ } else {
abort();
}
const Word &word = tp.GetWord(pos);
- if (word.IsNonTerminal())
- {
+ if (word.IsNonTerminal()) {
// non-term. fill out with prev hypo
size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
prevHypo->GetOutputPhrase(outPhrase);
- }
- else
- {
+ } else {
outPhrase.AddWord(word);
}
@@ -236,20 +216,16 @@ void ChartHypothesis::EvaluateWhenApplied()
// cached in the translation option-- there is no principled distinction
const std::vector<const StatelessFeatureFunction*>& sfs =
StatelessFeatureFunction::GetStatelessFeatureFunctions();
- for (unsigned i = 0; i < sfs.size(); ++i)
- {
- if (! staticData.IsFeatureFunctionIgnored( *sfs[i] ))
- {
+ for (unsigned i = 0; i < sfs.size(); ++i) {
+ if (! staticData.IsFeatureFunctionIgnored( *sfs[i] )) {
sfs[i]->EvaluateWhenApplied(*this,&m_currScoreBreakdown);
}
}
const std::vector<const StatefulFeatureFunction*>& ffs =
StatefulFeatureFunction::GetStatefulFeatureFunctions();
- for (unsigned i = 0; i < ffs.size(); ++i)
- {
- if (! staticData.IsFeatureFunctionIgnored( *ffs[i] ))
- {
+ for (unsigned i = 0; i < ffs.size(); ++i) {
+ if (! staticData.IsFeatureFunctionIgnored( *ffs[i] )) {
m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_currScoreBreakdown);
}
}
@@ -257,7 +233,7 @@ void ChartHypothesis::EvaluateWhenApplied()
// total score from current translation rule
m_totalScore = GetTranslationOption().GetScores().GetWeightedScore();
m_totalScore += m_currScoreBreakdown.GetWeightedScore();
-
+
// total scores from prev hypos
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
const ChartHypothesis &prevHypo = **iter;
@@ -267,31 +243,25 @@ void ChartHypothesis::EvaluateWhenApplied()
void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
{
- if (!m_arcList)
- {
- if (loserHypo->m_arcList)
- { // we don't have an arcList, but loser does
+ if (!m_arcList) {
+ if (loserHypo->m_arcList) {
+ // we don't have an arcList, but loser does
this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
loserHypo->m_arcList = 0; // prevent a double deletion
- }
- else
- {
+ } else {
this->m_arcList = new ChartArcList();
}
- }
- else
- {
- if (loserHypo->m_arcList)
- { // both have an arc list: merge. delete loser
+ } else {
+ if (loserHypo->m_arcList) {
+ // both have an arc list: merge. delete loser
size_t my_size = m_arcList->size();
size_t add_size = loserHypo->m_arcList->size();
this->m_arcList->resize(my_size + add_size, 0);
std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(ChartHypothesis *));
delete loserHypo->m_arcList;
loserHypo->m_arcList = 0;
- }
- else
- { // loserHypo doesn't have any arcs
+ } else {
+ // loserHypo doesn't have any arcs
// DO NOTHING
}
}
@@ -299,10 +269,8 @@ void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
}
// sorting helper
-struct CompareChartHypothesisTotalScore
-{
- bool operator()(const ChartHypothesis* hypo1, const ChartHypothesis* hypo2) const
- {
+struct CompareChartHypothesisTotalScore {
+ bool operator()(const ChartHypothesis* hypo1, const ChartHypothesis* hypo2) const {
return hypo1->GetTotalScore() > hypo2->GetTotalScore();
}
};
@@ -322,8 +290,7 @@ void ChartHypothesis::CleanupArcList()
size_t nBestSize = staticData.GetNBestSize();
bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph();
- if (!distinctNBest && m_arcList->size() > nBestSize)
- {
+ if (!distinctNBest && m_arcList->size() > nBestSize) {
// prune arc list only if there too many arcs
NTH_ELEMENT4(m_arcList->begin()
, m_arcList->begin() + nBestSize - 1
@@ -332,8 +299,7 @@ void ChartHypothesis::CleanupArcList()
// delete bad ones
ChartArcList::iterator iter;
- for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter)
- {
+ for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) {
ChartHypothesis *arc = *iter;
ChartHypothesis::Delete(arc);
}
@@ -343,8 +309,7 @@ void ChartHypothesis::CleanupArcList()
// set all arc's main hypo variable to this hypo
ChartArcList::iterator iter = m_arcList->begin();
- for (; iter != m_arcList->end() ; ++iter)
- {
+ for (; iter != m_arcList->end() ; ++iter) {
ChartHypothesis *arc = *iter;
arc->SetWinningHypo(this);
}
@@ -367,13 +332,11 @@ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
// recombination
if (hypo.GetWinningHypothesis() != NULL &&
- hypo.GetWinningHypothesis() != &hypo)
- {
+ hypo.GetWinningHypothesis() != &hypo) {
out << "->" << hypo.GetWinningHypothesis()->GetId();
}
- if (StaticData::Instance().GetIncludeLHSInSearchGraph())
- {
+ if (StaticData::Instance().GetIncludeLHSInSearchGraph()) {
out << " " << hypo.GetTargetLHS() << "=>";
}
out << " " << hypo.GetCurrTargetPhrase()
@@ -381,8 +344,7 @@ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
<< " " << hypo.GetCurrSourceRange();
HypoList::const_iterator iter;
- for (iter = hypo.GetPrevHypos().begin(); iter != hypo.GetPrevHypos().end(); ++iter)
- {
+ for (iter = hypo.GetPrevHypos().begin(); iter != hypo.GetPrevHypos().end(); ++iter) {
const ChartHypothesis &prevHypo = **iter;
out << " " << prevHypo.GetId();
}
diff --git a/moses/ChartHypothesis.h b/moses/ChartHypothesis.h
index ecd35b867..25216c04c 100644
--- a/moses/ChartHypothesis.h
+++ b/moses/ChartHypothesis.h
@@ -58,8 +58,8 @@ protected:
WordsRange m_currSourceWordsRange;
std::vector<const FFState*> m_ffStates; /*! stateful feature function states */
/*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
- mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
- mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown;
+ mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
+ mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown;
ScoreComponentCollection m_currScoreBreakdown /*! scores for this hypothesis only */
,m_lmNGram
,m_lmPrefix;
@@ -82,21 +82,18 @@ protected:
public:
#ifdef USE_HYPO_POOL
- void *operator new(size_t /* num_bytes */)
- {
+ void *operator new(size_t /* num_bytes */) {
void *ptr = s_objectPool.getPtr();
return ptr;
}
//! delete \param hypo. Works with object pool too
- static void Delete(ChartHypothesis *hypo)
- {
+ static void Delete(ChartHypothesis *hypo) {
s_objectPool.freeObject(hypo);
}
#else
//! delete \param hypo. Works with object pool too
- static void Delete(ChartHypothesis *hypo)
- {
+ static void Delete(ChartHypothesis *hypo) {
delete hypo;
}
#endif
@@ -109,43 +106,36 @@ public:
~ChartHypothesis();
- unsigned GetId() const
- {
+ unsigned GetId() const {
return m_id;
}
- const ChartTranslationOption &GetTranslationOption() const
- {
+ const ChartTranslationOption &GetTranslationOption() const {
return *m_transOpt;
}
//! Get the rule that created this hypothesis
- const TargetPhrase &GetCurrTargetPhrase() const
- {
+ const TargetPhrase &GetCurrTargetPhrase() const {
return m_transOpt->GetPhrase();
}
//! the source range that this hypothesis spans
- const WordsRange &GetCurrSourceRange() const
- {
+ const WordsRange &GetCurrSourceRange() const {
return m_currSourceWordsRange;
}
//! the arc list when creating n-best lists
- inline const ChartArcList* GetArcList() const
- {
+ inline const ChartArcList* GetArcList() const {
return m_arcList;
}
//! the feature function states for a particular feature \param featureID
- inline const FFState* GetFFState( size_t featureID ) const
- {
+ inline const FFState* GetFFState( size_t featureID ) const {
return m_ffStates[ featureID ];
}
//! reference back to the manager
- inline const ChartManager& GetManager() const
- {
+ inline const ChartManager& GetManager() const {
return m_manager;
}
@@ -165,21 +155,17 @@ public:
void SetWinningHypo(const ChartHypothesis *hypo);
//! get the unweighted score for each feature function
- const ScoreComponentCollection &GetScoreBreakdown() const
- {
+ const ScoreComponentCollection &GetScoreBreakdown() const {
// Note: never call this method before m_currScoreBreakdown is fully computed
- if (!m_scoreBreakdown.get())
- {
+ if (!m_scoreBreakdown.get()) {
m_scoreBreakdown.reset(new ScoreComponentCollection());
// score breakdown from current translation rule
- if (m_transOpt)
- {
+ if (m_transOpt) {
m_scoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
}
m_scoreBreakdown->PlusEquals(m_currScoreBreakdown);
// score breakdowns from prev hypos
- for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter)
- {
+ for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
const ChartHypothesis &prevHypo = **iter;
m_scoreBreakdown->PlusEquals(prevHypo.GetScoreBreakdown());
}
@@ -188,15 +174,12 @@ public:
}
//! get the unweighted score delta for each feature function
- const ScoreComponentCollection &GetDeltaScoreBreakdown() const
- {
+ const ScoreComponentCollection &GetDeltaScoreBreakdown() const {
// Note: never call this method before m_currScoreBreakdown is fully computed
- if (!m_deltaScoreBreakdown.get())
- {
+ if (!m_deltaScoreBreakdown.get()) {
m_deltaScoreBreakdown.reset(new ScoreComponentCollection());
// score breakdown from current translation rule
- if (m_transOpt)
- {
+ if (m_transOpt) {
m_deltaScoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
}
m_deltaScoreBreakdown->PlusEquals(m_currScoreBreakdown);
@@ -206,33 +189,28 @@ public:
}
//! Get the weighted total score
- float GetTotalScore() const
- {
+ float GetTotalScore() const {
// scores from current translation rule. eg. translation models & word penalty
return m_totalScore;
}
//! vector of previous hypotheses this hypo is built on
- const std::vector<const ChartHypothesis*> &GetPrevHypos() const
- {
+ const std::vector<const ChartHypothesis*> &GetPrevHypos() const {
return m_prevHypos;
}
//! get a particular previous hypos
- const ChartHypothesis* GetPrevHypo(size_t pos) const
- {
+ const ChartHypothesis* GetPrevHypo(size_t pos) const {
return m_prevHypos[pos];
}
//! get the constituency label that covers this hypo
- const Word &GetTargetLHS() const
- {
+ const Word &GetTargetLHS() const {
return GetCurrTargetPhrase().GetTargetLHS();
}
//! get the best hypo in the arc list when doing n-best list creation. It's either this hypothesis, or the best hypo is this hypo is in the arc list
- const ChartHypothesis* GetWinningHypothesis() const
- {
+ const ChartHypothesis* GetWinningHypothesis() const {
return m_winningHypo;
}
diff --git a/moses/ChartKBestExtractor.cpp b/moses/ChartKBestExtractor.cpp
index 13e9418b0..60e4e7f2b 100644
--- a/moses/ChartKBestExtractor.cpp
+++ b/moses/ChartKBestExtractor.cpp
@@ -125,7 +125,7 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
}
// Generate the score breakdown of the derivation d.
-boost::shared_ptr<ScoreComponentCollection>
+boost::shared_ptr<ScoreComponentCollection>
ChartKBestExtractor::GetOutputScoreBreakdown(const Derivation &d)
{
const ChartHypothesis &hypo = d.edge.head->hypothesis;
@@ -169,8 +169,7 @@ TreePointer ChartKBestExtractor::GetOutputTree(const Derivation &d)
mytree->Combine(previous_trees);
return mytree;
- }
- else {
+ } else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
}
}
diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp
index 0249b6536..d183c97e6 100644
--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@@ -290,12 +290,14 @@ void ChartManager::FindReachableHypotheses(
}
}
-void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const {
+void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const
+{
ChartSearchGraphWriterHypergraph writer(&outputSearchGraphStream);
WriteSearchGraph(writer);
}
-void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const {
+void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const
+{
ChartSearchGraphWriterMoses writer(&outputSearchGraphStream, m_source.GetTranslationId());
WriteSearchGraph(writer);
}
@@ -304,33 +306,33 @@ void ChartManager::OutputBest(OutputCollector *collector) const
{
const ChartHypothesis *bestHypo = GetBestHypothesis();
if (collector && bestHypo) {
- const size_t translationId = m_source.GetTranslationId();
- const ChartHypothesis *bestHypo = GetBestHypothesis();
- OutputBestHypo(collector, bestHypo, translationId);
+ const size_t translationId = m_source.GetTranslationId();
+ const ChartHypothesis *bestHypo = GetBestHypothesis();
+ OutputBestHypo(collector, bestHypo, translationId);
}
}
void ChartManager::OutputNBest(OutputCollector *collector) const
{
- const StaticData &staticData = StaticData::Instance();
- size_t nBestSize = staticData.GetNBestSize();
- if (nBestSize > 0) {
- const size_t translationId = m_source.GetTranslationId();
-
- VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
- std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
- CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
- OutputNBestList(collector, nBestList, translationId);
- IFVERBOSE(2) {
- PrintUserTime("N-Best Hypotheses Generation Time:");
- }
- }
+ const StaticData &staticData = StaticData::Instance();
+ size_t nBestSize = staticData.GetNBestSize();
+ if (nBestSize > 0) {
+ const size_t translationId = m_source.GetTranslationId();
+
+ VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
+ CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
+ OutputNBestList(collector, nBestList, translationId);
+ IFVERBOSE(2) {
+ PrintUserTime("N-Best Hypotheses Generation Time:");
+ }
+ }
}
void ChartManager::OutputNBestList(OutputCollector *collector,
- const ChartKBestExtractor::KBestVec &nBestList,
- long translationId) const
+ const ChartKBestExtractor::KBestVec &nBestList,
+ long translationId) const
{
const StaticData &staticData = StaticData::Instance();
const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
@@ -344,7 +346,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
}
bool includeWordAlignment =
- StaticData::Instance().PrintAlignmentInfoInNbest();
+ StaticData::Instance().PrintAlignmentInfoInNbest();
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
@@ -357,7 +359,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
// delete <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@@ -405,9 +407,9 @@ size_t ChartManager::CalcSourceSize(const Moses::ChartHypothesis *hypo) const
}
size_t ChartManager::OutputAlignmentNBest(
- Alignments &retAlign,
- const Moses::ChartKBestExtractor::Derivation &derivation,
- size_t startTarget) const
+ Alignments &retAlign,
+ const Moses::ChartKBestExtractor::Derivation &derivation,
+ size_t startTarget) const
{
const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
@@ -448,7 +450,7 @@ size_t ChartManager::OutputAlignmentNBest(
// Recursively look thru child hypos
size_t currStartTarget = startTarget + totalTargetSize;
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
- currStartTarget);
+ currStartTarget);
targetOffsets[targetPos] = targetSize;
totalTargetSize += targetSize;
@@ -486,22 +488,22 @@ size_t ChartManager::OutputAlignmentNBest(
void ChartManager::OutputAlignment(OutputCollector *collector) const
{
if (collector == NULL) {
- return;
+ return;
}
ostringstream out;
const ChartHypothesis *hypo = GetBestHypothesis();
if (hypo) {
- Alignments retAlign;
- OutputAlignment(retAlign, hypo, 0);
-
- // output alignments
- Alignments::const_iterator iter;
- for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
- const pair<size_t, size_t> &alignPoint = *iter;
- out << alignPoint.first << "-" << alignPoint.second << " ";
- }
+ Alignments retAlign;
+ OutputAlignment(retAlign, hypo, 0);
+
+ // output alignments
+ Alignments::const_iterator iter;
+ for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
+ const pair<size_t, size_t> &alignPoint = *iter;
+ out << alignPoint.first << "-" << alignPoint.second << " ";
+ }
}
out << endl;
@@ -510,8 +512,8 @@ void ChartManager::OutputAlignment(OutputCollector *collector) const
}
size_t ChartManager::OutputAlignment(Alignments &retAlign,
- const Moses::ChartHypothesis *hypo,
- size_t startTarget) const
+ const Moses::ChartHypothesis *hypo,
+ size_t startTarget) const
{
size_t totalTargetSize = 0;
size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
@@ -536,7 +538,7 @@ size_t ChartManager::OutputAlignment(Alignments &retAlign,
size_t targetInd = 0;
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
if (tp.GetWord(targetPos).IsNonTerminal()) {
- UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
size_t sourceInd = targetPos2SourceInd[targetPos];
size_t sourcePos = sourceInd2pos[sourceInd];
@@ -587,19 +589,19 @@ size_t ChartManager::OutputAlignment(Alignments &retAlign,
void ChartManager::OutputDetailedTranslationReport(OutputCollector *collector) const
{
- if (collector) {
- OutputDetailedTranslationReport(collector,
- GetBestHypothesis(),
- static_cast<const Sentence&>(m_source),
- m_source.GetTranslationId());
- }
+ if (collector) {
+ OutputDetailedTranslationReport(collector,
+ GetBestHypothesis(),
+ static_cast<const Sentence&>(m_source),
+ m_source.GetTranslationId());
+ }
}
void ChartManager::OutputDetailedTranslationReport(
- OutputCollector *collector,
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId) const
+ OutputCollector *collector,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
{
if (hypo == NULL) {
return;
@@ -610,24 +612,24 @@ void ChartManager::OutputDetailedTranslationReport(
OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
collector->Write(translationId, out.str());
- //DIMw
- const StaticData &staticData = StaticData::Instance();
+ //DIMw
+ const StaticData &staticData = StaticData::Instance();
- if (staticData.IsDetailedAllTranslationReportingEnabled()) {
- const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
- size_t nBestSize = staticData.GetNBestSize();
- std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
- CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
- OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
- }
+ if (staticData.IsDetailedAllTranslationReportingEnabled()) {
+ const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
+ size_t nBestSize = staticData.GetNBestSize();
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
+ CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
+ OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
+ }
}
void ChartManager::OutputTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId) const
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
{
if (hypo != NULL) {
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
@@ -644,10 +646,10 @@ void ChartManager::OutputTranslationOptions(std::ostream &out,
}
void ChartManager::OutputTranslationOption(std::ostream &out,
- ApplicationContext &applicationContext,
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId) const
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
{
ReconstructApplicationContext(*hypo, sentence, applicationContext);
out << "Trans Opt " << translationId
@@ -691,16 +693,16 @@ void ChartManager::ReconstructApplicationContext(const ChartHypothesis &hypo,
void ChartManager::OutputUnknowns(OutputCollector *collector) const
{
if (collector) {
- long translationId = m_source.GetTranslationId();
- const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
+ long translationId = m_source.GetTranslationId();
+ const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
- std::ostringstream out;
- for (std::vector<Phrase*>::const_iterator p = oovs.begin();
- p != oovs.end(); ++p) {
- out << *p;
- }
- out << std::endl;
- collector->Write(translationId, out.str());
+ std::ostringstream out;
+ for (std::vector<Phrase*>::const_iterator p = oovs.begin();
+ p != oovs.end(); ++p) {
+ out << *p;
+ }
+ out << std::endl;
+ collector->Write(translationId, out.str());
}
}
@@ -709,7 +711,7 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
{
const ChartHypothesis *hypo = GetBestHypothesis();
if (collector == NULL || hypo == NULL) {
- return;
+ return;
}
std::ostringstream out;
@@ -723,14 +725,14 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
//Tree of full sentence
const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
if (treeStructure != NULL) {
- const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
- for( size_t i=0; i<sff.size(); i++ ) {
- if (sff[i] == treeStructure) {
- const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
- out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
- break;
- }
- }
+ const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+ for( size_t i=0; i<sff.size(); i++ ) {
+ if (sff[i] == treeStructure) {
+ const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
+ out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
+ break;
+ }
+ }
}
collector->Write(translationId, out.str());
@@ -738,10 +740,10 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
}
void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId) const
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
{
if (hypo != NULL) {
@@ -769,20 +771,20 @@ void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
void ChartManager::OutputSearchGraph(OutputCollector *collector) const
{
- if (collector) {
- long translationId = m_source.GetTranslationId();
- std::ostringstream out;
- OutputSearchGraphMoses( out);
- collector->Write(translationId, out.str());
- }
+ if (collector) {
+ long translationId = m_source.GetTranslationId();
+ std::ostringstream out;
+ OutputSearchGraphMoses( out);
+ collector->Write(translationId, out.str());
+ }
}
//DIMw
void ChartManager::OutputDetailedAllTranslationReport(
- OutputCollector *collector,
- const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
- const Sentence &sentence,
- long translationId) const
+ OutputCollector *collector,
+ const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
+ const Sentence &sentence,
+ long translationId) const
{
std::ostringstream out;
ApplicationContext applicationContext;
@@ -813,8 +815,8 @@ void ChartManager::OutputSearchGraphHypergraph() const
{
const StaticData &staticData = StaticData::Instance();
if (staticData.GetOutputSearchGraphHypergraph()) {
- HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
- hypergraphOutputChart.Write(*this);
+ HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
+ hypergraphOutputChart.Write(*this);
}
}
@@ -842,7 +844,7 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
diff --git a/moses/ChartManager.h b/moses/ChartManager.h
index 7c493d840..745a792cb 100644
--- a/moses/ChartManager.h
+++ b/moses/ChartManager.h
@@ -56,49 +56,49 @@ private:
ChartTranslationOptionList m_translationOptionList; /**< pre-computed list of translation options for the phrases in this sentence */
/* auxilliary functions for SearchGraphs */
- void FindReachableHypotheses(
- const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
+ void FindReachableHypotheses(
+ const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
void WriteSearchGraph(const ChartSearchGraphWriter& writer) const;
// output
void OutputNBestList(OutputCollector *collector,
- const ChartKBestExtractor::KBestVec &nBestList,
- long translationId) const;
+ const ChartKBestExtractor::KBestVec &nBestList,
+ long translationId) const;
size_t CalcSourceSize(const Moses::ChartHypothesis *hypo) const;
size_t OutputAlignmentNBest(Alignments &retAlign,
- const Moses::ChartKBestExtractor::Derivation &derivation,
- size_t startTarget) const;
+ const Moses::ChartKBestExtractor::Derivation &derivation,
+ size_t startTarget) const;
size_t OutputAlignment(Alignments &retAlign,
- const Moses::ChartHypothesis *hypo,
- size_t startTarget) const;
+ const Moses::ChartHypothesis *hypo,
+ size_t startTarget) const;
void OutputDetailedTranslationReport(
- OutputCollector *collector,
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId) const;
+ OutputCollector *collector,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
void OutputTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId) const;
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
void OutputTranslationOption(std::ostream &out,
- ApplicationContext &applicationContext,
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId) const;
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
void ReconstructApplicationContext(const ChartHypothesis &hypo,
- const Sentence &sentence,
- ApplicationContext &context) const;
+ const Sentence &sentence,
+ ApplicationContext &context) const;
void OutputTreeFragmentsTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId) const;
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
void OutputDetailedAllTranslationReport(
- OutputCollector *collector,
- const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
- const Sentence &sentence,
- long translationId) const;
+ OutputCollector *collector,
+ const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
+ const Sentence &sentence,
+ long translationId) const;
void OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const;
void Backtrack(const ChartHypothesis *hypo) const;
@@ -126,8 +126,8 @@ public:
return m_hypoStackColl;
}
- void CalcDecoderStatistics() const
- {}
+ void CalcDecoderStatistics() const {
+ }
void ResetSentenceStats(const InputType& source) {
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
@@ -138,22 +138,24 @@ public:
return m_hypothesisId++;
}
- const ChartParser &GetParser() const { return m_parser; }
+ const ChartParser &GetParser() const {
+ return m_parser;
+ }
// outputs
void OutputBest(OutputCollector *collector) const;
void OutputNBest(OutputCollector *collector) const;
- void OutputLatticeSamples(OutputCollector *collector) const
- {}
+ void OutputLatticeSamples(OutputCollector *collector) const {
+ }
void OutputAlignment(OutputCollector *collector) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const;
void OutputUnknowns(OutputCollector *collector) const;
void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
- void OutputWordGraph(OutputCollector *collector) const
- {}
+ void OutputWordGraph(OutputCollector *collector) const {
+ }
void OutputSearchGraph(OutputCollector *collector) const;
- void OutputSearchGraphSLF() const
- {}
+ void OutputSearchGraphSLF() const {
+ }
void OutputSearchGraphHypergraph() const;
};
diff --git a/moses/ChartRuleLookupManager.h b/moses/ChartRuleLookupManager.h
index be1a8c7d1..b62ec157d 100644
--- a/moses/ChartRuleLookupManager.h
+++ b/moses/ChartRuleLookupManager.h
@@ -65,7 +65,7 @@ public:
* \param outColl return argument
*/
virtual void GetChartRuleCollection(
- const InputPath &inputPath,
+ const InputPath &inputPath,
size_t lastPos, // last position to consider if using lookahead
ChartParserCallback &outColl) = 0;
diff --git a/moses/ChartTranslationOption.cpp b/moses/ChartTranslationOption.cpp
index 332b26a15..65cb2afbd 100644
--- a/moses/ChartTranslationOption.cpp
+++ b/moses/ChartTranslationOption.cpp
@@ -11,8 +11,8 @@ ChartTranslationOption::ChartTranslationOption(const TargetPhrase &targetPhrase)
}
void ChartTranslationOption::EvaluateWithSourceContext(const InputType &input,
- const InputPath &inputPath,
- const StackVec &stackVec)
+ const InputPath &inputPath,
+ const StackVec &stackVec)
{
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
diff --git a/moses/ChartTranslationOption.h b/moses/ChartTranslationOption.h
index b6d32dfc9..0b902f811 100644
--- a/moses/ChartTranslationOption.h
+++ b/moses/ChartTranslationOption.h
@@ -46,8 +46,8 @@ public:
}
void EvaluateWithSourceContext(const InputType &input,
- const InputPath &inputPath,
- const StackVec &stackVec);
+ const InputPath &inputPath,
+ const StackVec &stackVec);
};
}
diff --git a/moses/ChartTranslationOptions.cpp b/moses/ChartTranslationOptions.cpp
index 44aa67619..6d4e1a816 100644
--- a/moses/ChartTranslationOptions.cpp
+++ b/moses/ChartTranslationOptions.cpp
@@ -71,10 +71,9 @@ void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input,
ChartTranslationOption *transOpt = m_collection[i].get();
if (transOpt->GetScores().GetWeightedScore() == - std::numeric_limits<float>::infinity()) {
- ++numDiscard;
- }
- else if (numDiscard) {
- m_collection[i - numDiscard] = m_collection[i];
+ ++numDiscard;
+ } else if (numDiscard) {
+ m_collection[i - numDiscard] = m_collection[i];
}
}
@@ -135,12 +134,12 @@ void ChartTranslationOptions::CreateSourceRuleFromInputPath()
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptions &obj)
{
- for (size_t i = 0; i < obj.m_collection.size(); ++i) {
- const ChartTranslationOption &transOpt = *obj.m_collection[i];
- out << transOpt << endl;
- }
+ for (size_t i = 0; i < obj.m_collection.size(); ++i) {
+ const ChartTranslationOption &transOpt = *obj.m_collection[i];
+ out << transOpt << endl;
+ }
- return out;
+ return out;
}
}
diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp
index 6591209d8..53e9e23b5 100644
--- a/moses/ConfusionNet.cpp
+++ b/moses/ConfusionNet.cpp
@@ -13,297 +13,297 @@
namespace Moses
{
- struct CNStats {
- size_t created,destr,read,colls,words;
-
- CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
- ~CNStats() {
- print(std::cerr);
- }
+struct CNStats {
+ size_t created,destr,read,colls,words;
- void createOne() {
- ++created;
- }
- void destroyOne() {
- ++destr;
- }
-
- void collect(const ConfusionNet& cn) {
- ++read;
- colls+=cn.GetSize();
- for(size_t i=0; i<cn.GetSize(); ++i)
- words+=cn[i].size();
- }
- void print(std::ostream& out) const {
- if(created>0) {
- out<<"confusion net statistics:\n"
- " created:\t"<<created<<"\n"
- " destroyed:\t"<<destr<<"\n"
- " succ. read:\t"<<read<<"\n"
- " columns:\t"<<colls<<"\n"
- " words:\t"<<words<<"\n"
- " avg. word/column:\t"<<words/(1.0*colls)<<"\n"
- " avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
- "\n\n";
- }
- }
- };
-
- CNStats stats;
-
- size_t
- ConfusionNet::
- GetColumnIncrement(size_t i, size_t j) const
- {
- (void) i;
- (void) j;
- return 1;
+ CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
+ ~CNStats() {
+ print(std::cerr);
}
- ConfusionNet::
- ConfusionNet()
- : InputType()
- {
- stats.createOne();
+ void createOne() {
+ ++created;
+ }
+ void destroyOne() {
+ ++destr;
+ }
- const StaticData& staticData = StaticData::Instance();
- if (staticData.IsChart()) {
- m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
+ void collect(const ConfusionNet& cn) {
+ ++read;
+ colls+=cn.GetSize();
+ for(size_t i=0; i<cn.GetSize(); ++i)
+ words+=cn[i].size();
+ }
+ void print(std::ostream& out) const {
+ if(created>0) {
+ out<<"confusion net statistics:\n"
+ " created:\t"<<created<<"\n"
+ " destroyed:\t"<<destr<<"\n"
+ " succ. read:\t"<<read<<"\n"
+ " columns:\t"<<colls<<"\n"
+ " words:\t"<<words<<"\n"
+ " avg. word/column:\t"<<words/(1.0*colls)<<"\n"
+ " avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
+ "\n\n";
}
- UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
}
+};
- ConfusionNet::
- ~ConfusionNet()
- {
- stats.destroyOne();
- }
+CNStats stats;
- ConfusionNet::
- ConfusionNet(Sentence const& s)
- {
- data.resize(s.GetSize());
- for(size_t i=0; i<s.GetSize(); ++i) {
- ScorePair scorePair;
- std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
- data[i].push_back(temp);
- }
+size_t
+ConfusionNet::
+GetColumnIncrement(size_t i, size_t j) const
+{
+ (void) i;
+ (void) j;
+ return 1;
+}
+
+ConfusionNet::
+ConfusionNet()
+ : InputType()
+{
+ stats.createOne();
+
+ const StaticData& staticData = StaticData::Instance();
+ if (staticData.IsChart()) {
+ m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
}
+ UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
+}
- bool
- ConfusionNet::
- ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
- {
- VERBOSE(2, "read confusion net with format "<<format<<"\n");
- switch(format) {
- case 0:
- return ReadFormat0(in,factorOrder);
- case 1:
- return ReadFormat1(in,factorOrder);
- default:
- std::cerr << "ERROR: unknown format '"<<format
- <<"' in ConfusionNet::Read";
- }
- return false;
+ConfusionNet::
+~ConfusionNet()
+{
+ stats.destroyOne();
+}
+
+ConfusionNet::
+ConfusionNet(Sentence const& s)
+{
+ data.resize(s.GetSize());
+ for(size_t i=0; i<s.GetSize(); ++i) {
+ ScorePair scorePair;
+ std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
+ data[i].push_back(temp);
}
+}
- int
- ConfusionNet::
- Read(std::istream& in,
- const std::vector<FactorType>& factorOrder)
- {
- int rv=ReadF(in,factorOrder,0);
- if(rv) stats.collect(*this);
- return rv;
+bool
+ConfusionNet::
+ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
+{
+ VERBOSE(2, "read confusion net with format "<<format<<"\n");
+ switch(format) {
+ case 0:
+ return ReadFormat0(in,factorOrder);
+ case 1:
+ return ReadFormat1(in,factorOrder);
+ default:
+ std::cerr << "ERROR: unknown format '"<<format
+ <<"' in ConfusionNet::Read";
}
+ return false;
+}
+
+int
+ConfusionNet::
+Read(std::istream& in,
+ const std::vector<FactorType>& factorOrder)
+{
+ int rv=ReadF(in,factorOrder,0);
+ if(rv) stats.collect(*this);
+ return rv;
+}
#if 0
- // Deprecated due to code duplication;
- // use Word::CreateFromString() instead
- void
- ConfusionNet::
- String2Word(const std::string& s,Word& w,
- const std::vector<FactorType>& factorOrder)
- {
- std::vector<std::string> factorStrVector = Tokenize(s, "|");
- for(size_t i=0; i<factorOrder.size(); ++i)
- w.SetFactor(factorOrder[i],
- FactorCollection::Instance().AddFactor
- (Input,factorOrder[i], factorStrVector[i]));
- }
+// Deprecated due to code duplication;
+// use Word::CreateFromString() instead
+void
+ConfusionNet::
+String2Word(const std::string& s,Word& w,
+ const std::vector<FactorType>& factorOrder)
+{
+ std::vector<std::string> factorStrVector = Tokenize(s, "|");
+ for(size_t i=0; i<factorOrder.size(); ++i)
+ w.SetFactor(factorOrder[i],
+ FactorCollection::Instance().AddFactor
+ (Input,factorOrder[i], factorStrVector[i]));
+}
#endif
- bool
- ConfusionNet::
- ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
- {
- Clear();
-
- // const StaticData &staticData = StaticData::Instance();
- const InputFeature &inputFeature = InputFeature::Instance();
- size_t numInputScores = inputFeature.GetNumInputScores();
- size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
-
- size_t totalCount = numInputScores + numRealWordCount;
- bool addRealWordCount = (numRealWordCount > 0);
-
- std::string line;
- while(getline(in,line)) {
- std::istringstream is(line);
- std::string word;
-
- Column col;
- while(is>>word) {
- Word w;
- // String2Word(word,w,factorOrder);
- w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
- std::vector<float> probs(totalCount, 0.0);
- for(size_t i=0; i < numInputScores; i++) {
- double prob;
- if (!(is>>prob)) {
- TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
- return false;
- }
- if(prob<0.0) {
- VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
- prob=0.0;
- } else if (prob>1.0) {
- VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
- prob=1.0;
- }
- probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
-
- }
- //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
- if (addRealWordCount && word!=EPSILON && word!="")
- probs.back() = -1.0;
-
- ScorePair scorePair(probs);
-
- col.push_back(std::make_pair(w,scorePair));
+bool
+ConfusionNet::
+ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
+{
+ Clear();
+
+ // const StaticData &staticData = StaticData::Instance();
+ const InputFeature &inputFeature = InputFeature::Instance();
+ size_t numInputScores = inputFeature.GetNumInputScores();
+ size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
+
+ size_t totalCount = numInputScores + numRealWordCount;
+ bool addRealWordCount = (numRealWordCount > 0);
+
+ std::string line;
+ while(getline(in,line)) {
+ std::istringstream is(line);
+ std::string word;
+
+ Column col;
+ while(is>>word) {
+ Word w;
+ // String2Word(word,w,factorOrder);
+ w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
+ std::vector<float> probs(totalCount, 0.0);
+ for(size_t i=0; i < numInputScores; i++) {
+ double prob;
+ if (!(is>>prob)) {
+ TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
+ return false;
+ }
+ if(prob<0.0) {
+ VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
+ prob=0.0;
+ } else if (prob>1.0) {
+ VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
+ prob=1.0;
+ }
+ probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
+
}
- if(col.size()) {
- data.push_back(col);
- ShrinkToFit(data.back());
- } else break;
+ //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
+ if (addRealWordCount && word!=EPSILON && word!="")
+ probs.back() = -1.0;
+
+ ScorePair scorePair(probs);
+
+ col.push_back(std::make_pair(w,scorePair));
}
- return !data.empty();
+ if(col.size()) {
+ data.push_back(col);
+ ShrinkToFit(data.back());
+ } else break;
}
+ return !data.empty();
+}
- bool
- ConfusionNet::
- ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
- {
- Clear();
- std::string line;
+bool
+ConfusionNet::
+ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
+{
+ Clear();
+ std::string line;
+ if(!getline(in,line)) return 0;
+ size_t s;
+ if(getline(in,line)) s=atoi(line.c_str());
+ else return 0;
+ data.resize(s);
+ for(size_t i=0; i<data.size(); ++i) {
if(!getline(in,line)) return 0;
- size_t s;
- if(getline(in,line)) s=atoi(line.c_str());
- else return 0;
- data.resize(s);
- for(size_t i=0; i<data.size(); ++i) {
- if(!getline(in,line)) return 0;
- std::istringstream is(line);
- if(!(is>>s)) return 0;
- std::string word;
- double prob;
- data[i].resize(s);
- for(size_t j=0; j<s; ++j)
- if(is>>word>>prob) {
- //TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
- data[i][j].second.denseScores = std::vector<float> (1);
- data[i][j].second.denseScores.push_back((float) log(prob));
- if(data[i][j].second.denseScores[0]<0) {
- VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
- data[i][j].second.denseScores[0]=0.0;
- }
- // String2Word(word,data[i][j].first,factorOrder);
- Word& w = data[i][j].first;
- w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
- } else return 0;
- }
- return !data.empty();
+ std::istringstream is(line);
+ if(!(is>>s)) return 0;
+ std::string word;
+ double prob;
+ data[i].resize(s);
+ for(size_t j=0; j<s; ++j)
+ if(is>>word>>prob) {
+ //TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
+ data[i][j].second.denseScores = std::vector<float> (1);
+ data[i][j].second.denseScores.push_back((float) log(prob));
+ if(data[i][j].second.denseScores[0]<0) {
+ VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
+ data[i][j].second.denseScores[0]=0.0;
+ }
+ // String2Word(word,data[i][j].first,factorOrder);
+ Word& w = data[i][j].first;
+ w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
+ } else return 0;
}
+ return !data.empty();
+}
+
+void ConfusionNet::Print(std::ostream& out) const
+{
+ out<<"conf net: "<<data.size()<<"\n";
+ for(size_t i=0; i<data.size(); ++i) {
+ out<<i<<" -- ";
+ for(size_t j=0; j<data[i].size(); ++j) {
+ out<<"("<<data[i][j].first.ToString()<<", ";
+
+ // dense
+ std::vector<float>::const_iterator iterDense;
+ for(iterDense = data[i][j].second.denseScores.begin();
+ iterDense < data[i][j].second.denseScores.end();
+ ++iterDense) {
+ out<<", "<<*iterDense;
+ }
- void ConfusionNet::Print(std::ostream& out) const
- {
- out<<"conf net: "<<data.size()<<"\n";
- for(size_t i=0; i<data.size(); ++i) {
- out<<i<<" -- ";
- for(size_t j=0; j<data[i].size(); ++j) {
- out<<"("<<data[i][j].first.ToString()<<", ";
-
- // dense
- std::vector<float>::const_iterator iterDense;
- for(iterDense = data[i][j].second.denseScores.begin();
- iterDense < data[i][j].second.denseScores.end();
- ++iterDense) {
- out<<", "<<*iterDense;
- }
-
- // sparse
- std::map<StringPiece, float>::const_iterator iterSparse;
- for(iterSparse = data[i][j].second.sparseScores.begin();
- iterSparse != data[i][j].second.sparseScores.end();
- ++iterSparse) {
- out << ", " << iterSparse->first << "=" << iterSparse->second;
- }
-
- out<<") ";
+ // sparse
+ std::map<StringPiece, float>::const_iterator iterSparse;
+ for(iterSparse = data[i][j].second.sparseScores.begin();
+ iterSparse != data[i][j].second.sparseScores.end();
+ ++iterSparse) {
+ out << ", " << iterSparse->first << "=" << iterSparse->second;
}
- out<<"\n";
+
+ out<<") ";
}
- out<<"\n\n";
+ out<<"\n";
}
+ out<<"\n\n";
+}
#ifdef _WIN32
#pragma warning(disable:4716)
#endif
- Phrase
- ConfusionNet::
- GetSubString(const WordsRange&) const
- {
- UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
- //return Phrase(Input);
- }
+Phrase
+ConfusionNet::
+GetSubString(const WordsRange&) const
+{
+ UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
+ //return Phrase(Input);
+}
- std::string
- ConfusionNet::
- GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
- {
- TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
- return "";
- }
+std::string
+ConfusionNet::
+GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
+{
+ TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
+ return "";
+}
#ifdef _WIN32
#pragma warning(disable:4716)
#endif
- const Word& ConfusionNet::GetWord(size_t) const
- {
- UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
- }
+const Word& ConfusionNet::GetWord(size_t) const
+{
+ UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
+}
#ifdef _WIN32
#pragma warning(default:4716)
#endif
- std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
- {
- cn.Print(out);
- return out;
- }
+std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
+{
+ cn.Print(out);
+ return out;
+}
- TranslationOptionCollection*
- ConfusionNet::
- CreateTranslationOptionCollection() const
- {
- size_t maxNoTransOptPerCoverage
- = StaticData::Instance().GetMaxNoTransOptPerCoverage();
- float translationOptionThreshold
- = StaticData::Instance().GetTranslationOptionThreshold();
- TranslationOptionCollection *rv
- = new TranslationOptionCollectionConfusionNet
- (*this, maxNoTransOptPerCoverage, translationOptionThreshold);
- assert(rv);
- return rv;
- }
+TranslationOptionCollection*
+ConfusionNet::
+CreateTranslationOptionCollection() const
+{
+ size_t maxNoTransOptPerCoverage
+ = StaticData::Instance().GetMaxNoTransOptPerCoverage();
+ float translationOptionThreshold
+ = StaticData::Instance().GetTranslationOptionThreshold();
+ TranslationOptionCollection *rv
+ = new TranslationOptionCollectionConfusionNet
+ (*this, maxNoTransOptPerCoverage, translationOptionThreshold);
+ assert(rv);
+ return rv;
+}
}
diff --git a/moses/DecodeGraph.h b/moses/DecodeGraph.h
index ebb7ef9e1..1be823dc3 100644
--- a/moses/DecodeGraph.h
+++ b/moses/DecodeGraph.h
@@ -49,8 +49,8 @@ public:
DecodeGraph(size_t id)
: m_id(id)
, m_maxChartSpan(NOT_FOUND)
- , m_backoff(0)
- {}
+ , m_backoff(0) {
+ }
// for chart decoding
DecodeGraph(size_t id, size_t maxChartSpan)
diff --git a/moses/DecodeStepTranslation.cpp b/moses/DecodeStepTranslation.cpp
index e7dbba4f3..ea5c289db 100644
--- a/moses/DecodeStepTranslation.cpp
+++ b/moses/DecodeStepTranslation.cpp
@@ -198,11 +198,11 @@ const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
const Word *wordIP = NULL;
for (size_t i = 0; i < phraseFromIP.GetSize(); ++i) {
- const Word &tempWord = phraseFromIP.GetWord(i);
- if (!tempWord.IsEpsilon()) {
- wordIP = &tempWord;
- break;
- }
+ const Word &tempWord = phraseFromIP.GetWord(i);
+ if (!tempWord.IsEpsilon()) {
+ wordIP = &tempWord;
+ break;
+ }
}
// const WordsRange &range = inputPath.GetWordsRange();
@@ -237,7 +237,7 @@ void DecodeStepTranslation::ProcessLEGACY(const TranslationOption &inputPartialT
const size_t tableLimit = phraseDictionary->GetTableLimit();
const TargetPhraseCollectionWithSourcePhrase *phraseColl
- = phraseDictionary->GetTargetPhraseCollectionLEGACY(toc->GetSource(),sourceWordsRange);
+ = phraseDictionary->GetTargetPhraseCollectionLEGACY(toc->GetSource(),sourceWordsRange);
if (phraseColl != NULL) {
diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp
index 3ba0dda7c..5be3b0b6b 100644
--- a/moses/FF/BleuScoreFeature.cpp
+++ b/moses/FF/BleuScoreFeature.cpp
@@ -502,8 +502,8 @@ void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
* phrase translated.
*/
FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const
{
if (!m_enabled) return new BleuScoreState();
diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h
index 423e7284d..e1a7f09c7 100644
--- a/moses/FF/BleuScoreFeature.h
+++ b/moses/FF/BleuScoreFeature.h
@@ -116,27 +116,27 @@ public:
size_t skip = 0) const;
FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
- int featureID,
- ScoreComponentCollection* accumulator) const;
+ int featureID,
+ ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
bool Enabled() const {
return m_enabled;
diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h
index fe9e2c182..67833a1b4 100644
--- a/moses/FF/ConstrainedDecoding.h
+++ b/moses/FF/ConstrainedDecoding.h
@@ -11,8 +11,8 @@ namespace Moses
class ConstrainedDecodingState : public FFState
{
public:
- ConstrainedDecodingState()
- {}
+ ConstrainedDecodingState() {
+ }
ConstrainedDecodingState(const Hypothesis &hypo);
ConstrainedDecodingState(const ChartHypothesis &hypo);
@@ -42,23 +42,23 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
-
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h
index 32cf566da..f221f772f 100644
--- a/moses/FF/ControlRecombination.h
+++ b/moses/FF/ControlRecombination.h
@@ -20,8 +20,8 @@ class ControlRecombinationState : public FFState
{
public:
ControlRecombinationState(const ControlRecombination &ff)
- :m_ff(ff)
- {}
+ :m_ff(ff) {
+ }
ControlRecombinationState(const Hypothesis &hypo, const ControlRecombination &ff);
ControlRecombinationState(const ChartHypothesis &hypo, const ControlRecombination &ff);
@@ -58,22 +58,22 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
diff --git a/moses/FF/CountNonTerms.cpp b/moses/FF/CountNonTerms.cpp
index 03c7b7315..17d1c9c20 100644
--- a/moses/FF/CountNonTerms.cpp
+++ b/moses/FF/CountNonTerms.cpp
@@ -8,18 +8,18 @@ using namespace std;
namespace Moses
{
CountNonTerms::CountNonTerms(const std::string &line)
-:StatelessFeatureFunction(line)
-,m_all(true)
-,m_sourceSyntax(false)
-,m_targetSyntax(false)
+ :StatelessFeatureFunction(line)
+ ,m_all(true)
+ ,m_sourceSyntax(false)
+ ,m_targetSyntax(false)
{
ReadParameters();
}
void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
const StaticData &staticData = StaticData::Instance();
@@ -27,33 +27,33 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
size_t indScore = 0;
if (m_all) {
- for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
- const Word &word = targetPhrase.GetWord(i);
- if (word.IsNonTerminal()) {
- ++scores[indScore];
- }
- }
- ++indScore;
+ for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
+ const Word &word = targetPhrase.GetWord(i);
+ if (word.IsNonTerminal()) {
+ ++scores[indScore];
+ }
+ }
+ ++indScore;
}
if (m_targetSyntax) {
- for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
- const Word &word = targetPhrase.GetWord(i);
- if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
- ++scores[indScore];
- }
- }
- ++indScore;
+ for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
+ const Word &word = targetPhrase.GetWord(i);
+ if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
+ ++scores[indScore];
+ }
+ }
+ ++indScore;
}
if (m_sourceSyntax) {
- for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
- const Word &word = sourcePhrase.GetWord(i);
- if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
- ++scores[indScore];
- }
- }
- ++indScore;
+ for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
+ const Word &word = sourcePhrase.GetWord(i);
+ if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
+ ++scores[indScore];
+ }
+ }
+ ++indScore;
}
scoreBreakdown.PlusEquals(this, scores);
@@ -64,9 +64,9 @@ void CountNonTerms::SetParameter(const std::string& key, const std::string& valu
if (key == "all") {
m_all = Scan<bool>(value);
} else if (key == "source-syntax") {
- m_sourceSyntax = Scan<bool>(value);
+ m_sourceSyntax = Scan<bool>(value);
} else if (key == "target-syntax") {
- m_targetSyntax = Scan<bool>(value);
+ m_targetSyntax = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h
index 3977f2f37..2e29f2aaa 100644
--- a/moses/FF/CountNonTerms.h
+++ b/moses/FF/CountNonTerms.h
@@ -14,30 +14,30 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(
const ChartHypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp
index 3a2482d0d..dd2c890d7 100644
--- a/moses/FF/CoveredReferenceFeature.cpp
+++ b/moses/FF/CoveredReferenceFeature.cpp
@@ -22,44 +22,44 @@ int CoveredReferenceState::Compare(const FFState& other) const
const CoveredReferenceState &otherState = static_cast<const CoveredReferenceState&>(other);
if (m_coveredRef.size() != otherState.m_coveredRef.size()) {
- return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
+ return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
} else {
multiset<string>::const_iterator thisIt, otherIt;
for (thisIt = m_coveredRef.begin(), otherIt = otherState.m_coveredRef.begin();
- thisIt != m_coveredRef.end();
- thisIt++, otherIt++) {
+ thisIt != m_coveredRef.end();
+ thisIt++, otherIt++) {
if (*thisIt != *otherIt) return thisIt->compare(*otherIt);
}
}
return 0;
// return m_coveredRef == otherState.m_coveredRef;
-
+
// if (m_coveredRef == otherState.m_coveredRef)
// return 0;
// return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
}
void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{}
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
long id = input.GetTranslationId();
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase);
multiset<string> covered;
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
- refIt->second.begin(), refIt->second.end(),
- inserter(covered, covered.begin()));
+ refIt->second.begin(), refIt->second.end(),
+ inserter(covered, covered.begin()));
vector<float> scores;
scores.push_back(covered.size());
@@ -67,7 +67,8 @@ void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
estimatedFutureScore->Assign(this, scores);
}
-void CoveredReferenceFeature::Load() {
+void CoveredReferenceFeature::Load()
+{
InputFileStream refFile(m_path);
std::string line;
const StaticData &staticData = StaticData::Instance();
@@ -76,7 +77,7 @@ void CoveredReferenceFeature::Load() {
vector<string> words = Tokenize(line, " ");
multiset<string> wordSet;
// TODO make Tokenize work with other containers than vector
- copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
+ copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
m_refs.insert(make_pair(sentenceID++, wordSet));
}
}
@@ -107,15 +108,15 @@ FFState* CoveredReferenceFeature::EvaluateWhenApplied(
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
if (refIt == m_refs.end()) UTIL_THROW(util::Exception, "Sentence id out of range: " + SPrint<long>(id));
set_difference(refIt->second.begin(), refIt->second.end(),
- ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
- inserter(remaining, remaining.begin()));
+ ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
+ inserter(remaining, remaining.begin()));
// which of the remaining words are present in the current phrase
multiset<string> wordsInPhrase = GetWordsInPhrase(cur_hypo.GetCurrTargetPhrase());
multiset<string> newCovered;
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
- remaining.begin(), remaining.end(),
- inserter(newCovered, newCovered.begin()));
+ remaining.begin(), remaining.end(),
+ inserter(newCovered, newCovered.begin()));
vector<float> estimateScore =
cur_hypo.GetCurrTargetPhrase().GetScoreBreakdown().GetScoresForProducer(this);
diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h
index b3b90f680..d5873f33e 100644
--- a/moses/FF/CoveredReferenceFeature.h
+++ b/moses/FF/CoveredReferenceFeature.h
@@ -52,20 +52,20 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
diff --git a/moses/FF/DecodeFeature.h b/moses/FF/DecodeFeature.h
index a8caa26f0..19c9b3161 100644
--- a/moses/FF/DecodeFeature.h
+++ b/moses/FF/DecodeFeature.h
@@ -63,30 +63,30 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void SetContainer(const DecodeStep *container) {
m_container = container;
diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h
index bf8243e89..218fb9b40 100644
--- a/moses/FF/DistortionScoreProducer.h
+++ b/moses/FF/DistortionScoreProducer.h
@@ -48,22 +48,22 @@ public:
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
};
}
diff --git a/moses/FF/DynamicCacheBasedLanguageModel.h b/moses/FF/DynamicCacheBasedLanguageModel.h
index e1363143c..5d9d17517 100644
--- a/moses/FF/DynamicCacheBasedLanguageModel.h
+++ b/moses/FF/DynamicCacheBasedLanguageModel.h
@@ -93,12 +93,16 @@ public:
}
static const DynamicCacheBasedLanguageModel* Instance(const std::string& name) {
- if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
+ if (s_instance_map.find(name) == s_instance_map.end()) {
+ return NULL;
+ }
return s_instance_map[name];
}
static DynamicCacheBasedLanguageModel* InstanceNonConst(const std::string& name) {
- if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
+ if (s_instance_map.find(name) == s_instance_map.end()) {
+ return NULL;
+ }
return s_instance_map[name];
}
@@ -126,29 +130,29 @@ public:
void Clear();
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void SetQueryType(size_t type);
void SetScoreType(size_t type);
diff --git a/moses/FF/ExternalFeature.h b/moses/FF/ExternalFeature.h
index 222ace544..6c0fb829e 100644
--- a/moses/FF/ExternalFeature.h
+++ b/moses/FF/ExternalFeature.h
@@ -18,8 +18,8 @@ protected:
public:
ExternalFeatureState(int stateSize)
:m_stateSize(stateSize)
- ,m_data(NULL)
- {}
+ ,m_data(NULL) {
+ }
ExternalFeatureState(int stateSize, void *data);
~ExternalFeatureState() {
@@ -52,22 +52,22 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 9dec0fba6..448a1d61f 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -242,7 +242,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SkeletonChangeInput);
MOSES_FNAME(SkeletonTranslationOptionListFeature);
MOSES_FNAME(SkeletonPT);
-
+
#ifdef HAVE_VW
MOSES_FNAME(VW);
MOSES_FNAME(VWFeatureSourceBagOfWords);
@@ -322,22 +322,22 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line
void FeatureRegistry::PrintFF() const
{
- vector<string> ffs;
- std::cerr << "Available feature functions:" << std::endl;
- Map::const_iterator iter;
- for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
- const string &ffName = iter->first;
- ffs.push_back(ffName);
- }
-
- vector<string>::const_iterator iterVec;
- std::sort(ffs.begin(), ffs.end());
- for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
- const string &ffName = *iterVec;
- std::cerr << ffName << " ";
- }
-
- std::cerr << std::endl;
+ vector<string> ffs;
+ std::cerr << "Available feature functions:" << std::endl;
+ Map::const_iterator iter;
+ for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
+ const string &ffName = iter->first;
+ ffs.push_back(ffName);
+ }
+
+ vector<string>::const_iterator iterVec;
+ std::sort(ffs.begin(), ffs.end());
+ for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
+ const string &ffName = *iterVec;
+ std::cerr << ffName << " ";
+ }
+
+ std::cerr << std::endl;
}
} // namespace Moses
diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp
index 89e7a212c..df1a90460 100644
--- a/moses/FF/FeatureFunction.cpp
+++ b/moses/FF/FeatureFunction.cpp
@@ -38,8 +38,8 @@ void FeatureFunction::Destroy()
void FeatureFunction::CallChangeSource(InputType *&input)
{
for (size_t i = 0; i < s_staticColl.size(); ++i) {
- const FeatureFunction &ff = *s_staticColl[i];
- ff.ChangeSource(input);
+ const FeatureFunction &ff = *s_staticColl[i];
+ ff.ChangeSource(input);
}
}
diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h
index fb7bc8aef..72fb3ab21 100644
--- a/moses/FF/FeatureFunction.h
+++ b/moses/FF/FeatureFunction.h
@@ -111,13 +111,13 @@ public:
// may have more factors than actually need, but not guaranteed.
// For SCFG decoding, the source contains non-terminals, NOT the raw source from the input sentence
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const = 0;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const = 0;
// override this method if you want to change the input before decoding
- virtual void ChangeSource(InputType *&input) const
- {}
+ virtual void ChangeSource(InputType *&input) const {
+ }
// This method is called once all the translation options are retrieved from the phrase table, and
// just before search.
@@ -127,12 +127,12 @@ public:
// For pb models, stackvec is NULL.
// No FF should set estimatedFutureScore in both overloads!
virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const = 0;
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const = 0;
+
// This method is called once all the translation options are retrieved from the phrase table, and
// just before search.
// 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
@@ -141,7 +141,7 @@ public:
// For pb models, stackvec is NULL.
// No FF should set estimatedFutureScore in both overloads!
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const = 0;
+ , const TranslationOptionList &translationOptionList) const = 0;
virtual void SetParameter(const std::string& key, const std::string& value);
virtual void ReadParameters();
diff --git a/moses/FF/GlobalLexicalModel.cpp b/moses/FF/GlobalLexicalModel.cpp
index 205c7937f..d52d62a49 100644
--- a/moses/FF/GlobalLexicalModel.cpp
+++ b/moses/FF/GlobalLexicalModel.cpp
@@ -165,11 +165,11 @@ float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetP
}
void GlobalLexicalModel::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
- scoreBreakdown.PlusEquals( this, GetFromCacheOrScorePhrase(targetPhrase) );
+ scoreBreakdown.PlusEquals( this, GetFromCacheOrScorePhrase(targetPhrase) );
}
bool GlobalLexicalModel::IsUseable(const FactorMask &mask) const
diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h
index 3ea94d412..a936c2e92 100644
--- a/moses/FF/GlobalLexicalModel.h
+++ b/moses/FF/GlobalLexicalModel.h
@@ -71,29 +71,29 @@ public:
bool IsUseable(const FactorMask &mask) const;
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
};
}
diff --git a/moses/FF/GlobalLexicalModelUnlimited.cpp b/moses/FF/GlobalLexicalModelUnlimited.cpp
index ab322944b..a757c1c4e 100644
--- a/moses/FF/GlobalLexicalModelUnlimited.cpp
+++ b/moses/FF/GlobalLexicalModelUnlimited.cpp
@@ -27,8 +27,8 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
// read optional punctuation and bias specifications
if (spec.size() > 0) {
if (spec.size() != 2 && spec.size() != 3 && spec.size() != 4 && spec.size() != 6) {
- std::cerr << "Format of glm feature is <factor-src>-<factor-tgt> [ignore-punct] [use-bias] "
- << "[context-type] [filename-src filename-tgt]";
+ std::cerr << "Format of glm feature is <factor-src>-<factor-tgt> [ignore-punct] [use-bias] "
+ << "[context-type] [filename-src filename-tgt]";
//return false;
}
@@ -48,7 +48,7 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
factors = Tokenize(modelSpec[i],"-");
if ( factors.size() != 2 ) {
- std::cerr << "Wrong factor definition for global lexical model unlimited: " << modelSpec[i];
+ std::cerr << "Wrong factor definition for global lexical model unlimited: " << modelSpec[i];
//return false;
}
@@ -60,10 +60,10 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
if (restricted) {
cerr << "loading word translation word lists from " << filenameSource << " and " << filenameTarget << endl;
if (!glmu->Load(filenameSource, filenameTarget)) {
- std::cerr << "Unable to load word lists for word translation feature from files "
- << filenameSource
- << " and "
- << filenameTarget;
+ std::cerr << "Unable to load word lists for word translation feature from files "
+ << filenameSource
+ << " and "
+ << filenameTarget;
//return false;
}
}
diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h
index c4f8f6f9a..33c0d0010 100644
--- a/moses/FF/GlobalLexicalModelUnlimited.h
+++ b/moses/FF/GlobalLexicalModelUnlimited.h
@@ -82,31 +82,31 @@ public:
//TODO: This implements the old interface, but cannot be updated because
//it appears to be stateful
void EvaluateWhenApplied(const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis& /* cur_hypo */,
- int /* featureID */,
- ScoreComponentCollection* ) const {
+ int /* featureID */,
+ ScoreComponentCollection* ) const {
throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void AddFeature(ScoreComponentCollection* accumulator,
StringPiece sourceTrigger, StringPiece sourceWord, StringPiece targetTrigger,
diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h
index a7c196da8..cd8d66821 100644
--- a/moses/FF/HyperParameterAsWeight.h
+++ b/moses/FF/HyperParameterAsWeight.h
@@ -19,33 +19,33 @@ public:
}
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
/**
* Same for chart-based features.
**/
virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
};
diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp
index 3ce6a9190..39535f58f 100644
--- a/moses/FF/InputFeature.cpp
+++ b/moses/FF/InputFeature.cpp
@@ -45,11 +45,11 @@ void InputFeature::SetParameter(const std::string& key, const std::string& value
}
void InputFeature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
if (m_legacy) {
//binary phrase-table does input feature itself
diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h
index 98e758ff8..c7b7237aa 100644
--- a/moses/FF/InputFeature.h
+++ b/moses/FF/InputFeature.h
@@ -42,28 +42,28 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
};
diff --git a/moses/FF/InternalTree.cpp b/moses/FF/InternalTree.cpp
index 45a46ae64..9e974d0cd 100644
--- a/moses/FF/InternalTree.cpp
+++ b/moses/FF/InternalTree.cpp
@@ -4,236 +4,241 @@ namespace Moses
{
InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool terminal):
- m_value_nt(0),
- m_isTerminal(terminal)
- {
+ m_value_nt(0),
+ m_isTerminal(terminal)
+{
- if (len > 0) {
- m_value.assign(line, start, len);
- }
+ if (len > 0) {
+ m_value.assign(line, start, len);
+ }
}
InternalTree::InternalTree(const std::string & line, const bool terminal):
- m_value_nt(0),
- m_isTerminal(terminal)
- {
+ m_value_nt(0),
+ m_isTerminal(terminal)
+{
- size_t found = line.find_first_of("[] ");
+ size_t found = line.find_first_of("[] ");
- if (found == line.npos) {
- m_value = line;
- }
- else {
- AddSubTree(line, 0);
- }
+ if (found == line.npos) {
+ m_value = line;
+ } else {
+ AddSubTree(line, 0);
+ }
}
-size_t InternalTree::AddSubTree(const std::string & line, size_t pos) {
-
- char token = 0;
- size_t len = 0;
-
- while (token != ']' && pos != std::string::npos)
- {
- size_t oldpos = pos;
- pos = line.find_first_of("[] ", pos);
- if (pos == std::string::npos) break;
- token = line[pos];
- len = pos-oldpos;
-
- if (token == '[') {
- if (!m_value.empty()) {
- m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
- pos = m_children.back()->AddSubTree(line, pos+1);
- }
- else {
- if (len > 0) {
- m_value.assign(line, oldpos, len);
- }
- pos = AddSubTree(line, pos+1);
- }
- }
- else if (token == ' ' || token == ']') {
- if (len > 0 && m_value.empty()) {
- m_value.assign(line, oldpos, len);
- }
- else if (len > 0) {
- m_isTerminal = false;
- m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
- }
- if (token == ' ') {
- pos++;
- }
- }
-
- if (!m_children.empty()) {
- m_isTerminal = false;
- }
- }
+size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
+{
- if (pos == std::string::npos) {
- return line.size();
- }
- return std::min(line.size(),pos+1);
+ char token = 0;
+ size_t len = 0;
+
+ while (token != ']' && pos != std::string::npos) {
+ size_t oldpos = pos;
+ pos = line.find_first_of("[] ", pos);
+ if (pos == std::string::npos) break;
+ token = line[pos];
+ len = pos-oldpos;
+
+ if (token == '[') {
+ if (!m_value.empty()) {
+ m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
+ pos = m_children.back()->AddSubTree(line, pos+1);
+ } else {
+ if (len > 0) {
+ m_value.assign(line, oldpos, len);
+ }
+ pos = AddSubTree(line, pos+1);
+ }
+ } else if (token == ' ' || token == ']') {
+ if (len > 0 && m_value.empty()) {
+ m_value.assign(line, oldpos, len);
+ } else if (len > 0) {
+ m_isTerminal = false;
+ m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
+ }
+ if (token == ' ') {
+ pos++;
+ }
+ }
+
+ if (!m_children.empty()) {
+ m_isTerminal = false;
+ }
+ }
+
+ if (pos == std::string::npos) {
+ return line.size();
+ }
+ return std::min(line.size(),pos+1);
}
-std::string InternalTree::GetString(bool start) const {
+std::string InternalTree::GetString(bool start) const
+{
- std::string ret = "";
- if (!start) {
- ret += " ";
- }
+ std::string ret = "";
+ if (!start) {
+ ret += " ";
+ }
- if (!m_isTerminal) {
- ret += "[";
- }
+ if (!m_isTerminal) {
+ ret += "[";
+ }
- ret += m_value;
- for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it)
- {
- ret += (*it)->GetString(false);
- }
+ ret += m_value;
+ for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
+ ret += (*it)->GetString(false);
+ }
- if (!m_isTerminal) {
- ret += "]";
- }
- return ret;
+ if (!m_isTerminal) {
+ ret += "]";
+ }
+ return ret;
}
-void InternalTree::Combine(const std::vector<TreePointer> &previous) {
+void InternalTree::Combine(const std::vector<TreePointer> &previous)
+{
- std::vector<TreePointer>::iterator it;
- bool found = false;
- leafNT next_leafNT(this);
- for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
- found = next_leafNT(it);
- if (found) {
- *it = *it_prev;
- }
- else {
- std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
- }
- }
+ std::vector<TreePointer>::iterator it;
+ bool found = false;
+ leafNT next_leafNT(this);
+ for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
+ found = next_leafNT(it);
+ if (found) {
+ *it = *it_prev;
+ } else {
+ std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
+ }
+ }
}
-bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetLabel() == label) {
- return true;
- }
+bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetLabel() == label) {
+ return true;
}
- return false;
+ }
+ return false;
}
-bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetLabel() == label) {
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(label, it2)) {
- it = it2;
- return true;
- }
+bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetLabel() == label) {
+ return true;
}
- return false;
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(label, it2)) {
+ it = it2;
+ return true;
+ }
+ }
+ return false;
}
-bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetLabel() == label) {
- parent = this;
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(label, it2, parent)) {
- it = it2;
- return true;
- }
- }
- return false;
+bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetLabel() == label) {
+ parent = this;
+ return true;
+ }
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(label, it2, parent)) {
+ it = it2;
+ return true;
+ }
+ }
+ return false;
}
-bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetNTLabel() == label) {
- return true;
- }
+bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetNTLabel() == label) {
+ return true;
}
- return false;
+ }
+ return false;
}
-bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetNTLabel() == label) {
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(label, it2)) {
- it = it2;
- return true;
- }
+bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetNTLabel() == label) {
+ return true;
}
- return false;
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(label, it2)) {
+ it = it2;
+ return true;
+ }
+ }
+ return false;
}
-bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if ((*it)->GetNTLabel() == label) {
- parent = this;
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(label, it2, parent)) {
- it = it2;
- return true;
- }
- }
- return false;
+bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if ((*it)->GetNTLabel() == label) {
+ parent = this;
+ return true;
+ }
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(label, it2, parent)) {
+ it = it2;
+ return true;
+ }
+ }
+ return false;
}
-bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
- return true;
- }
+bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
+ return true;
}
- return false;
+ }
+ return false;
}
-bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(labels, it2)) {
- it = it2;
- return true;
- }
+bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
+ return true;
+ }
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(labels, it2)) {
+ it = it2;
+ return true;
}
- return false;
+ }
+ return false;
}
-bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
- for (it = m_children.begin(); it != m_children.end(); ++it) {
- if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
- parent = this;
- return true;
- }
- std::vector<TreePointer>::const_iterator it2;
- if ((*it)->RecursiveSearch(labels, it2, parent)) {
- it = it2;
- return true;
- }
- }
- return false;
+bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
+{
+ for (it = m_children.begin(); it != m_children.end(); ++it) {
+ if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
+ parent = this;
+ return true;
+ }
+ std::vector<TreePointer>::const_iterator it2;
+ if ((*it)->RecursiveSearch(labels, it2, parent)) {
+ it = it2;
+ return true;
+ }
+ }
+ return false;
}
} \ No newline at end of file
diff --git a/moses/FF/InternalTree.h b/moses/FF/InternalTree.h
index 002313030..722c5832f 100644
--- a/moses/FF/InternalTree.h
+++ b/moses/FF/InternalTree.h
@@ -19,79 +19,79 @@ typedef int NTLabel;
class InternalTree
{
-std::string m_value;
-NTLabel m_value_nt;
-std::vector<TreePointer> m_children;
-bool m_isTerminal;
+ std::string m_value;
+ NTLabel m_value_nt;
+ std::vector<TreePointer> m_children;
+ bool m_isTerminal;
public:
- InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
- InternalTree(const std::string & line, const bool terminal = false);
- InternalTree(const InternalTree & tree):
- m_value(tree.m_value),
- m_isTerminal(tree.m_isTerminal) {
- const std::vector<TreePointer> & children = tree.m_children;
- for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
- m_children.push_back(boost::make_shared<InternalTree>(**it));
- }
- }
- size_t AddSubTree(const std::string & line, size_t start);
-
- std::string GetString(bool start = true) const;
- void Combine(const std::vector<TreePointer> &previous);
- const std::string & GetLabel() const {
- return m_value;
+ InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
+ InternalTree(const std::string & line, const bool terminal = false);
+ InternalTree(const InternalTree & tree):
+ m_value(tree.m_value),
+ m_isTerminal(tree.m_isTerminal) {
+ const std::vector<TreePointer> & children = tree.m_children;
+ for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
+ m_children.push_back(boost::make_shared<InternalTree>(**it));
}
+ }
+ size_t AddSubTree(const std::string & line, size_t start);
- // optionally identify label by int instead of string;
- // allows abstraction if multiple nonterminal strings should map to same label.
- const NTLabel & GetNTLabel() const {
- return m_value_nt;
- }
+ std::string GetString(bool start = true) const;
+ void Combine(const std::vector<TreePointer> &previous);
+ const std::string & GetLabel() const {
+ return m_value;
+ }
- void SetNTLabel(NTLabel value) {
- m_value_nt = value;
- }
+ // optionally identify label by int instead of string;
+ // allows abstraction if multiple nonterminal strings should map to same label.
+ const NTLabel & GetNTLabel() const {
+ return m_value_nt;
+ }
- size_t GetLength() const {
- return m_children.size();
- }
- std::vector<TreePointer> & GetChildren() {
- return m_children;
- }
+ void SetNTLabel(NTLabel value) {
+ m_value_nt = value;
+ }
- bool IsTerminal() const {
- return m_isTerminal;
- }
+ size_t GetLength() const {
+ return m_children.size();
+ }
+ std::vector<TreePointer> & GetChildren() {
+ return m_children;
+ }
- bool IsLeafNT() const {
- return (!m_isTerminal && m_children.size() == 0);
- }
+ bool IsTerminal() const {
+ return m_isTerminal;
+ }
+
+ bool IsLeafNT() const {
+ return (!m_isTerminal && m_children.size() == 0);
+ }
- // different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
- // can be used for formulating syntax constraints.
+ // different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
+ // can be used for formulating syntax constraints.
- // if found, 'it' is iterator to first tree node that matches search string
- bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
- bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
+ // if found, 'it' is iterator to first tree node that matches search string
+ bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
+ bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
- // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
- bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
+ // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
+ bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
- // use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
- // if found, 'it' is iterator to first tree node that matches search string
- bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
- bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
+ // use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
+ // if found, 'it' is iterator to first tree node that matches search string
+ bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
+ bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
- // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
- bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
+ // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
+ bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
- // pass vector of possible labels to search
- // if found, 'it' is iterator to first tree node that matches search string
- bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
- bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
+ // pass vector of possible labels to search
+ // if found, 'it' is iterator to first tree node that matches search string
+ bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
+ bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
- // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
- bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
+ // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
+ bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
};
@@ -101,77 +101,79 @@ class TreeState : public FFState
TreePointer m_tree;
public:
TreeState(TreePointer tree)
- :m_tree(tree)
- {}
+ :m_tree(tree) {
+ }
TreePointer GetTree() const {
- return m_tree;
+ return m_tree;
}
- int Compare(const FFState& other) const {return 0;};
+ int Compare(const FFState& other) const {
+ return 0;
+ };
};
// Python-like generator that yields next nonterminal leaf on every call
-$generator(leafNT) {
- std::vector<TreePointer>::iterator it;
- InternalTree* tree;
- leafNT(InternalTree* root = 0): tree(root) {}
- $emit(std::vector<TreePointer>::iterator)
- for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
- if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
- $yield(it);
- }
- else if ((*it)->GetLength() > 0) {
- if ((*it).get()) { // normal pointer to same object that TreePointer points to
- $restart(tree = (*it).get());
- }
- }
+$generator(leafNT)
+{
+ std::vector<TreePointer>::iterator it;
+ InternalTree* tree;
+ leafNT(InternalTree* root = 0): tree(root) {}
+ $emit(std::vector<TreePointer>::iterator)
+ for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
+ if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
+ $yield(it);
+ } else if ((*it)->GetLength() > 0) {
+ if ((*it).get()) { // normal pointer to same object that TreePointer points to
+ $restart(tree = (*it).get());
+ }
}
- $stop;
+ }
+ $stop;
};
// Python-like generator that yields the parent of the next nonterminal leaf on every call
-$generator(leafNTParent) {
- std::vector<TreePointer>::iterator it;
- InternalTree* tree;
- leafNTParent(InternalTree* root = 0): tree(root) {}
- $emit(InternalTree*)
- for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
- if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
- $yield(tree);
- }
- else if ((*it)->GetLength() > 0) {
- if ((*it).get()) {
- $restart(tree = (*it).get());
- }
- }
+$generator(leafNTParent)
+{
+ std::vector<TreePointer>::iterator it;
+ InternalTree* tree;
+ leafNTParent(InternalTree* root = 0): tree(root) {}
+ $emit(InternalTree*)
+ for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
+ if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
+ $yield(tree);
+ } else if ((*it)->GetLength() > 0) {
+ if ((*it).get()) {
+ $restart(tree = (*it).get());
+ }
}
- $stop;
+ }
+ $stop;
};
// Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal
-$generator(leafNTPath) {
- std::vector<TreePointer>::iterator it;
- InternalTree* tree;
- std::vector<InternalTree*> * path;
- leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
- $emit(std::vector<TreePointer>::iterator)
- path->push_back(tree);
- for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
- if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
- path->push_back((*it).get());
- $yield(it);
- path->pop_back();
- }
- else if ((*it)->GetLength() > 0) {
- if ((*it).get()) {
- $restart(tree = (*it).get());
- }
- }
+$generator(leafNTPath)
+{
+ std::vector<TreePointer>::iterator it;
+ InternalTree* tree;
+ std::vector<InternalTree*> * path;
+ leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
+ $emit(std::vector<TreePointer>::iterator)
+ path->push_back(tree);
+ for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
+ if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
+ path->push_back((*it).get());
+ $yield(it);
+ path->pop_back();
+ } else if ((*it)->GetLength() > 0) {
+ if ((*it).get()) {
+ $restart(tree = (*it).get());
+ }
}
- path->pop_back();
- $stop;
+ }
+ path->pop_back();
+ $stop;
};
diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index 2f870f957..7be2f1d9e 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -15,7 +15,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
std::cerr << "Initializing LexicalReordering.." << std::endl;
map<string,string> sparseArgs;
- m_haveDefaultScores = false;
+ m_haveDefaultScores = false;
for (size_t i = 0; i < m_args.size(); ++i) {
const vector<string> &args = m_args[i];
@@ -36,7 +36,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
for(size_t i=0; i<tokens.size(); i++) {
m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i]) ) );
}
- m_haveDefaultScores = true;
+ m_haveDefaultScores = true;
} else {
UTIL_THROW(util::Exception,"Unknown argument " + args[0]);
}
@@ -84,8 +84,8 @@ Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
}
FFState* LexicalReordering::EvaluateWhenApplied(const Hypothesis& hypo,
- const FFState* prev_state,
- ScoreComponentCollection* out) const
+ const FFState* prev_state,
+ ScoreComponentCollection* out) const
{
VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl);
Scores score(GetNumScoreComponents(), 0);
diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h
index a8479ccf5..444a5a68c 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@@ -46,33 +46,37 @@ public:
Scores GetProb(const Phrase& f, const Phrase& e) const;
virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateWhenApplied(const ChartHypothesis&,
- int /* featureID */,
- ScoreComponentCollection*) const {
+ int /* featureID */,
+ ScoreComponentCollection*) const {
UTIL_THROW(util::Exception, "LexicalReordering is not valid for chart decoder");
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
- bool GetHaveDefaultScores() { return m_haveDefaultScores; }
- float GetDefaultScore( size_t i ) { return m_defaultScores[i]; }
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+ bool GetHaveDefaultScores() {
+ return m_haveDefaultScores;
+ }
+ float GetDefaultScore( size_t i ) {
+ return m_defaultScores[i];
+ }
private:
bool DecodeCondition(std::string s);
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index aa2aa7f0d..567d1b713 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -39,7 +39,7 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
}
void LexicalReorderingConfiguration::ConfigureSparse
- (const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
+(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
{
if (sparseArgs.size()) {
m_sparse.reset(new SparseReordering(sparseArgs, producer));
@@ -95,7 +95,7 @@ LexicalReorderingConfiguration::LexicalReorderingConfiguration(const std::string
}
if (m_modelType == None) {
- std::cerr << "You need to specify the type of the reordering model (msd, monotonicity,...)" << std::endl;
+ std::cerr << "You need to specify the type of the reordering model (msd, monotonicity,...)" << std::endl;
exit(1);
}
}
@@ -134,7 +134,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
{
// don't call this on a bidirectional object
UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
- "Unknown direction: " << m_direction);
+ "Unknown direction: " << m_direction);
const TranslationOption* relevantOpt = &topt;
if (m_direction != LexicalReorderingConfiguration::Backward) relevantOpt = m_prevOption;
const Scores *cachedScores = relevantOpt->GetLexReorderingScores(m_configuration.GetScoreProducer());
@@ -146,8 +146,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
const Scores &scoreSet = *cachedScores;
if(m_configuration.CollapseScores()) {
scores[m_offset] = scoreSet[m_offset + reoType];
- }
- else {
+ } else {
std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
scores[m_offset + reoType] = scoreSet[m_offset + reoType];
}
@@ -158,8 +157,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
if(m_configuration.CollapseScores()) {
scores[m_offset] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
- }
- else {
+ } else {
scores[m_offset + reoType] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
}
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index e309ed7f1..79537f119 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -124,7 +124,7 @@ protected:
int ComparePrevScores(const TranslationOption *other) const;
//constants for the different type of reorderings (corresponding to indexes in the table file)
- public:
+public:
static const ReorderingType M = 0; // monotonic
static const ReorderingType NM = 1; // non-monotonic
static const ReorderingType S = 1; // swap
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index f62dcde8b..c9fe59423 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -16,10 +16,11 @@
using namespace std;
-namespace Moses
+namespace Moses
{
-const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) {
+const std::string& SparseReorderingFeatureKey::Name (const string& wordListId)
+{
static string kSep = "-";
static string name;
ostringstream buf;
@@ -55,7 +56,7 @@ const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) {
}
SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
- : m_producer(producer)
+ : m_producer(producer)
{
static const string kSource= "source";
static const string kTarget = "target";
@@ -93,22 +94,24 @@ SparseReordering::SparseReordering(const map<string,string>& config, const Lexic
}
-void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster) {
+void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster)
+{
for (size_t type = SparseReorderingFeatureKey::Stack;
- type <= SparseReorderingFeatureKey::Between; ++type) {
+ type <= SparseReorderingFeatureKey::Between; ++type) {
for (size_t position = SparseReorderingFeatureKey::First;
- position <= SparseReorderingFeatureKey::Last; ++position) {
+ position <= SparseReorderingFeatureKey::Last; ++position) {
for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) {
SparseReorderingFeatureKey key(
index, static_cast<SparseReorderingFeatureKey::Type>(type), factor, isCluster,
- static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
+ static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id))));
}
}
}
}
-void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists) {
+void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists)
+{
ifstream fh(filename.c_str());
UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
string line;
@@ -118,12 +121,13 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, Sp
//TODO: StringPiece
const Factor* factor = FactorCollection::Instance().AddFactor(line);
pWordLists->back().second.insert(factor);
- PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false);
+ PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false);
}
}
-void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps) {
+void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps)
+{
pClusterMaps->push_back(ClusterMap());
pClusterMaps->back().first = id;
util::FilePiece file(filename.c_str());
@@ -141,15 +145,16 @@ void SparseReordering::ReadClusterMap(const string& filename, const string& id,
if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing cluster id): '" << line << "'");
const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter);
pClusterMaps->back().second[wordFactor] = idFactor;
- PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true);
+ PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true);
}
}
void SparseReordering::AddFeatures(
- SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
- const Word& word, SparseReorderingFeatureKey::Position position,
- LexicalReorderingState::ReorderingType reoType,
- ScoreComponentCollection* scores) const {
+ SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
+ const Word& word, SparseReorderingFeatureKey::Position position,
+ LexicalReorderingState::ReorderingType reoType,
+ ScoreComponentCollection* scores) const
+{
const Factor* wordFactor = word.GetFactor(0);
@@ -186,18 +191,18 @@ void SparseReordering::AddFeatures(
}
void SparseReordering::CopyScores(
- const TranslationOption& currentOpt,
- const TranslationOption* previousOpt,
- const InputType& input,
- LexicalReorderingState::ReorderingType reoType,
- LexicalReorderingConfiguration::Direction direction,
- ScoreComponentCollection* scores) const
+ const TranslationOption& currentOpt,
+ const TranslationOption* previousOpt,
+ const InputType& input,
+ LexicalReorderingState::ReorderingType reoType,
+ LexicalReorderingConfiguration::Direction direction,
+ ScoreComponentCollection* scores) const
{
if (m_useBetween && direction == LexicalReorderingConfiguration::Backward &&
(reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL ||
- reoType == LexicalReorderingState::DR)) {
+ reoType == LexicalReorderingState::DR)) {
size_t gapStart, gapEnd;
- //NB: Using a static cast for speed, but could be nasty if
+ //NB: Using a static cast for speed, but could be nasty if
//using non-sentence input
const Sentence& sentence = static_cast<const Sentence&>(input);
const WordsRange& currentRange = currentOpt.GetSourceWordsRange();
@@ -217,9 +222,9 @@ void SparseReordering::CopyScores(
}
assert(gapStart < gapEnd);
for (size_t i = gapStart; i < gapEnd; ++i) {
- AddFeatures(SparseReorderingFeatureKey::Between,
- SparseReorderingFeatureKey::Source, sentence.GetWord(i),
- SparseReorderingFeatureKey::First, reoType, scores);
+ AddFeatures(SparseReorderingFeatureKey::Between,
+ SparseReorderingFeatureKey::Source, sentence.GetWord(i),
+ SparseReorderingFeatureKey::First, reoType, scores);
}
}
//std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
@@ -240,11 +245,11 @@ void SparseReordering::CopyScores(
}
const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
- SparseReorderingFeatureKey::First, reoType, scores);
+ SparseReorderingFeatureKey::First, reoType, scores);
AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
- const Phrase& targetPhrase = currentOpt.GetTargetPhrase();
+ const Phrase& targetPhrase = currentOpt.GetTargetPhrase();
AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
- SparseReorderingFeatureKey::First, reoType, scores);
+ SparseReorderingFeatureKey::First, reoType, scores);
AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
index 663785a88..50ec96a0d 100644
--- a/moses/FF/LexicalReordering/SparseReordering.h
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -23,7 +23,7 @@
/**
Configuration of sparse reordering:
-
+
The sparse reordering feature is configured using sparse-* configs in the lexical reordering line.
sparse-words-(source|target)-<id>=<filename> -- Features which fire for the words in the list
sparse-clusters-(source|target)-<id>=<filename> -- Features which fire for clusters in the list. Format
@@ -38,7 +38,7 @@
namespace Moses
{
-/**
+/**
* Used to store pre-calculated feature names.
**/
struct SparseReorderingFeatureKey {
@@ -51,17 +51,17 @@ struct SparseReorderingFeatureKey {
LexicalReorderingState::ReorderingType reoType;
SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, bool isCluster_,
- Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_)
+ Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_)
: id(id_), type(type_), word(word_), isCluster(isCluster_),
- position(position_), side(side_), reoType(reoType_)
- {}
+ position(position_), side(side_), reoType(reoType_) {
+ }
- const std::string& Name(const std::string& wordListId) ;
+ const std::string& Name(const std::string& wordListId) ;
};
struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorderingFeatureKey, std::size_t> {
std::size_t operator()(const SparseReorderingFeatureKey& key) const {
- //TODO: can we just hash the memory?
+ //TODO: can we just hash the memory?
//not sure, there could be random padding
std::size_t seed = 0;
seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed);
@@ -76,7 +76,7 @@ struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorder
};
struct EqualsSparseReorderingFeatureKey :
- public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
+ public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
bool operator()(const SparseReorderingFeatureKey& left, const SparseReorderingFeatureKey& right) const {
//TODO: Can we just compare the memory?
return left.id == right.id && left.type == right.type && left.word == right.word &&
@@ -89,14 +89,14 @@ class SparseReordering
{
public:
SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
-
+
//If direction is backward the options will be different, for forward they will be the same
void CopyScores(const TranslationOption& currentOpt,
const TranslationOption* previousOpt,
const InputType& input,
- LexicalReorderingState::ReorderingType reoType,
- LexicalReorderingConfiguration::Direction direction,
- ScoreComponentCollection* scores) const ;
+ LexicalReorderingState::ReorderingType reoType,
+ LexicalReorderingConfiguration::Direction direction,
+ ScoreComponentCollection* scores) const ;
private:
const LexicalReordering* m_producer;
@@ -113,14 +113,14 @@ private:
FeatureMap m_featureMap;
void ReadWordList(const std::string& filename, const std::string& id,
- SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
+ SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector<ClusterMap>* pClusterMaps);
void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster);
void AddFeatures(
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
- const Word& word, SparseReorderingFeatureKey::Position position,
- LexicalReorderingState::ReorderingType reoType,
+ const Word& word, SparseReorderingFeatureKey::Position position,
+ LexicalReorderingState::ReorderingType reoType,
ScoreComponentCollection* scores) const;
};
diff --git a/moses/FF/MaxSpanFreeNonTermSource.cpp b/moses/FF/MaxSpanFreeNonTermSource.cpp
index 9de582635..7799c0b2a 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.cpp
+++ b/moses/FF/MaxSpanFreeNonTermSource.cpp
@@ -14,10 +14,10 @@ using namespace std;
namespace Moses
{
MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
-:StatelessFeatureFunction(1, line)
-,m_maxSpan(2)
-,m_glueTargetLHSStr("S")
-,m_glueTargetLHS(true)
+ :StatelessFeatureFunction(1, line)
+ ,m_maxSpan(2)
+ ,m_glueTargetLHSStr("S")
+ ,m_glueTargetLHS(true)
{
m_tuneable = false;
ReadParameters();
@@ -28,25 +28,25 @@ MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
}
void MaxSpanFreeNonTermSource::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
targetPhrase.SetRuleSource(source);
}
void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
const Word &targetLHS = targetPhrase.GetTargetLHS();
if (targetLHS == m_glueTargetLHS) {
- // don't delete glue rules
- return;
+ // don't delete glue rules
+ return;
}
const Phrase *source = targetPhrase.GetRuleSource();
@@ -54,17 +54,17 @@ void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
float score = 0;
if (source->Front().IsNonTerminal()) {
- const ChartCellLabel &cell = *stackVec->front();
- if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
- score = - std::numeric_limits<float>::infinity();
- }
+ const ChartCellLabel &cell = *stackVec->front();
+ if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
+ score = - std::numeric_limits<float>::infinity();
+ }
}
if (source->Back().IsNonTerminal()) {
- const ChartCellLabel &cell = *stackVec->back();
- if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
- score = - std::numeric_limits<float>::infinity();
- }
+ const ChartCellLabel &cell = *stackVec->back();
+ if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
+ score = - std::numeric_limits<float>::infinity();
+ }
}
@@ -76,7 +76,7 @@ void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
void MaxSpanFreeNonTermSource::SetParameter(const std::string& key, const std::string& value)
{
if (key == "max-span") {
- m_maxSpan = Scan<int>(value);
+ m_maxSpan = Scan<int>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
@@ -84,8 +84,8 @@ void MaxSpanFreeNonTermSource::SetParameter(const std::string& key, const std::s
std::vector<float> MaxSpanFreeNonTermSource::DefaultWeights() const
{
- std::vector<float> ret(1, 1);
- return ret;
+ std::vector<float> ret(1, 1);
+ return ret;
}
}
diff --git a/moses/FF/MaxSpanFreeNonTermSource.h b/moses/FF/MaxSpanFreeNonTermSource.h
index c28ed4dfc..411b2d51d 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.h
+++ b/moses/FF/MaxSpanFreeNonTermSource.h
@@ -10,37 +10,38 @@ namespace Moses
class MaxSpanFreeNonTermSource : public StatelessFeatureFunction
{
public:
- MaxSpanFreeNonTermSource(const std::string &line);
-
- virtual bool IsUseable(const FactorMask &mask) const
- { return true; }
-
- virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
-
- virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-
- void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
- virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
- virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
- void SetParameter(const std::string& key, const std::string& value);
- std::vector<float> DefaultWeights() const;
+ MaxSpanFreeNonTermSource(const std::string &line);
+
+ virtual bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
+
+ virtual void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void SetParameter(const std::string& key, const std::string& value);
+ std::vector<float> DefaultWeights() const;
protected:
int m_maxSpan;
diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp
index b3a5f8f92..3473790c1 100644
--- a/moses/FF/NieceTerminal.cpp
+++ b/moses/FF/NieceTerminal.cpp
@@ -20,25 +20,25 @@ NieceTerminal::NieceTerminal(const std::string &line)
std::vector<float> NieceTerminal::DefaultWeights() const
{
UTIL_THROW_IF2(m_numScoreComponents != 1,
- "NieceTerminal must only have 1 score");
+ "NieceTerminal must only have 1 score");
vector<float> ret(1, 1);
return ret;
}
void NieceTerminal::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
targetPhrase.SetRuleSource(source);
}
void NieceTerminal::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
assert(stackVec);
@@ -47,32 +47,32 @@ void NieceTerminal::EvaluateWithSourceContext(const InputType &input
std::set<Word> terms;
for (size_t i = 0; i < ruleSource->GetSize(); ++i) {
- const Word &word = ruleSource->GetWord(i);
- if (!word.IsNonTerminal()) {
- terms.insert(word);
- }
+ const Word &word = ruleSource->GetWord(i);
+ if (!word.IsNonTerminal()) {
+ terms.insert(word);
+ }
}
for (size_t i = 0; i < stackVec->size(); ++i) {
- const ChartCellLabel &cell = *stackVec->at(i);
- const WordsRange &ntRange = cell.GetCoverage();
- bool containTerm = ContainTerm(input, ntRange, terms);
-
- if (containTerm) {
- //cerr << "ruleSource=" << *ruleSource << " ";
- //cerr << "ntRange=" << ntRange << endl;
-
- // non-term contains 1 of the terms in the rule.
- float score = m_hardConstraint ? - std::numeric_limits<float>::infinity() : 1;
- scoreBreakdown.PlusEquals(this, score);
- return;
- }
+ const ChartCellLabel &cell = *stackVec->at(i);
+ const WordsRange &ntRange = cell.GetCoverage();
+ bool containTerm = ContainTerm(input, ntRange, terms);
+
+ if (containTerm) {
+ //cerr << "ruleSource=" << *ruleSource << " ";
+ //cerr << "ntRange=" << ntRange << endl;
+
+ // non-term contains 1 of the terms in the rule.
+ float score = m_hardConstraint ? - std::numeric_limits<float>::infinity() : 1;
+ scoreBreakdown.PlusEquals(this, score);
+ return;
+ }
}
}
void NieceTerminal::EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{}
void NieceTerminal::EvaluateWhenApplied(const ChartHypothesis &hypo,
@@ -80,26 +80,26 @@ void NieceTerminal::EvaluateWhenApplied(const ChartHypothesis &hypo,
{}
bool NieceTerminal::ContainTerm(const InputType &input,
- const WordsRange &ntRange,
- const std::set<Word> &terms) const
+ const WordsRange &ntRange,
+ const std::set<Word> &terms) const
{
- std::set<Word>::const_iterator iter;
+ std::set<Word>::const_iterator iter;
- for (size_t pos = ntRange.GetStartPos(); pos <= ntRange.GetEndPos(); ++pos) {
- const Word &word = input.GetWord(pos);
- iter = terms.find(word);
+ for (size_t pos = ntRange.GetStartPos(); pos <= ntRange.GetEndPos(); ++pos) {
+ const Word &word = input.GetWord(pos);
+ iter = terms.find(word);
- if (iter != terms.end()) {
- return true;
- }
- }
- return false;
+ if (iter != terms.end()) {
+ return true;
+ }
+ }
+ return false;
}
void NieceTerminal::SetParameter(const std::string& key, const std::string& value)
{
if (key == "hard-constraint") {
- m_hardConstraint = Scan<bool>(value);
+ m_hardConstraint = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h
index cd648ed8f..2ee019443 100644
--- a/moses/FF/NieceTerminal.h
+++ b/moses/FF/NieceTerminal.h
@@ -20,24 +20,24 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void SetParameter(const std::string& key, const std::string& value);
std::vector<float> DefaultWeights() const;
diff --git a/moses/FF/OSM-Feature/KenOSM.cpp b/moses/FF/OSM-Feature/KenOSM.cpp
index e517200c3..4047406e5 100644
--- a/moses/FF/OSM-Feature/KenOSM.cpp
+++ b/moses/FF/OSM-Feature/KenOSM.cpp
@@ -5,28 +5,28 @@ namespace Moses
OSMLM* ConstructOSMLM(const std::string &file)
{
- lm::ngram::ModelType model_type;
- if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
+ lm::ngram::ModelType model_type;
+ if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
- switch(model_type) {
- case lm::ngram::PROBING:
- return new KenOSM<lm::ngram::ProbingModel>(file);
- case lm::ngram::REST_PROBING:
- return new KenOSM<lm::ngram::RestProbingModel>(file);
- case lm::ngram::TRIE:
- return new KenOSM<lm::ngram::TrieModel>(file);
- case lm::ngram::QUANT_TRIE:
- return new KenOSM<lm::ngram::QuantTrieModel>(file);
- case lm::ngram::ARRAY_TRIE:
- return new KenOSM<lm::ngram::ArrayTrieModel>(file);
- case lm::ngram::QUANT_ARRAY_TRIE:
- return new KenOSM<lm::ngram::QuantArrayTrieModel>(file);
- default:
- UTIL_THROW2("Unrecognized kenlm model type " << model_type);
- }
- } else {
+ switch(model_type) {
+ case lm::ngram::PROBING:
return new KenOSM<lm::ngram::ProbingModel>(file);
+ case lm::ngram::REST_PROBING:
+ return new KenOSM<lm::ngram::RestProbingModel>(file);
+ case lm::ngram::TRIE:
+ return new KenOSM<lm::ngram::TrieModel>(file);
+ case lm::ngram::QUANT_TRIE:
+ return new KenOSM<lm::ngram::QuantTrieModel>(file);
+ case lm::ngram::ARRAY_TRIE:
+ return new KenOSM<lm::ngram::ArrayTrieModel>(file);
+ case lm::ngram::QUANT_ARRAY_TRIE:
+ return new KenOSM<lm::ngram::QuantArrayTrieModel>(file);
+ default:
+ UTIL_THROW2("Unrecognized kenlm model type " << model_type);
}
+ } else {
+ return new KenOSM<lm::ngram::ProbingModel>(file);
+ }
}
} // namespace
diff --git a/moses/FF/OSM-Feature/KenOSM.h b/moses/FF/OSM-Feature/KenOSM.h
index d3d8672d3..a50589edc 100644
--- a/moses/FF/OSM-Feature/KenOSM.h
+++ b/moses/FF/OSM-Feature/KenOSM.h
@@ -7,39 +7,41 @@
namespace Moses
{
-class KenOSMBase {
- public:
- virtual float Score(const lm::ngram::State&, const std::string&,
- lm::ngram::State&) const = 0;
-
- virtual const lm::ngram::State &BeginSentenceState() const = 0;
-
- virtual const lm::ngram::State &NullContextState() const = 0;
+class KenOSMBase
+{
+public:
+ virtual float Score(const lm::ngram::State&, const std::string&,
+ lm::ngram::State&) const = 0;
+
+ virtual const lm::ngram::State &BeginSentenceState() const = 0;
+
+ virtual const lm::ngram::State &NullContextState() const = 0;
};
template <class KenModel>
-class KenOSM : public KenOSMBase {
- public:
- KenOSM(const std::string& file)
+class KenOSM : public KenOSMBase
+{
+public:
+ KenOSM(const std::string& file)
: m_kenlm(new KenModel(file.c_str())) {}
-
- virtual float Score(const lm::ngram::State &in_state,
- const std::string& word,
- lm::ngram::State &out_state) const {
- return m_kenlm->Score(in_state, m_kenlm->GetVocabulary().Index(word),
- out_state);
- }
-
- virtual const lm::ngram::State &BeginSentenceState() const {
- return m_kenlm->BeginSentenceState();
- }
-
- virtual const lm::ngram::State &NullContextState() const {
- return m_kenlm->NullContextState();
- }
-
- private:
- boost::shared_ptr<KenModel> m_kenlm;
+
+ virtual float Score(const lm::ngram::State &in_state,
+ const std::string& word,
+ lm::ngram::State &out_state) const {
+ return m_kenlm->Score(in_state, m_kenlm->GetVocabulary().Index(word),
+ out_state);
+ }
+
+ virtual const lm::ngram::State &BeginSentenceState() const {
+ return m_kenlm->BeginSentenceState();
+ }
+
+ virtual const lm::ngram::State &NullContextState() const {
+ return m_kenlm->NullContextState();
+ }
+
+private:
+ boost::shared_ptr<KenModel> m_kenlm;
};
typedef KenOSMBase OSMLM;
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index 6d839f0cc..43ed5f346 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -19,15 +19,16 @@ OpSequenceModel::OpSequenceModel(const std::string &line)
ReadParameters();
}
-OpSequenceModel::~OpSequenceModel() {
- delete OSM;
+OpSequenceModel::~OpSequenceModel()
+{
+ delete OSM;
}
void OpSequenceModel :: readLanguageModel(const char *lmFile)
{
string unkOp = "_TRANS_SLF_";
OSM = ConstructOSMLM(m_lmPath);
-
+
State startState = OSM->NullContextState();
State endState;
unkOpProb = OSM->Score(startState,unkOp,endState);
@@ -42,9 +43,9 @@ void OpSequenceModel::Load()
void OpSequenceModel:: EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
osmHypothesis obj;
@@ -198,7 +199,7 @@ FFState* OpSequenceModel::EvaluateWhenApplied(
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
{
- UTIL_THROW2("Chart decoding not support by UTIL_THROW2");
+ UTIL_THROW2("Chart decoding not support by UTIL_THROW2");
}
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h
index 343d7650d..b59eb681a 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@@ -38,21 +38,21 @@ public:
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h
index f23b4cb3d..a5b55e1ef 100644
--- a/moses/FF/PhraseBoundaryFeature.h
+++ b/moses/FF/PhraseBoundaryFeature.h
@@ -45,30 +45,30 @@ public:
virtual const FFState* EmptyHypothesisState(const InputType &) const;
virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */,
- int /* featureID */,
- ScoreComponentCollection* ) const {
+ int /* featureID */,
+ ScoreComponentCollection* ) const {
throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/PhraseLengthFeature.cpp b/moses/FF/PhraseLengthFeature.cpp
index 7850c374a..0eb0740b8 100644
--- a/moses/FF/PhraseLengthFeature.cpp
+++ b/moses/FF/PhraseLengthFeature.cpp
@@ -16,9 +16,9 @@ PhraseLengthFeature::PhraseLengthFeature(const std::string &line)
}
void PhraseLengthFeature::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
// get length of source and target phrase
size_t targetLength = targetPhrase.GetSize();
diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h
index ce2fbb6f0..9e576946f 100644
--- a/moses/FF/PhraseLengthFeature.h
+++ b/moses/FF/PhraseLengthFeature.h
@@ -25,28 +25,28 @@ public:
}
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis& hypo,
- ScoreComponentCollection*) const
- {}
+ ScoreComponentCollection*) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
};
diff --git a/moses/FF/PhraseOrientationFeature.cpp b/moses/FF/PhraseOrientationFeature.cpp
index 86a6d09ce..78ee2cd0a 100644
--- a/moses/FF/PhraseOrientationFeature.cpp
+++ b/moses/FF/PhraseOrientationFeature.cpp
@@ -1,11 +1,11 @@
//
// REFERENCE
// ---------
-// When using this feature, please cite:
-//
+// When using this feature, please cite:
+//
// Matthias Huck, Joern Wuebker, Felix Rietig, and Hermann Ney.
-// A Phrase Orientation Model for Hierarchical Machine Translation.
-// In ACL 2013 Eighth Workshop on Statistical Machine Translation (WMT 2013), pages 452-463, Sofia, Bulgaria, August 2013.
+// A Phrase Orientation Model for Hierarchical Machine Translation.
+// In ACL 2013 Eighth Workshop on Statistical Machine Translation (WMT 2013), pages 452-463, Sofia, Bulgaria, August 2013.
//
#include "PhraseOrientationFeature.h"
@@ -40,12 +40,9 @@ PhraseOrientationFeature::PhraseOrientationFeature(const std::string &line)
void PhraseOrientationFeature::SetParameter(const std::string& key, const std::string& value)
{
- if (key == "glueTargetLHS")
- {
+ if (key == "glueTargetLHS") {
m_glueTargetLHSStr = value;
- }
- else
- {
+ } else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
@@ -69,408 +66,371 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
// const Factor* targetLHS = currTarPhr.GetTargetLHS()[0];
// bool isGlueGrammarRule = false;
- IFFEATUREVERBOSE(2)
- {
- FEATUREVERBOSE(2, *currSrcPhr << std::endl);
- FEATUREVERBOSE(2, currTarPhr << std::endl);
+ IFFEATUREVERBOSE(2) {
+ FEATUREVERBOSE(2, *currSrcPhr << std::endl);
+ FEATUREVERBOSE(2, currTarPhr << std::endl);
for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignTerm().begin();
- it!=currTarPhr.GetAlignTerm().end(); ++it)
- {
+ it!=currTarPhr.GetAlignTerm().end(); ++it) {
FEATUREVERBOSE(2, "alignTerm " << it->first << " " << it->second << std::endl);
}
for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignNonTerm().begin();
- it!=currTarPhr.GetAlignNonTerm().end(); ++it)
- {
+ it!=currTarPhr.GetAlignNonTerm().end(); ++it) {
FEATUREVERBOSE(2, "alignNonTerm " << it->first << " " << it->second << std::endl);
}
}
// Initialize phrase orientation scoring object
- Moses::GHKM::PhraseOrientation phraseOrientation(currSrcPhr->GetSize(), currTarPhr.GetSize(),
- currTarPhr.GetAlignTerm(), currTarPhr.GetAlignNonTerm());
-
+ Moses::GHKM::PhraseOrientation phraseOrientation(currSrcPhr->GetSize(), currTarPhr.GetSize(),
+ currTarPhr.GetAlignTerm(), currTarPhr.GetAlignNonTerm());
+
// Get index map for underlying hypotheses
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
- currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
+ currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
// Determine & score orientations
for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignNonTerm().begin();
- it!=currTarPhr.GetAlignNonTerm().end(); ++it)
- {
- size_t sourceIndex = it->first;
- size_t targetIndex = it->second;
- size_t nonTermIndex = nonTermIndexMap[targetIndex];
-
- FEATUREVERBOSE(2, "Scoring nonTermIndex= " << nonTermIndex << " targetIndex= " << targetIndex << " sourceIndex= " << sourceIndex << std::endl);
-
- // consult subderivation
- const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
- const TargetPhrase &prevTarPhr = prevHypo->GetCurrTargetPhrase();
-
- if (const PhraseProperty *property = prevTarPhr.GetProperty("Orientation"))
- {
- const OrientationPhraseProperty *orientationPhraseProperty = static_cast<const OrientationPhraseProperty*>(property);
-
- FEATUREVERBOSE(5, "orientationPhraseProperty: "
- << "L2R_Mono " << orientationPhraseProperty->GetLeftToRightProbabilityMono()
- << " L2R_Swap " << orientationPhraseProperty->GetLeftToRightProbabilitySwap()
- << " L2R_Dright " << orientationPhraseProperty->GetLeftToRightProbabilityDright()
- << " L2R_Dleft " << orientationPhraseProperty->GetLeftToRightProbabilityDleft()
- << " R2L_Mono " << orientationPhraseProperty->GetRightToLeftProbabilityMono()
- << " R2L_Swap " << orientationPhraseProperty->GetRightToLeftProbabilitySwap()
- << " R2L_Dright " << orientationPhraseProperty->GetRightToLeftProbabilityDright()
- << " R2L_Dleft " << orientationPhraseProperty->GetRightToLeftProbabilityDleft()
- << std::endl);
-
- const PhraseOrientationFeatureState* prevState =
- static_cast<const PhraseOrientationFeatureState*>(prevHypo->GetFFState(featureID));
-
-
- // LEFT-TO-RIGHT DIRECTION
-
- Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
-
- IFFEATUREVERBOSE(2)
- {
- FEATUREVERBOSE(2, "l2rOrientation ");
- switch (l2rOrientation)
- {
- case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
- FEATUREVERBOSE2(2, "mono" << std::endl);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
- FEATUREVERBOSE2(2, "swap" << std::endl);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
- FEATUREVERBOSE2(2, "dleft" << std::endl);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
- FEATUREVERBOSE2(2, "dright" << std::endl);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
- // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
- FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
- break;
- default:
- UTIL_THROW2(GetScoreProducerDescription()
- << ": Unsupported orientation type.");
- break;
- }
+ it!=currTarPhr.GetAlignNonTerm().end(); ++it) {
+ size_t sourceIndex = it->first;
+ size_t targetIndex = it->second;
+ size_t nonTermIndex = nonTermIndexMap[targetIndex];
+
+ FEATUREVERBOSE(2, "Scoring nonTermIndex= " << nonTermIndex << " targetIndex= " << targetIndex << " sourceIndex= " << sourceIndex << std::endl);
+
+ // consult subderivation
+ const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
+ const TargetPhrase &prevTarPhr = prevHypo->GetCurrTargetPhrase();
+
+ if (const PhraseProperty *property = prevTarPhr.GetProperty("Orientation")) {
+ const OrientationPhraseProperty *orientationPhraseProperty = static_cast<const OrientationPhraseProperty*>(property);
+
+ FEATUREVERBOSE(5, "orientationPhraseProperty: "
+ << "L2R_Mono " << orientationPhraseProperty->GetLeftToRightProbabilityMono()
+ << " L2R_Swap " << orientationPhraseProperty->GetLeftToRightProbabilitySwap()
+ << " L2R_Dright " << orientationPhraseProperty->GetLeftToRightProbabilityDright()
+ << " L2R_Dleft " << orientationPhraseProperty->GetLeftToRightProbabilityDleft()
+ << " R2L_Mono " << orientationPhraseProperty->GetRightToLeftProbabilityMono()
+ << " R2L_Swap " << orientationPhraseProperty->GetRightToLeftProbabilitySwap()
+ << " R2L_Dright " << orientationPhraseProperty->GetRightToLeftProbabilityDright()
+ << " R2L_Dleft " << orientationPhraseProperty->GetRightToLeftProbabilityDleft()
+ << std::endl);
+
+ const PhraseOrientationFeatureState* prevState =
+ static_cast<const PhraseOrientationFeatureState*>(prevHypo->GetFFState(featureID));
+
+
+ // LEFT-TO-RIGHT DIRECTION
+
+ Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
+
+ IFFEATUREVERBOSE(2) {
+ FEATUREVERBOSE(2, "l2rOrientation ");
+ switch (l2rOrientation) {
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
+ FEATUREVERBOSE2(2, "mono" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
+ FEATUREVERBOSE2(2, "swap" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+ FEATUREVERBOSE2(2, "dleft" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+ FEATUREVERBOSE2(2, "dright" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+ // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
+ FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
+ break;
+ default:
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Unsupported orientation type.");
+ break;
}
+ }
- bool delayedScoringL2R = false;
-
- if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
- && (currTarPhrLHS != m_glueTargetLHS) ) // and not glue rule
- {
- // delay left-to-right scoring
-
- FEATUREVERBOSE(3, "Left boundary");
- if (targetIndex != 0) {
- FEATUREVERBOSE2(3, " (with targetIndex!=0)");
- }
- FEATUREVERBOSE2(3, std::endl);
-
- bool previousSourceSpanIsAligned = ( (sourceIndex > 0) && phraseOrientation.SourceSpanIsAligned(0,sourceIndex-1) );
- bool followingSourceSpanIsAligned = ( (sourceIndex < currSrcPhr->GetSize()-1) && phraseOrientation.SourceSpanIsAligned(sourceIndex,currSrcPhr->GetSize()-1) );
+ bool delayedScoringL2R = false;
- FEATUREVERBOSE(4, "previousSourceSpanIsAligned = " << previousSourceSpanIsAligned << std::endl);
- FEATUREVERBOSE(4, "followingSourceSpanIsAligned = " << followingSourceSpanIsAligned << std::endl;);
+ if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
+ && (currTarPhrLHS != m_glueTargetLHS) ) { // and not glue rule
+ // delay left-to-right scoring
- if (previousSourceSpanIsAligned && followingSourceSpanIsAligned)
- {
- // discontinuous
- l2rOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
+ FEATUREVERBOSE(3, "Left boundary");
+ if (targetIndex != 0) {
+ FEATUREVERBOSE2(3, " (with targetIndex!=0)");
+ }
+ FEATUREVERBOSE2(3, std::endl);
+
+ bool previousSourceSpanIsAligned = ( (sourceIndex > 0) && phraseOrientation.SourceSpanIsAligned(0,sourceIndex-1) );
+ bool followingSourceSpanIsAligned = ( (sourceIndex < currSrcPhr->GetSize()-1) && phraseOrientation.SourceSpanIsAligned(sourceIndex,currSrcPhr->GetSize()-1) );
+
+ FEATUREVERBOSE(4, "previousSourceSpanIsAligned = " << previousSourceSpanIsAligned << std::endl);
+ FEATUREVERBOSE(4, "followingSourceSpanIsAligned = " << followingSourceSpanIsAligned << std::endl;);
+
+ if (previousSourceSpanIsAligned && followingSourceSpanIsAligned) {
+ // discontinuous
+ l2rOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
+ } else {
+ FEATUREVERBOSE(3, "Delaying left-to-right scoring" << std::endl);
+
+ delayedScoringL2R = true;
+ std::bitset<3> possibleFutureOrientationsL2R(0x7);
+ possibleFutureOrientationsL2R[0] = !previousSourceSpanIsAligned;
+ possibleFutureOrientationsL2R[1] = !followingSourceSpanIsAligned;
+
+ // add heuristic scores
+
+ std::vector<float> weightsVector = StaticData::Instance().GetAllWeights().GetScoresForProducer(this);
+ std::vector<float> scoresL2R;
+ scoresL2R.push_back( std::log(orientationPhraseProperty->GetLeftToRightProbabilityMono()) );
+ scoresL2R.push_back( std::log(orientationPhraseProperty->GetLeftToRightProbabilitySwap()) );
+ scoresL2R.push_back( std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous()) );
+ std::vector<float> weightedScoresL2R;
+ for ( size_t i=0; i<3; ++i ) {
+ weightedScoresL2R.push_back( weightsVector[i] * scoresL2R[i] );
}
- else
- {
- FEATUREVERBOSE(3, "Delaying left-to-right scoring" << std::endl);
-
- delayedScoringL2R = true;
- std::bitset<3> possibleFutureOrientationsL2R(0x7);
- possibleFutureOrientationsL2R[0] = !previousSourceSpanIsAligned;
- possibleFutureOrientationsL2R[1] = !followingSourceSpanIsAligned;
-
- // add heuristic scores
-
- std::vector<float> weightsVector = StaticData::Instance().GetAllWeights().GetScoresForProducer(this);
- std::vector<float> scoresL2R;
- scoresL2R.push_back( std::log(orientationPhraseProperty->GetLeftToRightProbabilityMono()) );
- scoresL2R.push_back( std::log(orientationPhraseProperty->GetLeftToRightProbabilitySwap()) );
- scoresL2R.push_back( std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous()) );
- std::vector<float> weightedScoresL2R;
- for ( size_t i=0; i<3;++i )
- {
- weightedScoresL2R.push_back( weightsVector[i] * scoresL2R[i] );
- }
- size_t heuristicScoreIndex = 0;
- for (size_t i=1; i<3; ++i)
- {
- if (possibleFutureOrientationsL2R[i])
- {
- if (weightedScoresL2R[i] > weightedScoresL2R[heuristicScoreIndex])
- {
- heuristicScoreIndex = i;
- }
+ size_t heuristicScoreIndex = 0;
+ for (size_t i=1; i<3; ++i) {
+ if (possibleFutureOrientationsL2R[i]) {
+ if (weightedScoresL2R[i] > weightedScoresL2R[heuristicScoreIndex]) {
+ heuristicScoreIndex = i;
}
}
+ }
- IFFEATUREVERBOSE(5)
- {
- FEATUREVERBOSE(5, "Heuristic score computation (L2R): "
- << "heuristicScoreIndex= " << heuristicScoreIndex);
- for (size_t i=0; i<3; ++i)
- FEATUREVERBOSE2(5, " weightsVector[" << i << "]= " << weightsVector[i]);
- for (size_t i=0; i<3; ++i)
- FEATUREVERBOSE2(5, " scoresL2R[" << i << "]= " << scoresL2R[i]);
- for (size_t i=0; i<3; ++i)
- FEATUREVERBOSE2(5, " weightedScoresL2R[" << i << "]= " << weightedScoresL2R[i]);
- for (size_t i=0; i<3; ++i)
- FEATUREVERBOSE2(5, " possibleFutureOrientationsL2R[" << i << "]= " << possibleFutureOrientationsL2R[i]);
- if ( possibleFutureOrientationsL2R == 0x7 )
- {
- FEATUREVERBOSE2(5, " (all orientations possible)");
- }
- FEATUREVERBOSE2(5, std::endl);
- }
-
- newScores[heuristicScoreIndex] += scoresL2R[heuristicScoreIndex];
- state->SetLeftBoundaryL2R(scoresL2R, heuristicScoreIndex, possibleFutureOrientationsL2R, prevState);
-
- if ( (possibleFutureOrientationsL2R & prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) == 0x4 )
- {
- // recursive: discontinuous orientation
- FEATUREVERBOSE(5, "previous state: L2R discontinuous orientation "
- << possibleFutureOrientationsL2R << " & " << prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations
- << " = " << (possibleFutureOrientationsL2R & prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations)
- << std::endl);
- LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
- state->m_leftBoundaryRecursionGuard = true; // prevent subderivation from being scored recursively multiple times
+ IFFEATUREVERBOSE(5) {
+ FEATUREVERBOSE(5, "Heuristic score computation (L2R): "
+ << "heuristicScoreIndex= " << heuristicScoreIndex);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " weightsVector[" << i << "]= " << weightsVector[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " scoresL2R[" << i << "]= " << scoresL2R[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " weightedScoresL2R[" << i << "]= " << weightedScoresL2R[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " possibleFutureOrientationsL2R[" << i << "]= " << possibleFutureOrientationsL2R[i]);
+ if ( possibleFutureOrientationsL2R == 0x7 ) {
+ FEATUREVERBOSE2(5, " (all orientations possible)");
}
+ FEATUREVERBOSE2(5, std::endl);
}
- }
- if (!delayedScoringL2R)
- {
- switch (l2rOrientation)
- {
- case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
- newScores[0] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityMono());
- // if sub-derivation has left-boundary non-terminal:
- // add recursive actual score of boundary non-terminal from subderivation
- LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
- newScores[1] += std::log(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
- // if sub-derivation has left-boundary non-terminal:
- // add recursive actual score of boundary non-terminal from subderivation
- LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
- newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
- // if sub-derivation has left-boundary non-terminal:
- // add recursive actual score of boundary non-terminal from subderivation
- LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
- newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
- // if sub-derivation has left-boundary non-terminal:
- // add recursive actual score of boundary non-terminal from subderivation
- LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
- // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
- newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
- // if sub-derivation has left-boundary non-terminal:
- // add recursive actual score of boundary non-terminal from subderivation
- LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
- break;
- default:
- UTIL_THROW2(GetScoreProducerDescription()
- << ": Unsupported orientation type.");
- break;
+ newScores[heuristicScoreIndex] += scoresL2R[heuristicScoreIndex];
+ state->SetLeftBoundaryL2R(scoresL2R, heuristicScoreIndex, possibleFutureOrientationsL2R, prevState);
+
+ if ( (possibleFutureOrientationsL2R & prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) == 0x4 ) {
+ // recursive: discontinuous orientation
+ FEATUREVERBOSE(5, "previous state: L2R discontinuous orientation "
+ << possibleFutureOrientationsL2R << " & " << prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations
+ << " = " << (possibleFutureOrientationsL2R & prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations)
+ << std::endl);
+ LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
+ state->m_leftBoundaryRecursionGuard = true; // prevent subderivation from being scored recursively multiple times
}
}
+ }
-
- // RIGHT-TO-LEFT DIRECTION
-
- Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
-
- IFFEATUREVERBOSE(2)
- {
- FEATUREVERBOSE(2, "r2lOrientation ");
- switch (r2lOrientation)
- {
- case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
- FEATUREVERBOSE2(2, "mono" << std::endl);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
- FEATUREVERBOSE2(2, "swap" << std::endl);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
- FEATUREVERBOSE2(2, "dleft" << std::endl);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
- FEATUREVERBOSE2(2, "dright" << std::endl);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
- // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
- FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
- break;
- default:
- UTIL_THROW2(GetScoreProducerDescription()
- << ": Unsupported orientation type.");
- break;
- }
+ if (!delayedScoringL2R) {
+ switch (l2rOrientation) {
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
+ newScores[0] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityMono());
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
+ newScores[1] += std::log(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+ newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+ newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+ // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
+ newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
+ break;
+ default:
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Unsupported orientation type.");
+ break;
}
+ }
- bool delayedScoringR2L = false;
-
- if ( ((targetIndex == currTarPhr.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,currTarPhr.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
- && (currTarPhrLHS != m_glueTargetLHS) ) // and not glue rule
- {
- // delay right-to-left scoring
- FEATUREVERBOSE(3, "Right boundary");
- if (targetIndex != currTarPhr.GetSize()-1) {
- FEATUREVERBOSE2(3, " (with targetIndex!=currTarPhr.GetSize()-1)");
- }
- FEATUREVERBOSE2(3, std::endl);
+ // RIGHT-TO-LEFT DIRECTION
+
+ Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
+
+ IFFEATUREVERBOSE(2) {
+ FEATUREVERBOSE(2, "r2lOrientation ");
+ switch (r2lOrientation) {
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
+ FEATUREVERBOSE2(2, "mono" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
+ FEATUREVERBOSE2(2, "swap" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+ FEATUREVERBOSE2(2, "dleft" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+ FEATUREVERBOSE2(2, "dright" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+ // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
+ FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
+ break;
+ default:
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Unsupported orientation type.");
+ break;
+ }
+ }
- bool previousSourceSpanIsAligned = ( (sourceIndex > 0) && phraseOrientation.SourceSpanIsAligned(0,sourceIndex-1) );
- bool followingSourceSpanIsAligned = ( (sourceIndex < currSrcPhr->GetSize()-1) && phraseOrientation.SourceSpanIsAligned(sourceIndex,currSrcPhr->GetSize()-1) );
+ bool delayedScoringR2L = false;
- FEATUREVERBOSE(4, "previousSourceSpanIsAligned = " << previousSourceSpanIsAligned << std::endl);
- FEATUREVERBOSE(4, "followingSourceSpanIsAligned = " << followingSourceSpanIsAligned << std::endl;);
+ if ( ((targetIndex == currTarPhr.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,currTarPhr.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
+ && (currTarPhrLHS != m_glueTargetLHS) ) { // and not glue rule
+ // delay right-to-left scoring
- if (previousSourceSpanIsAligned && followingSourceSpanIsAligned)
- {
- // discontinuous
- r2lOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
+ FEATUREVERBOSE(3, "Right boundary");
+ if (targetIndex != currTarPhr.GetSize()-1) {
+ FEATUREVERBOSE2(3, " (with targetIndex!=currTarPhr.GetSize()-1)");
+ }
+ FEATUREVERBOSE2(3, std::endl);
+
+ bool previousSourceSpanIsAligned = ( (sourceIndex > 0) && phraseOrientation.SourceSpanIsAligned(0,sourceIndex-1) );
+ bool followingSourceSpanIsAligned = ( (sourceIndex < currSrcPhr->GetSize()-1) && phraseOrientation.SourceSpanIsAligned(sourceIndex,currSrcPhr->GetSize()-1) );
+
+ FEATUREVERBOSE(4, "previousSourceSpanIsAligned = " << previousSourceSpanIsAligned << std::endl);
+ FEATUREVERBOSE(4, "followingSourceSpanIsAligned = " << followingSourceSpanIsAligned << std::endl;);
+
+ if (previousSourceSpanIsAligned && followingSourceSpanIsAligned) {
+ // discontinuous
+ r2lOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
+ } else {
+ FEATUREVERBOSE(3, "Delaying right-to-left scoring" << std::endl);
+
+ delayedScoringR2L = true;
+ std::bitset<3> possibleFutureOrientationsR2L(0x7);
+ possibleFutureOrientationsR2L[0] = !followingSourceSpanIsAligned;
+ possibleFutureOrientationsR2L[1] = !previousSourceSpanIsAligned;
+
+ // add heuristic scores
+
+ std::vector<float> weightsVector = StaticData::Instance().GetAllWeights().GetScoresForProducer(this);
+ std::vector<float> scoresR2L;
+ scoresR2L.push_back( std::log(orientationPhraseProperty->GetRightToLeftProbabilityMono()) );
+ scoresR2L.push_back( std::log(orientationPhraseProperty->GetRightToLeftProbabilitySwap()) );
+ scoresR2L.push_back( std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous()) );
+ std::vector<float> weightedScoresR2L;
+ for ( size_t i=0; i<3; ++i ) {
+ weightedScoresR2L.push_back( weightsVector[m_offsetR2LScores+i] * scoresR2L[i] );
}
- else
- {
- FEATUREVERBOSE(3, "Delaying right-to-left scoring" << std::endl);
-
- delayedScoringR2L = true;
- std::bitset<3> possibleFutureOrientationsR2L(0x7);
- possibleFutureOrientationsR2L[0] = !followingSourceSpanIsAligned;
- possibleFutureOrientationsR2L[1] = !previousSourceSpanIsAligned;
-
- // add heuristic scores
-
- std::vector<float> weightsVector = StaticData::Instance().GetAllWeights().GetScoresForProducer(this);
- std::vector<float> scoresR2L;
- scoresR2L.push_back( std::log(orientationPhraseProperty->GetRightToLeftProbabilityMono()) );
- scoresR2L.push_back( std::log(orientationPhraseProperty->GetRightToLeftProbabilitySwap()) );
- scoresR2L.push_back( std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous()) );
- std::vector<float> weightedScoresR2L;
- for ( size_t i=0; i<3;++i )
- {
- weightedScoresR2L.push_back( weightsVector[m_offsetR2LScores+i] * scoresR2L[i] );
- }
- size_t heuristicScoreIndex = 0;
- for (size_t i=1; i<3; ++i)
- {
- if (possibleFutureOrientationsR2L[i])
- {
- if (weightedScoresR2L[i] > weightedScoresR2L[heuristicScoreIndex])
- {
- heuristicScoreIndex = i;
- }
+ size_t heuristicScoreIndex = 0;
+ for (size_t i=1; i<3; ++i) {
+ if (possibleFutureOrientationsR2L[i]) {
+ if (weightedScoresR2L[i] > weightedScoresR2L[heuristicScoreIndex]) {
+ heuristicScoreIndex = i;
}
}
+ }
- IFFEATUREVERBOSE(5)
- {
- FEATUREVERBOSE(5, "Heuristic score computation (R2L): "
- << "heuristicScoreIndex= " << heuristicScoreIndex);
- for (size_t i=0; i<3; ++i)
- FEATUREVERBOSE2(5, " weightsVector[" << m_offsetR2LScores+i << "]= " << weightsVector[m_offsetR2LScores+i]);
- for (size_t i=0; i<3; ++i)
- FEATUREVERBOSE2(5, " scoresR2L[" << i << "]= " << scoresR2L[i]);
- for (size_t i=0; i<3; ++i)
- FEATUREVERBOSE2(5, " weightedScoresR2L[" << i << "]= " << weightedScoresR2L[i]);
- for (size_t i=0; i<3; ++i)
- FEATUREVERBOSE2(5, " possibleFutureOrientationsR2L[" << i << "]= " << possibleFutureOrientationsR2L[i]);
- if ( possibleFutureOrientationsR2L == 0x7 )
- {
- FEATUREVERBOSE2(5, " (all orientations possible)");
- }
- FEATUREVERBOSE2(5, std::endl);
+ IFFEATUREVERBOSE(5) {
+ FEATUREVERBOSE(5, "Heuristic score computation (R2L): "
+ << "heuristicScoreIndex= " << heuristicScoreIndex);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " weightsVector[" << m_offsetR2LScores+i << "]= " << weightsVector[m_offsetR2LScores+i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " scoresR2L[" << i << "]= " << scoresR2L[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " weightedScoresR2L[" << i << "]= " << weightedScoresR2L[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " possibleFutureOrientationsR2L[" << i << "]= " << possibleFutureOrientationsR2L[i]);
+ if ( possibleFutureOrientationsR2L == 0x7 ) {
+ FEATUREVERBOSE2(5, " (all orientations possible)");
}
+ FEATUREVERBOSE2(5, std::endl);
+ }
- newScores[m_offsetR2LScores+heuristicScoreIndex] += scoresR2L[heuristicScoreIndex];
- state->SetRightBoundaryR2L(scoresR2L, heuristicScoreIndex, possibleFutureOrientationsR2L, prevState);
-
- if ( (possibleFutureOrientationsR2L & prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) == 0x4 )
- {
- // recursive: discontinuous orientation
- FEATUREVERBOSE(5, "previous state: R2L discontinuous orientation "
- << possibleFutureOrientationsR2L << " & " << prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations
- << " = " << (possibleFutureOrientationsR2L & prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations)
- << std::endl);
- RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
- state->m_rightBoundaryRecursionGuard = true; // prevent subderivation from being scored recursively multiple times
- }
+ newScores[m_offsetR2LScores+heuristicScoreIndex] += scoresR2L[heuristicScoreIndex];
+ state->SetRightBoundaryR2L(scoresR2L, heuristicScoreIndex, possibleFutureOrientationsR2L, prevState);
+
+ if ( (possibleFutureOrientationsR2L & prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) == 0x4 ) {
+ // recursive: discontinuous orientation
+ FEATUREVERBOSE(5, "previous state: R2L discontinuous orientation "
+ << possibleFutureOrientationsR2L << " & " << prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations
+ << " = " << (possibleFutureOrientationsR2L & prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations)
+ << std::endl);
+ RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
+ state->m_rightBoundaryRecursionGuard = true; // prevent subderivation from being scored recursively multiple times
}
}
+ }
- if (!delayedScoringR2L)
- {
- switch (r2lOrientation)
- {
- case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
- newScores[m_offsetR2LScores+0] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityMono());
- // if sub-derivation has right-boundary non-terminal:
- // add recursive actual score of boundary non-terminal from subderivation
- RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
- newScores[m_offsetR2LScores+1] += std::log(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
- // if sub-derivation has right-boundary non-terminal:
- // add recursive actual score of boundary non-terminal from subderivation
- RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
- newScores[m_offsetR2LScores+2] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
- // if sub-derivation has right-boundary non-terminal:
- // add recursive actual score of boundary non-terminal from subderivation
- RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
- newScores[m_offsetR2LScores+2] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
- // if sub-derivation has right-boundary non-terminal:
- // add recursive actual score of boundary non-terminal from subderivation
- RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
- break;
- case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
- // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
- newScores[m_offsetR2LScores+2] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
- // if sub-derivation has right-boundary non-terminal:
- // add recursive actual score of boundary non-terminal from subderivation
- RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
- break;
- default:
- UTIL_THROW2(GetScoreProducerDescription()
- << ": Unsupported orientation type.");
- break;
- }
+ if (!delayedScoringR2L) {
+ switch (r2lOrientation) {
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
+ newScores[m_offsetR2LScores+0] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityMono());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
+ newScores[m_offsetR2LScores+1] += std::log(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+ newScores[m_offsetR2LScores+2] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+ newScores[m_offsetR2LScores+2] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+ // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
+ newScores[m_offsetR2LScores+2] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
+ break;
+ default:
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Unsupported orientation type.");
+ break;
}
- }
- else
- {
- // abort with error message if the phrase does not translate an unknown word
- UTIL_THROW_IF2(!prevTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
- << ": Missing Orientation property. "
- << "Please check phrase table and glue rules.");
}
+ } else {
+ // abort with error message if the phrase does not translate an unknown word
+ UTIL_THROW_IF2(!prevTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
+ << ": Missing Orientation property. "
+ << "Please check phrase table and glue rules.");
+ }
}
accumulator->PlusEquals(this, newScores);
@@ -479,47 +439,34 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
}
void PhraseOrientationFeature::LeftBoundaryL2RScoreRecursive(int featureID,
- const PhraseOrientationFeatureState *state,
- const std::bitset<3> orientation,
- std::vector<float>& newScores) const
+ const PhraseOrientationFeatureState *state,
+ const std::bitset<3> orientation,
+ std::vector<float>& newScores) const
{
- if (state->m_leftBoundaryIsSet)
- {
+ if (state->m_leftBoundaryIsSet) {
// subtract heuristic score from subderivation
newScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex] -= state->m_leftBoundaryNonTerminalL2RScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex];
// add actual score
std::bitset<3> recursiveOrientation = orientation;
- if ( (orientation == 0x4) || (orientation == 0x0) )
- {
+ if ( (orientation == 0x4) || (orientation == 0x0) ) {
// discontinuous
newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
- }
- else
- {
+ } else {
recursiveOrientation &= state->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations;
- if ( recursiveOrientation == 0x1 )
- {
+ if ( recursiveOrientation == 0x1 ) {
// monotone
newScores[0] += state->GetLeftBoundaryL2RScoreMono();
- }
- else if ( recursiveOrientation == 0x2 )
- {
+ } else if ( recursiveOrientation == 0x2 ) {
// swap
newScores[1] += state->GetLeftBoundaryL2RScoreSwap();
- }
- else if ( recursiveOrientation == 0x4 )
- {
+ } else if ( recursiveOrientation == 0x4 ) {
// discontinuous
newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
- }
- else if ( recursiveOrientation == 0x0 )
- {
+ } else if ( recursiveOrientation == 0x0 ) {
// discontinuous
newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
- }
- else
- {
+ } else {
UTIL_THROW2(GetScoreProducerDescription()
<< ": Error in recursive scoring.");
}
@@ -528,61 +475,45 @@ void PhraseOrientationFeature::LeftBoundaryL2RScoreRecursive(int featureID,
FEATUREVERBOSE(6, "Left boundary recursion: " << orientation << " & " << state->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations << " = " << recursiveOrientation
<< " --- Subtracted heuristic score: " << state->m_leftBoundaryNonTerminalL2RScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex] << std::endl);
- if (!state->m_leftBoundaryRecursionGuard)
- {
+ if (!state->m_leftBoundaryRecursionGuard) {
// recursive call
const PhraseOrientationFeatureState* prevState = state->m_leftBoundaryPrevState;
LeftBoundaryL2RScoreRecursive(featureID, prevState, recursiveOrientation, newScores);
- }
- else
- {
+ } else {
FEATUREVERBOSE(6, "m_leftBoundaryRecursionGuard" << std::endl);
}
}
}
void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID,
- const PhraseOrientationFeatureState *state,
- const std::bitset<3> orientation,
- std::vector<float>& newScores) const
+ const PhraseOrientationFeatureState *state,
+ const std::bitset<3> orientation,
+ std::vector<float>& newScores) const
{
- if (state->m_rightBoundaryIsSet)
- {
+ if (state->m_rightBoundaryIsSet) {
// subtract heuristic score from subderivation
newScores[m_offsetR2LScores+state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex] -= state->m_rightBoundaryNonTerminalR2LScores[state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex];
// add actual score
std::bitset<3> recursiveOrientation = orientation;
- if ( (orientation == 0x4) || (orientation == 0x0) )
- {
+ if ( (orientation == 0x4) || (orientation == 0x0) ) {
// discontinuous
newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
- }
- else
- {
+ } else {
recursiveOrientation &= state->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations;
- if ( recursiveOrientation == 0x1 )
- {
+ if ( recursiveOrientation == 0x1 ) {
// monotone
newScores[m_offsetR2LScores+0] += state->GetRightBoundaryR2LScoreMono();
- }
- else if ( recursiveOrientation == 0x2 )
- {
+ } else if ( recursiveOrientation == 0x2 ) {
// swap
newScores[m_offsetR2LScores+1] += state->GetRightBoundaryR2LScoreSwap();
- }
- else if ( recursiveOrientation == 0x4 )
- {
+ } else if ( recursiveOrientation == 0x4 ) {
// discontinuous
newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
- }
- else if ( recursiveOrientation == 0x0 )
- {
+ } else if ( recursiveOrientation == 0x0 ) {
// discontinuous
newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
- }
- else
- {
+ } else {
UTIL_THROW2(GetScoreProducerDescription()
<< ": Error in recursive scoring.");
}
@@ -591,19 +522,16 @@ void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID,
FEATUREVERBOSE(6, "Right boundary recursion: " << orientation << " & " << state->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations << " = " << recursiveOrientation
<< " --- Subtracted heuristic score: " << state->m_rightBoundaryNonTerminalR2LScores[state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex] << std::endl);
- if (!state->m_rightBoundaryRecursionGuard)
- {
+ if (!state->m_rightBoundaryRecursionGuard) {
// recursive call
const PhraseOrientationFeatureState* prevState = state->m_rightBoundaryPrevState;
RightBoundaryR2LScoreRecursive(featureID, prevState, recursiveOrientation, newScores);
- }
- else
- {
+ } else {
FEATUREVERBOSE(6, "m_rightBoundaryRecursionGuard" << std::endl);
}
}
}
-
+
}
diff --git a/moses/FF/PhraseOrientationFeature.h b/moses/FF/PhraseOrientationFeature.h
index 28f09a099..b8903aa8b 100644
--- a/moses/FF/PhraseOrientationFeature.h
+++ b/moses/FF/PhraseOrientationFeature.h
@@ -1,11 +1,11 @@
//
// REFERENCE
// ---------
-// When using this feature, please cite:
-//
+// When using this feature, please cite:
+//
// Matthias Huck, Joern Wuebker, Felix Rietig, and Hermann Ney.
-// A Phrase Orientation Model for Hierarchical Machine Translation.
-// In ACL 2013 Eighth Workshop on Statistical Machine Translation (WMT 2013), pages 452-463, Sofia, Bulgaria, August 2013.
+// A Phrase Orientation Model for Hierarchical Machine Translation.
+// In ACL 2013 Eighth Workshop on Statistical Machine Translation (WMT 2013), pages 452-463, Sofia, Bulgaria, August 2013.
//
#pragma once
@@ -28,7 +28,7 @@ public:
friend class PhraseOrientationFeature;
- PhraseOrientationFeatureState()
+ PhraseOrientationFeatureState()
: m_leftBoundaryNonTerminalL2RScores(3,0)
, m_rightBoundaryNonTerminalR2LScores(3,0)
, m_leftBoundaryNonTerminalL2RPossibleFutureOrientations(0x7)
@@ -36,16 +36,14 @@ public:
, m_leftBoundaryRecursionGuard(false)
, m_rightBoundaryRecursionGuard(false)
, m_leftBoundaryIsSet(false)
- , m_rightBoundaryIsSet(false)
- {}
+ , m_rightBoundaryIsSet(false) {
+ }
void SetLeftBoundaryL2R(const std::vector<float> &scores,
size_t heuristicScoreIndex,
std::bitset<3> &possibleFutureOrientations,
- const PhraseOrientationFeatureState* prevState)
- {
- for (size_t i=0; i<3; ++i)
- {
+ const PhraseOrientationFeatureState* prevState) {
+ for (size_t i=0; i<3; ++i) {
m_leftBoundaryNonTerminalL2RScores[i] = scores[i];
m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i] = possibleFutureOrientations[i];
}
@@ -57,10 +55,8 @@ public:
void SetRightBoundaryR2L(const std::vector<float> &scores,
size_t heuristicScoreIndex,
std::bitset<3> &possibleFutureOrientations,
- const PhraseOrientationFeatureState* prevState)
- {
- for (size_t i=0; i<3; ++i)
- {
+ const PhraseOrientationFeatureState* prevState) {
+ for (size_t i=0; i<3; ++i) {
m_rightBoundaryNonTerminalR2LScores[i] = scores[i];
m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i] = possibleFutureOrientations[i];
}
@@ -70,139 +66,109 @@ public:
}
- float GetLeftBoundaryL2RScoreMono() const
- {
+ float GetLeftBoundaryL2RScoreMono() const {
return m_leftBoundaryNonTerminalL2RScores[0];
}
- float GetLeftBoundaryL2RScoreSwap() const
- {
+ float GetLeftBoundaryL2RScoreSwap() const {
return m_leftBoundaryNonTerminalL2RScores[1];
}
- float GetLeftBoundaryL2RScoreDiscontinuous() const
- {
+ float GetLeftBoundaryL2RScoreDiscontinuous() const {
return m_leftBoundaryNonTerminalL2RScores[2];
}
- float GetRightBoundaryR2LScoreMono() const
- {
+ float GetRightBoundaryR2LScoreMono() const {
return m_rightBoundaryNonTerminalR2LScores[0];
}
- float GetRightBoundaryR2LScoreSwap() const
- {
+ float GetRightBoundaryR2LScoreSwap() const {
return m_rightBoundaryNonTerminalR2LScores[1];
}
- float GetRightBoundaryR2LScoreDiscontinuous() const
- {
+ float GetRightBoundaryR2LScoreDiscontinuous() const {
return m_rightBoundaryNonTerminalR2LScores[2];
}
- int Compare(const FFState& other) const
- {
+ int Compare(const FFState& other) const {
const PhraseOrientationFeatureState &otherState = static_cast<const PhraseOrientationFeatureState&>(other);
if (!m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet &&
- !m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet)
- {
+ !m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
return 0;
}
- if (m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet)
- {
+ if (m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet) {
return 1;
}
- if (!m_leftBoundaryIsSet && otherState.m_leftBoundaryIsSet)
- {
+ if (!m_leftBoundaryIsSet && otherState.m_leftBoundaryIsSet) {
return -1;
}
- if (m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet)
- {
+ if (m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
return 1;
}
- if (!m_rightBoundaryIsSet && otherState.m_rightBoundaryIsSet)
- {
+ if (!m_rightBoundaryIsSet && otherState.m_rightBoundaryIsSet) {
return -1;
}
- if (m_leftBoundaryIsSet)
- {
+ if (m_leftBoundaryIsSet) {
int compareLeft = CompareLeftBoundaryRecursive(*this, otherState);
- if (compareLeft != 0)
- {
+ if (compareLeft != 0) {
return compareLeft;
}
}
- if (m_rightBoundaryIsSet)
- {
+ if (m_rightBoundaryIsSet) {
int compareRight = CompareRightBoundaryRecursive(*this, otherState);
- if (compareRight != 0)
- {
+ if (compareRight != 0) {
return compareRight;
}
}
- return 0;
+ return 0;
};
private:
- static int CompareLeftBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState)
- {
- if (!state.m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet)
- {
+ static int CompareLeftBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState) {
+ if (!state.m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet) {
return 0;
}
- if (state.m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet)
- {
+ if (state.m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet) {
return 1;
}
- if (!state.m_leftBoundaryIsSet && otherState.m_leftBoundaryIsSet)
- {
+ if (!state.m_leftBoundaryIsSet && otherState.m_leftBoundaryIsSet) {
return -1;
}
- if ( otherState.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex < state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex )
- {
+ if ( otherState.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex < state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ) {
return 1;
}
- if ( state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex < otherState.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex )
- {
+ if ( state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex < otherState.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ) {
return -1;
}
- if ( Smaller(otherState.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations, state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) )
- {
+ if ( Smaller(otherState.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations, state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) ) {
return 1;
}
- if ( Smaller(state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations, otherState.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) )
- {
+ if ( Smaller(state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations, otherState.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) ) {
return -1;
}
- for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i)
- {
- if (state.m_leftBoundaryNonTerminalL2RScores[i] > otherState.m_leftBoundaryNonTerminalL2RScores[i])
- {
+ for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i) {
+ if (state.m_leftBoundaryNonTerminalL2RScores[i] > otherState.m_leftBoundaryNonTerminalL2RScores[i]) {
return 1;
}
- if (state.m_leftBoundaryNonTerminalL2RScores[i] < otherState.m_leftBoundaryNonTerminalL2RScores[i])
- {
+ if (state.m_leftBoundaryNonTerminalL2RScores[i] < otherState.m_leftBoundaryNonTerminalL2RScores[i]) {
return -1;
}
}
- if (state.m_leftBoundaryRecursionGuard && otherState.m_leftBoundaryRecursionGuard)
- {
+ if (state.m_leftBoundaryRecursionGuard && otherState.m_leftBoundaryRecursionGuard) {
return 0;
}
- if (state.m_leftBoundaryRecursionGuard && !otherState.m_leftBoundaryRecursionGuard)
- {
+ if (state.m_leftBoundaryRecursionGuard && !otherState.m_leftBoundaryRecursionGuard) {
return 1;
}
- if (!state.m_leftBoundaryRecursionGuard && otherState.m_leftBoundaryRecursionGuard)
- {
+ if (!state.m_leftBoundaryRecursionGuard && otherState.m_leftBoundaryRecursionGuard) {
return -1;
}
@@ -212,59 +178,45 @@ private:
return CompareLeftBoundaryRecursive(*prevState, *otherPrevState);
};
- static int CompareRightBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState)
- {
- if (!state.m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet)
- {
+ static int CompareRightBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState) {
+ if (!state.m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
return 0;
}
- if (state.m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet)
- {
+ if (state.m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
return 1;
}
- if (!state.m_rightBoundaryIsSet && otherState.m_rightBoundaryIsSet)
- {
+ if (!state.m_rightBoundaryIsSet && otherState.m_rightBoundaryIsSet) {
return -1;
}
- if ( otherState.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex < state.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex )
- {
+ if ( otherState.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex < state.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ) {
return 1;
}
- if ( state.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex < otherState.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex )
- {
+ if ( state.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex < otherState.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ) {
return -1;
}
- if ( Smaller(otherState.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations, state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) )
- {
+ if ( Smaller(otherState.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations, state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) ) {
return 1;
}
- if ( Smaller(state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations, otherState.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) )
- {
+ if ( Smaller(state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations, otherState.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) ) {
return -1;
}
- for (size_t i=0; i<state.m_rightBoundaryNonTerminalR2LScores.size(); ++i)
- {
- if (state.m_rightBoundaryNonTerminalR2LScores[i] > otherState.m_rightBoundaryNonTerminalR2LScores[i])
- {
+ for (size_t i=0; i<state.m_rightBoundaryNonTerminalR2LScores.size(); ++i) {
+ if (state.m_rightBoundaryNonTerminalR2LScores[i] > otherState.m_rightBoundaryNonTerminalR2LScores[i]) {
return 1;
}
- if (state.m_rightBoundaryNonTerminalR2LScores[i] < otherState.m_rightBoundaryNonTerminalR2LScores[i])
- {
+ if (state.m_rightBoundaryNonTerminalR2LScores[i] < otherState.m_rightBoundaryNonTerminalR2LScores[i]) {
return -1;
}
}
- if (state.m_rightBoundaryRecursionGuard && otherState.m_rightBoundaryRecursionGuard)
- {
+ if (state.m_rightBoundaryRecursionGuard && otherState.m_rightBoundaryRecursionGuard) {
return 0;
}
- if (state.m_rightBoundaryRecursionGuard && !otherState.m_rightBoundaryRecursionGuard)
- {
+ if (state.m_rightBoundaryRecursionGuard && !otherState.m_rightBoundaryRecursionGuard) {
return 1;
}
- if (!state.m_rightBoundaryRecursionGuard && otherState.m_rightBoundaryRecursionGuard)
- {
+ if (!state.m_rightBoundaryRecursionGuard && otherState.m_rightBoundaryRecursionGuard) {
return -1;
}
@@ -274,11 +226,9 @@ private:
return CompareRightBoundaryRecursive(*prevState, *otherPrevState);
};
- template<std::size_t N> static bool Smaller(const std::bitset<N>& x, const std::bitset<N>& y)
- {
- for (size_t i=0; i<N; ++i)
- {
- if (x[i] ^ y[i])
+ template<std::size_t N> static bool Smaller(const std::bitset<N>& x, const std::bitset<N>& y) {
+ for (size_t i=0; i<N; ++i) {
+ if (x[i] ^ y[i])
return y[i];
}
return false;
@@ -323,29 +273,27 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
targetPhrase.SetRuleSource(source);
};
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {};
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ };
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
- ScoreComponentCollection* accumulator) const
- {
+ ScoreComponentCollection* accumulator) const {
return new PhraseOrientationFeatureState();
};
@@ -357,13 +305,13 @@ public:
protected:
void LeftBoundaryL2RScoreRecursive(int featureID,
- const PhraseOrientationFeatureState *state,
+ const PhraseOrientationFeatureState *state,
const std::bitset<3> orientation,
std::vector<float>& newScores) const;
void RightBoundaryR2LScoreRecursive(int featureID,
- const PhraseOrientationFeatureState *state,
- const std::bitset<3> orientation,
+ const PhraseOrientationFeatureState *state,
+ const std::bitset<3> orientation,
std::vector<float>& newScores) const;
std::string m_glueTargetLHSStr;
diff --git a/moses/FF/PhrasePairFeature.cpp b/moses/FF/PhrasePairFeature.cpp
index 6daab7e25..0bf5f71f9 100644
--- a/moses/FF/PhrasePairFeature.cpp
+++ b/moses/FF/PhrasePairFeature.cpp
@@ -107,11 +107,11 @@ void PhrasePairFeature::Load()
}
void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
const Phrase& source = inputPath.GetPhrase();
if (m_simple) {
diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h
index a27e641bb..ff22340e9 100644
--- a/moses/FF/PhrasePairFeature.h
+++ b/moses/FF/PhrasePairFeature.h
@@ -41,28 +41,28 @@ public:
bool IsUseable(const FactorMask &mask) const;
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis& hypo,
- ScoreComponentCollection*) const
- {}
+ ScoreComponentCollection*) const {
+ }
};
diff --git a/moses/FF/PhrasePenalty.cpp b/moses/FF/PhrasePenalty.cpp
index cd1b735df..e4ee294fa 100644
--- a/moses/FF/PhrasePenalty.cpp
+++ b/moses/FF/PhrasePenalty.cpp
@@ -9,41 +9,39 @@ using namespace std;
namespace Moses
{
PhrasePenalty::PhrasePenalty(const std::string &line)
-: StatelessFeatureFunction(1, line)
-, m_perPhraseTable(false)
+ : StatelessFeatureFunction(1, line)
+ , m_perPhraseTable(false)
{
ReadParameters();
}
void PhrasePenalty::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
if (m_perPhraseTable) {
- const PhraseDictionary *pt = targetPhrase.GetContainer();
- if (pt) {
- size_t ptId = pt->GetId();
- UTIL_THROW_IF2(ptId >= m_numScoreComponents, "Wrong number of scores");
+ const PhraseDictionary *pt = targetPhrase.GetContainer();
+ if (pt) {
+ size_t ptId = pt->GetId();
+ UTIL_THROW_IF2(ptId >= m_numScoreComponents, "Wrong number of scores");
- vector<float> scores(m_numScoreComponents, 0);
- scores[ptId] = 1.0f;
+ vector<float> scores(m_numScoreComponents, 0);
+ scores[ptId] = 1.0f;
- scoreBreakdown.Assign(this, scores);
- }
+ scoreBreakdown.Assign(this, scores);
+ }
- }
- else {
- scoreBreakdown.Assign(this, 1.0f);
+ } else {
+ scoreBreakdown.Assign(this, 1.0f);
}
}
void PhrasePenalty::SetParameter(const std::string& key, const std::string& value)
{
if (key == "per-phrase-table") {
- m_perPhraseTable =Scan<bool>(value);
- }
- else {
+ m_perPhraseTable =Scan<bool>(value);
+ } else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
diff --git a/moses/FF/PhrasePenalty.h b/moses/FF/PhrasePenalty.h
index ea0049f52..044184755 100644
--- a/moses/FF/PhrasePenalty.h
+++ b/moses/FF/PhrasePenalty.h
@@ -15,31 +15,31 @@ public:
}
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void SetParameter(const std::string& key, const std::string& value);
protected:
diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h
index 58cc0221a..c28cdc5d7 100644
--- a/moses/FF/ReferenceComparison.h
+++ b/moses/FF/ReferenceComparison.h
@@ -10,39 +10,41 @@ namespace Moses
class ReferenceComparison : public StatelessFeatureFunction
{
public:
- ReferenceComparison(const std::string &line);
-
- virtual bool IsUseable(const FactorMask &mask) const
- { return true; }
-
- virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
-
- virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
- void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
- virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
- virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
- std::vector<float> DefaultWeights() const
- { return std::vector<float>(); }
+ ReferenceComparison(const std::string &line);
+
+ virtual bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+
+ virtual void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ std::vector<float> DefaultWeights() const {
+ return std::vector<float>();
+ }
protected:
diff --git a/moses/FF/RuleScope.cpp b/moses/FF/RuleScope.cpp
index ed329c4ca..5f38a59ae 100644
--- a/moses/FF/RuleScope.cpp
+++ b/moses/FF/RuleScope.cpp
@@ -5,8 +5,8 @@
namespace Moses
{
RuleScope::RuleScope(const std::string &line)
-:StatelessFeatureFunction(1, line)
-,m_sourceSyntax(true)
+ :StatelessFeatureFunction(1, line)
+ ,m_sourceSyntax(true)
{
}
@@ -17,9 +17,9 @@ bool IsAmbiguous(const Word &word, bool sourceSyntax)
}
void RuleScope::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
// adjacent non-term count as 1 ammbiguity, rather than 2 as in rule scope
// source can't be empty, right?
@@ -27,23 +27,22 @@ void RuleScope::EvaluateInIsolation(const Phrase &source
int count = 0;
for (size_t i = 0; i < source.GetSize() - 0; ++i) {
- const Word &word = source.GetWord(i);
- bool ambiguous = IsAmbiguous(word, m_sourceSyntax);
- if (ambiguous) {
- ++count;
- }
- else {
- if (count > 0) {
- score += count;
- }
- count = -1;
- }
+ const Word &word = source.GetWord(i);
+ bool ambiguous = IsAmbiguous(word, m_sourceSyntax);
+ if (ambiguous) {
+ ++count;
+ } else {
+ if (count > 0) {
+ score += count;
+ }
+ count = -1;
+ }
}
// 1st & last always adjacent to ambiguity
++count;
if (count > 0) {
- score += count;
+ score += count;
}
scoreBreakdown.PlusEquals(this, score);
@@ -52,7 +51,7 @@ void RuleScope::EvaluateInIsolation(const Phrase &source
void RuleScope::SetParameter(const std::string& key, const std::string& value)
{
if (key == "source-syntax") {
- m_sourceSyntax = Scan<bool>(value);
+ m_sourceSyntax = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
diff --git a/moses/FF/RuleScope.h b/moses/FF/RuleScope.h
index 6490bffa3..8bf7b7670 100644
--- a/moses/FF/RuleScope.h
+++ b/moses/FF/RuleScope.h
@@ -9,38 +9,39 @@ namespace Moses
class RuleScope : public StatelessFeatureFunction
{
public:
- RuleScope(const std::string &line);
-
- virtual bool IsUseable(const FactorMask &mask) const
- { return true; }
-
- virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
-
- virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
- void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
-
- virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
- virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
- void SetParameter(const std::string& key, const std::string& value);
+ RuleScope(const std::string &line);
+
+ virtual bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
+
+ virtual void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
+
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void SetParameter(const std::string& key, const std::string& value);
protected:
bool m_sourceSyntax;
diff --git a/moses/FF/SetSourcePhrase.cpp b/moses/FF/SetSourcePhrase.cpp
index f89683f28..115affa52 100644
--- a/moses/FF/SetSourcePhrase.cpp
+++ b/moses/FF/SetSourcePhrase.cpp
@@ -4,18 +4,18 @@
namespace Moses
{
SetSourcePhrase::SetSourcePhrase(const std::string &line)
-:StatelessFeatureFunction(0, line)
+ :StatelessFeatureFunction(0, line)
{
m_tuneable = false;
ReadParameters();
}
void SetSourcePhrase::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
- targetPhrase.SetRuleSource(source);
+ targetPhrase.SetRuleSource(source);
}
}
diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h
index ef27448c5..e34e618f2 100644
--- a/moses/FF/SetSourcePhrase.h
+++ b/moses/FF/SetSourcePhrase.h
@@ -11,36 +11,38 @@ class SetSourcePhrase : public StatelessFeatureFunction
public:
SetSourcePhrase(const std::string &line);
- virtual bool IsUseable(const FactorMask &mask) const
- { return true; }
+ virtual bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
- void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
- std::vector<float> DefaultWeights() const
- { return std::vector<float>(); }
+ std::vector<float> DefaultWeights() const {
+ return std::vector<float>();
+ }
};
diff --git a/moses/FF/SkeletonChangeInput.cpp b/moses/FF/SkeletonChangeInput.cpp
index a2097958d..7ab267d96 100644
--- a/moses/FF/SkeletonChangeInput.cpp
+++ b/moses/FF/SkeletonChangeInput.cpp
@@ -17,9 +17,9 @@ SkeletonChangeInput::SkeletonChangeInput(const std::string &line)
}
void SkeletonChangeInput::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
// dense scores
vector<float> newScores(m_numScoreComponents);
@@ -33,30 +33,30 @@ void SkeletonChangeInput::EvaluateInIsolation(const Phrase &source
}
void SkeletonChangeInput::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
- if (targetPhrase.GetNumNonTerminals()) {
- vector<float> newScores(m_numScoreComponents);
- newScores[0] = - std::numeric_limits<float>::infinity();
- scoreBreakdown.PlusEquals(this, newScores);
- }
+ if (targetPhrase.GetNumNonTerminals()) {
+ vector<float> newScores(m_numScoreComponents);
+ newScores[0] = - std::numeric_limits<float>::infinity();
+ scoreBreakdown.PlusEquals(this, newScores);
+ }
}
void SkeletonChangeInput::EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
+ , const TranslationOptionList &translationOptionList) const
{}
void SkeletonChangeInput::EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{}
void SkeletonChangeInput::EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{}
void SkeletonChangeInput::ChangeSource(InputType *&input) const
@@ -70,16 +70,16 @@ void SkeletonChangeInput::ChangeSource(InputType *&input) const
size_t size = sentence->GetSize();
for (size_t i = 0; i < size; ++i) {
- Word &word = sentence->Phrase::GetWord(i);
- const Factor *factor0 = word[0];
+ Word &word = sentence->Phrase::GetWord(i);
+ const Factor *factor0 = word[0];
- std::string str = factor0->GetString().as_string();
- if (str.length() > 4) {
- str = str.substr(0, 4);
- }
+ std::string str = factor0->GetString().as_string();
+ if (str.length() > 4) {
+ str = str.substr(0, 4);
+ }
- const Factor *factor1 = fc.AddFactor(str);
- word.SetFactor(1, factor1);
+ const Factor *factor1 = fc.AddFactor(str);
+ word.SetFactor(1, factor1);
}
}
diff --git a/moses/FF/SkeletonChangeInput.h b/moses/FF/SkeletonChangeInput.h
index f53f64fc8..23ede5c97 100644
--- a/moses/FF/SkeletonChangeInput.h
+++ b/moses/FF/SkeletonChangeInput.h
@@ -16,26 +16,26 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void ChangeSource(InputType *&input) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const;
-
+ , const TranslationOptionList &translationOptionList) const;
+
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp
index ffec7575b..931556007 100644
--- a/moses/FF/SkeletonStatefulFF.cpp
+++ b/moses/FF/SkeletonStatefulFF.cpp
@@ -24,21 +24,21 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line)
}
void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{}
void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{}
void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
+ , const TranslationOptionList &translationOptionList) const
{}
FFState* SkeletonStatefulFF::EvaluateWhenApplied(
diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h
index c6aea52c8..cc0bc07a0 100644
--- a/moses/FF/SkeletonStatefulFF.h
+++ b/moses/FF/SkeletonStatefulFF.h
@@ -12,8 +12,8 @@ class SkeletonState : public FFState
int m_targetLen;
public:
SkeletonState(int targetLen)
- :m_targetLen(targetLen)
- {}
+ :m_targetLen(targetLen) {
+ }
int Compare(const FFState& other) const;
};
@@ -31,19 +31,19 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const;
-
+ , const TranslationOptionList &translationOptionList) const;
+
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/SkeletonStatelessFF.cpp
index 8619c958d..209409a8e 100644
--- a/moses/FF/SkeletonStatelessFF.cpp
+++ b/moses/FF/SkeletonStatelessFF.cpp
@@ -14,9 +14,9 @@ SkeletonStatelessFF::SkeletonStatelessFF(const std::string &line)
}
void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
// dense scores
vector<float> newScores(m_numScoreComponents);
@@ -30,30 +30,30 @@ void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source
}
void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
- if (targetPhrase.GetNumNonTerminals()) {
- vector<float> newScores(m_numScoreComponents);
- newScores[0] = - std::numeric_limits<float>::infinity();
- scoreBreakdown.PlusEquals(this, newScores);
- }
+ if (targetPhrase.GetNumNonTerminals()) {
+ vector<float> newScores(m_numScoreComponents);
+ newScores[0] = - std::numeric_limits<float>::infinity();
+ scoreBreakdown.PlusEquals(this, newScores);
+ }
}
void SkeletonStatelessFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
+ , const TranslationOptionList &translationOptionList) const
{}
void SkeletonStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{}
void SkeletonStatelessFF::EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{}
void SkeletonStatelessFF::SetParameter(const std::string& key, const std::string& value)
diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/SkeletonStatelessFF.h
index b77427727..7fb6634c2 100644
--- a/moses/FF/SkeletonStatelessFF.h
+++ b/moses/FF/SkeletonStatelessFF.h
@@ -16,23 +16,23 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const;
+ , const TranslationOptionList &translationOptionList) const;
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/SkeletonTranslationOptionListFeature.h b/moses/FF/SkeletonTranslationOptionListFeature.h
index cc7eae160..1d88717e1 100644
--- a/moses/FF/SkeletonTranslationOptionListFeature.h
+++ b/moses/FF/SkeletonTranslationOptionListFeature.h
@@ -10,8 +10,7 @@ class SkeletonTranslationOptionListFeature : public StatelessFeatureFunction
{
public:
SkeletonTranslationOptionListFeature(const std::string &line)
- :StatelessFeatureFunction(1, line)
- {
+ :StatelessFeatureFunction(1, line) {
ReadParameters();
}
@@ -20,48 +19,47 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
-
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {
+ , const TranslationOptionList &translationOptionList) const {
vector<float> newScores(m_numScoreComponents);
newScores[0] = translationOptionList.size();
-
+
TranslationOptionList::const_iterator iterTransOpt;
for(iterTransOpt = translationOptionList.begin() ;
iterTransOpt != translationOptionList.end() ; ++iterTransOpt) {
TranslationOption &transOpt = **iterTransOpt;
-
+
ScoreComponentCollection &scoreBreakDown = transOpt.GetScoreBreakdown();
scoreBreakDown.PlusEquals(this, newScores);
-
+
transOpt.UpdateScore();
}
}
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
+ ScoreComponentCollection* accumulator) const {
+ }
+
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
- void SetParameter(const std::string& key, const std::string& value)
- {}
+ void SetParameter(const std::string& key, const std::string& value) {
+ }
};
diff --git a/moses/FF/SoftMatchingFeature.cpp b/moses/FF/SoftMatchingFeature.cpp
index 0475547da..b2d8e7ea5 100644
--- a/moses/FF/SoftMatchingFeature.cpp
+++ b/moses/FF/SoftMatchingFeature.cpp
@@ -24,8 +24,8 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string
m_tuneable = Scan<bool>(value);
} else if (key == "filterable") { //ignore
} else if (key == "path") {
- const std::string filePath = value;
- Load(filePath);
+ const std::string filePath = value;
+ Load(filePath);
} else {
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
}
@@ -35,34 +35,34 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string
bool SoftMatchingFeature::Load(const std::string& filePath)
{
- StaticData &staticData = StaticData::InstanceNonConst();
+ StaticData &staticData = StaticData::InstanceNonConst();
- InputFileStream inStream(filePath);
- std::string line;
- while(getline(inStream, line)) {
- std::vector<std::string> tokens = Tokenize(line);
- UTIL_THROW_IF2(tokens.size() != 2, "Error: wrong format of SoftMatching file: must have two nonterminals per line");
+ InputFileStream inStream(filePath);
+ std::string line;
+ while(getline(inStream, line)) {
+ std::vector<std::string> tokens = Tokenize(line);
+ UTIL_THROW_IF2(tokens.size() != 2, "Error: wrong format of SoftMatching file: must have two nonterminals per line");
- // no soft matching necessary if LHS and RHS are the same
- if (tokens[0] == tokens[1]) {
- continue;
- }
+ // no soft matching necessary if LHS and RHS are the same
+ if (tokens[0] == tokens[1]) {
+ continue;
+ }
- Word LHS, RHS;
- LHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[0], true);
- RHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[1], true);
+ Word LHS, RHS;
+ LHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[0], true);
+ RHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[1], true);
- m_softMatches[RHS[0]->GetId()].push_back(LHS);
- GetOrSetFeatureName(RHS, LHS);
- }
+ m_softMatches[RHS[0]->GetId()].push_back(LHS);
+ GetOrSetFeatureName(RHS, LHS);
+ }
- staticData.SetSoftMatches(m_softMatches);
+ staticData.SetSoftMatches(m_softMatches);
- return true;
+ return true;
}
void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{
const TargetPhrase& target = hypo.GetCurrTargetPhrase();
@@ -87,7 +87,8 @@ void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
}
// when loading, or when we notice that non-terminals have been added after loading, we resize vectors
-void SoftMatchingFeature::ResizeCache() const {
+void SoftMatchingFeature::ResizeCache() const
+{
FactorCollection& fc = FactorCollection::Instance();
size_t numNonTerminals = fc.GetNumNonTerminals();
@@ -98,7 +99,8 @@ void SoftMatchingFeature::ResizeCache() const {
}
-const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, const Word& LHS) const {
+const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, const Word& LHS) const
+{
try {
#ifdef WITH_THREADS //try read-only lock
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
@@ -107,23 +109,22 @@ const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, con
if (!name.empty()) {
return name;
}
- }
- catch (const std::out_of_range& oor) {
+ } catch (const std::out_of_range& oor) {
#ifdef WITH_THREADS //need to resize cache; write lock
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
ResizeCache();
}
#ifdef WITH_THREADS //need to update cache; write lock
- boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
+ boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
- std::string &name = m_nameCache[RHS[0]->GetId()][LHS[0]->GetId()];
- const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
- std::string LHS_string = LHS.GetString(outputFactorOrder, false);
- std::string RHS_string = RHS.GetString(outputFactorOrder, false);
- name = LHS_string + "->" + RHS_string;
- return name;
- }
+ std::string &name = m_nameCache[RHS[0]->GetId()][LHS[0]->GetId()];
+ const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
+ std::string LHS_string = LHS.GetString(outputFactorOrder, false);
+ std::string RHS_string = RHS.GetString(outputFactorOrder, false);
+ name = LHS_string + "->" + RHS_string;
+ return name;
+}
}
diff --git a/moses/FF/SoftMatchingFeature.h b/moses/FF/SoftMatchingFeature.h
index 0b4f07c5f..d524a1d07 100644
--- a/moses/FF/SoftMatchingFeature.h
+++ b/moses/FF/SoftMatchingFeature.h
@@ -20,25 +20,25 @@ public:
}
virtual void EvaluateWhenApplied(const ChartHypothesis& hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const {};
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {};
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const {};
+ ScoreComponentCollection* accumulator) const {};
bool Load(const std::string &filePath);
diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
index e5f79d906..deec05bca 100644
--- a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
@@ -78,14 +78,14 @@ void SoftSourceSyntacticConstraintsFeature::LoadSourceLabelSet()
try {
tokenizer >> label >> index;
} catch (const std::exception &e) {
- UTIL_THROW2(GetScoreProducerDescription()
+ UTIL_THROW2(GetScoreProducerDescription()
<< ": Error reading source label set file " << m_sourceLabelSetFile << " .");
}
std::pair< boost::unordered_map<std::string,size_t>::iterator, bool > inserted = m_sourceLabels.insert( std::pair<std::string,size_t>(label,index) );
UTIL_THROW_IF2(!inserted.second, GetScoreProducerDescription()
<< ": Source label set file " << m_sourceLabelSetFile << " should contain each syntactic label only once.");
-
- if (index >= m_sourceLabelsByIndex.size()) {
+
+ if (index >= m_sourceLabelsByIndex.size()) {
m_sourceLabelsByIndex.resize(index+1);
m_sourceLabelsByIndex_RHS_1.resize(index+1);
m_sourceLabelsByIndex_RHS_0.resize(index+1);
@@ -184,11 +184,11 @@ void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCou
const Factor* targetLabelFactor = factorCollection.AddFactor(targetLabel,true);
sourceLHSCounts[foundSourceLabelIndex->second] += count;
- std::pair< boost::unordered_map<const Factor*,float >::iterator, bool > insertedTargetLHSCount =
+ std::pair< boost::unordered_map<const Factor*,float >::iterator, bool > insertedTargetLHSCount =
targetLHSCounts.insert( std::pair<const Factor*,float>(targetLabelFactor,count) );
if (!insertedTargetLHSCount.second) {
(insertedTargetLHSCount.first)->second += count;
- boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator jointCountIt =
+ boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator jointCountIt =
m_labelPairProbabilities.find( targetLabelFactor );
assert(jointCountIt != m_labelPairProbabilities.end());
(jointCountIt->second)->at(foundSourceLabelIndex->second).first += count;
@@ -197,7 +197,7 @@ void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCou
std::pair<float,float> init(0.0,0.0);
std::vector< std::pair<float,float> >* sourceVector = new std::vector< std::pair<float,float> >(m_sourceLabels.size(),init);
sourceVector->at(foundSourceLabelIndex->second) = std::pair<float,float>(count,count);
- std::pair< boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator, bool > insertedJointCount =
+ std::pair< boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator, bool > insertedJointCount =
m_labelPairProbabilities.insert( std::pair<const Factor*, std::vector< std::pair<float,float> >* >(targetLabelFactor,sourceVector) );
assert(insertedJointCount.second);
}
@@ -230,28 +230,25 @@ void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCou
void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
assert(stackVec);
- IFFEATUREVERBOSE(2)
- {
- FEATUREVERBOSE(2, targetPhrase << std::endl);
- FEATUREVERBOSE(2, inputPath << std::endl);
- for (size_t i = 0; i < stackVec->size(); ++i)
- {
+ IFFEATUREVERBOSE(2) {
+ FEATUREVERBOSE(2, targetPhrase << std::endl);
+ FEATUREVERBOSE(2, inputPath << std::endl);
+ for (size_t i = 0; i < stackVec->size(); ++i) {
const ChartCellLabel &cell = *stackVec->at(i);
const WordsRange &ntRange = cell.GetCoverage();
FEATUREVERBOSE(2, "stackVec[ " << i << " ] : " << ntRange.GetStartPos() << " - " << ntRange.GetEndPos() << std::endl);
}
for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
- it!=targetPhrase.GetAlignNonTerm().end(); ++it)
- {
+ it!=targetPhrase.GetAlignNonTerm().end(); ++it) {
FEATUREVERBOSE(2, "alignNonTerm " << it->first << " " << it->second << std::endl);
}
}
@@ -278,11 +275,11 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
if (const PhraseProperty *property = targetPhrase.GetProperty("SourceLabels")) {
- const SourceLabelsPhraseProperty *sourceLabelsPhraseProperty = static_cast<const SourceLabelsPhraseProperty*>(property);
+ const SourceLabelsPhraseProperty *sourceLabelsPhraseProperty = static_cast<const SourceLabelsPhraseProperty*>(property);
- nNTs = sourceLabelsPhraseProperty->GetNumberOfNonTerminals();
+ nNTs = sourceLabelsPhraseProperty->GetNumberOfNonTerminals();
float totalCount = sourceLabelsPhraseProperty->GetTotalCount();
-
+
// prepare for input tree label matching
std::vector< boost::unordered_set<size_t> > treeInputLabelsRHS(nNTs-1);
boost::unordered_set<size_t> treeInputLabelsLHS;
@@ -294,7 +291,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
std::vector<const Factor*> targetLabelsRHS;
if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
size_t nonTerminalNumber = 0;
-
+
for (size_t phrasePos=0; phrasePos<targetPhrase.GetSize(); ++phrasePos) {
// consult rule for either word or non-terminal
const Word &word = targetPhrase.GetWord(phrasePos);
@@ -311,20 +308,17 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
const NonTerminalSet& prevTreeInputLabels = treeInput.GetLabelSet(prevStartPos,prevEndPos);
for (NonTerminalSet::const_iterator prevTreeInputLabelsIt = prevTreeInputLabels.begin();
- prevTreeInputLabelsIt != prevTreeInputLabels.end(); ++prevTreeInputLabelsIt)
- {
- if (*prevTreeInputLabelsIt != outputDefaultNonTerminal)
- {
- boost::unordered_map<const Factor*,size_t>::const_iterator foundPrevTreeInputLabel
+ prevTreeInputLabelsIt != prevTreeInputLabels.end(); ++prevTreeInputLabelsIt) {
+ if (*prevTreeInputLabelsIt != outputDefaultNonTerminal) {
+ boost::unordered_map<const Factor*,size_t>::const_iterator foundPrevTreeInputLabel
= m_sourceLabelIndexesByFactor.find((*prevTreeInputLabelsIt)[0]);
- if (foundPrevTreeInputLabel != m_sourceLabelIndexesByFactor.end())
- {
+ if (foundPrevTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
size_t prevTreeInputLabelIndex = foundPrevTreeInputLabel->second;
treeInputLabelsRHS[nonTerminalNumber].insert(prevTreeInputLabelIndex);
}
}
}
-
+
++nonTerminalNumber;
}
}
@@ -339,7 +333,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
for (NonTerminalSet::const_iterator treeInputLabelsIt = treeInputLabels.begin();
treeInputLabelsIt != treeInputLabels.end(); ++treeInputLabelsIt) {
if (*treeInputLabelsIt != outputDefaultNonTerminal) {
- boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel
+ boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel
= m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]);
if (foundTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
size_t treeInputLabelIndex = foundTreeInputLabel->second;
@@ -348,7 +342,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
}
}
-
+
// inspect source-labelled rule items
std::vector< boost::unordered_set<size_t> > sparseScoredTreeInputLabelsRHS(nNTs-1);
@@ -378,15 +372,15 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
treeInputMatchRHSCountByNonTerminal[nonTerminalNumber] = true;
- if ( m_featureVariant == 2 ||
+ if ( m_featureVariant == 2 ||
(m_featureVariant == 3 && m_coreSourceLabels.find(*sourceLabelsRHSIt) != m_coreSourceLabels.end()) ) {
// score sparse features: RHS match
if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*sourceLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
- // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
+ // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
float score_RHS_1 = (float)1/treeInputLabelsRHS[nonTerminalNumber].size();
scoreBreakdown.PlusEquals(this,
m_sourceLabelsByIndex_RHS_1[*sourceLabelsRHSIt],
- score_RHS_1);
+ score_RHS_1);
sparseScoredTreeInputLabelsRHS[nonTerminalNumber].insert(*sourceLabelsRHSIt);
}
}
@@ -413,15 +407,15 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
currentSourceLabelItemHasLHSTreeInputMatch = true;
- if ( m_featureVariant == 2 ||
+ if ( m_featureVariant == 2 ||
(m_featureVariant == 3 && m_coreSourceLabels.find(sourceLabelsLHSIt->first) != m_coreSourceLabels.end()) ) {
// score sparse features: LHS match
if (sparseScoredTreeInputLabelsLHS.find(sourceLabelsLHSIt->first) == sparseScoredTreeInputLabelsLHS.end()) {
- // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
+ // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
float score_LHS_1 = (float)1/treeInputLabelsLHS.size();
scoreBreakdown.PlusEquals(this,
m_sourceLabelsByIndex_LHS_1[sourceLabelsLHSIt->first],
- score_LHS_1);
+ score_LHS_1);
sparseScoredTreeInputLabelsLHS.insert(sourceLabelsLHSIt->first);
}
}
@@ -476,13 +470,13 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
// RHS
for (size_t nonTerminalNumber = 0; nonTerminalNumber < nNTs-1; ++nonTerminalNumber) {
- // nNTs-1 because nNTs also counts the left-hand side non-terminal
+ // nNTs-1 because nNTs also counts the left-hand side non-terminal
float score_RHS_0 = (float)1/treeInputLabelsRHS[nonTerminalNumber].size();
for (boost::unordered_set<size_t>::const_iterator treeInputLabelsRHSIt = treeInputLabelsRHS[nonTerminalNumber].begin();
treeInputLabelsRHSIt != treeInputLabelsRHS[nonTerminalNumber].end(); ++treeInputLabelsRHSIt) {
- if ( m_featureVariant == 2 ||
+ if ( m_featureVariant == 2 ||
(m_featureVariant == 3 && m_coreSourceLabels.find(*treeInputLabelsRHSIt) != m_coreSourceLabels.end()) ) {
if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*treeInputLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
@@ -501,7 +495,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
for (boost::unordered_set<size_t>::const_iterator treeInputLabelsLHSIt = treeInputLabelsLHS.begin();
treeInputLabelsLHSIt != treeInputLabelsLHS.end(); ++treeInputLabelsLHSIt) {
- if ( m_featureVariant == 2 ||
+ if ( m_featureVariant == 2 ||
(m_featureVariant == 3 && m_coreSourceLabels.find(*treeInputLabelsLHSIt) != m_coreSourceLabels.end()) ) {
if (sparseScoredTreeInputLabelsLHS.find(*treeInputLabelsLHSIt) == sparseScoredTreeInputLabelsLHS.end()) {
@@ -514,7 +508,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
}
}
-
+
} else {
// abort with error message if the phrase does not translate an unknown word
@@ -532,16 +526,16 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
// input tree matching
switch (m_featureVariant) {
- case 0:
- newScores[0] = hasCompleteTreeInputMatch;
- break;
+ case 0:
+ newScores[0] = hasCompleteTreeInputMatch;
+ break;
- case 1:
- newScores[0] = ( (hasCompleteTreeInputMatch || isGlueGrammarRule || isUnkRule) ? 0 : std::numeric_limits<float>::min() );
- break;
+ case 1:
+ newScores[0] = ( (hasCompleteTreeInputMatch || isGlueGrammarRule || isUnkRule) ? 0 : std::numeric_limits<float>::min() );
+ break;
- default:
- newScores[0] = hasCompleteTreeInputMatch;
+ default:
+ newScores[0] = hasCompleteTreeInputMatch;
}
newScores[1] = treeInputMismatchLHSBinary;
newScores[2] = treeInputMismatchRHSCount;
@@ -552,12 +546,12 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
scoreBreakdown.PlusEquals(this, newScores);
}
-
+
std::pair<float,float> SoftSourceSyntacticConstraintsFeature::GetLabelPairProbabilities(
- const Factor* target,
+ const Factor* target,
const size_t source) const
{
- boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::const_iterator found =
+ boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::const_iterator found =
m_labelPairProbabilities.find(target);
if ( found == m_labelPairProbabilities.end() ) {
return std::pair<float,float>(0,0);
@@ -565,6 +559,6 @@ std::pair<float,float> SoftSourceSyntacticConstraintsFeature::GetLabelPairProbab
return found->second->at(source);
}
-
+
}
diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.h b/moses/FF/SoftSourceSyntacticConstraintsFeature.h
index c7d081970..e2d670656 100644
--- a/moses/FF/SoftSourceSyntacticConstraintsFeature.h
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.h
@@ -30,34 +30,33 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
targetPhrase.SetRuleSource(source);
};
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
-
void EvaluateWhenApplied(
const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const
- {};
+ ScoreComponentCollection* accumulator) const {
+ };
void EvaluateWhenApplied(
const ChartHypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const
- {};
+ ScoreComponentCollection* accumulator) const {
+ };
private:
std::string m_sourceLabelSetFile;
@@ -88,8 +87,8 @@ private:
void LoadCoreSourceLabelSet();
void LoadTargetSourceLeftHandSideJointCountFile();
- std::pair<float,float> GetLabelPairProbabilities(const Factor* target,
- const size_t source) const;
+ std::pair<float,float> GetLabelPairProbabilities(const Factor* target,
+ const size_t source) const;
};
diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
index 38238b10c..15509f1e4 100644
--- a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
+++ b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
@@ -33,11 +33,11 @@ void SourceGHKMTreeInputMatchFeature::SetParameter(const std::string& key, const
// assumes that source-side syntax labels are stored in the target non-terminal field of the rules
void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
const WordsRange& wordsRange = inputPath.GetWordsRange();
size_t startPos = wordsRange.GetStartPos();
diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h
index 8143a3dc5..15f1877de 100644
--- a/moses/FF/SourceGHKMTreeInputMatchFeature.h
+++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h
@@ -18,27 +18,27 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const {};
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {};
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
-
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const {};
+ ScoreComponentCollection* accumulator) const {};
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const {};
+ ScoreComponentCollection* accumulator) const {};
};
diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h
index 954a36f0d..fe0043553 100644
--- a/moses/FF/SourceWordDeletionFeature.h
+++ b/moses/FF/SourceWordDeletionFeature.h
@@ -29,28 +29,28 @@ public:
bool IsUseable(const FactorMask &mask) const;
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
-
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void ComputeFeatures(const Phrase &source,
const TargetPhrase& targetPhrase,
diff --git a/moses/FF/SpanLength.cpp b/moses/FF/SpanLength.cpp
index 7a7c87be8..0e14069ee 100644
--- a/moses/FF/SpanLength.cpp
+++ b/moses/FF/SpanLength.cpp
@@ -14,33 +14,33 @@ using namespace std;
namespace Moses
{
SpanLength::SpanLength(const std::string &line)
-:StatelessFeatureFunction(1, line)
-,m_smoothingMethod(None)
-,m_const(0)
+ :StatelessFeatureFunction(1, line)
+ ,m_smoothingMethod(None)
+ ,m_const(0)
{
ReadParameters();
}
void SpanLength::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
targetPhrase.SetRuleSource(source);
}
void SpanLength::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
assert(stackVec);
const PhraseProperty *property = targetPhrase.GetProperty("SpanLength");
if (property == NULL) {
- return;
+ return;
}
const SpanLengthPhraseProperty *slProp = static_cast<const SpanLengthPhraseProperty*>(property);
@@ -50,17 +50,17 @@ void SpanLength::EvaluateWithSourceContext(const InputType &input
float score = 0;
for (size_t i = 0; i < stackVec->size(); ++i) {
- const ChartCellLabel &cell = *stackVec->at(i);
- const WordsRange &ntRange = cell.GetCoverage();
- size_t sourceWidth = ntRange.GetNumWordsCovered();
- float prob = slProp->GetProb(i, sourceWidth, m_const);
- score += TransformScore(prob);
+ const ChartCellLabel &cell = *stackVec->at(i);
+ const WordsRange &ntRange = cell.GetCoverage();
+ size_t sourceWidth = ntRange.GetNumWordsCovered();
+ float prob = slProp->GetProb(i, sourceWidth, m_const);
+ score += TransformScore(prob);
}
if (score < -100.0f) {
float weight = StaticData::Instance().GetWeight(this);
if (weight < 0) {
- score = -100;
+ score = -100;
}
}
@@ -71,20 +71,16 @@ void SpanLength::EvaluateWithSourceContext(const InputType &input
void SpanLength::SetParameter(const std::string& key, const std::string& value)
{
if (key == "smoothing") {
- if (value == "plus-constant") {
- m_smoothingMethod = PlusConst;
- }
- else if (value == "none") {
- m_smoothingMethod = None;
- }
- else {
- UTIL_THROW(util::Exception, "Unknown smoothing type " << value);
- }
- }
- else if (key == "constant") {
- m_const = Scan<float>(value);
- }
- else {
+ if (value == "plus-constant") {
+ m_smoothingMethod = PlusConst;
+ } else if (value == "none") {
+ m_smoothingMethod = None;
+ } else {
+ UTIL_THROW(util::Exception, "Unknown smoothing type " << value);
+ }
+ } else if (key == "constant") {
+ m_const = Scan<float>(value);
+ } else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
diff --git a/moses/FF/SpanLength.h b/moses/FF/SpanLength.h
index 30ebead5a..b3998e462 100644
--- a/moses/FF/SpanLength.h
+++ b/moses/FF/SpanLength.h
@@ -9,47 +9,47 @@ namespace Moses
class SpanLength : public StatelessFeatureFunction
{
public:
- SpanLength(const std::string &line);
+ SpanLength(const std::string &line);
- virtual bool IsUseable(const FactorMask &mask) const
- { return true; }
+ virtual bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
- virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
- virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ virtual void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
- virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ virtual void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
- virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
- void SetParameter(const std::string& key, const std::string& value);
+ void SetParameter(const std::string& key, const std::string& value);
protected:
- enum SmoothingMethod
- {
- None,
- PlusConst,
- };
- SmoothingMethod m_smoothingMethod;
-
- float m_const;
+ enum SmoothingMethod {
+ None,
+ PlusConst,
+ };
+ SmoothingMethod m_smoothingMethod;
+
+ float m_const;
};
}
diff --git a/moses/FF/SparseHieroReorderingFeature.cpp b/moses/FF/SparseHieroReorderingFeature.cpp
index 0c6ac4767..ee9d4b719 100644
--- a/moses/FF/SparseHieroReorderingFeature.cpp
+++ b/moses/FF/SparseHieroReorderingFeature.cpp
@@ -16,11 +16,11 @@ namespace Moses
SparseHieroReorderingFeature::SparseHieroReorderingFeature(const std::string &line)
:StatelessFeatureFunction(0, line),
- m_type(SourceCombined),
- m_sourceFactor(0),
- m_targetFactor(0),
- m_sourceVocabFile(""),
- m_targetVocabFile("")
+ m_type(SourceCombined),
+ m_sourceFactor(0),
+ m_targetFactor(0),
+ m_sourceVocabFile(""),
+ m_targetVocabFile("")
{
/*
@@ -39,7 +39,8 @@ SparseHieroReorderingFeature::SparseHieroReorderingFeature(const std::string &li
LoadVocabulary(m_targetVocabFile, m_targetVocab);
}
-void SparseHieroReorderingFeature::SetParameter(const std::string& key, const std::string& value) {
+void SparseHieroReorderingFeature::SetParameter(const std::string& key, const std::string& value)
+{
if (key == "input-factor") {
m_sourceFactor = Scan<FactorType>(value);
} else if (key == "output-factor") {
@@ -70,12 +71,13 @@ void SparseHieroReorderingFeature::LoadVocabulary(const std::string& filename, V
UTIL_THROW_IF(!in, util::Exception, "Unable to open vocab file: " << filename);
string line;
while(getline(in,line)) {
- vocab.insert(FactorCollection::Instance().AddFactor(line));
+ vocab.insert(FactorCollection::Instance().AddFactor(line));
}
in.close();
}
-const Factor* SparseHieroReorderingFeature::GetFactor(const Word& word, const Vocab& vocab, FactorType factorType) const {
+const Factor* SparseHieroReorderingFeature::GetFactor(const Word& word, const Vocab& vocab, FactorType factorType) const
+{
const Factor* factor = word.GetFactor(factorType);
if (vocab.size() && vocab.find(factor) == vocab.end()) return m_otherFactor;
return factor;
@@ -88,21 +90,21 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
// get index map for underlying hypotheses
//const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
// cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
-
+
//The Huck features. For a rule with source side:
// abXcdXef
//We first have to split into blocks:
// ab X cd X ef
//Then we extract features based in the boundary words of the neighbouring blocks
- //For the block pair, we use the right word of the left block, and the left
+ //For the block pair, we use the right word of the left block, and the left
//word of the right block.
- //Need to get blocks, and their alignment. Each block has a word range (on the
+ //Need to get blocks, and their alignment. Each block has a word range (on the
// on the source), a non-terminal flag, and a set of alignment points in the target phrase
//We need to be able to map source word position to target word position, as
//much as possible (don't need interior of non-terminals). The alignment info
- //objects just give us the mappings between *rule* positions. So if we can
+ //objects just give us the mappings between *rule* positions. So if we can
//map source word position to source rule position, and target rule position
//to target word position, then we can map right through.
@@ -114,16 +116,16 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
sourceNTSpans.push_back(cur_hypo.GetPrevHypo(prevHypoId)->GetCurrSourceRange());
}
//put in source order. Is this necessary?
- sort(sourceNTSpans.begin(), sourceNTSpans.end());
+ sort(sourceNTSpans.begin(), sourceNTSpans.end());
//cerr << "Source NTs: ";
//for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " ";
//cerr << endl;
typedef pair<WordsRange,bool> Block;//flag indicates NT
- vector<Block> sourceBlocks;
+ vector<Block> sourceBlocks;
sourceBlocks.push_back(Block(cur_hypo.GetCurrSourceRange(),false));
- for (vector<WordsRange>::const_iterator i = sourceNTSpans.begin();
- i != sourceNTSpans.end(); ++i) {
+ for (vector<WordsRange>::const_iterator i = sourceNTSpans.begin();
+ i != sourceNTSpans.end(); ++i) {
const WordsRange& prevHypoRange = *i;
Block lastBlock = sourceBlocks.back();
sourceBlocks.pop_back();
@@ -155,12 +157,12 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
//vector<size_t> alignMapTerm = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm()
size_t sourceRulePos = 0;
//cerr << "SW->RP ";
- for (vector<Block>::const_iterator sourceBlockIt = sourceBlocks.begin();
- sourceBlockIt != sourceBlocks.end(); ++sourceBlockIt) {
+ for (vector<Block>::const_iterator sourceBlockIt = sourceBlocks.begin();
+ sourceBlockIt != sourceBlocks.end(); ++sourceBlockIt) {
for (size_t sourceWordPos = sourceBlockIt->first.GetStartPos();
- sourceWordPos <= sourceBlockIt->first.GetEndPos(); ++sourceWordPos) {
+ sourceWordPos <= sourceBlockIt->first.GetEndPos(); ++sourceWordPos) {
sourceWordToTargetRulePos[sourceWordPos - sourceStart] = alignMap[sourceRulePos];
- // cerr << sourceWordPos - sourceStart << "-" << alignMap[sourceRulePos] << " ";
+ // cerr << sourceWordPos - sourceStart << "-" << alignMap[sourceRulePos] << " ";
if (! sourceBlockIt->second) {
//T
++sourceRulePos;
@@ -174,7 +176,7 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
//cerr << endl;
//Iterate through block pairs
- const Sentence& sentence =
+ const Sentence& sentence =
dynamic_cast<const Sentence&>(cur_hypo.GetManager().GetSource());
//const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
for (size_t i = 0; i < sourceBlocks.size()-1; ++i) {
@@ -186,19 +188,18 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
const Word& sourceRightBoundaryWord = sentence.GetWord(sourceRightBoundaryPos);
sourceLeftBoundaryPos -= sourceStart;
sourceRightBoundaryPos -= sourceStart;
-
+
// Need to figure out where these map to on the target.
- size_t targetLeftRulePos =
+ size_t targetLeftRulePos =
sourceWordToTargetRulePos[sourceLeftBoundaryPos];
- size_t targetRightRulePos =
+ size_t targetRightRulePos =
sourceWordToTargetRulePos[sourceRightBoundaryPos];
bool isMonotone = true;
if ((sourceLeftBoundaryPos < sourceRightBoundaryPos &&
- targetLeftRulePos > targetRightRulePos) ||
- ((sourceLeftBoundaryPos > sourceRightBoundaryPos &&
- targetLeftRulePos < targetRightRulePos)))
- {
+ targetLeftRulePos > targetRightRulePos) ||
+ ((sourceLeftBoundaryPos > sourceRightBoundaryPos &&
+ targetLeftRulePos < targetRightRulePos))) {
isMonotone = false;
}
stringstream buf;
@@ -208,7 +209,7 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
buf << "_";
}
if (m_type == SourceRight || m_type == SourceCombined) {
- buf << GetFactor(sourceRightBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();
+ buf << GetFactor(sourceRightBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();
buf << "_";
}
buf << (isMonotone ? "M" : "S");
diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h
index 4d694b96d..945402412 100644
--- a/moses/FF/SparseHieroReorderingFeature.h
+++ b/moses/FF/SparseHieroReorderingFeature.h
@@ -24,35 +24,36 @@ public:
SourceRight
};
- SparseHieroReorderingFeature(const std::string &line);
+ SparseHieroReorderingFeature(const std::string &line);
- bool IsUseable(const FactorMask &mask) const
- { return true; }
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
void SetParameter(const std::string& key, const std::string& value);
- void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
virtual void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
private:
@@ -61,7 +62,7 @@ private:
void AddNonTerminalPairFeatures(
const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2,
- bool isMonotone, ScoreComponentCollection* accumulator) const;
+ bool isMonotone, ScoreComponentCollection* accumulator) const;
void LoadVocabulary(const std::string& filename, Vocab& vocab);
const Factor* GetFactor(const Word& word, const Vocab& vocab, FactorType factor) const;
@@ -73,7 +74,7 @@ private:
std::string m_targetVocabFile;
const Factor* m_otherFactor;
-
+
Vocab m_sourceVocab;
Vocab m_targetVocab;
diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h
index 950b122e9..08b7c607d 100644
--- a/moses/FF/StatefulFeatureFunction.h
+++ b/moses/FF/StatefulFeatureFunction.h
@@ -44,7 +44,10 @@ public:
virtual FFState* EvaluateWhenApplied(
const Syntax::SHyperedge& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
- ScoreComponentCollection* accumulator) const { assert(false); return 0; /* FIXME */ }
+ ScoreComponentCollection* accumulator) const {
+ assert(false);
+ return 0; /* FIXME */
+ }
//! return the state associated with the empty hypothesis for a given sentence
virtual const FFState* EmptyHypothesisState(const InputType &input) const = 0;
diff --git a/moses/FF/StatelessFeatureFunction.h b/moses/FF/StatelessFeatureFunction.h
index 9ef5d269a..e5d3f3812 100644
--- a/moses/FF/StatelessFeatureFunction.h
+++ b/moses/FF/StatelessFeatureFunction.h
@@ -27,16 +27,18 @@ public:
* This should be implemented for features that apply to phrase-based models.
**/
virtual void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const = 0;
+ ScoreComponentCollection* accumulator) const = 0;
/**
* Same for chart-based features.
**/
virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const = 0;
+ ScoreComponentCollection* accumulator) const = 0;
virtual void EvaluateWhenApplied(const Syntax::SHyperedge &,
- ScoreComponentCollection*) const { assert(false); }
+ ScoreComponentCollection*) const {
+ assert(false);
+ }
virtual bool IsStateless() const {
return true;
diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp
index 24b3bf062..80f9b21bc 100644
--- a/moses/FF/SyntaxRHS.cpp
+++ b/moses/FF/SyntaxRHS.cpp
@@ -9,36 +9,36 @@ using namespace std;
namespace Moses
{
SyntaxRHS::SyntaxRHS(const std::string &line)
-:StatelessFeatureFunction(1, line)
+ :StatelessFeatureFunction(1, line)
{
ReadParameters();
}
void SyntaxRHS::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
}
void SyntaxRHS::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
- assert(stackVec);
- for (size_t i = 0; i < stackVec->size(); ++i) {
- const ChartCellLabel &cell = *stackVec->at(i);
+ assert(stackVec);
+ for (size_t i = 0; i < stackVec->size(); ++i) {
+ const ChartCellLabel &cell = *stackVec->at(i);
- }
+ }
- if (targetPhrase.GetNumNonTerminals()) {
- vector<float> newScores(m_numScoreComponents);
- newScores[0] = - std::numeric_limits<float>::infinity();
- scoreBreakdown.PlusEquals(this, newScores);
- }
+ if (targetPhrase.GetNumNonTerminals()) {
+ vector<float> newScores(m_numScoreComponents);
+ newScores[0] = - std::numeric_limits<float>::infinity();
+ scoreBreakdown.PlusEquals(this, newScores);
+ }
}
diff --git a/moses/FF/SyntaxRHS.h b/moses/FF/SyntaxRHS.h
index 29259360e..46911ccd9 100644
--- a/moses/FF/SyntaxRHS.h
+++ b/moses/FF/SyntaxRHS.h
@@ -16,27 +16,27 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
};
diff --git a/moses/FF/TargetBigramFeature.cpp b/moses/FF/TargetBigramFeature.cpp
index f1da62b7d..6816410f8 100644
--- a/moses/FF/TargetBigramFeature.cpp
+++ b/moses/FF/TargetBigramFeature.cpp
@@ -65,8 +65,8 @@ const FFState* TargetBigramFeature::EmptyHypothesisState(const InputType &/*inpu
}
FFState* TargetBigramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const
{
const TargetBigramState* tbState = dynamic_cast<const TargetBigramState*>(prev_state);
assert(tbState);
diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h
index 6e0a5bd05..f6e965808 100644
--- a/moses/FF/TargetBigramFeature.h
+++ b/moses/FF/TargetBigramFeature.h
@@ -40,30 +40,30 @@ public:
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */,
- int /* featureID */,
- ScoreComponentCollection* ) const {
+ int /* featureID */,
+ ScoreComponentCollection* ) const {
throw std::logic_error("TargetBigramFeature not valid in chart decoder");
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
-
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
void SetParameter(const std::string& key, const std::string& value);
private:
diff --git a/moses/FF/TargetNgramFeature.cpp b/moses/FF/TargetNgramFeature.cpp
index a810a742c..8414e1bc2 100644
--- a/moses/FF/TargetNgramFeature.cpp
+++ b/moses/FF/TargetNgramFeature.cpp
@@ -96,8 +96,8 @@ const FFState* TargetNgramFeature::EmptyHypothesisState(const InputType &/*input
}
FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const
{
const TargetNgramState* tnState = static_cast<const TargetNgramState*>(prev_state);
assert(tnState);
diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h
index 8063deca5..2e9e71db0 100644
--- a/moses/FF/TargetNgramFeature.h
+++ b/moses/FF/TargetNgramFeature.h
@@ -187,28 +187,28 @@ public:
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId,
- ScoreComponentCollection* accumulator) const;
+ ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h
index ff1c4f5fe..a9dd3d97a 100644
--- a/moses/FF/TargetWordInsertionFeature.h
+++ b/moses/FF/TargetWordInsertionFeature.h
@@ -29,26 +29,26 @@ public:
void Load();
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
+ ScoreComponentCollection* accumulator) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void ComputeFeatures(const Phrase &source,
const TargetPhrase& targetPhrase,
diff --git a/moses/FF/TreeStructureFeature.cpp b/moses/FF/TreeStructureFeature.cpp
index 38c21109d..e558b06bc 100644
--- a/moses/FF/TreeStructureFeature.cpp
+++ b/moses/FF/TreeStructureFeature.cpp
@@ -8,7 +8,8 @@
namespace Moses
{
-void TreeStructureFeature::Load() {
+void TreeStructureFeature::Load()
+{
// syntactic constraints can be hooked in here.
m_constraints = NULL;
@@ -20,34 +21,35 @@ void TreeStructureFeature::Load() {
// define NT labels (ints) that are mapped from strings for quicker comparison.
-void TreeStructureFeature::AddNTLabels(TreePointer root) const {
- std::string label = root->GetLabel();
+void TreeStructureFeature::AddNTLabels(TreePointer root) const
+{
+ std::string label = root->GetLabel();
- if (root->IsTerminal()) {
- return;
- }
+ if (root->IsTerminal()) {
+ return;
+ }
- std::map<std::string, NTLabel>::const_iterator it = m_labelset->string_to_label.find(label);
- if (it != m_labelset->string_to_label.end()) {
- root->SetNTLabel(it->second);
- }
+ std::map<std::string, NTLabel>::const_iterator it = m_labelset->string_to_label.find(label);
+ if (it != m_labelset->string_to_label.end()) {
+ root->SetNTLabel(it->second);
+ }
- std::vector<TreePointer> children = root->GetChildren();
- for (std::vector<TreePointer>::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) {
- AddNTLabels(*it2);
- }
+ std::vector<TreePointer> children = root->GetChildren();
+ for (std::vector<TreePointer>::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) {
+ AddNTLabels(*it2);
+ }
}
FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
- , int featureID /* used to index the state in the previous hypotheses */
- , ScoreComponentCollection* accumulator) const
+ , int featureID /* used to index the state in the previous hypotheses */
+ , ScoreComponentCollection* accumulator) const
{
if (const PhraseProperty *property = cur_hypo.GetCurrTargetPhrase().GetProperty("Tree")) {
const std::string *tree = property->GetValueString();
TreePointer mytree (boost::make_shared<InternalTree>(*tree));
if (m_labelset) {
- AddNTLabels(mytree);
+ AddNTLabels(mytree);
}
//get subtrees (in target order)
@@ -69,8 +71,7 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
mytree->Combine(previous_trees);
return new TreeState(mytree);
- }
- else {
+ } else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
}
diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h
index 3e05a9234..d5ec4edda 100644
--- a/moses/FF/TreeStructureFeature.h
+++ b/moses/FF/TreeStructureFeature.h
@@ -14,10 +14,9 @@ typedef int NTLabel;
// mapping from string nonterminal label to int representation.
// allows abstraction if multiple nonterminal strings should map to same label.
-struct LabelSet
-{
+struct LabelSet {
public:
- std::map<std::string, NTLabel> string_to_label;
+ std::map<std::string, NTLabel> string_to_label;
};
@@ -26,8 +25,8 @@ public:
class SyntaxConstraints
{
public:
- virtual void SyntacticRules(TreePointer root, const std::vector<TreePointer> &previous, const FeatureFunction* sp, ScoreComponentCollection* accumulator) = 0;
- virtual ~SyntaxConstraints() {};
+ virtual void SyntacticRules(TreePointer root, const std::vector<TreePointer> &previous, const FeatureFunction* sp, ScoreComponentCollection* accumulator) = 0;
+ virtual ~SyntaxConstraints() {};
};
@@ -38,9 +37,11 @@ class TreeStructureFeature : public StatefulFeatureFunction
public:
TreeStructureFeature(const std::string &line)
:StatefulFeatureFunction(0, line) {
- ReadParameters();
- }
- ~TreeStructureFeature() {delete m_constraints;};
+ ReadParameters();
+ }
+ ~TreeStructureFeature() {
+ delete m_constraints;
+ };
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
return new TreeState(TreePointer());
@@ -53,25 +54,27 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const {};
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {};
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
+
-
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
- ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");};
+ ScoreComponentCollection* accumulator) const {
+ UTIL_THROW(util::Exception, "Not implemented");
+ };
FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
diff --git a/moses/FF/UnknownWordPenaltyProducer.h b/moses/FF/UnknownWordPenaltyProducer.h
index 1f7f5c7ff..bef6bd58c 100644
--- a/moses/FF/UnknownWordPenaltyProducer.h
+++ b/moses/FF/UnknownWordPenaltyProducer.h
@@ -32,31 +32,31 @@ public:
std::vector<float> DefaultWeights() const;
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
};
diff --git a/moses/FF/VW/ThreadLocalByFeatureStorage.h b/moses/FF/VW/ThreadLocalByFeatureStorage.h
index b37aa47b0..790054d1f 100644
--- a/moses/FF/VW/ThreadLocalByFeatureStorage.h
+++ b/moses/FF/VW/ThreadLocalByFeatureStorage.h
@@ -16,7 +16,7 @@ namespace Moses
template <class Value>
struct DefaultFactory {
typedef boost::shared_ptr<Value> ValuePtr;
-
+
ValuePtr operator()() {
return ValuePtr(new Value());
}
@@ -25,55 +25,54 @@ struct DefaultFactory {
template<class Value, class Factory = DefaultFactory<Value> >
class ThreadLocalByFeatureStorage
{
- public:
- typedef boost::shared_ptr<Value> ValuePtr;
- typedef std::map<std::string, ValuePtr> NameValueMap;
- typedef boost::thread_specific_ptr<NameValueMap> TSNameValueMap;
-
- ThreadLocalByFeatureStorage(FeatureFunction* ff,
- Factory factory = Factory())
+public:
+ typedef boost::shared_ptr<Value> ValuePtr;
+ typedef std::map<std::string, ValuePtr> NameValueMap;
+ typedef boost::thread_specific_ptr<NameValueMap> TSNameValueMap;
+
+ ThreadLocalByFeatureStorage(FeatureFunction* ff,
+ Factory factory = Factory())
: m_ff(ff), m_factory(factory) {}
- virtual ~ThreadLocalByFeatureStorage() {} // provide empty virtual dtor
-
- virtual ValuePtr GetStored() {
- if(!m_nameMap.get())
- m_nameMap.reset(new NameValueMap());
-
- typename NameValueMap::iterator it
- = m_nameMap->find(m_ff->GetScoreProducerDescription());
-
- if(it == m_nameMap->end()) {
- std::pair<typename NameValueMap::iterator, bool> ret;
- ret = m_nameMap->insert(
- std::make_pair(m_ff->GetScoreProducerDescription(), m_factory()));
-
- return ret.first->second;
- }
- else {
- return it->second;
- }
- }
-
- virtual const ValuePtr GetStored() const {
- UTIL_THROW_IF2(!m_nameMap.get(),
- "No thread local storage has been created for: "
- << m_ff->GetScoreProducerDescription());
-
- typename NameValueMap::const_iterator it
- = m_nameMap->find(m_ff->GetScoreProducerDescription());
-
- UTIL_THROW_IF2(it == m_nameMap->end(),
- "No features stored for: "
- << m_ff->GetScoreProducerDescription());
-
+ virtual ~ThreadLocalByFeatureStorage() {} // provide empty virtual dtor
+
+ virtual ValuePtr GetStored() {
+ if(!m_nameMap.get())
+ m_nameMap.reset(new NameValueMap());
+
+ typename NameValueMap::iterator it
+ = m_nameMap->find(m_ff->GetScoreProducerDescription());
+
+ if(it == m_nameMap->end()) {
+ std::pair<typename NameValueMap::iterator, bool> ret;
+ ret = m_nameMap->insert(
+ std::make_pair(m_ff->GetScoreProducerDescription(), m_factory()));
+
+ return ret.first->second;
+ } else {
return it->second;
}
-
- private:
- FeatureFunction* m_ff;
- Factory m_factory;
- static TSNameValueMap m_nameMap;
+ }
+
+ virtual const ValuePtr GetStored() const {
+ UTIL_THROW_IF2(!m_nameMap.get(),
+ "No thread local storage has been created for: "
+ << m_ff->GetScoreProducerDescription());
+
+ typename NameValueMap::const_iterator it
+ = m_nameMap->find(m_ff->GetScoreProducerDescription());
+
+ UTIL_THROW_IF2(it == m_nameMap->end(),
+ "No features stored for: "
+ << m_ff->GetScoreProducerDescription());
+
+ return it->second;
+ }
+
+private:
+ FeatureFunction* m_ff;
+ Factory m_factory;
+ static TSNameValueMap m_nameMap;
};
template <class Value, class Factory>
diff --git a/moses/FF/VW/VW.h b/moses/FF/VW/VW.h
index 2f3e31559..135d7481d 100644
--- a/moses/FF/VW/VW.h
+++ b/moses/FF/VW/VW.h
@@ -23,20 +23,18 @@ namespace Moses
const std::string VW_DUMMY_LABEL = "1111"; // VW does not use the actual label, other classifiers might
-/**
+/**
* VW thread-specific data about target sentence.
*/
struct VWTargetSentence {
VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}
- void Clear()
- {
+ void Clear() {
if (m_sentence) delete m_sentence;
if (m_alignment) delete m_alignment;
}
- ~VWTargetSentence()
- {
+ ~VWTargetSentence() {
Clear();
}
@@ -52,13 +50,12 @@ class VW : public StatelessFeatureFunction, public TLSTargetSentence
public:
VW(const std::string &line)
: StatelessFeatureFunction(1, line)
- , TLSTargetSentence(this)
- , m_train(false)
- {
+ , TLSTargetSentence(this)
+ , m_train(false) {
ReadParameters();
- Discriminative::ClassifierFactory *classifierFactory = m_train
- ? new Discriminative::ClassifierFactory(m_modelPath)
- : new Discriminative::ClassifierFactory(m_modelPath, m_vwOptions);
+ Discriminative::ClassifierFactory *classifierFactory = m_train
+ ? new Discriminative::ClassifierFactory(m_modelPath)
+ : new Discriminative::ClassifierFactory(m_modelPath, m_vwOptions);
m_tlsClassifier = new TLSClassifier(this, *classifierFactory);
@@ -68,9 +65,8 @@ public:
}
}
- virtual ~VW()
- {
- delete m_tlsClassifier;
+ virtual ~VW() {
+ delete m_tlsClassifier;
delete m_normalizer;
}
@@ -79,36 +75,35 @@ public:
}
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
-
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
+
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {
+ , const TranslationOptionList &translationOptionList) const {
Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
-
+
if (translationOptionList.size() == 0)
return; // nothing to do
VERBOSE(2, "VW :: Evaluating translation options\n");
-
+
const std::vector<VWFeatureBase*>& sourceFeatures = VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription());
-
+
const WordsRange &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange();
const InputPath &inputPath = translationOptionList.Get(0)->GetInputPath();
-
- for(size_t i = 0; i < sourceFeatures.size(); ++i)
- (*sourceFeatures[i])(input, inputPath, sourceRange, classifier);
+
+ for(size_t i = 0; i < sourceFeatures.size(); ++i)
+ (*sourceFeatures[i])(input, inputPath, sourceRange, classifier);
const std::vector<VWFeatureBase*>& targetFeatures = VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
@@ -118,7 +113,7 @@ public:
TranslationOptionList::const_iterator iterTransOpt;
for(iterTransOpt = translationOptionList.begin(), iterLoss = losses.begin() ;
iterTransOpt != translationOptionList.end() ; ++iterTransOpt, ++iterLoss) {
-
+
const TargetPhrase &targetPhrase = (*iterTransOpt)->GetTargetPhrase();
for(size_t i = 0; i < targetFeatures.size(); ++i)
(*targetFeatures[i])(input, inputPath, targetPhrase, classifier);
@@ -136,28 +131,27 @@ public:
for(iterTransOpt = translationOptionList.begin(), iterLoss = losses.begin() ;
iterTransOpt != translationOptionList.end() ; ++iterTransOpt, ++iterLoss) {
TranslationOption &transOpt = **iterTransOpt;
-
+
std::vector<float> newScores(m_numScoreComponents);
newScores[0] = FloorScore(TransformScore(*iterLoss));
-
+
ScoreComponentCollection &scoreBreakDown = transOpt.GetScoreBreakdown();
scoreBreakDown.PlusEquals(this, newScores);
-
+
transOpt.UpdateScore();
}
}
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
+ ScoreComponentCollection* accumulator) const {
+ }
+
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
- void SetParameter(const std::string& key, const std::string& value)
- {
+ void SetParameter(const std::string& key, const std::string& value) {
if (key == "train") {
m_train = Scan<bool>(value);
} else if (key == "path") {
@@ -166,8 +160,8 @@ public:
m_vwOptions = value;
} else if (key == "loss") {
m_normalizer = value == "logistic"
- ? (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer()
- : (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
+ ? (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer()
+ : (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
@@ -186,13 +180,13 @@ public:
// target sentence represented as a phrase
Phrase *target = new Phrase();
target->CreateFromString(
- Output
- , StaticData::Instance().GetOutputFactorOrder()
- , tabbedSentence.GetColumns()[0]
- , NULL);
+ Output
+ , StaticData::Instance().GetOutputFactorOrder()
+ , tabbedSentence.GetColumns()[0]
+ , NULL);
// word alignment between source and target sentence
- // we don't store alignment info in AlignmentInfoCollection because we keep alignments of whole
+ // we don't store alignment info in AlignmentInfoCollection because we keep alignments of whole
// sentences, not phrases
AlignmentInfo *alignment = new AlignmentInfo(tabbedSentence.GetColumns()[1]);
@@ -204,8 +198,7 @@ public:
private:
- std::string MakeTargetLabel(const TargetPhrase &targetPhrase) const
- {
+ std::string MakeTargetLabel(const TargetPhrase &targetPhrase) const {
return VW_DUMMY_LABEL;
}
@@ -214,12 +207,12 @@ private:
size_t sourceEnd = topt.GetSourceWordsRange().GetEndPos() + 1;
const VWTargetSentence &targetSentence = *GetStored();
-
+
// get the left-most alignment point withitn sourceRange
std::set<size_t> aligned;
while ((aligned = targetSentence.m_alignment->GetAlignmentsForSource(sourceStart)).empty()) {
sourceStart++;
-
+
if (sourceStart >= sourceEnd) {
// no alignment point between source and target sentence within current source span;
// return immediately
diff --git a/moses/FF/VW/VWFeatureBase.cpp b/moses/FF/VW/VWFeatureBase.cpp
index bb31f58ca..874544203 100644
--- a/moses/FF/VW/VWFeatureBase.cpp
+++ b/moses/FF/VW/VWFeatureBase.cpp
@@ -5,8 +5,8 @@
namespace Moses
{
- std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_features;
- std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_sourceFeatures;
- std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetFeatures;
+std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_features;
+std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_sourceFeatures;
+std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetFeatures;
}
diff --git a/moses/FF/VW/VWFeatureBase.h b/moses/FF/VW/VWFeatureBase.h
index 2c7d7edad..04eb6974a 100644
--- a/moses/FF/VW/VWFeatureBase.h
+++ b/moses/FF/VW/VWFeatureBase.h
@@ -13,111 +13,109 @@ namespace Moses
class VWFeatureBase : public StatelessFeatureFunction
{
- public:
- VWFeatureBase(const std::string &line, bool isSource = true)
- : StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_isSource(isSource)
- {
- // defaults
- m_sourceFactors.push_back(0);
- m_targetFactors.push_back(0);
- }
+public:
+ VWFeatureBase(const std::string &line, bool isSource = true)
+ : StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_isSource(isSource) {
+ // defaults
+ m_sourceFactors.push_back(0);
+ m_targetFactors.push_back(0);
+ }
- bool IsUseable(const FactorMask &mask) const {
- return true;
- }
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
- // Official hooks should do nothing. This is a hack to be able to define
- // classifier features in the moses.ini configuration file.
- void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const {}
- void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const {}
- void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const {}
- void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const {}
- void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const {}
-
-
- // Common parameters for classifier features, both source and target features
- virtual void SetParameter(const std::string& key, const std::string& value)
- {
- if (key == "used-by") {
- ParseUsedBy(value);
- } else if (key == "source-factors") {
- Tokenize<FactorType>(m_sourceFactors, value, ",");
- } else if (key == "target-factors") {
- Tokenize<FactorType>(m_targetFactors, value, ",");
- } else {
- StatelessFeatureFunction::SetParameter(key, value);
- }
- }
+ // Official hooks should do nothing. This is a hack to be able to define
+ // classifier features in the moses.ini configuration file.
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {}
+ void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {}
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {}
+ void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {}
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {}
- // Return all classifier features, regardless of type
- static const std::vector<VWFeatureBase*>& GetFeatures(std::string name = "VW0") {
- UTIL_THROW_IF2(s_features.count(name) == 0, "No features registered for parent classifier: " + name);
- return s_features[name];
- }
- // Return only source-dependent classifier features
- static const std::vector<VWFeatureBase*>& GetSourceFeatures(std::string name = "VW0") {
- UTIL_THROW_IF2(s_sourceFeatures.count(name) == 0, "No source features registered for parent classifier: " + name);
- return s_sourceFeatures[name];
+ // Common parameters for classifier features, both source and target features
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ if (key == "used-by") {
+ ParseUsedBy(value);
+ } else if (key == "source-factors") {
+ Tokenize<FactorType>(m_sourceFactors, value, ",");
+ } else if (key == "target-factors") {
+ Tokenize<FactorType>(m_targetFactors, value, ",");
+ } else {
+ StatelessFeatureFunction::SetParameter(key, value);
}
+ }
- // Return only target-dependent classifier features
- static const std::vector<VWFeatureBase*>& GetTargetFeatures(std::string name = "VW0") {
- UTIL_THROW_IF2(s_targetFeatures.count(name) == 0, "No target features registered for parent classifier: " + name);
- return s_targetFeatures[name];
- }
+ // Return all classifier features, regardless of type
+ static const std::vector<VWFeatureBase*>& GetFeatures(std::string name = "VW0") {
+ UTIL_THROW_IF2(s_features.count(name) == 0, "No features registered for parent classifier: " + name);
+ return s_features[name];
+ }
- // Overload to process source-dependent data, create features once for every
- // source sentence word range.
- virtual void operator()(const InputType &input
- , const InputPath &inputPath
- , const WordsRange &sourceRange
- , Discriminative::Classifier &classifier) const = 0;
-
- // Overload to process target-dependent features, create features once for
- // every target phrase. One source word range will have at leat one target
- // phrase, but may have more.
- virtual void operator()(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , Discriminative::Classifier &classifier) const = 0;
-
- protected:
- std::vector<FactorType> m_sourceFactors, m_targetFactors;
-
- void UpdateRegister() {
- for(std::vector<std::string>::const_iterator it = m_usedBy.begin();
- it != m_usedBy.end(); it++) {
- s_features[*it].push_back(this);
- if(m_isSource)
- s_sourceFeatures[*it].push_back(this);
- else
- s_targetFeatures[*it].push_back(this);
- }
- }
+ // Return only source-dependent classifier features
+ static const std::vector<VWFeatureBase*>& GetSourceFeatures(std::string name = "VW0") {
+ UTIL_THROW_IF2(s_sourceFeatures.count(name) == 0, "No source features registered for parent classifier: " + name);
+ return s_sourceFeatures[name];
+ }
- private:
- void ParseUsedBy(const std::string &usedBy) {
- m_usedBy.clear();
- Tokenize(m_usedBy, usedBy, ",");
+ // Return only target-dependent classifier features
+ static const std::vector<VWFeatureBase*>& GetTargetFeatures(std::string name = "VW0") {
+ UTIL_THROW_IF2(s_targetFeatures.count(name) == 0, "No target features registered for parent classifier: " + name);
+ return s_targetFeatures[name];
+ }
+
+ // Overload to process source-dependent data, create features once for every
+ // source sentence word range.
+ virtual void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const WordsRange &sourceRange
+ , Discriminative::Classifier &classifier) const = 0;
+
+ // Overload to process target-dependent features, create features once for
+ // every target phrase. One source word range will have at leat one target
+ // phrase, but may have more.
+ virtual void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , Discriminative::Classifier &classifier) const = 0;
+
+protected:
+ std::vector<FactorType> m_sourceFactors, m_targetFactors;
+
+ void UpdateRegister() {
+ for(std::vector<std::string>::const_iterator it = m_usedBy.begin();
+ it != m_usedBy.end(); it++) {
+ s_features[*it].push_back(this);
+ if(m_isSource)
+ s_sourceFeatures[*it].push_back(this);
+ else
+ s_targetFeatures[*it].push_back(this);
}
-
- std::vector<std::string> m_usedBy;
- bool m_isSource;
- static std::map<std::string, std::vector<VWFeatureBase*> > s_features;
- static std::map<std::string, std::vector<VWFeatureBase*> > s_sourceFeatures;
- static std::map<std::string, std::vector<VWFeatureBase*> > s_targetFeatures;
+ }
+
+private:
+ void ParseUsedBy(const std::string &usedBy) {
+ m_usedBy.clear();
+ Tokenize(m_usedBy, usedBy, ",");
+ }
+
+ std::vector<std::string> m_usedBy;
+ bool m_isSource;
+ static std::map<std::string, std::vector<VWFeatureBase*> > s_features;
+ static std::map<std::string, std::vector<VWFeatureBase*> > s_sourceFeatures;
+ static std::map<std::string, std::vector<VWFeatureBase*> > s_targetFeatures;
};
}
diff --git a/moses/FF/VW/VWFeatureSource.h b/moses/FF/VW/VWFeatureSource.h
index 74e1d809d..564f4a3b6 100644
--- a/moses/FF/VW/VWFeatureSource.h
+++ b/moses/FF/VW/VWFeatureSource.h
@@ -6,7 +6,7 @@
namespace Moses
{
-
+
// Inherit from this for source-dependent classifier features. They will
// automatically register with the classifier class named VW0 or one or more
// names specified by the used-by=name1,name2,... parameter.
@@ -14,30 +14,30 @@ namespace Moses
// The classifier gets a full list by calling
// VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription())
-
+
class VWFeatureSource : public VWFeatureBase
{
- public:
- VWFeatureSource(const std::string &line)
- : VWFeatureBase(line, true)
- {}
-
- // Gets its pure virtual functions from VWFeatureBase
-
- virtual void operator()(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , Discriminative::Classifier &classifier) const
- {}
-
- virtual void SetParameter(const std::string& key, const std::string& value) {
- VWFeatureBase::SetParameter(key, value);
- }
-
- protected:
- inline std::string GetWord(const InputType &input, size_t pos) const {
- return input.GetWord(pos).GetString(m_sourceFactors, false);
- }
+public:
+ VWFeatureSource(const std::string &line)
+ : VWFeatureBase(line, true) {
+ }
+
+ // Gets its pure virtual functions from VWFeatureBase
+
+ virtual void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , Discriminative::Classifier &classifier) const {
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureBase::SetParameter(key, value);
+ }
+
+protected:
+ inline std::string GetWord(const InputType &input, size_t pos) const {
+ return input.GetWord(pos).GetString(m_sourceFactors, false);
+ }
};
}
diff --git a/moses/FF/VW/VWFeatureSourceBagOfWords.h b/moses/FF/VW/VWFeatureSourceBagOfWords.h
index 600db186f..12bcaecb8 100644
--- a/moses/FF/VW/VWFeatureSourceBagOfWords.h
+++ b/moses/FF/VW/VWFeatureSourceBagOfWords.h
@@ -5,32 +5,30 @@
namespace Moses
{
-
+
class VWFeatureSourceBagOfWords : public VWFeatureSource
{
- public:
- VWFeatureSourceBagOfWords(const std::string &line)
- : VWFeatureSource(line)
- {
- ReadParameters();
-
- // Call this last
- VWFeatureBase::UpdateRegister();
- }
+public:
+ VWFeatureSourceBagOfWords(const std::string &line)
+ : VWFeatureSource(line) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
- void operator()(const InputType &input
+ void operator()(const InputType &input
, const InputPath &inputPath
, const WordsRange &sourceRange
- , Discriminative::Classifier &classifier) const
- {
- for (size_t i = 0; i < input.GetSize(); i++) {
- classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i));
- }
- }
-
- virtual void SetParameter(const std::string& key, const std::string& value) {
- VWFeatureSource::SetParameter(key, value);
+ , Discriminative::Classifier &classifier) const {
+ for (size_t i = 0; i < input.GetSize(); i++) {
+ classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i));
}
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureSource::SetParameter(key, value);
+ }
};
}
diff --git a/moses/FF/VW/VWFeatureSourceExternalFeatures.h b/moses/FF/VW/VWFeatureSourceExternalFeatures.h
index 88f1a940c..09abe517b 100644
--- a/moses/FF/VW/VWFeatureSourceExternalFeatures.h
+++ b/moses/FF/VW/VWFeatureSourceExternalFeatures.h
@@ -13,53 +13,51 @@ namespace Moses
// Assuming a given column of TabbedSentence contains space separated source features
class VWFeatureSourceExternalFeatures : public VWFeatureSource
{
- public:
- VWFeatureSourceExternalFeatures(const std::string &line)
- : VWFeatureSource(line), m_tls(this), m_column(0)
- {
- ReadParameters();
-
- // Call this last
- VWFeatureBase::UpdateRegister();
- }
+public:
+ VWFeatureSourceExternalFeatures(const std::string &line)
+ : VWFeatureSource(line), m_tls(this), m_column(0) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
- void operator()(const InputType &input
+ void operator()(const InputType &input
, const InputPath &inputPath
, const WordsRange &sourceRange
- , Discriminative::Classifier &classifier) const
- {
- const Features& features = *m_tls.GetStored();
- for (size_t i = 0; i < features.size(); i++) {
- classifier.AddLabelIndependentFeature("srcext^" + features[i]);
- }
- }
-
- virtual void SetParameter(const std::string& key, const std::string& value) {
- if(key == "column")
- m_column = Scan<size_t>(value);
- else
- VWFeatureSource::SetParameter(key, value);
+ , Discriminative::Classifier &classifier) const {
+ const Features& features = *m_tls.GetStored();
+ for (size_t i = 0; i < features.size(); i++) {
+ classifier.AddLabelIndependentFeature("srcext^" + features[i]);
}
-
- virtual void InitializeForInput(InputType const& source) {
- UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
- "This feature function requires the TabbedSentence input type");
-
- const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
- const std::string &column = tabbedSentence.GetColumn(m_column);
-
- Features& features = *m_tls.GetStored();
- features.clear();
-
- Tokenize(features, column, " ");
- }
-
- private:
- typedef std::vector<std::string> Features;
- typedef ThreadLocalByFeatureStorage<Features> TLSFeatures;
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ if(key == "column")
+ m_column = Scan<size_t>(value);
+ else
+ VWFeatureSource::SetParameter(key, value);
+ }
+
+ virtual void InitializeForInput(InputType const& source) {
+ UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
+ "This feature function requires the TabbedSentence input type");
+
+ const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
+ const std::string &column = tabbedSentence.GetColumn(m_column);
+
+ Features& features = *m_tls.GetStored();
+ features.clear();
+
+ Tokenize(features, column, " ");
+ }
+
+private:
+ typedef std::vector<std::string> Features;
+ typedef ThreadLocalByFeatureStorage<Features> TLSFeatures;
- TLSFeatures m_tls;
- size_t m_column;
+ TLSFeatures m_tls;
+ size_t m_column;
};
}
diff --git a/moses/FF/VW/VWFeatureSourceIndicator.h b/moses/FF/VW/VWFeatureSourceIndicator.h
index 9b4b56809..784f2657e 100644
--- a/moses/FF/VW/VWFeatureSourceIndicator.h
+++ b/moses/FF/VW/VWFeatureSourceIndicator.h
@@ -7,38 +7,36 @@
namespace Moses
{
-
+
class VWFeatureSourceIndicator : public VWFeatureSource
{
- public:
- VWFeatureSourceIndicator(const std::string &line)
- : VWFeatureSource(line)
- {
- ReadParameters();
-
- // Call this last
- VWFeatureBase::UpdateRegister();
- }
-
- void operator()(const InputType &input
+public:
+ VWFeatureSourceIndicator(const std::string &line)
+ : VWFeatureSource(line) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
+
+ void operator()(const InputType &input
, const InputPath &inputPath
, const WordsRange &sourceRange
- , Discriminative::Classifier &classifier) const
- {
- size_t begin = sourceRange.GetStartPos();
- size_t end = sourceRange.GetEndPos() + 1;
-
- std::vector<std::string> words(end - begin);
-
- for (size_t i = 0; i < end - begin; i++)
- words[i] = GetWord(input, begin + i);
-
- classifier.AddLabelIndependentFeature("sind^" + Join(" ", words));
- }
-
- virtual void SetParameter(const std::string& key, const std::string& value) {
- VWFeatureSource::SetParameter(key, value);
- }
+ , Discriminative::Classifier &classifier) const {
+ size_t begin = sourceRange.GetStartPos();
+ size_t end = sourceRange.GetEndPos() + 1;
+
+ std::vector<std::string> words(end - begin);
+
+ for (size_t i = 0; i < end - begin; i++)
+ words[i] = GetWord(input, begin + i);
+
+ classifier.AddLabelIndependentFeature("sind^" + Join(" ", words));
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureSource::SetParameter(key, value);
+ }
};
}
diff --git a/moses/FF/VW/VWFeatureSourcePhraseInternal.h b/moses/FF/VW/VWFeatureSourcePhraseInternal.h
index fa260c902..6b6f6f933 100644
--- a/moses/FF/VW/VWFeatureSourcePhraseInternal.h
+++ b/moses/FF/VW/VWFeatureSourcePhraseInternal.h
@@ -7,35 +7,33 @@
namespace Moses
{
-
+
class VWFeatureSourcePhraseInternal : public VWFeatureSource
{
- public:
- VWFeatureSourcePhraseInternal(const std::string &line)
- : VWFeatureSource(line)
- {
- ReadParameters();
-
- // Call this last
- VWFeatureBase::UpdateRegister();
- }
+public:
+ VWFeatureSourcePhraseInternal(const std::string &line)
+ : VWFeatureSource(line) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
- void operator()(const InputType &input
+ void operator()(const InputType &input
, const InputPath &inputPath
, const WordsRange &sourceRange
- , Discriminative::Classifier &classifier) const
- {
- size_t begin = sourceRange.GetStartPos();
- size_t end = sourceRange.GetEndPos() + 1;
-
- while (begin < end) {
- classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++));
- }
- }
-
- virtual void SetParameter(const std::string& key, const std::string& value) {
- VWFeatureSource::SetParameter(key, value);
+ , Discriminative::Classifier &classifier) const {
+ size_t begin = sourceRange.GetStartPos();
+ size_t end = sourceRange.GetEndPos() + 1;
+
+ while (begin < end) {
+ classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++));
}
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureSource::SetParameter(key, value);
+ }
};
}
diff --git a/moses/FF/VW/VWFeatureSourceWindow.h b/moses/FF/VW/VWFeatureSourceWindow.h
index cf484f8e5..844b7efb1 100644
--- a/moses/FF/VW/VWFeatureSourceWindow.h
+++ b/moses/FF/VW/VWFeatureSourceWindow.h
@@ -7,49 +7,47 @@
namespace Moses
{
-
+
class VWFeatureSourceWindow : public VWFeatureSource
{
- public:
- VWFeatureSourceWindow(const std::string &line)
- : VWFeatureSource(line), m_size(DEFAULT_WINDOW_SIZE)
- {
- ReadParameters();
-
- // Call this last
- VWFeatureBase::UpdateRegister();
- }
+public:
+ VWFeatureSourceWindow(const std::string &line)
+ : VWFeatureSource(line), m_size(DEFAULT_WINDOW_SIZE) {
+ ReadParameters();
- void operator()(const InputType &input
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
+
+ void operator()(const InputType &input
, const InputPath &inputPath
, const WordsRange &sourceRange
- , Discriminative::Classifier &classifier) const
- {
- int begin = sourceRange.GetStartPos();
- int end = sourceRange.GetEndPos() + 1;
- int inputLen = input.GetSize();
-
- for (int i = std::max(0, begin - m_size); i < begin; i++) {
- classifier.AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i));
- }
-
- for (int i = end; i < std::min(end + m_size, inputLen); i++) {
- classifier.AddLabelIndependentFeature("c^" + SPrint(i - end + 1) + "^" + GetWord(input, i));
- }
+ , Discriminative::Classifier &classifier) const {
+ int begin = sourceRange.GetStartPos();
+ int end = sourceRange.GetEndPos() + 1;
+ int inputLen = input.GetSize();
+
+ for (int i = std::max(0, begin - m_size); i < begin; i++) {
+ classifier.AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i));
}
-
- virtual void SetParameter(const std::string& key, const std::string& value) {
- if (key == "size") {
- m_size = Scan<size_t>(value);
- } else {
- VWFeatureSource::SetParameter(key, value);
- }
+
+ for (int i = end; i < std::min(end + m_size, inputLen); i++) {
+ classifier.AddLabelIndependentFeature("c^" + SPrint(i - end + 1) + "^" + GetWord(input, i));
+ }
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ if (key == "size") {
+ m_size = Scan<size_t>(value);
+ } else {
+ VWFeatureSource::SetParameter(key, value);
}
+ }
- private:
- static const int DEFAULT_WINDOW_SIZE = 3;
+private:
+ static const int DEFAULT_WINDOW_SIZE = 3;
- int m_size;
+ int m_size;
};
}
diff --git a/moses/FF/VW/VWFeatureTarget.h b/moses/FF/VW/VWFeatureTarget.h
index a4acec9bb..d56306aa8 100644
--- a/moses/FF/VW/VWFeatureTarget.h
+++ b/moses/FF/VW/VWFeatureTarget.h
@@ -5,37 +5,37 @@
namespace Moses
{
-
+
// Inherit from this for target-dependent classifier features. They will
// automatically register with the classifier class named VW0 or one or more
// names specified by the used-by=name1,name2,... parameter.
//
// The classifier gets a full list by calling
// VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription())
-
+
class VWFeatureTarget : public VWFeatureBase
{
- public:
- VWFeatureTarget(const std::string &line)
- : VWFeatureBase(line, false)
- {}
-
- // Gets its pure virtual functions from VWFeatureBase
-
- virtual void operator()(const InputType &input
- , const InputPath &inputPath
- , const WordsRange &sourceRange
- , Discriminative::Classifier &classifier) const
- {}
-
- virtual void SetParameter(const std::string& key, const std::string& value) {
- VWFeatureBase::SetParameter(key, value);
- }
-
- protected:
- inline std::string GetWord(const TargetPhrase &phrase, size_t pos) const {
- return phrase.GetWord(pos).GetString(m_targetFactors, false);
- }
+public:
+ VWFeatureTarget(const std::string &line)
+ : VWFeatureBase(line, false) {
+ }
+
+ // Gets its pure virtual functions from VWFeatureBase
+
+ virtual void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const WordsRange &sourceRange
+ , Discriminative::Classifier &classifier) const {
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureBase::SetParameter(key, value);
+ }
+
+protected:
+ inline std::string GetWord(const TargetPhrase &phrase, size_t pos) const {
+ return phrase.GetWord(pos).GetString(m_targetFactors, false);
+ }
};
}
diff --git a/moses/FF/VW/VWFeatureTargetIndicator.h b/moses/FF/VW/VWFeatureTargetIndicator.h
index 1f729f89d..39d8a37a0 100644
--- a/moses/FF/VW/VWFeatureTargetIndicator.h
+++ b/moses/FF/VW/VWFeatureTargetIndicator.h
@@ -5,29 +5,27 @@
namespace Moses
{
-
+
class VWFeatureTargetIndicator : public VWFeatureTarget
{
- public:
- VWFeatureTargetIndicator(const std::string &line)
- : VWFeatureTarget(line)
- {
- ReadParameters();
-
- VWFeatureBase::UpdateRegister();
- }
+public:
+ VWFeatureTargetIndicator(const std::string &line)
+ : VWFeatureTarget(line) {
+ ReadParameters();
+
+ VWFeatureBase::UpdateRegister();
+ }
- void operator()(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , Discriminative::Classifier &classifier) const
- {
- classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors));
- }
+ void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , Discriminative::Classifier &classifier) const {
+ classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors));
+ }
- virtual void SetParameter(const std::string& key, const std::string& value) {
- VWFeatureTarget::SetParameter(key, value);
- }
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureTarget::SetParameter(key, value);
+ }
};
}
diff --git a/moses/FF/VW/VWFeatureTargetPhraseInternal.h b/moses/FF/VW/VWFeatureTargetPhraseInternal.h
index 926dc078e..e376a1ed3 100644
--- a/moses/FF/VW/VWFeatureTargetPhraseInternal.h
+++ b/moses/FF/VW/VWFeatureTargetPhraseInternal.h
@@ -5,31 +5,29 @@
namespace Moses
{
-
+
class VWFeatureTargetPhraseInternal : public VWFeatureTarget
{
- public:
- VWFeatureTargetPhraseInternal(const std::string &line)
- : VWFeatureTarget(line)
- {
- ReadParameters();
-
- VWFeatureBase::UpdateRegister();
- }
+public:
+ VWFeatureTargetPhraseInternal(const std::string &line)
+ : VWFeatureTarget(line) {
+ ReadParameters();
- void operator()(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , Discriminative::Classifier &classifier) const
- {
- for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
- classifier.AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i));
- }
- }
+ VWFeatureBase::UpdateRegister();
+ }
- virtual void SetParameter(const std::string& key, const std::string& value) {
- VWFeatureTarget::SetParameter(key, value);
+ void operator()(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , Discriminative::Classifier &classifier) const {
+ for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
+ classifier.AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i));
}
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureTarget::SetParameter(key, value);
+ }
};
}
diff --git a/moses/FF/WordPenaltyProducer.cpp b/moses/FF/WordPenaltyProducer.cpp
index 1e191d040..835ee8e96 100644
--- a/moses/FF/WordPenaltyProducer.cpp
+++ b/moses/FF/WordPenaltyProducer.cpp
@@ -18,9 +18,9 @@ WordPenaltyProducer::WordPenaltyProducer(const std::string &line)
}
void WordPenaltyProducer::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
float score = - (float) targetPhrase.GetNumTerminals();
scoreBreakdown.Assign(this, score);
diff --git a/moses/FF/WordPenaltyProducer.h b/moses/FF/WordPenaltyProducer.h
index 2aa56af97..5d4005533 100644
--- a/moses/FF/WordPenaltyProducer.h
+++ b/moses/FF/WordPenaltyProducer.h
@@ -28,29 +28,29 @@ public:
}
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
diff --git a/moses/FF/WordTranslationFeature.cpp b/moses/FF/WordTranslationFeature.cpp
index ac906a5eb..2eb980563 100644
--- a/moses/FF/WordTranslationFeature.cpp
+++ b/moses/FF/WordTranslationFeature.cpp
@@ -137,11 +137,11 @@ void WordTranslationFeature::Load()
}
void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
const Sentence& sentence = static_cast<const Sentence&>(input);
const AlignmentInfo &alignment = targetPhrase.GetAlignTerm();
diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h
index 902ba96eb..c10b4b771 100644
--- a/moses/FF/WordTranslationFeature.h
+++ b/moses/FF/WordTranslationFeature.h
@@ -49,29 +49,29 @@ public:
}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
- {}
+ ScoreComponentCollection* accumulator) const {
+ }
void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
- {}
-
+ ScoreComponentCollection* accumulator) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
};
diff --git a/moses/FactorCollection.cpp b/moses/FactorCollection.cpp
index a29778310..7b370ff36 100644
--- a/moses/FactorCollection.cpp
+++ b/moses/FactorCollection.cpp
@@ -72,7 +72,8 @@ const Factor *FactorCollection::GetFactor(const StringPiece &factorString, bool
to_find.in.m_string = factorString;
to_find.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId;
Set & set = (isNonTerminal) ? m_set : m_setNonTerminal;
- { // read=lock scope
+ {
+ // read=lock scope
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
#endif // WITH_THREADS
diff --git a/moses/HypergraphOutput.cpp b/moses/HypergraphOutput.cpp
index 481bca07e..47c564882 100644
--- a/moses/HypergraphOutput.cpp
+++ b/moses/HypergraphOutput.cpp
@@ -41,16 +41,18 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
-namespace Moses {
+namespace Moses
+{
template<class M>
HypergraphOutput<M>::HypergraphOutput(size_t precision) :
- m_precision(precision) {
+ m_precision(precision)
+{
const StaticData& staticData = StaticData::Instance();
vector<string> hypergraphParameters;
const PARAM_VEC *params = staticData.GetParameter().GetParam("output-search-graph-hypergraph");
if (params) {
- hypergraphParameters = *params;
+ hypergraphParameters = *params;
}
if (hypergraphParameters.size() > 0 && hypergraphParameters[0] == "true") {
@@ -66,7 +68,7 @@ HypergraphOutput<M>::HypergraphOutput(size_t precision) :
m_compression = "txt";
}
UTIL_THROW_IF(m_compression != "txt" && m_compression != "gz" && m_compression != "bz2",
- util::Exception, "Unknown compression type: " << m_compression);
+ util::Exception, "Unknown compression type: " << m_compression);
if ( hypergraphParameters.size() > 2 ) {
m_hypergraphDir = hypergraphParameters[2];
@@ -109,7 +111,7 @@ HypergraphOutput<M>::HypergraphOutput(size_t precision) :
}
UTIL_THROW_IF(!boost::filesystem::is_directory(m_hypergraphDir),
- util::Exception, "Cannot output hypergraphs to " << m_hypergraphDir << " because that path exists, but is not a directory");
+ util::Exception, "Cannot output hypergraphs to " << m_hypergraphDir << " because that path exists, but is not a directory");
ofstream weightsOut;
@@ -125,7 +127,8 @@ HypergraphOutput<M>::HypergraphOutput(size_t precision) :
}
template<class M>
-void HypergraphOutput<M>::Write(const M& manager) const {
+void HypergraphOutput<M>::Write(const M& manager) const
+{
stringstream fileName;
fileName << m_hypergraphDir << "/" << manager.GetSource().GetTranslationId();
@@ -138,7 +141,7 @@ void HypergraphOutput<M>::Write(const M& manager) const {
file.push( boost::iostreams::gzip_compressor() );
} else if ( m_compression == "bz2" ) {
file.push( boost::iostreams::bzip2_compressor() );
- }
+ }
file.push( boost::iostreams::file_sink(fileName.str(), ios_base::out) );
@@ -149,9 +152,9 @@ void HypergraphOutput<M>::Write(const M& manager) const {
file.flush();
} else {
TRACE_ERR("Cannot output hypergraph for line " << manager.GetSource().GetTranslationId()
- << " because the output file " << fileName.str()
- << " is not open or not ready for writing"
- << std::endl);
+ << " because the output file " << fileName.str()
+ << " is not open or not ready for writing"
+ << std::endl);
}
file.pop();
}
@@ -161,7 +164,8 @@ template class HypergraphOutput<ChartManager>;
void ChartSearchGraphWriterMoses::WriteHypos
- (const ChartHypothesisCollection& hypos, const map<unsigned, bool> &reachable) const {
+(const ChartHypothesisCollection& hypos, const map<unsigned, bool> &reachable) const
+{
ChartHypothesisCollection::const_iterator iter;
for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
@@ -184,7 +188,8 @@ void ChartSearchGraphWriterMoses::WriteHypos
}
}
-void ChartSearchGraphWriterHypergraph::WriteHeader(size_t winners, size_t losers) const {
+void ChartSearchGraphWriterHypergraph::WriteHeader(size_t winners, size_t losers) const
+{
(*m_out) << "# target ||| features ||| source-covered" << endl;
(*m_out) << winners << " " << (winners+losers) << endl;
@@ -192,13 +197,14 @@ void ChartSearchGraphWriterHypergraph::WriteHeader(size_t winners, size_t losers
}
void ChartSearchGraphWriterHypergraph::WriteHypos(const ChartHypothesisCollection& hypos,
- const map<unsigned, bool> &reachable) const {
-
+ const map<unsigned, bool> &reachable) const
+{
+
ChartHypothesisCollection::const_iterator iter;
for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
const ChartHypothesis* mainHypo = *iter;
- if (!StaticData::Instance().GetUnprunedSearchGraph() &&
- reachable.find(mainHypo->GetId()) == reachable.end()) {
+ if (!StaticData::Instance().GetUnprunedSearchGraph() &&
+ reachable.find(mainHypo->GetId()) == reachable.end()) {
//Ignore non reachable nodes
continue;
}
@@ -246,7 +252,7 @@ void ChartSearchGraphWriterHypergraph::WriteHypos(const ChartHypothesisCollectio
}
}
}
-
+
} //namespace Moses
diff --git a/moses/HypergraphOutput.h b/moses/HypergraphOutput.h
index 4ec8e2665..6503a9a28 100644
--- a/moses/HypergraphOutput.h
+++ b/moses/HypergraphOutput.h
@@ -29,12 +29,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* Manage the output of hypergraphs.
**/
-namespace Moses {
+namespace Moses
+{
class ChartHypothesisCollection;
template<class M>
-class HypergraphOutput {
+class HypergraphOutput
+{
public:
/** Initialise output directory and create weights file */
@@ -51,42 +53,47 @@ private:
};
-/**
+/**
* ABC for different types of search graph output for chart Moses.
**/
-class ChartSearchGraphWriter {
+class ChartSearchGraphWriter
+{
public:
virtual void WriteHeader(size_t winners, size_t losers) const = 0;
virtual void WriteHypos(const ChartHypothesisCollection& hypos,
- const std::map<unsigned, bool> &reachable) const = 0;
+ const std::map<unsigned, bool> &reachable) const = 0;
};
/** "Moses" format (osg style) */
-class ChartSearchGraphWriterMoses : public virtual ChartSearchGraphWriter {
+class ChartSearchGraphWriterMoses : public virtual ChartSearchGraphWriter
+{
public:
- ChartSearchGraphWriterMoses(std::ostream* out, size_t lineNumber) :
- m_out(out), m_lineNumber(lineNumber) {}
- virtual void WriteHeader(size_t, size_t) const {/* do nothing */}
+ ChartSearchGraphWriterMoses(std::ostream* out, size_t lineNumber) :
+ m_out(out), m_lineNumber(lineNumber) {}
+ virtual void WriteHeader(size_t, size_t) const {
+ /* do nothing */
+ }
virtual void WriteHypos(const ChartHypothesisCollection& hypos,
- const std::map<unsigned, bool> &reachable) const;
-
+ const std::map<unsigned, bool> &reachable) const;
+
private:
- std::ostream* m_out;
+ std::ostream* m_out;
size_t m_lineNumber;
};
/** Modified version of Kenneth's lazy hypergraph format */
-class ChartSearchGraphWriterHypergraph : public virtual ChartSearchGraphWriter {
+class ChartSearchGraphWriterHypergraph : public virtual ChartSearchGraphWriter
+{
public:
- ChartSearchGraphWriterHypergraph(std::ostream* out) :
- m_out(out), m_nodeId(0) {}
+ ChartSearchGraphWriterHypergraph(std::ostream* out) :
+ m_out(out), m_nodeId(0) {}
virtual void WriteHeader(size_t winners, size_t losers) const;
virtual void WriteHypos(const ChartHypothesisCollection& hypos,
- const std::map<unsigned, bool> &reachable) const;
-
+ const std::map<unsigned, bool> &reachable) const;
+
private:
- std::ostream* m_out;
+ std::ostream* m_out;
mutable size_t m_nodeId;
mutable std::map<size_t,size_t> m_hypoIdToNodeId;
};
diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp
index 086da8323..a8777ed9f 100644
--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@@ -207,7 +207,7 @@ int Hypothesis::RecombineCompare(const Hypothesis &compare) const
}
void Hypothesis::EvaluateWhenApplied(const StatefulFeatureFunction &sfff,
- int state_idx)
+ int state_idx)
{
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored( sfff )) {
@@ -255,8 +255,8 @@ void Hypothesis::EvaluateWhenApplied(const SquareMatrix &futureScore)
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored(ff)) {
m_ffStates[i] = ff.EvaluateWhenApplied(*this,
- m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
- &m_currScoreBreakdown);
+ m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
+ &m_currScoreBreakdown);
}
}
@@ -337,10 +337,10 @@ void Hypothesis::CleanupArcList()
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
// prune arc list only if there too many arcs
- NTH_ELEMENT4(m_arcList->begin()
- , m_arcList->begin() + nBestSize - 1
- , m_arcList->end()
- , CompareHypothesisTotalScore());
+ NTH_ELEMENT4(m_arcList->begin()
+ , m_arcList->begin() + nBestSize - 1
+ , m_arcList->end()
+ , CompareHypothesisTotalScore());
// delete bad ones
ArcList::iterator iter;
@@ -491,7 +491,7 @@ void Hypothesis::OutputInput(std::ostream& os) const
}
void Hypothesis::OutputBestSurface(std::ostream &out, const std::vector<FactorType> &outputFactorOrder,
- char reportSegmentation, bool reportAllFactors) const
+ char reportSegmentation, bool reportAllFactors) const
{
if (m_prevHypo) {
// recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
@@ -505,10 +505,10 @@ void Hypothesis::OutputBestSurface(std::ostream &out, const std::vector<FactorTy
* print surface factor only for the given phrase
*/
void Hypothesis::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
- char reportSegmentation, bool reportAllFactors) const
+ char reportSegmentation, bool reportAllFactors) const
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
- "Must specific at least 1 output factor");
+ "Must specific at least 1 output factor");
const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
bool markUnknown = StaticData::Instance().GetMarkUnknown();
if (reportAllFactors == true) {
@@ -535,7 +535,7 @@ void Hypothesis::OutputSurface(std::ostream &out, const Hypothesis &edge, const
}
UTIL_THROW_IF2(factor == NULL,
- "No factor 0 at position " << pos);
+ "No factor 0 at position " << pos);
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
@@ -548,7 +548,7 @@ void Hypothesis::OutputSurface(std::ostream &out, const Hypothesis &edge, const
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
- "No factor " << i << " at position " << pos);
+ "No factor " << i << " at position " << pos);
out << "|" << *factor;
}
@@ -589,7 +589,7 @@ std::map<size_t, const Factor*> Hypothesis::GetPlaceholders(const Hypothesis &hy
if (factor) {
std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
UTIL_THROW_IF2(targetPos.size() != 1,
- "Placeholder should be aligned to 1, and only 1, word");
+ "Placeholder should be aligned to 1, and only 1, word");
ret[*targetPos.begin()] = factor;
}
}
diff --git a/moses/Hypothesis.h b/moses/Hypothesis.h
index 72fb08bbd..481329338 100644
--- a/moses/Hypothesis.h
+++ b/moses/Hypothesis.h
@@ -81,7 +81,7 @@ protected:
float m_totalScore; /*! score so far */
float m_futureScore; /*! estimated future cost to translate rest of sentence */
/*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
- mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
+ mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
ScoreComponentCollection m_currScoreBreakdown; /*! scores for this hypothesis only */
std::vector<const FFState*> m_ffStates;
const Hypothesis *m_winningHypo;
diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp
index 2df57a674..f187d9ea6 100644
--- a/moses/IOWrapper.cpp
+++ b/moses/IOWrapper.cpp
@@ -103,10 +103,9 @@ IOWrapper::IOWrapper()
staticData.GetParameter().SetParameter<string>(m_inputFilePath, "input-file", "");
if (m_inputFilePath.empty()) {
- m_inputFile = NULL;
- m_inputStream = &cin;
- }
- else {
+ m_inputFile = NULL;
+ m_inputStream = &cin;
+ } else {
VERBOSE(2,"IO from File" << endl);
m_inputFile = new InputFileStream(m_inputFilePath);
m_inputStream = m_inputFile;
@@ -131,10 +130,9 @@ IOWrapper::IOWrapper()
if (staticData.GetOutputSearchGraph()) {
string fileName;
if (staticData.GetOutputSearchGraphExtended()) {
- staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph-extended", "");
- }
- else {
- staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph", "");
+ staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph-extended", "");
+ } else {
+ staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph", "");
}
std::ofstream *file = new std::ofstream;
m_outputSearchGraphStream = file;
@@ -146,19 +144,19 @@ IOWrapper::IOWrapper()
m_unknownsCollector = new Moses::OutputCollector(m_unknownsStream);
UTIL_THROW_IF2(!m_unknownsStream->good(),
"File for unknowns words could not be opened: " <<
- staticData.GetOutputUnknownsFile());
+ staticData.GetOutputUnknownsFile());
}
if (!staticData.GetAlignmentOutputFile().empty()) {
m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str());
m_alignmentInfoCollector = new Moses::OutputCollector(m_alignmentInfoStream);
UTIL_THROW_IF2(!m_alignmentInfoStream->good(),
- "File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
+ "File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
}
if (staticData.GetOutputSearchGraph()) {
string fileName;
- staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph", "");
+ staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph", "");
std::ofstream *file = new std::ofstream;
m_outputSearchGraphStream = file;
@@ -182,7 +180,7 @@ IOWrapper::IOWrapper()
// wordgraph output
if (staticData.GetOutputWordGraph()) {
string fileName;
- staticData.GetParameter().SetParameter<string>(fileName, "output-word-graph", "");
+ staticData.GetParameter().SetParameter<string>(fileName, "output-word-graph", "");
std::ofstream *file = new std::ofstream;
m_outputWordGraphStream = file;
@@ -211,7 +209,7 @@ IOWrapper::IOWrapper()
}
if (staticData.GetParameter().GetParam("spe-src")) {
- spe_src = new ifstream(staticData.GetParameter().GetParam("spe-src")->at(0).c_str());
+ spe_src = new ifstream(staticData.GetParameter().GetParam("spe-src")->at(0).c_str());
spe_trg = new ifstream(staticData.GetParameter().GetParam("spe-trg")->at(0).c_str());
spe_aln = new ifstream(staticData.GetParameter().GetParam("spe-aln")->at(0).c_str());
}
diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp
index dc410d8a9..49573664e 100644
--- a/moses/Incremental.cpp
+++ b/moses/Incremental.cpp
@@ -288,10 +288,9 @@ void Manager::OutputBest(OutputCollector *collector) const
const long translationId = m_source.GetTranslationId();
const std::vector<search::Applied> &nbest = GetNBest();
if (!nbest.empty()) {
- OutputBestHypo(collector, nbest[0], translationId);
- }
- else {
- OutputBestNone(collector, translationId);
+ OutputBestHypo(collector, nbest[0], translationId);
+ } else {
+ OutputBestNone(collector, translationId);
}
}
@@ -300,7 +299,7 @@ void Manager::OutputBest(OutputCollector *collector) const
void Manager::OutputNBest(OutputCollector *collector) const
{
if (collector == NULL) {
- return;
+ return;
}
OutputNBestList(collector, *completed_nbest_, m_source.GetTranslationId());
@@ -322,7 +321,7 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector<sear
Incremental::PhraseAndFeatures(*i, outputPhrase, features);
// <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@@ -339,13 +338,13 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector<sear
void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const
{
- if (collector && !completed_nbest_->empty()) {
- const search::Applied &applied = completed_nbest_->at(0);
- OutputDetailedTranslationReport(collector,
- &applied,
- static_cast<const Sentence&>(m_source),
- m_source.GetTranslationId());
- }
+ if (collector && !completed_nbest_->empty()) {
+ const search::Applied &applied = completed_nbest_->at(0);
+ OutputDetailedTranslationReport(collector,
+ &applied,
+ static_cast<const Sentence&>(m_source),
+ m_source.GetTranslationId());
+ }
}
@@ -366,9 +365,9 @@ void Manager::OutputDetailedTranslationReport(
}
void Manager::OutputTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const search::Applied *applied,
- const Sentence &sentence, long translationId) const
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence, long translationId) const
{
if (applied != NULL) {
OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
@@ -378,15 +377,15 @@ void Manager::OutputTranslationOptions(std::ostream &out,
// recursive
const search::Applied *child = applied->Children();
for (size_t i = 0; i < applied->GetArity(); i++) {
- OutputTranslationOptions(out, applicationContext, child++, sentence, translationId);
+ OutputTranslationOptions(out, applicationContext, child++, sentence, translationId);
}
}
void Manager::OutputTranslationOption(std::ostream &out,
- ApplicationContext &applicationContext,
- const search::Applied *applied,
- const Sentence &sentence,
- long translationId) const
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const
{
ReconstructApplicationContext(applied, sentence, applicationContext);
const TargetPhrase &phrase = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
@@ -432,7 +431,7 @@ void Manager::ReconstructApplicationContext(const search::Applied *applied,
void Manager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
{
if (collector == NULL || Completed().empty()) {
- return;
+ return;
}
const search::Applied *applied = &Completed()[0];
@@ -452,10 +451,10 @@ void Manager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector *coll
}
void Manager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const search::Applied *applied,
- const Sentence &sentence,
- long translationId) const
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const
{
if (applied != NULL) {
@@ -475,7 +474,7 @@ void Manager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
// recursive
const search::Applied *child = applied->Children();
for (size_t i = 0; i < applied->GetArity(); i++) {
- OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId);
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId);
}
}
@@ -491,7 +490,7 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied
Incremental::ToPhrase(applied, outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
@@ -505,9 +504,9 @@ void Manager::OutputBestNone(OutputCollector *collector, long translationId) con
{
if (collector == NULL) return;
if (StaticData::Instance().GetOutputHypoScore()) {
- collector->Write(translationId, "0 \n");
+ collector->Write(translationId, "0 \n");
} else {
- collector->Write(translationId, "\n");
+ collector->Write(translationId, "\n");
}
}
diff --git a/moses/Incremental.h b/moses/Incremental.h
index 9e4d52396..c1f5e40b3 100644
--- a/moses/Incremental.h
+++ b/moses/Incremental.h
@@ -44,19 +44,19 @@ public:
void OutputNBest(OutputCollector *collector) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const;
void OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const;
- void OutputLatticeSamples(OutputCollector *collector) const
- {}
- void OutputAlignment(OutputCollector *collector) const
- {}
+ void OutputLatticeSamples(OutputCollector *collector) const {
+ }
+ void OutputAlignment(OutputCollector *collector) const {
+ }
void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
- void OutputWordGraph(OutputCollector *collector) const
- {}
- void OutputSearchGraph(OutputCollector *collector) const
- {}
- void OutputSearchGraphSLF() const
- {}
- void OutputSearchGraphHypergraph() const
- {}
+ void OutputWordGraph(OutputCollector *collector) const {
+ }
+ void OutputSearchGraph(OutputCollector *collector) const {
+ }
+ void OutputSearchGraphSLF() const {
+ }
+ void OutputSearchGraphHypergraph() const {
+ }
private:
@@ -77,35 +77,35 @@ private:
// outputs
void OutputDetailedTranslationReport(
- OutputCollector *collector,
+ OutputCollector *collector,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputTranslationOption(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const;
+ void ReconstructApplicationContext(const search::Applied *applied,
+ const Sentence &sentence,
+ ApplicationContext &context) const;
+ void OutputTreeFragmentsTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
const search::Applied *applied,
const Sentence &sentence,
long translationId) const;
- void OutputTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const search::Applied *applied,
- const Sentence &sentence,
- long translationId) const;
- void OutputTranslationOption(std::ostream &out,
- ApplicationContext &applicationContext,
- const search::Applied *applied,
- const Sentence &sentence,
- long translationId) const;
- void ReconstructApplicationContext(const search::Applied *applied,
- const Sentence &sentence,
- ApplicationContext &context) const;
- void OutputTreeFragmentsTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const search::Applied *applied,
- const Sentence &sentence,
- long translationId) const;
- void OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const;
- void OutputBestNone(OutputCollector *collector, long translationId) const;
-
- void OutputUnknowns(OutputCollector *collector) const
- {}
- void CalcDecoderStatistics() const
- {}
+ void OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const;
+ void OutputBestNone(OutputCollector *collector, long translationId) const;
+
+ void OutputUnknowns(OutputCollector *collector) const {
+ }
+ void CalcDecoderStatistics() const {
+ }
};
diff --git a/moses/InputPath.cpp b/moses/InputPath.cpp
index 3800bcb1b..ab7c9c782 100644
--- a/moses/InputPath.cpp
+++ b/moses/InputPath.cpp
@@ -86,7 +86,7 @@ size_t InputPath::GetTotalRuleSize() const
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) {
// const PhraseDictionary *pt = iter->first;
- const TargetPhraseCollection *tpColl = iter->second.first;
+ const TargetPhraseCollection *tpColl = iter->second.first;
if (tpColl) {
ret += tpColl->GetSize();
diff --git a/moses/InputPath.h b/moses/InputPath.h
index 0d9579730..c67d88795 100644
--- a/moses/InputPath.h
+++ b/moses/InputPath.h
@@ -54,8 +54,8 @@ public:
: m_prevPath(NULL)
, m_range(NOT_FOUND, NOT_FOUND)
, m_inputScore(NULL)
- , m_nextNode(NOT_FOUND)
- {}
+ , m_nextNode(NOT_FOUND) {
+ }
InputPath(const Phrase &phrase, const NonTerminalSet &sourceNonTerms, const WordsRange &range, const InputPath *prevNode
,const ScorePair *inputScore);
diff --git a/moses/LM/Base.cpp b/moses/LM/Base.cpp
index db71119d5..76a6336c3 100644
--- a/moses/LM/Base.cpp
+++ b/moses/LM/Base.cpp
@@ -70,9 +70,9 @@ void LanguageModel::ReportHistoryOrder(std::ostream &out,const Phrase &phrase) c
}
void LanguageModel::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
// contains factors used by this LM
float fullScore, nGramScore;
diff --git a/moses/LM/Base.h b/moses/LM/Base.h
index 016975d06..eb0a98ca1 100644
--- a/moses/LM/Base.h
+++ b/moses/LM/Base.h
@@ -88,21 +88,21 @@ public:
virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const;
virtual void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
-
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
+
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const
- {}
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {
+ }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
};
diff --git a/moses/LM/BilingualLM.cpp b/moses/LM/BilingualLM.cpp
index 118b24aab..97f3d55e9 100644
--- a/moses/LM/BilingualLM.cpp
+++ b/moses/LM/BilingualLM.cpp
@@ -4,7 +4,8 @@
using namespace std;
-namespace Moses {
+namespace Moses
+{
int BilingualLMState::Compare(const FFState& other) const
{
@@ -17,17 +18,19 @@ int BilingualLMState::Compare(const FFState& other) const
////////////////////////////////////////////////////////////////
BilingualLM::BilingualLM(const std::string &line)
- : StatefulFeatureFunction(1, line),
- word_factortype(0) {
+ : StatefulFeatureFunction(1, line),
+ word_factortype(0)
+{
FactorCollection& factorFactory = FactorCollection::Instance(); //Factor Factory to use for BOS_ and EOS_
BOS_factor = factorFactory.AddFactor(BOS_);
BOS_word.SetFactor(0, BOS_factor);
EOS_factor = factorFactory.AddFactor(EOS_);
EOS_word.SetFactor(0, EOS_factor);
-
+
}
-void BilingualLM::Load(){
+void BilingualLM::Load()
+{
ReadParameters();
loadModel();
}
@@ -35,14 +38,15 @@ void BilingualLM::Load(){
//Populates words with amount words from the targetPhrase from the previous hypothesis where
//words[0] is the last word of the previous hypothesis, words[1] is the second last etc...
void BilingualLM::requestPrevTargetNgrams(
- const Hypothesis &cur_hypo, int amount, std::vector<int> &words) const {
+ const Hypothesis &cur_hypo, int amount, std::vector<int> &words) const
+{
const Hypothesis * prev_hyp = cur_hypo.GetPrevHypo();
int found = 0;
while (prev_hyp && found != amount) {
const TargetPhrase& currTargetPhrase = prev_hyp->GetCurrTargetPhrase();
- for (int i = currTargetPhrase.GetSize() - 1; i> -1; i--){
- if (found != amount){
+ for (int i = currTargetPhrase.GetSize() - 1; i> -1; i--) {
+ if (found != amount) {
const Word& word = currTargetPhrase.GetWord(i);
words[found] = getNeuralLMId(word, false);
found++;
@@ -55,18 +59,19 @@ void BilingualLM::requestPrevTargetNgrams(
}
int neuralLM_wordID = getNeuralLMId(BOS_word, false);
- for (int i = found; i < amount; i++){
+ for (int i = found; i < amount; i++) {
words[i] = neuralLM_wordID;
}
}
-//Populates the words vector with target_ngrams sized that also contains the current word we are looking at.
+//Populates the words vector with target_ngrams sized that also contains the current word we are looking at.
//(in effect target_ngrams + 1)
void BilingualLM::getTargetWords(
- const Hypothesis &cur_hypo,
- const TargetPhrase &targetPhrase,
- int current_word_index,
- std::vector<int> &words) const {
+ const Hypothesis &cur_hypo,
+ const TargetPhrase &targetPhrase,
+ int current_word_index,
+ std::vector<int> &words) const
+{
//Check if we need to look at previous target phrases
int additional_needed = current_word_index - target_ngrams;
if (additional_needed < 0) {
@@ -87,7 +92,7 @@ void BilingualLM::getTargetWords(
}
} else {
//We haven't added any words, proceed as before
- for (int i = current_word_index - target_ngrams; i <= current_word_index; i++){
+ for (int i = current_word_index - target_ngrams; i <= current_word_index; i++) {
const Word& word = targetPhrase.GetWord(i);
words.push_back(getNeuralLMId(word, false));
}
@@ -97,7 +102,8 @@ void BilingualLM::getTargetWords(
//Returns source words in the way NeuralLM expects them.
size_t BilingualLM::selectMiddleAlignment(
- const set<size_t>& alignment_links) const {
+ const set<size_t>& alignment_links) const
+{
set<size_t>::iterator it = alignment_links.begin();
for (int i = 0; i < (alignment_links.size() - 1) / 2; ++i) {
@@ -108,11 +114,12 @@ size_t BilingualLM::selectMiddleAlignment(
}
void BilingualLM::getSourceWords(
- const TargetPhrase &targetPhrase,
- int targetWordIdx,
- const Sentence &source_sent,
- const WordsRange &sourceWordRange,
- std::vector<int> &words) const {
+ const TargetPhrase &targetPhrase,
+ int targetWordIdx,
+ const Sentence &source_sent,
+ const WordsRange &sourceWordRange,
+ std::vector<int> &words) const
+{
//Get source context
//Get alignment for the word we require
@@ -123,7 +130,7 @@ void BilingualLM::getSourceWords(
std::set<size_t> last_word_al;
for (int j = 0; j < targetPhrase.GetSize(); j++) {
// Find the nearest aligned word with preference for right.
- if ((targetWordIdx + j) < targetPhrase.GetSize()){
+ if ((targetWordIdx + j) < targetPhrase.GetSize()) {
last_word_al = alignments.GetAlignmentsForTarget(targetWordIdx + j);
if (!last_word_al.empty()) {
break;
@@ -146,7 +153,7 @@ void BilingualLM::getSourceWords(
//It should never be the case the the word_al size would be zero, but several times this has happened because
//of a corrupt phrase table. It is best to have this check here, as it makes debugging the problem a lot easier.
UTIL_THROW_IF2(last_word_al.size() == 0,
- "A target phrase with no alignments detected! " << targetPhrase << "Check if there is something wrong with your phrase table.");
+ "A target phrase with no alignments detected! " << targetPhrase << "Check if there is something wrong with your phrase table.");
size_t source_center_index = selectMiddleAlignment(last_word_al);
// We have found the alignment. Now determine how much to shift by to get the actual source word index.
size_t phrase_start_pos = sourceWordRange.GetStartPos();
@@ -156,7 +163,8 @@ void BilingualLM::getSourceWords(
appendSourceWordsToVector(source_sent, words, source_word_mid_idx);
}
-size_t BilingualLM::getState(const Hypothesis& cur_hypo) const {
+size_t BilingualLM::getState(const Hypothesis& cur_hypo) const
+{
const TargetPhrase &targetPhrase = cur_hypo.GetCurrTargetPhrase();
size_t hashCode = 0;
@@ -190,25 +198,26 @@ size_t BilingualLM::getState(const Hypothesis& cur_hypo) const {
}
void BilingualLM::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {}
void BilingualLM::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
}
FFState* BilingualLM::EvaluateWhenApplied(
- const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const {
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const
+{
Manager& manager = cur_hypo.GetManager();
const Sentence& source_sent = static_cast<const Sentence&>(manager.GetSource());
@@ -223,9 +232,9 @@ FFState* BilingualLM::EvaluateWhenApplied(
const WordsRange& sourceWordRange = cur_hypo.GetCurrSourceWordsRange(); //Source words range to calculate offsets
// For each word in the current target phrase get its LM score.
- for (int i = 0; i < currTargetPhrase.GetSize(); i++){
+ for (int i = 0; i < currTargetPhrase.GetSize(); i++) {
getSourceWords(
- currTargetPhrase, i, source_sent, sourceWordRange, source_words);
+ currTargetPhrase, i, source_sent, sourceWordRange, source_words);
getTargetWords(cur_hypo, currTargetPhrase, i, target_words);
value += Score(source_words, target_words);
@@ -234,30 +243,32 @@ FFState* BilingualLM::EvaluateWhenApplied(
target_words.clear();
}
- size_t new_state = getState(cur_hypo);
+ size_t new_state = getState(cur_hypo);
accumulator->PlusEquals(this, value);
return new BilingualLMState(new_state);
}
-void BilingualLM::getAllTargetIdsChart(const ChartHypothesis& cur_hypo, size_t featureID, std::vector<int>& wordIds) const {
+void BilingualLM::getAllTargetIdsChart(const ChartHypothesis& cur_hypo, size_t featureID, std::vector<int>& wordIds) const
+{
const TargetPhrase targetPhrase = cur_hypo.GetCurrTargetPhrase();
- for (int i = 0; i < targetPhrase.GetSize(); i++){
- if (targetPhrase.GetWord(i).IsNonTerminal()){ //Nonterminal get from prev state
+ for (int i = 0; i < targetPhrase.GetSize(); i++) {
+ if (targetPhrase.GetWord(i).IsNonTerminal()) { //Nonterminal get from prev state
const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(targetPhrase.GetAlignNonTerm().GetNonTermIndexMap()[i]);
const BilingualLMState * prev_state = static_cast<const BilingualLMState *>(prev_hypo->GetFFState(featureID));
const std::vector<int> prevWordIDs = prev_state->GetWordIdsVector();
- for (std::vector<int>::const_iterator it = prevWordIDs.begin(); it!= prevWordIDs.end(); it++){
+ for (std::vector<int>::const_iterator it = prevWordIDs.begin(); it!= prevWordIDs.end(); it++) {
wordIds.push_back(*it);
}
} else {
wordIds.push_back(getNeuralLMId(targetPhrase.GetWord(i), false));
}
}
-}
+}
-void BilingualLM::getAllAlignments(const ChartHypothesis& cur_hypo, size_t featureID, std::vector<int>& word_alignments) const {
+void BilingualLM::getAllAlignments(const ChartHypothesis& cur_hypo, size_t featureID, std::vector<int>& word_alignments) const
+{
const TargetPhrase targetPhrase = cur_hypo.GetCurrTargetPhrase();
int source_word_mid_idx; //The word alignment
@@ -272,35 +283,35 @@ void BilingualLM::getAllAlignments(const ChartHypothesis& cur_hypo, size_t featu
absolute_source_position[0] = cur_hypo.GetCurrSourceRange().GetStartPos();
// get last absolute position of each source nonterminal symbol
for (int i = 0; i < targetPhrase.GetSize(); i++) {
- if (targetPhrase.GetWord(i).IsNonTerminal()) {
- const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(targetPhrase.GetAlignNonTerm().GetNonTermIndexMap()[i]);
- absolute_source_position[targetPhrase.GetAlignNonTerm().GetNonTermIndexMap2()[i]] = prev_hypo->GetCurrSourceRange().GetEndPos();
- }
+ if (targetPhrase.GetWord(i).IsNonTerminal()) {
+ const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(targetPhrase.GetAlignNonTerm().GetNonTermIndexMap()[i]);
+ absolute_source_position[targetPhrase.GetAlignNonTerm().GetNonTermIndexMap2()[i]] = prev_hypo->GetCurrSourceRange().GetEndPos();
+ }
}
// set absolute position of all source terminal symbols based on absolute position of previous symbol
for (int i = 0; i != absolute_source_position.size(); i++) {
- if (i && absolute_source_position[i] == 0) {
- absolute_source_position[i] = absolute_source_position[i-1] + 1;
- }
+ if (i && absolute_source_position[i] == 0) {
+ absolute_source_position[i] = absolute_source_position[i-1] + 1;
+ }
}
- for (int i = 0; i < targetPhrase.GetSize(); i++){
+ for (int i = 0; i < targetPhrase.GetSize(); i++) {
//Sometimes we have to traverse more than one target words because of
//unaligned words. This is O(n^2) in worst case, but usually closer to O(n)
- if (targetPhrase.GetWord(i).IsNonTerminal()){
+ if (targetPhrase.GetWord(i).IsNonTerminal()) {
//If we have a non terminal we can get the alignments from the previous state
const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(targetPhrase.GetAlignNonTerm().GetNonTermIndexMap()[i]);
const BilingualLMState * prev_state = static_cast<const BilingualLMState *>(prev_hypo->GetFFState(featureID));
const std::vector<int> prevWordAls = prev_state->GetWordAlignmentVector();
- for (std::vector<int>::const_iterator it = prevWordAls.begin(); it!= prevWordAls.end(); it++){
+ for (std::vector<int>::const_iterator it = prevWordAls.begin(); it!= prevWordAls.end(); it++) {
word_alignments.push_back(*it);
}
} else {
bool resolvedIndexis = false; //If we are aligning to an existing nonterm we don't need to calculate offsets
std::set<size_t> word_al = alignments.GetAlignmentsForTarget(i);
if (word_al.empty()) {
- for (int j = 1; j < targetPhrase.GetSize(); j++){
+ for (int j = 1; j < targetPhrase.GetSize(); j++) {
//Try to get alignment from the current word and if it is unaligned,
//try from the first word to the right and then to the left
if ((i+j) < targetPhrase.GetSize()) {
@@ -336,11 +347,11 @@ void BilingualLM::getAllAlignments(const ChartHypothesis& cur_hypo, size_t featu
}
}
- if (!resolvedIndexis){
+ if (!resolvedIndexis) {
//It should never be the case the the word_al size would be zero, but several times this has happened because
//of a corrupt phrase table. It is best to have this check here, as it makes debugging the problem a lot easier.
UTIL_THROW_IF2(word_al.size() == 0,
- "A target phrase with no alignments detected! " << targetPhrase << "Check if there is something wrong with your phrase table.");
+ "A target phrase with no alignments detected! " << targetPhrase << "Check if there is something wrong with your phrase table.");
size_t source_center_index = selectMiddleAlignment(word_al);
// We have found the alignment. Now determine how much to shift by to get the actual source word index.
source_word_mid_idx = absolute_source_position[source_center_index];
@@ -351,9 +362,10 @@ void BilingualLM::getAllAlignments(const ChartHypothesis& cur_hypo, size_t featu
}
-size_t BilingualLM::getStateChart(std::vector<int>& neuralLMids) const {
+size_t BilingualLM::getStateChart(std::vector<int>& neuralLMids) const
+{
size_t hashCode = 0;
- for (int i = neuralLMids.size() - target_ngrams; i < neuralLMids.size(); i++){
+ for (int i = neuralLMids.size() - target_ngrams; i < neuralLMids.size(); i++) {
int neuralLM_wordID;
if (i < 0) {
neuralLM_wordID = getNeuralLMId(BOS_word, false);
@@ -366,10 +378,11 @@ size_t BilingualLM::getStateChart(std::vector<int>& neuralLMids) const {
}
void BilingualLM::getTargetWordsChart(
- std::vector<int>& neuralLMids,
- int current_word_index,
- std::vector<int>& words,
- bool sentence_begin) const {
+ std::vector<int>& neuralLMids,
+ int current_word_index,
+ std::vector<int>& words,
+ bool sentence_begin) const
+{
for (int i = current_word_index - target_ngrams; i <= current_word_index; i++) {
if (i < 0) {
@@ -384,7 +397,8 @@ void BilingualLM::getTargetWordsChart(
}
}
-void BilingualLM::appendSourceWordsToVector(const Sentence &source_sent, std::vector<int> &words, int source_word_mid_idx) const {
+void BilingualLM::appendSourceWordsToVector(const Sentence &source_sent, std::vector<int> &words, int source_word_mid_idx) const
+{
//Define begin and end indexes of the lookup. Cases for even and odd ngrams
//This can result in indexes which span larger than the length of the source phrase.
//In this case we just
@@ -415,9 +429,10 @@ void BilingualLM::appendSourceWordsToVector(const Sentence &source_sent, std::ve
}
FFState* BilingualLM::EvaluateWhenApplied(
- const ChartHypothesis& cur_hypo,
- int featureID, /* - used to index the state in the previous hypotheses */
- ScoreComponentCollection* accumulator) const {
+ const ChartHypothesis& cur_hypo,
+ int featureID, /* - used to index the state in the previous hypotheses */
+ ScoreComponentCollection* accumulator) const
+{
//Init vectors
std::vector<int> source_words;
source_words.reserve(source_ngrams);
@@ -431,7 +446,7 @@ FFState* BilingualLM::EvaluateWhenApplied(
std::vector<int> alignments;
//Estimate size and reserve vectors to avoid reallocation
int future_size = currTargetPhrase.GetNumTerminals();
- for (int i =0; i<currTargetPhrase.GetNumNonTerminals(); i++){
+ for (int i =0; i<currTargetPhrase.GetNumNonTerminals(); i++) {
const ChartHypothesis * prev_hypo = cur_hypo.GetPrevHypo(i); //We need to look at the nonterm on the left.
future_size += prev_hypo->GetCurrTargetPhrase().GetSize();
}
@@ -442,10 +457,10 @@ FFState* BilingualLM::EvaluateWhenApplied(
getAllAlignments(cur_hypo, featureID, alignments);
bool sentence_begin = false; //Check if this hypothesis' target words are located in the beginning of the sentence
- if (neuralLMids[0] == getNeuralLMId(BOS_word, false)){
+ if (neuralLMids[0] == getNeuralLMId(BOS_word, false)) {
sentence_begin = true;
}
-
+
//Get source sentence
const ChartManager& manager = cur_hypo.GetManager();
const Sentence& source_sent = static_cast<const Sentence&>(manager.GetSource());
@@ -471,7 +486,8 @@ FFState* BilingualLM::EvaluateWhenApplied(
return new BilingualLMState(new_state, alignments, neuralLMids);
}
-void BilingualLM::SetParameter(const std::string& key, const std::string& value) {
+void BilingualLM::SetParameter(const std::string& key, const std::string& value)
+{
if (key == "path") {
m_filePath = value;
} else {
diff --git a/moses/LM/BilingualLM.h b/moses/LM/BilingualLM.h
index 2508dfa6f..67a6c2ea1 100644
--- a/moses/LM/BilingualLM.h
+++ b/moses/LM/BilingualLM.h
@@ -21,13 +21,13 @@ class BilingualLMState : public FFState
std::vector<int> neuralLM_ids; //Carry the neuralLMids of the previous target phrase to avoid calling GetWholePhrase. Hiero only.
public:
BilingualLMState(size_t hash)
- :m_hash(hash)
- {}
+ :m_hash(hash) {
+ }
BilingualLMState(size_t hash, std::vector<int>& word_alignments_vec, std::vector<int>& neural_ids)
:m_hash(hash)
, word_alignments(word_alignments_vec)
- , neuralLM_ids(neural_ids)
- {}
+ , neuralLM_ids(neural_ids) {
+ }
const std::vector<int>& GetWordAlignmentVector() const {
return word_alignments;
@@ -40,8 +40,9 @@ public:
int Compare(const FFState& other) const;
};
-class BilingualLM : public StatefulFeatureFunction {
- private:
+class BilingualLM : public StatefulFeatureFunction
+{
+private:
virtual float Score(std::vector<int>& source_words, std::vector<int>& target_words) const = 0;
virtual int getNeuralLMId(const Word& word, bool is_source_word) const = 0;
@@ -53,19 +54,19 @@ class BilingualLM : public StatefulFeatureFunction {
size_t selectMiddleAlignment(const std::set<size_t>& alignment_links) const;
void getSourceWords(
- const TargetPhrase &targetPhrase,
- int targetWordIdx,
- const Sentence &source_sent,
- const WordsRange &sourceWordRange,
- std::vector<int> &words) const;
+ const TargetPhrase &targetPhrase,
+ int targetWordIdx,
+ const Sentence &source_sent,
+ const WordsRange &sourceWordRange,
+ std::vector<int> &words) const;
void appendSourceWordsToVector(const Sentence &source_sent, std::vector<int> &words, int source_word_mid_idx) const;
void getTargetWords(
- const Hypothesis &cur_hypo,
- const TargetPhrase &targetPhrase,
- int current_word_index,
- std::vector<int> &words) const;
+ const Hypothesis &cur_hypo,
+ const TargetPhrase &targetPhrase,
+ int current_word_index,
+ std::vector<int> &words) const;
size_t getState(const Hypothesis &cur_hypo) const;
@@ -112,31 +113,31 @@ public:
void Load();
void EvaluateInIsolation(
- const Phrase &source,
- const TargetPhrase &targetPhrase,
- ScoreComponentCollection &scoreBreakdown,
- ScoreComponentCollection &estimatedFutureScore) const;
+ const Phrase &source,
+ const TargetPhrase &targetPhrase,
+ ScoreComponentCollection &scoreBreakdown,
+ ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(
- const InputType &input,
- const InputPath &inputPath,
- const TargetPhrase &targetPhrase,
- const StackVec *stackVec,
- ScoreComponentCollection &scoreBreakdown,
- ScoreComponentCollection *estimatedFutureScore = NULL) const;
+ const InputType &input,
+ const InputPath &inputPath,
+ const TargetPhrase &targetPhrase,
+ const StackVec *stackVec,
+ ScoreComponentCollection &scoreBreakdown,
+ ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const {};
+ , const TranslationOptionList &translationOptionList) const {};
FFState* EvaluateWhenApplied(
- const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(
- const ChartHypothesis& cur_hypo ,
- int featureID, /* - used to index the state in the previous hypotheses */
- ScoreComponentCollection* accumulator) const;
+ const ChartHypothesis& cur_hypo ,
+ int featureID, /* - used to index the state in the previous hypotheses */
+ ScoreComponentCollection* accumulator) const;
void SetParameter(const std::string& key, const std::string& value);
};
diff --git a/moses/LM/DALMWrapper.cpp b/moses/LM/DALMWrapper.cpp
index 74f9376b8..638060b0e 100644
--- a/moses/LM/DALMWrapper.cpp
+++ b/moses/LM/DALMWrapper.cpp
@@ -16,24 +16,25 @@
using namespace std;
/////////////////////////
-void read_ini(const char *inifile, string &model, string &words, string &wordstxt){
- ifstream ifs(inifile);
- string line;
-
- getline(ifs, line);
- while(ifs){
- unsigned int pos = line.find("=");
- string key = line.substr(0, pos);
- string value = line.substr(pos+1, line.size()-pos);
- if(key=="MODEL"){
- model = value;
- }else if(key=="WORDS"){
- words = value;
- }else if(key=="WORDSTXT"){
- wordstxt = value;
- }
- getline(ifs, line);
- }
+void read_ini(const char *inifile, string &model, string &words, string &wordstxt)
+{
+ ifstream ifs(inifile);
+ string line;
+
+ getline(ifs, line);
+ while(ifs) {
+ unsigned int pos = line.find("=");
+ string key = line.substr(0, pos);
+ string value = line.substr(pos+1, line.size()-pos);
+ if(key=="MODEL") {
+ model = value;
+ } else if(key=="WORDS") {
+ words = value;
+ } else if(key=="WORDSTXT") {
+ wordstxt = value;
+ }
+ getline(ifs, line);
+ }
}
/////////////////////////
@@ -43,140 +44,140 @@ namespace Moses
class DALMState : public FFState
{
private:
- DALM::State state;
+ DALM::State state;
public:
- DALMState(){
- }
-
- DALMState(const DALMState &from){
- state = from.state;
- }
-
- virtual ~DALMState(){
- }
-
- void reset(const DALMState &from){
- state = from.state;
- }
-
- virtual int Compare(const FFState& other) const{
- const DALMState &o = static_cast<const DALMState &>(other);
- if(state.get_count() < o.state.get_count()) return -1;
- else if(state.get_count() > o.state.get_count()) return 1;
- else return state.compare(o.state);
- }
-
- DALM::State &get_state(){
- return state;
- }
-
- void refresh(){
- state.refresh();
- }
+ DALMState() {
+ }
+
+ DALMState(const DALMState &from) {
+ state = from.state;
+ }
+
+ virtual ~DALMState() {
+ }
+
+ void reset(const DALMState &from) {
+ state = from.state;
+ }
+
+ virtual int Compare(const FFState& other) const {
+ const DALMState &o = static_cast<const DALMState &>(other);
+ if(state.get_count() < o.state.get_count()) return -1;
+ else if(state.get_count() > o.state.get_count()) return 1;
+ else return state.compare(o.state);
+ }
+
+ DALM::State &get_state() {
+ return state;
+ }
+
+ void refresh() {
+ state.refresh();
+ }
};
class DALMChartState : public FFState
{
private:
- DALM::Fragment prefixFragments[DALM_MAX_ORDER-1];
- unsigned char prefixLength;
- DALM::State rightContext;
- bool isLarge;
- size_t hypoSize;
+ DALM::Fragment prefixFragments[DALM_MAX_ORDER-1];
+ unsigned char prefixLength;
+ DALM::State rightContext;
+ bool isLarge;
+ size_t hypoSize;
public:
- DALMChartState()
- : prefixLength(0),
- isLarge(false)
- {}
-
- /*
- DALMChartState(const DALMChartState &other)
- : prefixLength(other.prefixLength),
- rightContext(other.rightContext),
- isLarge(other.isLarge)
- {
- std::copy(
- other.prefixFragments,
- other.prefixFragments+other.prefixLength,
- prefixFragments
- );
- }
- */
-
- virtual ~DALMChartState(){
- }
-
- /*
- DALMChartState &operator=(const DALMChartState &other){
- prefixLength = other.prefixLength;
- std::copy(
- other.prefixFragments,
- other.prefixFragments+other.prefixLength,
- prefixFragments
- );
- rightContext = other.rightContext;
- isLarge=other.isLarge;
-
- return *this;
- }
- */
-
- inline unsigned char GetPrefixLength() const{
- return prefixLength;
- }
-
- inline unsigned char &GetPrefixLength(){
- return prefixLength;
- }
-
- inline const DALM::Fragment *GetPrefixFragments() const{
- return prefixFragments;
- }
-
- inline DALM::Fragment *GetPrefixFragments(){
- return prefixFragments;
- }
-
- inline const DALM::State &GetRightContext() const{
- return rightContext;
- }
-
- inline DALM::State &GetRightContext() {
- return rightContext;
- }
-
- inline bool LargeEnough() const{
- return isLarge;
- }
-
- inline void SetAsLarge() {
- isLarge=true;
- }
-
- inline size_t &GetHypoSize() {
- return hypoSize;
- }
- inline size_t GetHypoSize() const {
- return hypoSize;
- }
-
- virtual int Compare(const FFState& other) const{
- const DALMChartState &o = static_cast<const DALMChartState &>(other);
- if(prefixLength < o.prefixLength) return -1;
- if(prefixLength > o.prefixLength) return 1;
- if(prefixLength!=0){
- const DALM::Fragment &f = prefixFragments[prefixLength-1];
- const DALM::Fragment &of = o.prefixFragments[prefixLength-1];
- int ret = DALM::compare_fragments(f,of);
- if(ret != 0) return ret;
- }
- if(isLarge != o.isLarge) return (int)isLarge - (int)o.isLarge;
- if(rightContext.get_count() < o.rightContext.get_count()) return -1;
- if(rightContext.get_count() > o.rightContext.get_count()) return 1;
- return rightContext.compare(o.rightContext);
- }
+ DALMChartState()
+ : prefixLength(0),
+ isLarge(false) {
+ }
+
+ /*
+ DALMChartState(const DALMChartState &other)
+ : prefixLength(other.prefixLength),
+ rightContext(other.rightContext),
+ isLarge(other.isLarge)
+ {
+ std::copy(
+ other.prefixFragments,
+ other.prefixFragments+other.prefixLength,
+ prefixFragments
+ );
+ }
+ */
+
+ virtual ~DALMChartState() {
+ }
+
+ /*
+ DALMChartState &operator=(const DALMChartState &other){
+ prefixLength = other.prefixLength;
+ std::copy(
+ other.prefixFragments,
+ other.prefixFragments+other.prefixLength,
+ prefixFragments
+ );
+ rightContext = other.rightContext;
+ isLarge=other.isLarge;
+
+ return *this;
+ }
+ */
+
+ inline unsigned char GetPrefixLength() const {
+ return prefixLength;
+ }
+
+ inline unsigned char &GetPrefixLength() {
+ return prefixLength;
+ }
+
+ inline const DALM::Fragment *GetPrefixFragments() const {
+ return prefixFragments;
+ }
+
+ inline DALM::Fragment *GetPrefixFragments() {
+ return prefixFragments;
+ }
+
+ inline const DALM::State &GetRightContext() const {
+ return rightContext;
+ }
+
+ inline DALM::State &GetRightContext() {
+ return rightContext;
+ }
+
+ inline bool LargeEnough() const {
+ return isLarge;
+ }
+
+ inline void SetAsLarge() {
+ isLarge=true;
+ }
+
+ inline size_t &GetHypoSize() {
+ return hypoSize;
+ }
+ inline size_t GetHypoSize() const {
+ return hypoSize;
+ }
+
+ virtual int Compare(const FFState& other) const {
+ const DALMChartState &o = static_cast<const DALMChartState &>(other);
+ if(prefixLength < o.prefixLength) return -1;
+ if(prefixLength > o.prefixLength) return 1;
+ if(prefixLength!=0) {
+ const DALM::Fragment &f = prefixFragments[prefixLength-1];
+ const DALM::Fragment &of = o.prefixFragments[prefixLength-1];
+ int ret = DALM::compare_fragments(f,of);
+ if(ret != 0) return ret;
+ }
+ if(isLarge != o.isLarge) return (int)isLarge - (int)o.isLarge;
+ if(rightContext.get_count() < o.rightContext.get_count()) return -1;
+ if(rightContext.get_count() > o.rightContext.get_count()) return 1;
+ return rightContext.compare(o.rightContext);
+ }
};
LanguageModelDALM::LanguageModelDALM(const std::string &line)
@@ -191,62 +192,64 @@ LanguageModelDALM::LanguageModelDALM(const std::string &line)
LanguageModelDALM::~LanguageModelDALM()
{
- delete m_logger;
- delete m_vocab;
- delete m_lm;
+ delete m_logger;
+ delete m_vocab;
+ delete m_lm;
}
void LanguageModelDALM::Load()
{
- /////////////////////
- // READING INIFILE //
- /////////////////////
- string inifile= m_filePath + "/dalm.ini";
-
- string model; // Path to the double-array file.
- string words; // Path to the vocabulary file.
- string wordstxt; //Path to the vocabulary file in text format.
- read_ini(inifile.c_str(), model, words, wordstxt);
-
- model = m_filePath + "/" + model;
- words = m_filePath + "/" + words;
- wordstxt = m_filePath + "/" + wordstxt;
-
- UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
- util::FileOpenException,
- "Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
-
- ////////////////
- // LOADING LM //
- ////////////////
-
- // Preparing a logger object.
- m_logger = new DALM::Logger(stderr);
- m_logger->setLevel(DALM::LOGGER_INFO);
-
- // Load the vocabulary file.
- m_vocab = new DALM::Vocabulary(words, *m_logger);
-
- // Load the language model.
- m_lm = new DALM::LM(model, *m_vocab, m_nGramOrder, *m_logger);
-
- wid_start = m_vocab->lookup(BOS_);
- wid_end = m_vocab->lookup(EOS_);
-
- // vocab mapping
- CreateVocabMapping(wordstxt);
-
- FactorCollection &collection = FactorCollection::Instance();
- m_beginSentenceFactor = collection.AddFactor(BOS_);
+ /////////////////////
+ // READING INIFILE //
+ /////////////////////
+ string inifile= m_filePath + "/dalm.ini";
+
+ string model; // Path to the double-array file.
+ string words; // Path to the vocabulary file.
+ string wordstxt; //Path to the vocabulary file in text format.
+ read_ini(inifile.c_str(), model, words, wordstxt);
+
+ model = m_filePath + "/" + model;
+ words = m_filePath + "/" + words;
+ wordstxt = m_filePath + "/" + wordstxt;
+
+ UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
+ util::FileOpenException,
+ "Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
+
+ ////////////////
+ // LOADING LM //
+ ////////////////
+
+ // Preparing a logger object.
+ m_logger = new DALM::Logger(stderr);
+ m_logger->setLevel(DALM::LOGGER_INFO);
+
+ // Load the vocabulary file.
+ m_vocab = new DALM::Vocabulary(words, *m_logger);
+
+ // Load the language model.
+ m_lm = new DALM::LM(model, *m_vocab, m_nGramOrder, *m_logger);
+
+ wid_start = m_vocab->lookup(BOS_);
+ wid_end = m_vocab->lookup(EOS_);
+
+ // vocab mapping
+ CreateVocabMapping(wordstxt);
+
+ FactorCollection &collection = FactorCollection::Instance();
+ m_beginSentenceFactor = collection.AddFactor(BOS_);
}
-const FFState *LanguageModelDALM::EmptyHypothesisState(const InputType &/*input*/) const{
- DALMState *s = new DALMState();
- m_lm->init_state(s->get_state());
- return s;
+const FFState *LanguageModelDALM::EmptyHypothesisState(const InputType &/*input*/) const
+{
+ DALMState *s = new DALMState();
+ m_lm->init_state(s->get_state());
+ return s;
}
-void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const{
+void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
+{
fullScore = 0;
ngramScore = 0;
@@ -254,18 +257,18 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
size_t phraseSize = phrase.GetSize();
if (!phraseSize) return;
-
+
size_t currPos = 0;
size_t hist_count = 0;
- DALM::State state;
-
- if(phrase.GetWord(0).GetFactor(m_factorType) == m_beginSentenceFactor){
- m_lm->init_state(state);
- currPos++;
- hist_count++;
- }
-
- float score;
+ DALM::State state;
+
+ if(phrase.GetWord(0).GetFactor(m_factorType) == m_beginSentenceFactor) {
+ m_lm->init_state(state);
+ currPos++;
+ hist_count++;
+ }
+
+ float score;
while (currPos < phraseSize) {
const Word &word = phrase.GetWord(currPos);
hist_count++;
@@ -274,9 +277,9 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
state.refresh();
hist_count = 0;
} else {
- DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
- score = m_lm->query(wid, state);
- fullScore += score;
+ DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
+ score = m_lm->query(wid, state);
+ fullScore += score;
if (hist_count >= m_nGramOrder) ngramScore += score;
if (wid==m_vocab->unk()) ++oovCount;
}
@@ -284,41 +287,42 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
currPos++;
}
- fullScore = TransformLMScore(fullScore);
- ngramScore = TransformLMScore(ngramScore);
+ fullScore = TransformLMScore(fullScore);
+ ngramScore = TransformLMScore(ngramScore);
}
-FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const{
+FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
+{
// In this function, we only compute the LM scores of n-grams that overlap a
// phrase boundary. Phrase-internal scores are taken directly from the
// translation option.
- const DALMState *dalm_ps = static_cast<const DALMState *>(ps);
-
+ const DALMState *dalm_ps = static_cast<const DALMState *>(ps);
+
// Empty phrase added? nothing to be done
- if (hypo.GetCurrTargetLength() == 0){
+ if (hypo.GetCurrTargetLength() == 0) {
return dalm_ps ? new DALMState(*dalm_ps) : NULL;
}
-
+
const std::size_t begin = hypo.GetCurrTargetWordsRange().GetStartPos();
//[begin, end) in STL-like fashion.
const std::size_t end = hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
const std::size_t adjust_end = std::min(end, begin + m_nGramOrder - 1);
-
+
DALMState *dalm_state = new DALMState(*dalm_ps);
- DALM::State &state = dalm_state->get_state();
+ DALM::State &state = dalm_state->get_state();
float score = 0.0;
- for(std::size_t position=begin; position < adjust_end; position++){
- score += m_lm->query(GetVocabId(hypo.GetWord(position).GetFactor(m_factorType)), state);
+ for(std::size_t position=begin; position < adjust_end; position++) {
+ score += m_lm->query(GetVocabId(hypo.GetWord(position).GetFactor(m_factorType)), state);
}
-
+
if (hypo.IsSourceCompleted()) {
// Score end of sentence.
std::vector<DALM::VocabId> indices(m_nGramOrder-1);
const DALM::VocabId *last = LastIDs(hypo, &indices.front());
m_lm->set_state(&indices.front(), (last-&indices.front()), state);
-
- score += m_lm->query(wid_end, state);
+
+ score += m_lm->query(wid_end, state);
} else if (adjust_end < end) {
// Get state after adding a long phrase.
std::vector<DALM::VocabId> indices(m_nGramOrder-1);
@@ -326,7 +330,7 @@ FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FF
m_lm->set_state(&indices.front(), (last-&indices.front()), state);
}
- score = TransformLMScore(score);
+ score = TransformLMScore(score);
if (OOVFeatureEnabled()) {
std::vector<float> scores(2);
scores[0] = score;
@@ -335,53 +339,54 @@ FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FF
} else {
out->PlusEquals(this, score);
}
-
+
return dalm_state;
}
-FFState *LanguageModelDALM::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const{
+FFState *LanguageModelDALM::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const
+{
// initialize language model context state
- DALMChartState *newState = new DALMChartState();
- DALM::State &state = newState->GetRightContext();
+ DALMChartState *newState = new DALMChartState();
+ DALM::State &state = newState->GetRightContext();
- DALM::Fragment *prefixFragments = newState->GetPrefixFragments();
- unsigned char &prefixLength = newState->GetPrefixLength();
- size_t &hypoSizeAll = newState->GetHypoSize();
+ DALM::Fragment *prefixFragments = newState->GetPrefixFragments();
+ unsigned char &prefixLength = newState->GetPrefixLength();
+ size_t &hypoSizeAll = newState->GetHypoSize();
// initial language model scores
float hypoScore = 0.0; // diffs of scores.
- const TargetPhrase &targetPhrase = hypo.GetCurrTargetPhrase();
- size_t hypoSize = targetPhrase.GetSize();
- hypoSizeAll = hypoSize;
+ const TargetPhrase &targetPhrase = hypo.GetCurrTargetPhrase();
+ size_t hypoSize = targetPhrase.GetSize();
+ hypoSizeAll = hypoSize;
// get index map for underlying hypotheses
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
targetPhrase.GetAlignNonTerm().GetNonTermIndexMap();
- size_t phrasePos = 0;
-
- // begginig of sentence.
- if(hypoSize > 0){
- const Word &word = targetPhrase.GetWord(0);
- if(word.GetFactor(m_factorType) == m_beginSentenceFactor){
- m_lm->init_state(state);
- // state is finalized.
- newState->SetAsLarge();
- phrasePos++;
- }else if(word.IsNonTerminal()){
+ size_t phrasePos = 0;
+
+ // begginig of sentence.
+ if(hypoSize > 0) {
+ const Word &word = targetPhrase.GetWord(0);
+ if(word.GetFactor(m_factorType) == m_beginSentenceFactor) {
+ m_lm->init_state(state);
+ // state is finalized.
+ newState->SetAsLarge();
+ phrasePos++;
+ } else if(word.IsNonTerminal()) {
// special case: rule starts with non-terminal -> copy everything
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[0]);
const DALMChartState* prevState =
static_cast<const DALMChartState*>(prevHypo->GetFFState(featureID));
- // copy chart state
- (*newState) = (*prevState);
- hypoSizeAll = hypoSize+prevState->GetHypoSize()-1;
+ // copy chart state
+ (*newState) = (*prevState);
+ hypoSizeAll = hypoSize+prevState->GetHypoSize()-1;
- phrasePos++;
- }
+ phrasePos++;
+ }
}
// loop over rule
@@ -392,29 +397,29 @@ FFState *LanguageModelDALM::EvaluateWhenApplied(const ChartHypothesis& hypo, int
// regular word
if (!word.IsNonTerminal()) {
- EvaluateTerminal(
- word, hypoScore,
- newState, state,
- prefixFragments, prefixLength
- );
+ EvaluateTerminal(
+ word, hypoScore,
+ newState, state,
+ prefixFragments, prefixLength
+ );
}
// non-terminal, add phrase from underlying hypothesis
// internal non-terminal
else {
// look up underlying hypothesis
- const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
- const DALMChartState* prevState =
- static_cast<const DALMChartState*>(prevHypo->GetFFState(featureID));
- size_t prevTargetPhraseLength = prevHypo->GetCurrTargetPhrase().GetSize();
- hypoSizeAll += prevState->GetHypoSize()-1;
-
- EvaluateNonTerminal(
- word, hypoScore,
- newState, state,
- prefixFragments, prefixLength,
- prevState, prevTargetPhraseLength
- );
+ const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
+ const DALMChartState* prevState =
+ static_cast<const DALMChartState*>(prevHypo->GetFFState(featureID));
+ size_t prevTargetPhraseLength = prevHypo->GetCurrTargetPhrase().GetSize();
+ hypoSizeAll += prevState->GetHypoSize()-1;
+
+ EvaluateNonTerminal(
+ word, hypoScore,
+ newState, state,
+ prefixFragments, prefixLength,
+ prevState, prevTargetPhraseLength
+ );
}
}
@@ -433,80 +438,81 @@ void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt)
{
InputFileStream vocabStrm(wordstxt);
- std::vector< std::pair<std::size_t, DALM::VocabId> > vlist;
+ std::vector< std::pair<std::size_t, DALM::VocabId> > vlist;
string line;
- std::size_t max_fid = 0;
+ std::size_t max_fid = 0;
while(getline(vocabStrm, line)) {
- const Factor *factor = FactorCollection::Instance().AddFactor(line);
- std::size_t fid = factor->GetId();
- DALM::VocabId wid = m_vocab->lookup(line.c_str());
+ const Factor *factor = FactorCollection::Instance().AddFactor(line);
+ std::size_t fid = factor->GetId();
+ DALM::VocabId wid = m_vocab->lookup(line.c_str());
- vlist.push_back(std::pair<std::size_t, DALM::VocabId>(fid, wid));
- if(max_fid < fid) max_fid = fid;
+ vlist.push_back(std::pair<std::size_t, DALM::VocabId>(fid, wid));
+ if(max_fid < fid) max_fid = fid;
}
- for(std::size_t i = 0; i < m_vocabMap.size(); i++){
- m_vocabMap[i] = m_vocab->unk();
- }
+ for(std::size_t i = 0; i < m_vocabMap.size(); i++) {
+ m_vocabMap[i] = m_vocab->unk();
+ }
- m_vocabMap.resize(max_fid+1, m_vocab->unk());
- std::vector< std::pair<std::size_t, DALM::VocabId> >::iterator it = vlist.begin();
- while(it != vlist.end()){
- std::pair<std::size_t, DALM::VocabId> &entry = *it;
- m_vocabMap[entry.first] = entry.second;
+ m_vocabMap.resize(max_fid+1, m_vocab->unk());
+ std::vector< std::pair<std::size_t, DALM::VocabId> >::iterator it = vlist.begin();
+ while(it != vlist.end()) {
+ std::pair<std::size_t, DALM::VocabId> &entry = *it;
+ m_vocabMap[entry.first] = entry.second;
- ++it;
- }
+ ++it;
+ }
}
DALM::VocabId LanguageModelDALM::GetVocabId(const Factor *factor) const
{
- std::size_t fid = factor->GetId();
- return (m_vocabMap.size() > fid)? m_vocabMap[fid] : m_vocab->unk();
+ std::size_t fid = factor->GetId();
+ return (m_vocabMap.size() > fid)? m_vocabMap[fid] : m_vocab->unk();
}
void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value)
{
if (key == "factor") {
m_factorType = Scan<FactorType>(value);
- } else if (key == "order") {
- m_nGramOrder = Scan<size_t>(value);
- } else if (key == "path") {
- m_filePath = value;
+ } else if (key == "order") {
+ m_nGramOrder = Scan<size_t>(value);
+ } else if (key == "path") {
+ m_filePath = value;
} else {
LanguageModel::SetParameter(key, value);
}
- m_ContextSize = m_nGramOrder-1;
+ m_ContextSize = m_nGramOrder-1;
}
void LanguageModelDALM::EvaluateTerminal(
- const Word &word,
- float &hypoScore,
- DALMChartState *newState,
- DALM::State &state,
- DALM::Fragment *prefixFragments,
- unsigned char &prefixLength) const{
-
- DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
- if (newState->LargeEnough()) {
- float score = m_lm->query(wid, state);
- hypoScore += score;
- }else{
- float score = m_lm->query(wid, state, prefixFragments[prefixLength]);
-
- if(score > 0){
- hypoScore -= score;
- newState->SetAsLarge();
- }else if(state.get_count()<=prefixLength){
- hypoScore += score;
- prefixLength++;
- newState->SetAsLarge();
- }else{
- hypoScore += score;
- prefixLength++;
- if(prefixLength >= m_ContextSize) newState->SetAsLarge();
- }
- }
+ const Word &word,
+ float &hypoScore,
+ DALMChartState *newState,
+ DALM::State &state,
+ DALM::Fragment *prefixFragments,
+ unsigned char &prefixLength) const
+{
+
+ DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
+ if (newState->LargeEnough()) {
+ float score = m_lm->query(wid, state);
+ hypoScore += score;
+ } else {
+ float score = m_lm->query(wid, state, prefixFragments[prefixLength]);
+
+ if(score > 0) {
+ hypoScore -= score;
+ newState->SetAsLarge();
+ } else if(state.get_count()<=prefixLength) {
+ hypoScore += score;
+ prefixLength++;
+ newState->SetAsLarge();
+ } else {
+ hypoScore += score;
+ prefixLength++;
+ if(prefixLength >= m_ContextSize) newState->SetAsLarge();
+ }
+ }
}
void LanguageModelDALM::EvaluateNonTerminal(
@@ -516,72 +522,73 @@ void LanguageModelDALM::EvaluateNonTerminal(
DALM::State &state,
DALM::Fragment *prefixFragments,
unsigned char &prefixLength,
- const DALMChartState *prevState,
- size_t prevTargetPhraseLength
- ) const{
+ const DALMChartState *prevState,
+ size_t prevTargetPhraseLength
+) const
+{
const unsigned char prevPrefixLength = prevState->GetPrefixLength();
- const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments();
-
- if(prevPrefixLength == 0){
- newState->SetAsLarge();
- hypoScore += state.sum_bows(0, state.get_count());
- state = prevState->GetRightContext();
- return;
- }
- if(!state.has_context()){
- newState->SetAsLarge();
- state = prevState->GetRightContext();
- return;
- }
- DALM::Gap gap(state);
+ const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments();
+
+ if(prevPrefixLength == 0) {
+ newState->SetAsLarge();
+ hypoScore += state.sum_bows(0, state.get_count());
+ state = prevState->GetRightContext();
+ return;
+ }
+ if(!state.has_context()) {
+ newState->SetAsLarge();
+ state = prevState->GetRightContext();
+ return;
+ }
+ DALM::Gap gap(state);
// score its prefix
for(size_t prefixPos = 0; prefixPos < prevPrefixLength; prefixPos++) {
- const DALM::Fragment &f = prevPrefixFragments[prefixPos];
- if (newState->LargeEnough()) {
- float score = m_lm->query(f, state, gap);
- hypoScore += score;
-
- if(!gap.is_extended()){
- state = prevState->GetRightContext();
- return;
- }else if(state.get_count() <= prefixPos+1){
- state = prevState->GetRightContext();
- return;
- }
- } else {
- DALM::Fragment &fnew = prefixFragments[prefixLength];
- float score = m_lm->query(f, state, gap, fnew);
- hypoScore += score;
-
- if(!gap.is_extended()){
- newState->SetAsLarge();
- state = prevState->GetRightContext();
- return;
- }else if(state.get_count() <= prefixPos+1){
- if(!gap.is_finalized()) prefixLength++;
- newState->SetAsLarge();
- state = prevState->GetRightContext();
- return;
- }else if(gap.is_finalized()){
- newState->SetAsLarge();
- }else{
- prefixLength++;
- if(prefixLength >= m_ContextSize) newState->SetAsLarge();
- }
- }
- gap.succ();
+ const DALM::Fragment &f = prevPrefixFragments[prefixPos];
+ if (newState->LargeEnough()) {
+ float score = m_lm->query(f, state, gap);
+ hypoScore += score;
+
+ if(!gap.is_extended()) {
+ state = prevState->GetRightContext();
+ return;
+ } else if(state.get_count() <= prefixPos+1) {
+ state = prevState->GetRightContext();
+ return;
+ }
+ } else {
+ DALM::Fragment &fnew = prefixFragments[prefixLength];
+ float score = m_lm->query(f, state, gap, fnew);
+ hypoScore += score;
+
+ if(!gap.is_extended()) {
+ newState->SetAsLarge();
+ state = prevState->GetRightContext();
+ return;
+ } else if(state.get_count() <= prefixPos+1) {
+ if(!gap.is_finalized()) prefixLength++;
+ newState->SetAsLarge();
+ state = prevState->GetRightContext();
+ return;
+ } else if(gap.is_finalized()) {
+ newState->SetAsLarge();
+ } else {
+ prefixLength++;
+ if(prefixLength >= m_ContextSize) newState->SetAsLarge();
+ }
+ }
+ gap.succ();
}
// check if we are dealing with a large sub-phrase
if (prevState->LargeEnough()) {
newState->SetAsLarge();
- if(prevPrefixLength < prevState->GetHypoSize()){
- hypoScore += state.sum_bows(prevPrefixLength, state.get_count());
- }
- // copy language model state
- state = prevState->GetRightContext();
+ if(prevPrefixLength < prevState->GetHypoSize()) {
+ hypoScore += state.sum_bows(prevPrefixLength, state.get_count());
+ }
+ // copy language model state
+ state = prevState->GetRightContext();
} else {
m_lm->set_state(state, prevState->GetRightContext(), prevPrefixFragments, gap);
}
diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp
index 7beb3c280..62c9616c0 100644
--- a/moses/LM/Implementation.cpp
+++ b/moses/LM/Implementation.cpp
@@ -341,8 +341,7 @@ FFState* LanguageModelImplementation::EvaluateWhenApplied(const ChartHypothesis&
// scores[1] = out->GetScoresForProducer(this)[1];
scores[1] = 0;
out->PlusEquals(this, scores);
- }
- else {
+ } else {
out->PlusEquals(this, prefixScore + finalizedScore - hypo.GetTranslationOption().GetScores().GetScoresForProducer(this)[0]);
}
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index 76bf12593..df2cfbbfe 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -353,8 +353,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
scores[0] = score;
scores[1] = 0.0;
accumulator->PlusEquals(this, scores);
- }
- else {
+ } else {
accumulator->PlusEquals(this, score);
}
return newState;
diff --git a/moses/LM/LDHT.cpp b/moses/LM/LDHT.cpp
index 1d0331df5..7a3b0ebd5 100644
--- a/moses/LM/LDHT.cpp
+++ b/moses/LM/LDHT.cpp
@@ -98,8 +98,8 @@ public:
const FFState* input_state,
ScoreComponentCollection* score_output) const;
FFState* EvaluateWhenApplied(const ChartHypothesis& hypo,
- int featureID,
- ScoreComponentCollection* accumulator) const;
+ int featureID,
+ ScoreComponentCollection* accumulator) const;
virtual void IssueRequestsFor(Hypothesis& hypo,
const FFState* input_state);
diff --git a/moses/LM/NeuralLMWrapper.h b/moses/LM/NeuralLMWrapper.h
index 2b80fb303..bd6635a7c 100644
--- a/moses/LM/NeuralLMWrapper.h
+++ b/moses/LM/NeuralLMWrapper.h
@@ -4,8 +4,9 @@
#include <boost/thread/tss.hpp>
-namespace nplm {
- class neuralLM;
+namespace nplm
+{
+class neuralLM;
}
namespace Moses
diff --git a/moses/LM/SingleFactor.cpp b/moses/LM/SingleFactor.cpp
index cf080f04a..8f9e952d3 100644
--- a/moses/LM/SingleFactor.cpp
+++ b/moses/LM/SingleFactor.cpp
@@ -89,13 +89,13 @@ void LanguageModelSingleFactor::SetParameter(const std::string& key, const std::
std::string LanguageModelSingleFactor::DebugContextFactor(const std::vector<const Word*> &contextFactor) const
{
- std::string ret;
- for (size_t i = 0; i < contextFactor.size(); ++i) {
- const Word &word = *contextFactor[i];
- ret += word.ToString();
- }
+ std::string ret;
+ for (size_t i = 0; i < contextFactor.size(); ++i) {
+ const Word &word = *contextFactor[i];
+ ret += word.ToString();
+ }
- return ret;
+ return ret;
}
}
diff --git a/moses/LM/bilingual-lm/BiLM_NPLM.cpp b/moses/LM/bilingual-lm/BiLM_NPLM.cpp
index 451aa5ae1..eb47d7360 100644
--- a/moses/LM/bilingual-lm/BiLM_NPLM.cpp
+++ b/moses/LM/bilingual-lm/BiLM_NPLM.cpp
@@ -2,31 +2,36 @@
#include "neuralLM.h"
#include "vocabulary.h"
-namespace Moses {
+namespace Moses
+{
BilingualLM_NPLM::BilingualLM_NPLM(const std::string &line)
- : BilingualLM(line),
- premultiply(true),
- factored(false),
- neuralLM_cache(1000000) {
-
- NULL_string = "<null>"; //Default null value for nplm
- FactorCollection& factorFactory = FactorCollection::Instance(); // To add null word.
- const Factor* NULL_factor = factorFactory.AddFactor(NULL_string);
- NULL_word.SetFactor(0, NULL_factor);
- }
-
-float BilingualLM_NPLM::Score(std::vector<int>& source_words, std::vector<int>& target_words) const {
+ : BilingualLM(line),
+ premultiply(true),
+ factored(false),
+ neuralLM_cache(1000000)
+{
+
+ NULL_string = "<null>"; //Default null value for nplm
+ FactorCollection& factorFactory = FactorCollection::Instance(); // To add null word.
+ const Factor* NULL_factor = factorFactory.AddFactor(NULL_string);
+ NULL_word.SetFactor(0, NULL_factor);
+}
+
+float BilingualLM_NPLM::Score(std::vector<int>& source_words, std::vector<int>& target_words) const
+{
source_words.reserve(source_ngrams+target_ngrams+1);
source_words.insert( source_words.end(), target_words.begin(), target_words.end() );
return FloorScore(m_neuralLM->lookup_ngram(source_words));
}
-const Word& BilingualLM_NPLM::getNullWord() const {
+const Word& BilingualLM_NPLM::getNullWord() const
+{
return NULL_word;
}
-int BilingualLM_NPLM::getNeuralLMId(const Word& word, bool is_source_word) const {
+int BilingualLM_NPLM::getNeuralLMId(const Word& word, bool is_source_word) const
+{
initSharedPointer();
//Decide if we are doing source or target side first.
@@ -45,30 +50,32 @@ int BilingualLM_NPLM::getNeuralLMId(const Word& word, bool is_source_word) const
it = neuralLMids->find(factor);
//If we know the word return immediately
- if (it != neuralLMids->end()){
+ if (it != neuralLMids->end()) {
return it->second;
}
//If we don't know the word and we aren't factored, return the word.
if (!factored) {
- return unknown_word_id;
- }
+ return unknown_word_id;
+ }
//Else try to get a pos_factor
const Factor* pos_factor = word.GetFactor(pos_factortype);
it = neuralLMids->find(pos_factor);
- if (it != neuralLMids->end()){
+ if (it != neuralLMids->end()) {
return it->second;
} else {
return unknown_word_id;
}
}
-void BilingualLM_NPLM::initSharedPointer() const {
+void BilingualLM_NPLM::initSharedPointer() const
+{
if (!m_neuralLM.get()) {
m_neuralLM.reset(new nplm::neuralLM(*m_neuralLM_shared));
}
}
-void BilingualLM_NPLM::SetParameter(const std::string& key, const std::string& value) {
+void BilingualLM_NPLM::SetParameter(const std::string& key, const std::string& value)
+{
if (key == "order") {
target_ngrams = Scan<int>(value)-1;
} else if (key == "source_window") {
@@ -94,7 +101,8 @@ void BilingualLM_NPLM::SetParameter(const std::string& key, const std::string& v
}
}
-void BilingualLM_NPLM::loadModel() {
+void BilingualLM_NPLM::loadModel()
+{
m_neuralLM_shared = new nplm::neuralLM();
m_neuralLM_shared->read(m_filePath);
if (premultiply) {
@@ -103,9 +111,9 @@ void BilingualLM_NPLM::loadModel() {
int ngram_order = target_ngrams + source_ngrams + 1;
UTIL_THROW_IF2(
- ngram_order != m_neuralLM_shared->get_order(),
- "Wrong order of neuralLM: LM has " << m_neuralLM_shared->get_order() <<
- ", but Moses expects " << ngram_order);
+ ngram_order != m_neuralLM_shared->get_order(),
+ "Wrong order of neuralLM: LM has " << m_neuralLM_shared->get_order() <<
+ ", but Moses expects " << ngram_order);
m_neuralLM_shared->set_cache(neuralLM_cache); //Default 1000000
diff --git a/moses/LM/bilingual-lm/BiLM_NPLM.h b/moses/LM/bilingual-lm/BiLM_NPLM.h
index 9a3167455..e291e4d22 100644
--- a/moses/LM/bilingual-lm/BiLM_NPLM.h
+++ b/moses/LM/bilingual-lm/BiLM_NPLM.h
@@ -3,17 +3,20 @@
#include <utility> //make_pair
#include <fstream> //Read vocabulary files
-namespace nplm {
- class neuralLM;
+namespace nplm
+{
+class neuralLM;
}
-namespace Moses {
+namespace Moses
+{
-class BilingualLM_NPLM : public BilingualLM {
- public:
+class BilingualLM_NPLM : public BilingualLM
+{
+public:
BilingualLM_NPLM(const std::string &line);
- private:
+private:
float Score(std::vector<int>& source_words, std::vector<int>& target_words) const;
int getNeuralLMId(const Word& word, bool is_source_word) const;
diff --git a/moses/LM/oxlm/OxLM.cpp b/moses/LM/oxlm/OxLM.cpp
index 5047a0344..7700a9237 100644
--- a/moses/LM/oxlm/OxLM.cpp
+++ b/moses/LM/oxlm/OxLM.cpp
@@ -16,9 +16,10 @@ namespace Moses
template<class Model>
OxLM<Model>::OxLM(const string &line)
- : LanguageModelSingleFactor(line), normalized(true),
- posBackOff(false), posFactorType(1),
- persistentCache(false) {
+ : LanguageModelSingleFactor(line), normalized(true),
+ posBackOff(false), posFactorType(1),
+ persistentCache(false)
+{
ReadParameters();
FactorCollection &factorCollection = FactorCollection::Instance();
@@ -35,7 +36,8 @@ OxLM<Model>::OxLM(const string &line)
template<class Model>
-OxLM<Model>::~OxLM() {
+OxLM<Model>::~OxLM()
+{
if (persistentCache) {
if (cache.get()) {
string cache_file = m_filePath + ".phrases.cache.bin";
@@ -49,7 +51,8 @@ OxLM<Model>::~OxLM() {
template<class Model>
-void OxLM<Model>::SetParameter(const string& key, const string& value) {
+void OxLM<Model>::SetParameter(const string& key, const string& value)
+{
if (key == "normalized") {
normalized = Scan<bool>(value);
} else if (key == "persistent-cache") {
@@ -66,7 +69,8 @@ void OxLM<Model>::SetParameter(const string& key, const string& value) {
}
template<class Model>
-void OxLM<Model>::Load() {
+void OxLM<Model>::Load()
+{
model.load(m_filePath);
boost::shared_ptr<Vocabulary> vocab = model.getVocab();
@@ -78,12 +82,13 @@ void OxLM<Model>::Load() {
size_t ngram_order = model.getConfig()->ngram_order;
UTIL_THROW_IF2(
- m_nGramOrder != ngram_order,
- "Wrong order for OxLM: LM has " << ngram_order << ", but Moses expects " << m_nGramOrder);
+ m_nGramOrder != ngram_order,
+ "Wrong order for OxLM: LM has " << ngram_order << ", but Moses expects " << m_nGramOrder);
}
template<class Model>
-double OxLM<Model>::GetScore(int word, const vector<int>& context) const {
+double OxLM<Model>::GetScore(int word, const vector<int>& context) const
+{
if (normalized) {
return model.getLogProb(word, context);
} else {
@@ -93,7 +98,8 @@ double OxLM<Model>::GetScore(int word, const vector<int>& context) const {
template<class Model>
LMResult OxLM<Model>::GetValue(
- const vector<const Word*> &contextFactor, State* finalState) const {
+ const vector<const Word*> &contextFactor, State* finalState) const
+{
if (!cache.get()) {
cache.reset(new QueryCache());
string cache_file = m_filePath + ".phrases.cache.bin";
@@ -144,7 +150,8 @@ LMResult OxLM<Model>::GetValue(
}
template<class Model>
-void OxLM<Model>::loadPersistentCache(const string& cache_file) const {
+void OxLM<Model>::loadPersistentCache(const string& cache_file) const
+{
if (boost::filesystem::exists(cache_file)) {
ifstream f(cache_file);
boost::archive::binary_iarchive iar(f);
@@ -158,7 +165,8 @@ void OxLM<Model>::loadPersistentCache(const string& cache_file) const {
}
template<class Model>
-void OxLM<Model>::savePersistentCache(const string& cache_file) const {
+void OxLM<Model>::savePersistentCache(const string& cache_file) const
+{
ofstream f(cache_file);
boost::archive::binary_oarchive oar(f);
cerr << "Saving persistent cache to " << cache_file << endl;
@@ -168,7 +176,8 @@ void OxLM<Model>::savePersistentCache(const string& cache_file) const {
}
template<class Model>
-void OxLM<Model>::InitializeForInput(const InputType& source) {
+void OxLM<Model>::InitializeForInput(const InputType& source)
+{
LanguageModelSingleFactor::InitializeForInput(source);
if (persistentCache) {
@@ -183,7 +192,8 @@ void OxLM<Model>::InitializeForInput(const InputType& source) {
}
template<class Model>
-void OxLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source) {
+void OxLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source)
+{
// Thread safe: the model cache is thread specific.
model.clearCache();
diff --git a/moses/LM/oxlm/OxLM.h b/moses/LM/oxlm/OxLM.h
index a528d0882..446758b2a 100644
--- a/moses/LM/oxlm/OxLM.h
+++ b/moses/LM/oxlm/OxLM.h
@@ -11,12 +11,14 @@
#include "OxLMMapper.h"
-namespace Moses {
+namespace Moses
+{
template<class Model>
-class OxLM : public LanguageModelSingleFactor {
- public:
- OxLM(const std::string &line);
+class OxLM : public LanguageModelSingleFactor
+{
+public:
+ OxLM(const std::string &line);
~OxLM();
@@ -25,21 +27,21 @@ class OxLM : public LanguageModelSingleFactor {
void Load();
virtual LMResult GetValue(
- const std::vector<const Word*> &contextFactor,
- State* finalState = 0) const;
+ const std::vector<const Word*> &contextFactor,
+ State* finalState = 0) const;
virtual void InitializeForInput(const InputType& source);
virtual void CleanUpAfterSentenceProcessing(const InputType& source);
- private:
+private:
double GetScore(int word, const vector<int>& context) const;
void loadPersistentCache(const string& cache_file) const;
void savePersistentCache(const string& cache_file) const;
- protected:
+protected:
Model model;
boost::shared_ptr<OxLMMapper> mapper;
diff --git a/moses/LM/oxlm/OxLMMapper.cpp b/moses/LM/oxlm/OxLMMapper.cpp
index f2953b4e9..0c0ca8062 100644
--- a/moses/LM/oxlm/OxLMMapper.cpp
+++ b/moses/LM/oxlm/OxLMMapper.cpp
@@ -4,13 +4,15 @@
using namespace std;
-namespace Moses {
+namespace Moses
+{
OxLMMapper::OxLMMapper(
- const boost::shared_ptr<oxlm::Vocabulary>& vocab,
- bool pos_back_off,
- const FactorType& pos_factor_type)
- : posBackOff(pos_back_off), posFactorType(pos_factor_type) {
+ const boost::shared_ptr<oxlm::Vocabulary>& vocab,
+ bool pos_back_off,
+ const FactorType& pos_factor_type)
+ : posBackOff(pos_back_off), posFactorType(pos_factor_type)
+{
for (int i = 0; i < vocab->size(); ++i) {
const string &str = vocab->convert(i);
FactorCollection &fc = FactorCollection::Instance();
@@ -21,9 +23,10 @@ OxLMMapper::OxLMMapper(
kUNKNOWN = vocab->convert("<unk>");
}
-int OxLMMapper::convert(const Word& word) const {
+int OxLMMapper::convert(const Word& word) const
+{
const Moses::Factor* word_factor = word.GetFactor(0);
- Coll::const_iterator iter = moses2Oxlm.find(word_factor);
+ Coll::const_iterator iter = moses2Oxlm.find(word_factor);
if (posBackOff && iter == moses2Oxlm.end()) {
const Moses::Factor* pos_factor = word.GetFactor(posFactorType);
iter = moses2Oxlm.find(pos_factor);
@@ -33,15 +36,16 @@ int OxLMMapper::convert(const Word& word) const {
}
void OxLMMapper::convert(
- const vector<const Word*>& contextFactor,
- vector<int> &ids, int &word) const {
+ const vector<const Word*>& contextFactor,
+ vector<int> &ids, int &word) const
+{
ids.clear();
- for (size_t i = 0; i < contextFactor.size() - 1; ++i) {
+ for (size_t i = 0; i < contextFactor.size() - 1; ++i) {
ids.push_back(convert(*contextFactor[i]));
- }
- std::reverse(ids.begin(), ids.end());
+ }
+ std::reverse(ids.begin(), ids.end());
- word = convert(*contextFactor.back());
+ word = convert(*contextFactor.back());
}
} // namespace Moses
diff --git a/moses/LM/oxlm/OxLMMapper.h b/moses/LM/oxlm/OxLMMapper.h
index 1aef7af88..07184ed72 100644
--- a/moses/LM/oxlm/OxLMMapper.h
+++ b/moses/LM/oxlm/OxLMMapper.h
@@ -7,23 +7,25 @@
#include "moses/Factor.h"
#include "moses/Phrase.h"
-namespace Moses {
+namespace Moses
+{
-class OxLMMapper {
- public:
+class OxLMMapper
+{
+public:
OxLMMapper(
- const boost::shared_ptr<oxlm::Vocabulary>& vocab,
- bool pos_back_off,
- const FactorType& pos_factor_type);
+ const boost::shared_ptr<oxlm::Vocabulary>& vocab,
+ bool pos_back_off,
+ const FactorType& pos_factor_type);
int convert(const Word& word) const;
void convert(
- const std::vector<const Word*> &contextFactor,
- std::vector<int> &ids,
- int &word) const;
+ const std::vector<const Word*> &contextFactor,
+ std::vector<int> &ids,
+ int &word) const;
- protected:
+protected:
bool posBackOff;
FactorType posFactorType;
diff --git a/moses/LM/oxlm/OxLMParallelMapper.cpp b/moses/LM/oxlm/OxLMParallelMapper.cpp
index 3bfd4be04..c8fe692cc 100644
--- a/moses/LM/oxlm/OxLMParallelMapper.cpp
+++ b/moses/LM/oxlm/OxLMParallelMapper.cpp
@@ -6,15 +6,17 @@
using namespace std;
-namespace Moses {
+namespace Moses
+{
OxLMParallelMapper::OxLMParallelMapper(
- const boost::shared_ptr<oxlm::Vocabulary>& vocab,
- bool pos_back_off,
- const FactorType& pos_factor_type)
- : OxLMMapper(vocab, pos_back_off, pos_factor_type) {
+ const boost::shared_ptr<oxlm::Vocabulary>& vocab,
+ bool pos_back_off,
+ const FactorType& pos_factor_type)
+ : OxLMMapper(vocab, pos_back_off, pos_factor_type)
+{
boost::shared_ptr<oxlm::ParallelVocabulary> parallel_vocab =
- dynamic_pointer_cast<oxlm::ParallelVocabulary>(vocab);
+ dynamic_pointer_cast<oxlm::ParallelVocabulary>(vocab);
assert(parallel_vocab != nullptr);
for (int i = 0; i < parallel_vocab->sourceSize(); ++i) {
@@ -27,7 +29,8 @@ OxLMParallelMapper::OxLMParallelMapper(
kSOURCE_UNKNOWN = parallel_vocab->convertSource("<unk>");
}
-int OxLMParallelMapper::convertSource(const Word& word) const {
+int OxLMParallelMapper::convertSource(const Word& word) const
+{
const Moses::Factor* word_factor = word.GetFactor(0);
Coll::const_iterator iter = moses2SourceOxlm.find(word_factor);
if (posBackOff && iter == moses2SourceOxlm.end()) {
diff --git a/moses/LM/oxlm/OxLMParallelMapper.h b/moses/LM/oxlm/OxLMParallelMapper.h
index 9fbcfa2a3..79cf72e8f 100644
--- a/moses/LM/oxlm/OxLMParallelMapper.h
+++ b/moses/LM/oxlm/OxLMParallelMapper.h
@@ -2,18 +2,20 @@
#include "moses/LM/oxlm/OxLMMapper.h"
-namespace Moses {
+namespace Moses
+{
-class OxLMParallelMapper : public OxLMMapper {
- public:
+class OxLMParallelMapper : public OxLMMapper
+{
+public:
OxLMParallelMapper(
- const boost::shared_ptr<oxlm::Vocabulary>& vocab,
- bool pos_back_off,
- const FactorType& pos_factor_type);
+ const boost::shared_ptr<oxlm::Vocabulary>& vocab,
+ bool pos_back_off,
+ const FactorType& pos_factor_type);
int convertSource(const Word& word) const;
- private:
+private:
Coll moses2SourceOxlm;
int kSOURCE_UNKNOWN;
};
diff --git a/moses/LM/oxlm/SourceOxLM.cpp b/moses/LM/oxlm/SourceOxLM.cpp
index 4a6991eb2..408208e3b 100644
--- a/moses/LM/oxlm/SourceOxLM.cpp
+++ b/moses/LM/oxlm/SourceOxLM.cpp
@@ -7,17 +7,20 @@
using namespace std;
using namespace oxlm;
-namespace Moses {
+namespace Moses
+{
SourceOxLM::SourceOxLM(const string &line)
- : BilingualLM(line), posBackOff(false), posFactorType(1),
- persistentCache(false), cacheHits(0), totalHits(0) {
- FactorCollection& factorFactory = FactorCollection::Instance(); // To add null word.
- const Factor* NULL_factor = factorFactory.AddFactor("<unk>");
- NULL_word.SetFactor(0, NULL_factor);
- }
-
-SourceOxLM::~SourceOxLM() {
+ : BilingualLM(line), posBackOff(false), posFactorType(1),
+ persistentCache(false), cacheHits(0), totalHits(0)
+{
+ FactorCollection& factorFactory = FactorCollection::Instance(); // To add null word.
+ const Factor* NULL_factor = factorFactory.AddFactor("<unk>");
+ NULL_word.SetFactor(0, NULL_factor);
+}
+
+SourceOxLM::~SourceOxLM()
+{
if (persistentCache) {
double cache_hit_ratio = 100.0 * cacheHits / totalHits;
cerr << "Cache hit ratio: " << cache_hit_ratio << endl;
@@ -25,8 +28,9 @@ SourceOxLM::~SourceOxLM() {
}
float SourceOxLM::Score(
- vector<int>& source_words,
- vector<int>& target_words) const {
+ vector<int>& source_words,
+ vector<int>& target_words) const
+{
// OxLM expects the context in the following format:
// [t_{n-1}, t_{n-2}, ..., t_{n-m}, s_{a_n-sm}, s_{a_n-sm+1}, ..., s_{a_n+sm}]
// where n is the index for the current target word, m is the target order,
@@ -61,15 +65,18 @@ float SourceOxLM::Score(
return score;
}
-int SourceOxLM::getNeuralLMId(const Word& word, bool is_source_word) const {
+int SourceOxLM::getNeuralLMId(const Word& word, bool is_source_word) const
+{
return is_source_word ? mapper->convertSource(word) : mapper->convert(word);
}
-const Word& SourceOxLM::getNullWord() const {
+const Word& SourceOxLM::getNullWord() const
+{
return NULL_word;
}
-void SourceOxLM::loadModel() {
+void SourceOxLM::loadModel()
+{
model.load(m_filePath);
boost::shared_ptr<ModelData> config = model.getConfig();
@@ -78,10 +85,11 @@ void SourceOxLM::loadModel() {
boost::shared_ptr<Vocabulary> vocab = model.getVocab();
mapper = boost::make_shared<OxLMParallelMapper>(
- vocab, posBackOff, posFactorType);
+ vocab, posBackOff, posFactorType);
}
-void SourceOxLM::SetParameter(const string& key, const string& value) {
+void SourceOxLM::SetParameter(const string& key, const string& value)
+{
if (key == "persistent-cache") {
persistentCache = Scan<bool>(value);
} else if (key == "pos-back-off") {
@@ -93,7 +101,8 @@ void SourceOxLM::SetParameter(const string& key, const string& value) {
}
}
-void SourceOxLM::InitializeForInput(const InputType& source) {
+void SourceOxLM::InitializeForInput(const InputType& source)
+{
BilingualLM::InitializeForInput(source);
if (persistentCache) {
@@ -116,7 +125,8 @@ void SourceOxLM::InitializeForInput(const InputType& source) {
}
}
-void SourceOxLM::CleanUpAfterSentenceProcessing(const InputType& source) {
+void SourceOxLM::CleanUpAfterSentenceProcessing(const InputType& source)
+{
// Thread safe: the model cache is thread specific.
model.clearCache();
diff --git a/moses/LM/oxlm/SourceOxLM.h b/moses/LM/oxlm/SourceOxLM.h
index 3af48489f..945bbda6e 100644
--- a/moses/LM/oxlm/SourceOxLM.h
+++ b/moses/LM/oxlm/SourceOxLM.h
@@ -8,23 +8,25 @@
#include "moses/LM/BilingualLM.h"
#include "moses/LM/oxlm/OxLMParallelMapper.h"
-namespace Moses {
+namespace Moses
+{
-class SourceOxLM : public BilingualLM {
- public:
- SourceOxLM(const std::string &line);
+class SourceOxLM : public BilingualLM
+{
+public:
+ SourceOxLM(const std::string &line);
~SourceOxLM();
- private:
+private:
virtual float Score(
- std::vector<int>& source_words,
- std::vector<int>& target_words) const;
+ std::vector<int>& source_words,
+ std::vector<int>& target_words) const;
virtual int getNeuralLMId(const Word& word, bool is_source_word) const;
virtual void loadModel();
-
+
const Word& getNullWord() const;
void SetParameter(const std::string& key, const std::string& value);
@@ -33,7 +35,7 @@ class SourceOxLM : public BilingualLM {
void CleanUpAfterSentenceProcessing(const InputType& source);
- protected:
+protected:
oxlm::SourceFactoredLM model;
boost::shared_ptr<OxLMParallelMapper> mapper;
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index 90d120920..854b9cf1d 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -113,8 +113,8 @@ void Manager::Decode()
IFVERBOSE(1) {
GetSentenceStats().StopTimeCollectOpts();
TRACE_ERR("Line "<< m_source.GetTranslationId() << ": Collecting options took "
- << GetSentenceStats().GetTimeCollectOpts() << " seconds at "
- << __FILE__ << ":" << __LINE__ << endl);
+ << GetSentenceStats().GetTimeCollectOpts() << " seconds at "
+ << __FILE__ << ":" << __LINE__ << endl);
}
// search for best translation with the specified algorithm
@@ -122,7 +122,7 @@ void Manager::Decode()
searchTime.start();
m_search->Decode();
VERBOSE(1, "Line " << m_source.GetTranslationId() << ": Search took " << searchTime << " seconds" << endl);
- IFVERBOSE(2) {
+ IFVERBOSE(2) {
GetSentenceStats().StopTimeTotal();
TRACE_ERR(GetSentenceStats());
}
@@ -191,11 +191,11 @@ void Manager::printDivergentHypothesis(long translationId, const Hypothesis* hyp
}
-void
+void
Manager::
-printThisHypothesis(long translationId, const Hypothesis* hypo,
- const vector <const TargetPhrase*> & remainingPhrases,
- float remainingScore, ostream& outputStream) const
+printThisHypothesis(long translationId, const Hypothesis* hypo,
+ const vector <const TargetPhrase*> & remainingPhrases,
+ float remainingScore, ostream& outputStream) const
{
outputStream << translationId << " ||| ";
@@ -334,12 +334,12 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
if (i->forward >= 0) {
map<int,const Hypothesis*>::const_iterator idToHypIter = idToHyp.find(i->forward);
UTIL_THROW_IF2(idToHypIter == idToHyp.end(),
- "Couldn't find hypothesis " << i->forward);
+ "Couldn't find hypothesis " << i->forward);
const Hypothesis* nextHypo = idToHypIter->second;
outgoingHyps[hypo].insert(nextHypo);
map<int,float>::const_iterator fscoreIter = fscores.find(nextHypo->GetId());
UTIL_THROW_IF2(fscoreIter == fscores.end(),
- "Couldn't find scores for hypothsis " << nextHypo->GetId());
+ "Couldn't find scores for hypothsis " << nextHypo->GetId());
edgeScores[Edge(hypo->GetId(),nextHypo->GetId())] =
i->fscore - fscoreIter->second;
}
@@ -357,17 +357,17 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
outgoingHyps.find(i->hypo);
UTIL_THROW_IF2(outIter == outgoingHyps.end(),
- "Couldn't find hypothesis " << i->hypo->GetId());
+ "Couldn't find hypothesis " << i->hypo->GetId());
float sigma = 0;
for (set<const Hypothesis*>::const_iterator j = outIter->second.begin();
j != outIter->second.end(); ++j) {
map<const Hypothesis*, float>::const_iterator succIter = sigmas.find(*j);
UTIL_THROW_IF2(succIter == sigmas.end(),
- "Couldn't find hypothesis " << (*j)->GetId());
+ "Couldn't find hypothesis " << (*j)->GetId());
map<Edge,float>::const_iterator edgeScoreIter =
edgeScores.find(Edge(i->hypo->GetId(),(*j)->GetId()));
UTIL_THROW_IF2(edgeScoreIter == edgeScores.end(),
- "Couldn't find edge for hypothesis " << (*j)->GetId());
+ "Couldn't find edge for hypothesis " << (*j)->GetId());
float term = edgeScoreIter->second + succIter->second; // Add sigma(*j)
if (sigma == 0) {
sigma = term;
@@ -400,10 +400,10 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
j != outIter->second.end(); ++j) {
candidates.push_back(*j);
UTIL_THROW_IF2(sigmas.find(*j) == sigmas.end(),
- "Hypothesis " << (*j)->GetId() << " not found");
+ "Hypothesis " << (*j)->GetId() << " not found");
Edge edge(path.back()->GetId(),(*j)->GetId());
UTIL_THROW_IF2(edgeScores.find(edge) == edgeScores.end(),
- "Edge not found");
+ "Edge not found");
candidateScores.push_back(sigmas[*j] + edgeScores[edge]);
if (scoreTotal == 0) {
scoreTotal = candidateScores.back();
@@ -565,7 +565,7 @@ void Manager::GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *
const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (size_t i = 0; i < statefulFFs.size(); ++i) {
const StatefulFeatureFunction *ff = statefulFFs[i];
- if (const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff)) {
+ if (const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff)) {
lm->ReportHistoryOrder(out, translation);
}
}
@@ -580,11 +580,11 @@ void Manager::GetWordGraph(long translationId, std::ostream &outputWordGraphStre
bool outputNBest = false;
params = staticData.GetParameter().GetParam("output-word-graph");
if (params && params->size()) {
- fileName = params->at(0);
+ fileName = params->at(0);
- if (params->size() == 2) {
- outputNBest = Scan<bool>(params->at(1));
- }
+ if (params->size() == 2) {
+ outputNBest = Scan<bool>(params->at(1));
+ }
}
const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
@@ -1334,7 +1334,7 @@ void Manager::SerializeSearchGraphPB(
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
const Hypothesis *loserHypo = *iterArcList;
UTIL_THROW_IF2(!connected[loserHypo->GetId()],
- "Hypothesis " << loserHypo->GetId() << " is not connected");
+ "Hypothesis " << loserHypo->GetId() << " is not connected");
Hypergraph_Edge* edge = hg.add_edges();
SerializeEdgeInfo(loserHypo, edge);
edge->set_head_node(headNodeIdx);
@@ -1465,125 +1465,125 @@ void Manager::OutputBest(OutputCollector *collector) const
// apply decision rule and output best translation(s)
if (collector) {
- ostringstream out;
- ostringstream debug;
- FixPrecision(debug,PRECISION);
-
- // all derivations - send them to debug stream
- if (staticData.PrintAllDerivations()) {
- additionalReportingTime.start();
- PrintAllDerivations(translationId, debug);
- additionalReportingTime.stop();
- }
-
- Timer decisionRuleTime;
- decisionRuleTime.start();
-
- // MAP decoding: best hypothesis
- const Hypothesis* bestHypo = NULL;
- if (!staticData.UseMBR()) {
- bestHypo = GetBestHypothesis();
- if (bestHypo) {
- if (StaticData::Instance().GetOutputHypoScore()) {
- out << bestHypo->GetTotalScore() << ' ';
- }
- if (staticData.IsPathRecoveryEnabled()) {
- bestHypo->OutputInput(out);
- out << "||| ";
- }
-
- const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id");
- if (params && params->size() && Scan<bool>(params->at(0)) ) {
- out << translationId << " ";
- }
-
- if (staticData.GetReportSegmentation() == 2) {
- GetOutputLanguageModelOrder(out, bestHypo);
- }
- bestHypo->OutputBestSurface(
- out,
- staticData.GetOutputFactorOrder(),
- staticData.GetReportSegmentation(),
- staticData.GetReportAllFactors());
- if (staticData.PrintAlignmentInfo()) {
- out << "||| ";
- bestHypo->OutputAlignment(out);
- }
-
- IFVERBOSE(1) {
- debug << "BEST TRANSLATION: " << *bestHypo << endl;
- }
- } else {
- VERBOSE(1, "NO BEST TRANSLATION" << endl);
- }
-
- out << endl;
- } // if (!staticData.UseMBR())
-
- // MBR decoding (n-best MBR, lattice MBR, consensus)
- else {
- // we first need the n-best translations
- size_t nBestSize = staticData.GetMBRSize();
- if (nBestSize <= 0) {
- cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
- exit(1);
- }
- TrellisPathList nBestList;
- CalcNBest(nBestSize, nBestList,true);
- VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
- IFVERBOSE(2) {
- PrintUserTime("calculated n-best list for (L)MBR decoding");
- }
-
- // lattice MBR
- if (staticData.UseLatticeMBR()) {
- if (staticData.IsNBestEnabled()) {
- //lattice mbr nbest
- vector<LatticeMBRSolution> solutions;
- size_t n = min(nBestSize, staticData.GetNBestSize());
- getLatticeMBRNBest(*this,nBestList,solutions,n);
- OutputLatticeMBRNBest(m_latticeNBestOut, solutions, translationId);
- } else {
- //Lattice MBR decoding
- vector<Word> mbrBestHypo = doLatticeMBR(*this,nBestList);
- OutputBestHypo(mbrBestHypo, translationId, staticData.GetReportSegmentation(),
- staticData.GetReportAllFactors(),out);
- IFVERBOSE(2) {
- PrintUserTime("finished Lattice MBR decoding");
- }
- }
- }
-
- // consensus decoding
- else if (staticData.UseConsensusDecoding()) {
- const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList);
- OutputBestHypo(conBestHypo, translationId,
- staticData.GetReportSegmentation(),
- staticData.GetReportAllFactors(),out);
- OutputAlignment(m_alignmentOut, conBestHypo);
- IFVERBOSE(2) {
- PrintUserTime("finished Consensus decoding");
- }
- }
-
- // n-best MBR decoding
- else {
- const TrellisPath &mbrBestHypo = doMBR(nBestList);
- OutputBestHypo(mbrBestHypo, translationId,
- staticData.GetReportSegmentation(),
- staticData.GetReportAllFactors(),out);
- OutputAlignment(m_alignmentOut, mbrBestHypo);
- IFVERBOSE(2) {
- PrintUserTime("finished MBR decoding");
- }
- }
- }
-
- // report best translation to output collector
- collector->Write(translationId,out.str(),debug.str());
-
- decisionRuleTime.stop();
- VERBOSE(1, "Line " << translationId << ": Decision rule took " << decisionRuleTime << " seconds total" << endl);
+ ostringstream out;
+ ostringstream debug;
+ FixPrecision(debug,PRECISION);
+
+ // all derivations - send them to debug stream
+ if (staticData.PrintAllDerivations()) {
+ additionalReportingTime.start();
+ PrintAllDerivations(translationId, debug);
+ additionalReportingTime.stop();
+ }
+
+ Timer decisionRuleTime;
+ decisionRuleTime.start();
+
+ // MAP decoding: best hypothesis
+ const Hypothesis* bestHypo = NULL;
+ if (!staticData.UseMBR()) {
+ bestHypo = GetBestHypothesis();
+ if (bestHypo) {
+ if (StaticData::Instance().GetOutputHypoScore()) {
+ out << bestHypo->GetTotalScore() << ' ';
+ }
+ if (staticData.IsPathRecoveryEnabled()) {
+ bestHypo->OutputInput(out);
+ out << "||| ";
+ }
+
+ const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id");
+ if (params && params->size() && Scan<bool>(params->at(0)) ) {
+ out << translationId << " ";
+ }
+
+ if (staticData.GetReportSegmentation() == 2) {
+ GetOutputLanguageModelOrder(out, bestHypo);
+ }
+ bestHypo->OutputBestSurface(
+ out,
+ staticData.GetOutputFactorOrder(),
+ staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors());
+ if (staticData.PrintAlignmentInfo()) {
+ out << "||| ";
+ bestHypo->OutputAlignment(out);
+ }
+
+ IFVERBOSE(1) {
+ debug << "BEST TRANSLATION: " << *bestHypo << endl;
+ }
+ } else {
+ VERBOSE(1, "NO BEST TRANSLATION" << endl);
+ }
+
+ out << endl;
+ } // if (!staticData.UseMBR())
+
+ // MBR decoding (n-best MBR, lattice MBR, consensus)
+ else {
+ // we first need the n-best translations
+ size_t nBestSize = staticData.GetMBRSize();
+ if (nBestSize <= 0) {
+ cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
+ exit(1);
+ }
+ TrellisPathList nBestList;
+ CalcNBest(nBestSize, nBestList,true);
+ VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
+ IFVERBOSE(2) {
+ PrintUserTime("calculated n-best list for (L)MBR decoding");
+ }
+
+ // lattice MBR
+ if (staticData.UseLatticeMBR()) {
+ if (staticData.IsNBestEnabled()) {
+ //lattice mbr nbest
+ vector<LatticeMBRSolution> solutions;
+ size_t n = min(nBestSize, staticData.GetNBestSize());
+ getLatticeMBRNBest(*this,nBestList,solutions,n);
+ OutputLatticeMBRNBest(m_latticeNBestOut, solutions, translationId);
+ } else {
+ //Lattice MBR decoding
+ vector<Word> mbrBestHypo = doLatticeMBR(*this,nBestList);
+ OutputBestHypo(mbrBestHypo, translationId, staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors(),out);
+ IFVERBOSE(2) {
+ PrintUserTime("finished Lattice MBR decoding");
+ }
+ }
+ }
+
+ // consensus decoding
+ else if (staticData.UseConsensusDecoding()) {
+ const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList);
+ OutputBestHypo(conBestHypo, translationId,
+ staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors(),out);
+ OutputAlignment(m_alignmentOut, conBestHypo);
+ IFVERBOSE(2) {
+ PrintUserTime("finished Consensus decoding");
+ }
+ }
+
+ // n-best MBR decoding
+ else {
+ const TrellisPath &mbrBestHypo = doMBR(nBestList);
+ OutputBestHypo(mbrBestHypo, translationId,
+ staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors(),out);
+ OutputAlignment(m_alignmentOut, mbrBestHypo);
+ IFVERBOSE(2) {
+ PrintUserTime("finished MBR decoding");
+ }
+ }
+ }
+
+ // report best translation to output collector
+ collector->Write(translationId,out.str(),debug.str());
+
+ decisionRuleTime.stop();
+ VERBOSE(1, "Line " << translationId << ": Decision rule took " << decisionRuleTime << " seconds total" << endl);
} // if (m_ioWrapper.GetSingleBestOutputCollector())
}
@@ -1591,33 +1591,32 @@ void Manager::OutputBest(OutputCollector *collector) const
void Manager::OutputNBest(OutputCollector *collector) const
{
if (collector == NULL) {
- return;
+ return;
}
const StaticData &staticData = StaticData::Instance();
long translationId = m_source.GetTranslationId();
if (staticData.UseLatticeMBR()) {
- if (staticData.IsNBestEnabled()) {
- collector->Write(translationId, m_latticeNBestOut.str());
- }
- }
- else {
- TrellisPathList nBestList;
- ostringstream out;
- CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
- OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
- staticData.GetReportSegmentation());
- collector->Write(m_source.GetTranslationId(), out.str());
+ if (staticData.IsNBestEnabled()) {
+ collector->Write(translationId, m_latticeNBestOut.str());
+ }
+ } else {
+ TrellisPathList nBestList;
+ ostringstream out;
+ CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
+ OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
+ staticData.GetReportSegmentation());
+ collector->Write(m_source.GetTranslationId(), out.str());
}
}
void Manager::OutputNBest(std::ostream& out
- , const Moses::TrellisPathList &nBestList
- , const std::vector<Moses::FactorType>& outputFactorOrder
- , long translationId
- , char reportSegmentation) const
+ , const Moses::TrellisPathList &nBestList
+ , const std::vector<Moses::FactorType>& outputFactorOrder
+ , long translationId
+ , char reportSegmentation) const
{
const StaticData &staticData = StaticData::Instance();
bool reportAllFactors = staticData.GetReportAllFactorsNBest();
@@ -1692,10 +1691,10 @@ void Manager::OutputNBest(std::ostream& out
* print surface factor only for the given phrase
*/
void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
- char reportSegmentation, bool reportAllFactors) const
+ char reportSegmentation, bool reportAllFactors) const
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
- "Must specific at least 1 output factor");
+ "Must specific at least 1 output factor");
const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
bool markUnknown = StaticData::Instance().GetMarkUnknown();
if (reportAllFactors == true) {
@@ -1722,7 +1721,7 @@ void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std
}
UTIL_THROW_IF2(factor == NULL,
- "No factor 0 at position " << pos);
+ "No factor 0 at position " << pos);
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
@@ -1735,7 +1734,7 @@ void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
- "No factor " << i << " at position " << pos);
+ "No factor " << i << " at position " << pos);
out << "|" << *factor;
}
@@ -1806,7 +1805,7 @@ std::map<size_t, const Factor*> Manager::GetPlaceholders(const Hypothesis &hypo,
if (factor) {
std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
UTIL_THROW_IF2(targetPos.size() != 1,
- "Placeholder should be aligned to 1, and only 1, word");
+ "Placeholder should be aligned to 1, and only 1, word");
ret[*targetPos.begin()] = factor;
}
}
@@ -1818,12 +1817,12 @@ void Manager::OutputLatticeSamples(OutputCollector *collector) const
{
const StaticData &staticData = StaticData::Instance();
if (collector) {
- TrellisPathList latticeSamples;
- ostringstream out;
- CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
- OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
- staticData.GetReportSegmentation());
- collector->Write(m_source.GetTranslationId(), out.str());
+ TrellisPathList latticeSamples;
+ ostringstream out;
+ CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
+ OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
+ staticData.GetReportSegmentation());
+ collector->Write(m_source.GetTranslationId(), out.str());
}
}
@@ -1831,21 +1830,20 @@ void Manager::OutputLatticeSamples(OutputCollector *collector) const
void Manager::OutputAlignment(OutputCollector *collector) const
{
if (collector == NULL) {
- return;
+ return;
}
if (!m_alignmentOut.str().empty()) {
collector->Write(m_source.GetTranslationId(), m_alignmentOut.str());
- }
- else {
- std::vector<const Hypothesis *> edges;
- const Hypothesis *currentHypo = GetBestHypothesis();
- while (currentHypo) {
- edges.push_back(currentHypo);
- currentHypo = currentHypo->GetPrevHypo();
- }
+ } else {
+ std::vector<const Hypothesis *> edges;
+ const Hypothesis *currentHypo = GetBestHypothesis();
+ while (currentHypo) {
+ edges.push_back(currentHypo);
+ currentHypo = currentHypo->GetPrevHypo();
+ }
- OutputAlignment(collector,m_source.GetTranslationId(), edges);
+ OutputAlignment(collector,m_source.GetTranslationId(), edges);
}
}
@@ -1878,10 +1876,10 @@ void Manager::OutputAlignment(ostream &out, const vector<const Hypothesis *> &ed
void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const
{
if (collector) {
- ostringstream out;
- FixPrecision(out,PRECISION);
- TranslationAnalysis::PrintTranslationAnalysis(out, GetBestHypothesis());
- collector->Write(m_source.GetTranslationId(),out.str());
+ ostringstream out;
+ FixPrecision(out,PRECISION);
+ TranslationAnalysis::PrintTranslationAnalysis(out, GetBestHypothesis());
+ collector->Write(m_source.GetTranslationId(),out.str());
}
}
@@ -1889,14 +1887,14 @@ void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const
void Manager::OutputUnknowns(OutputCollector *collector) const
{
if (collector) {
- long translationId = m_source.GetTranslationId();
- const vector<const Phrase*>& unknowns = m_transOptColl->GetUnknownSources();
- ostringstream out;
- for (size_t i = 0; i < unknowns.size(); ++i) {
- out << *(unknowns[i]);
- }
- out << endl;
- collector->Write(translationId, out.str());
+ long translationId = m_source.GetTranslationId();
+ const vector<const Phrase*>& unknowns = m_transOptColl->GetUnknownSources();
+ ostringstream out;
+ for (size_t i = 0; i < unknowns.size(); ++i) {
+ out << *(unknowns[i]);
+ }
+ out << endl;
+ collector->Write(translationId, out.str());
}
}
@@ -1905,32 +1903,32 @@ void Manager::OutputWordGraph(OutputCollector *collector) const
{
if (collector) {
long translationId = m_source.GetTranslationId();
- ostringstream out;
- FixPrecision(out,PRECISION);
- GetWordGraph(translationId, out);
- collector->Write(translationId, out.str());
+ ostringstream out;
+ FixPrecision(out,PRECISION);
+ GetWordGraph(translationId, out);
+ collector->Write(translationId, out.str());
}
}
void Manager::OutputSearchGraph(OutputCollector *collector) const
{
if (collector) {
- long translationId = m_source.GetTranslationId();
- ostringstream out;
- FixPrecision(out,PRECISION);
- OutputSearchGraph(translationId, out);
- collector->Write(translationId, out.str());
+ long translationId = m_source.GetTranslationId();
+ ostringstream out;
+ FixPrecision(out,PRECISION);
+ OutputSearchGraph(translationId, out);
+ collector->Write(translationId, out.str());
#ifdef HAVE_PROTOBUF
const StaticData &staticData = StaticData::Instance();
- if (staticData.GetOutputSearchGraphPB()) {
- ostringstream sfn;
- sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << translationId << ".pb" << ends;
- string fn = sfn.str();
- VERBOSE(2, "Writing search graph to " << fn << endl);
- fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out);
- SerializeSearchGraphPB(translationId, output);
- }
+ if (staticData.GetOutputSearchGraphPB()) {
+ ostringstream sfn;
+ sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << translationId << ".pb" << ends;
+ string fn = sfn.str();
+ VERBOSE(2, "Writing search graph to " << fn << endl);
+ fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out);
+ SerializeSearchGraphPB(translationId, output);
+ }
#endif
}
@@ -1944,24 +1942,24 @@ void Manager::OutputSearchGraphSLF() const
// Output search graph in HTK standard lattice format (SLF)
bool slf = staticData.GetOutputSearchGraphSLF();
if (slf) {
- stringstream fileName;
-
- string dir;
- staticData.GetParameter().SetParameter<string>(dir, "output-search-graph-slf", "");
-
- fileName << dir << "/" << translationId << ".slf";
- ofstream *file = new ofstream;
- file->open(fileName.str().c_str());
- if (file->is_open() && file->good()) {
- ostringstream out;
- FixPrecision(out,PRECISION);
- OutputSearchGraphAsSLF(translationId, out);
- *file << out.str();
- file -> flush();
- } else {
- TRACE_ERR("Cannot output HTK standard lattice for line " << translationId << " because the output file is not open or not ready for writing" << endl);
- }
- delete file;
+ stringstream fileName;
+
+ string dir;
+ staticData.GetParameter().SetParameter<string>(dir, "output-search-graph-slf", "");
+
+ fileName << dir << "/" << translationId << ".slf";
+ ofstream *file = new ofstream;
+ file->open(fileName.str().c_str());
+ if (file->is_open() && file->good()) {
+ ostringstream out;
+ FixPrecision(out,PRECISION);
+ OutputSearchGraphAsSLF(translationId, out);
+ *file << out.str();
+ file -> flush();
+ } else {
+ TRACE_ERR("Cannot output HTK standard lattice for line " << translationId << " because the output file is not open or not ready for writing" << endl);
+ }
+ delete file;
}
}
@@ -1970,8 +1968,8 @@ void Manager::OutputSearchGraphHypergraph() const
{
const StaticData &staticData = StaticData::Instance();
if (staticData.GetOutputSearchGraphHypergraph()) {
- HypergraphOutput<Manager> hypergraphOutput(PRECISION);
- hypergraphOutput.Write(*this);
+ HypergraphOutput<Manager> hypergraphOutput(PRECISION);
+ hypergraphOutput.Write(*this);
}
}
@@ -2005,7 +2003,7 @@ void Manager::OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*trans
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
UTIL_THROW_IF2(factor == NULL,
- "No factor 0 at position " << i);
+ "No factor 0 at position " << i);
if (i>0) out << " " << *factor;
else out << *factor;
}
@@ -2025,7 +2023,7 @@ void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationI
void Manager::OutputAlignment(std::ostringstream &out, const TrellisPath &path) const
{
- Hypothesis::OutputAlignment(out, path.GetEdges());
+ Hypothesis::OutputAlignment(out, path.GetEdges());
}
} // namespace
diff --git a/moses/Manager.h b/moses/Manager.h
index 8c4c1e6f4..4de0f5f95 100644
--- a/moses/Manager.h
+++ b/moses/Manager.h
@@ -204,8 +204,8 @@ public:
void OutputLatticeSamples(OutputCollector *collector) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const;
void OutputUnknowns(OutputCollector *collector) const;
- void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
- {}
+ void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const {
+ }
void OutputWordGraph(OutputCollector *collector) const;
void OutputSearchGraph(OutputCollector *collector) const;
void OutputSearchGraphSLF() const;
diff --git a/moses/PCNTools.h b/moses/PCNTools.h
index c36ed67e3..69f9a5488 100644
--- a/moses/PCNTools.h
+++ b/moses/PCNTools.h
@@ -35,8 +35,8 @@ namespace PCN
{
struct CNAlt {
- CNAlt()
- {}
+ CNAlt() {
+ }
CNAlt(const std::string &word,
const std::vector<float> &denseFeatures,
const std::map<std::string, float> &sparseFeatures,
@@ -44,8 +44,8 @@ struct CNAlt {
:m_word(word)
,m_denseFeatures(denseFeatures)
,m_sparseFeatures(sparseFeatures)
- ,m_next(next)
- {}
+ ,m_next(next) {
+ }
std::string m_word;
std::vector<float> m_denseFeatures;
diff --git a/moses/PDTAimp.cpp b/moses/PDTAimp.cpp
index 8726183c3..1b9e789ce 100644
--- a/moses/PDTAimp.cpp
+++ b/moses/PDTAimp.cpp
@@ -8,7 +8,8 @@ PDTAimp::PDTAimp(PhraseDictionaryTreeAdaptor *p)
m_obj(p),
useCache(1),
totalE(0),
- distinctE(0) {
+ distinctE(0)
+{
m_numInputScores = 0;
m_inputFeature = &InputFeature::Instance();
@@ -20,7 +21,8 @@ PDTAimp::PDTAimp(PhraseDictionaryTreeAdaptor *p)
}
}
-PDTAimp::~PDTAimp() {
+PDTAimp::~PDTAimp()
+{
CleanUp();
delete m_dict;
@@ -57,7 +59,8 @@ PDTAimp::~PDTAimp() {
}
-void PDTAimp::CleanUp() {
+void PDTAimp::CleanUp()
+{
assert(m_dict);
m_dict->FreeMemory();
for(size_t i=0; i<m_tgtColls.size(); ++i) delete m_tgtColls[i];
@@ -68,9 +71,10 @@ void PDTAimp::CleanUp() {
}
TargetPhraseCollectionWithSourcePhrase const*
-PDTAimp::GetTargetPhraseCollection(Phrase const &src) const {
+PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
+{
- assert(m_dict);
+ assert(m_dict);
if(src.GetSize()==0) return 0;
std::pair<MapSrc2Tgt::iterator,bool> piter;
@@ -150,10 +154,11 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const {
}
void PDTAimp::Create(const std::vector<FactorType> &input
- , const std::vector<FactorType> &output
- , const std::string &filePath
- , const std::vector<float> &weight
- ) {
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , const std::vector<float> &weight
+ )
+{
// set my members
m_dict=new PhraseDictionaryTree();
@@ -174,14 +179,15 @@ void PDTAimp::Create(const std::vector<FactorType> &input
// m_dict->Read(filePath);
bool res=m_dict->Read(filePath);
if (!res) {
- std::cerr << "bin ttable was read in a wrong way\n";
+ std::cerr << "bin ttable was read in a wrong way\n";
exit(1);
}
}
-void PDTAimp::CacheSource(ConfusionNet const& src) {
- assert(m_dict);
+void PDTAimp::CacheSource(ConfusionNet const& src)
+{
+ assert(m_dict);
const size_t srcSize=src.GetSize();
std::vector<size_t> exploredPaths(srcSize+1,0);
@@ -243,7 +249,7 @@ void PDTAimp::CacheSource(ConfusionNet const& src) {
//assert that we have the right number of link params in this CN option
UTIL_THROW_IF2(currCol[colidx].second.denseScores.size() < m_numInputScores,
- "Incorrect number of input scores");
+ "Incorrect number of input scores");
// do not start with epsilon (except at first position)
if(isEpsilon && curr.begin()==curr.end() && curr.begin()>0) continue;
@@ -298,7 +304,7 @@ void PDTAimp::CacheSource(ConfusionNet const& src) {
//put input scores in first - already logged, just drop in directly
std::vector<float> transcores(m_obj->GetNumScoreComponents());
UTIL_THROW_IF2(transcores.size() != weightTrans.size(),
- "Incorrect number of translation scores");
+ "Incorrect number of translation scores");
//put in phrase table scores, logging as we insert
std::transform(tcands[i].scores.begin()
@@ -395,11 +401,12 @@ void PDTAimp::CacheSource(ConfusionNet const& src) {
}
void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
- StringTgtCand::Tokens const& factorStrings,
- Scores const& transVector,
- Scores const& inputVector,
- const std::string *alignmentString,
- Phrase const* srcPtr) const {
+ StringTgtCand::Tokens const& factorStrings,
+ Scores const& transVector,
+ Scores const& inputVector,
+ const std::string *alignmentString,
+ Phrase const* srcPtr) const
+{
FactorCollection &factorCollection = FactorCollection::Instance();
for(size_t k=0; k<factorStrings.size(); ++k) {
@@ -425,10 +432,11 @@ void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
(const std::vector<TargetPhrase> & tCands,
std::vector<std::pair<float,size_t> >& costs,
- const std::vector<Phrase> &sourcePhrases) const {
+ const std::vector<Phrase> &sourcePhrases) const
+{
// convert into TargetPhraseCollection
UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
- "Number of target phrases must equal number of source phrases");
+ "Number of target phrases must equal number of source phrases");
TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase;
diff --git a/moses/PP/CountsPhraseProperty.cpp b/moses/PP/CountsPhraseProperty.cpp
index b64366733..00bc08011 100644
--- a/moses/PP/CountsPhraseProperty.cpp
+++ b/moses/PP/CountsPhraseProperty.cpp
@@ -27,11 +27,11 @@ void CountsPhraseProperty::ProcessValue(const std::string &value)
std::ostream& operator<<(std::ostream &out, const CountsPhraseProperty &obj)
{
- out << "Count property="
- << obj.GetTargetMarginal() << " "
- << obj.GetSourceMarginal() << " "
- << obj.GetJointCount();
- return out;
+ out << "Count property="
+ << obj.GetTargetMarginal() << " "
+ << obj.GetSourceMarginal() << " "
+ << obj.GetJointCount();
+ return out;
}
} // namespace Moses
diff --git a/moses/PP/CountsPhraseProperty.h b/moses/PP/CountsPhraseProperty.h
index 4f6fbcfa8..7f3137085 100644
--- a/moses/PP/CountsPhraseProperty.h
+++ b/moses/PP/CountsPhraseProperty.h
@@ -47,9 +47,9 @@ public:
return m_jointCount;
}
- virtual const std::string *GetValueString() const {
+ virtual const std::string *GetValueString() const {
UTIL_THROW2("CountsPhraseProperty: value string not available in this phrase property");
- return NULL;
+ return NULL;
};
protected:
diff --git a/moses/PP/NonTermContextProperty.cpp b/moses/PP/NonTermContextProperty.cpp
index df5e88d8e..d1ea6a554 100644
--- a/moses/PP/NonTermContextProperty.cpp
+++ b/moses/PP/NonTermContextProperty.cpp
@@ -14,7 +14,7 @@ NonTermContextProperty::NonTermContextProperty()
NonTermContextProperty::~NonTermContextProperty()
{
- //RemoveAllInColl(m_probStores);
+ //RemoveAllInColl(m_probStores);
}
void NonTermContextProperty::ProcessValue(const std::string &value)
@@ -29,39 +29,39 @@ void NonTermContextProperty::ProcessValue(const std::string &value)
size_t ind = 1;
while (ind < toks.size()) {
- vector<const Factor *> factors;
-
- for (size_t nt = 0; nt < numNT; ++nt) {
- size_t ntInd = Scan<size_t>(toks[ind]);
- assert(nt == ntInd);
- ++ind;
-
- for (size_t contextInd = 0; contextInd < 4; ++contextInd) {
- //cerr << "toks[" << ind << "]=" << toks[ind] << endl;
- const Factor *factor = fc.AddFactor(toks[ind], false);
- factors.push_back(factor);
- ++ind;
- }
- }
-
- // done with the context. Just get the count and put it all into data structures
- // cerr << "count=" << toks[ind] << endl;
- float count = Scan<float>(toks[ind]);
- ++ind;
-
- for (size_t i = 0; i < factors.size(); ++i) {
- size_t ntInd = i / 4;
- size_t contextInd = i % 4;
- const Factor *factor = factors[i];
- AddToMap(ntInd, contextInd, factor, count);
- }
+ vector<const Factor *> factors;
+
+ for (size_t nt = 0; nt < numNT; ++nt) {
+ size_t ntInd = Scan<size_t>(toks[ind]);
+ assert(nt == ntInd);
+ ++ind;
+
+ for (size_t contextInd = 0; contextInd < 4; ++contextInd) {
+ //cerr << "toks[" << ind << "]=" << toks[ind] << endl;
+ const Factor *factor = fc.AddFactor(toks[ind], false);
+ factors.push_back(factor);
+ ++ind;
+ }
+ }
+
+ // done with the context. Just get the count and put it all into data structures
+ // cerr << "count=" << toks[ind] << endl;
+ float count = Scan<float>(toks[ind]);
+ ++ind;
+
+ for (size_t i = 0; i < factors.size(); ++i) {
+ size_t ntInd = i / 4;
+ size_t contextInd = i % 4;
+ const Factor *factor = factors[i];
+ AddToMap(ntInd, contextInd, factor, count);
+ }
}
}
void NonTermContextProperty::AddToMap(size_t ntIndex, size_t index, const Factor *factor, float count)
{
if (ntIndex <= m_probStores.size()) {
- m_probStores.resize(ntIndex + 1);
+ m_probStores.resize(ntIndex + 1);
}
ProbStore &probStore = m_probStores[ntIndex];
@@ -69,38 +69,37 @@ void NonTermContextProperty::AddToMap(size_t ntIndex, size_t index, const Factor
}
float NonTermContextProperty::GetProb(size_t ntInd,
- size_t contextInd,
- const Factor *factor,
- float smoothConstant) const
+ size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const
{
- UTIL_THROW_IF2(ntInd >= m_probStores.size(), "Invalid nt index=" << ntInd);
- const ProbStore &probStore = m_probStores[ntInd];
- float ret = probStore.GetProb(contextInd, factor, smoothConstant);
- return ret;
+ UTIL_THROW_IF2(ntInd >= m_probStores.size(), "Invalid nt index=" << ntInd);
+ const ProbStore &probStore = m_probStores[ntInd];
+ float ret = probStore.GetProb(contextInd, factor, smoothConstant);
+ return ret;
}
//////////////////////////////////////////
void NonTermContextProperty::ProbStore::AddToMap(size_t index, const Factor *factor, float count)
{
- Map &map = m_vec[index];
-
- Map::iterator iter = map.find(factor);
- if (iter == map.end()) {
- map[factor] = count;
- }
- else {
- float &currCount = iter->second;
- currCount += count;
- }
-
- m_totalCount += count;
+ Map &map = m_vec[index];
+
+ Map::iterator iter = map.find(factor);
+ if (iter == map.end()) {
+ map[factor] = count;
+ } else {
+ float &currCount = iter->second;
+ currCount += count;
+ }
+
+ m_totalCount += count;
}
float NonTermContextProperty::ProbStore::GetProb(size_t contextInd,
- const Factor *factor,
- float smoothConstant) const
+ const Factor *factor,
+ float smoothConstant) const
{
float count = GetCount(contextInd, factor, smoothConstant);
float total = GetTotalCount(contextInd, smoothConstant);
@@ -109,27 +108,26 @@ float NonTermContextProperty::ProbStore::GetProb(size_t contextInd,
}
float NonTermContextProperty::ProbStore::GetCount(size_t contextInd,
- const Factor *factor,
- float smoothConstant) const
+ const Factor *factor,
+ float smoothConstant) const
{
- const Map &map = m_vec[contextInd];
-
- float count = smoothConstant;
- Map::const_iterator iter = map.find(factor);
- if (iter == map.end()) {
- // nothing
- }
- else {
- count += iter->second;
- }
-
- return count;
+ const Map &map = m_vec[contextInd];
+
+ float count = smoothConstant;
+ Map::const_iterator iter = map.find(factor);
+ if (iter == map.end()) {
+ // nothing
+ } else {
+ count += iter->second;
+ }
+
+ return count;
}
float NonTermContextProperty::ProbStore::GetTotalCount(size_t contextInd, float smoothConstant) const
{
- const Map &map = m_vec[contextInd];
- return m_totalCount + smoothConstant * map.size();
+ const Map &map = m_vec[contextInd];
+ return m_totalCount + smoothConstant * map.size();
}
diff --git a/moses/PP/NonTermContextProperty.h b/moses/PP/NonTermContextProperty.h
index 56db9cb32..efe007099 100644
--- a/moses/PP/NonTermContextProperty.h
+++ b/moses/PP/NonTermContextProperty.h
@@ -27,38 +27,40 @@ public:
};
float GetProb(size_t ntInd,
- size_t contextInd,
- const Factor *factor,
- float smoothConstant) const;
+ size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const;
protected:
- class ProbStore {
- typedef std::map<const Factor*, float> Map; // map word -> prob
- typedef std::vector<Map> Vec; // left outside, left inside, right inside, right outside
- Vec m_vec;
- float m_totalCount;
+ class ProbStore
+ {
+ typedef std::map<const Factor*, float> Map; // map word -> prob
+ typedef std::vector<Map> Vec; // left outside, left inside, right inside, right outside
+ Vec m_vec;
+ float m_totalCount;
- float GetCount(size_t contextInd,
- const Factor *factor,
- float smoothConstant) const;
- float GetTotalCount(size_t contextInd, float smoothConstant) const;
+ float GetCount(size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const;
+ float GetTotalCount(size_t contextInd, float smoothConstant) const;
public:
- ProbStore()
- :m_vec(4)
- ,m_totalCount(0)
- {}
+ ProbStore()
+ :m_vec(4)
+ ,m_totalCount(0) {
+ }
- float GetProb(size_t contextInd,
- const Factor *factor,
- float smoothConstant) const;
+ float GetProb(size_t contextInd,
+ const Factor *factor,
+ float smoothConstant) const;
- float GetSize(size_t index) const
- { return m_vec[index].size(); }
+ float GetSize(size_t index) const {
+ return m_vec[index].size();
+ }
- void AddToMap(size_t index, const Factor *factor, float count);
+ void AddToMap(size_t index, const Factor *factor, float count);
};
diff --git a/moses/PP/OrientationPhraseProperty.cpp b/moses/PP/OrientationPhraseProperty.cpp
index 1722a5383..4088a0d8b 100644
--- a/moses/PP/OrientationPhraseProperty.cpp
+++ b/moses/PP/OrientationPhraseProperty.cpp
@@ -7,14 +7,14 @@ namespace Moses
void OrientationPhraseProperty::ProcessValue(const std::string &value)
{
- // bidirectional MSLR phrase orientation with 2x4 orientation classes:
+ // bidirectional MSLR phrase orientation with 2x4 orientation classes:
// mono swap dleft dright
std::istringstream tokenizer(value);
try {
if (! (tokenizer >> m_l2rMonoProbability >> m_l2rSwapProbability >> m_l2rDleftProbability >> m_l2rDrightProbability
- >> m_r2lMonoProbability >> m_r2lSwapProbability >> m_r2lDleftProbability >> m_r2lDrightProbability)) {
+ >> m_r2lMonoProbability >> m_r2lSwapProbability >> m_r2lDleftProbability >> m_r2lDrightProbability)) {
UTIL_THROW2("OrientationPhraseProperty: Not able to read value. Flawed property?");
}
} catch (const std::exception &e) {
diff --git a/moses/PP/OrientationPhraseProperty.h b/moses/PP/OrientationPhraseProperty.h
index f6344062c..d682e1c59 100644
--- a/moses/PP/OrientationPhraseProperty.h
+++ b/moses/PP/OrientationPhraseProperty.h
@@ -58,9 +58,9 @@ public:
};
- virtual const std::string *GetValueString() const {
+ virtual const std::string *GetValueString() const {
UTIL_THROW2("OrientationPhraseProperty: value string not available in this phrase property");
- return NULL;
+ return NULL;
};
protected:
diff --git a/moses/PP/PhraseProperty.cpp b/moses/PP/PhraseProperty.cpp
index 614b39c60..69e3c3374 100644
--- a/moses/PP/PhraseProperty.cpp
+++ b/moses/PP/PhraseProperty.cpp
@@ -5,8 +5,8 @@ namespace Moses
std::ostream& operator<<(std::ostream &out, const PhraseProperty &obj)
{
- out << "Base phrase property";
- return out;
+ out << "Base phrase property";
+ return out;
}
}
diff --git a/moses/PP/PhraseProperty.h b/moses/PP/PhraseProperty.h
index e8127e5bb..76c294481 100644
--- a/moses/PP/PhraseProperty.h
+++ b/moses/PP/PhraseProperty.h
@@ -14,15 +14,21 @@ class PhraseProperty
public:
PhraseProperty() : m_value(NULL) {};
- virtual ~PhraseProperty() { if ( m_value != NULL ) delete m_value; };
+ virtual ~PhraseProperty() {
+ if ( m_value != NULL ) delete m_value;
+ };
- virtual void ProcessValue(const std::string &value) { m_value = new std::string(value); };
+ virtual void ProcessValue(const std::string &value) {
+ m_value = new std::string(value);
+ };
- virtual const std::string *GetValueString() const { return m_value; };
+ virtual const std::string *GetValueString() const {
+ return m_value;
+ };
protected:
- std::string *m_value;
+ std::string *m_value;
};
diff --git a/moses/PP/SourceLabelsPhraseProperty.cpp b/moses/PP/SourceLabelsPhraseProperty.cpp
index 8e6a5dd6d..efe5ae741 100644
--- a/moses/PP/SourceLabelsPhraseProperty.cpp
+++ b/moses/PP/SourceLabelsPhraseProperty.cpp
@@ -27,53 +27,53 @@ void SourceLabelsPhraseProperty::ProcessValue(const std::string &value)
- // read source-labelled rule items
+ // read source-labelled rule items
std::priority_queue<float> ruleLabelledCountsPQ;
while (tokenizer.peek() != EOF) {
// try {
- SourceLabelsPhrasePropertyItem item;
- size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
+ SourceLabelsPhrasePropertyItem item;
+ size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
- if (m_nNTs == 1) {
+ if (m_nNTs == 1) {
- item.m_sourceLabelsRHSCount = m_totalCount;
+ item.m_sourceLabelsRHSCount = m_totalCount;
- } else { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
+ } else { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
- for (size_t i=0; i<m_nNTs-1; ++i) { // RHS source non-terminal labels
- size_t sourceLabelRHS;
- if (! (tokenizer >> sourceLabelRHS) ) { // RHS source non-terminal label
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side label index. Flawed property? " << value);
- }
- item.m_sourceLabelsRHS.push_back(sourceLabelRHS);
+ for (size_t i=0; i<m_nNTs-1; ++i) { // RHS source non-terminal labels
+ size_t sourceLabelRHS;
+ if (! (tokenizer >> sourceLabelRHS) ) { // RHS source non-terminal label
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side label index. Flawed property? " << value);
}
+ item.m_sourceLabelsRHS.push_back(sourceLabelRHS);
+ }
- if (! (tokenizer >> item.m_sourceLabelsRHSCount)) {
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side count. Flawed property? " << value);
- }
+ if (! (tokenizer >> item.m_sourceLabelsRHSCount)) {
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side count. Flawed property? " << value);
+ }
- if (! (tokenizer >> numberOfLHSsGivenRHS)) {
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of left-hand sides. Flawed property? " << value);
- }
+ if (! (tokenizer >> numberOfLHSsGivenRHS)) {
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of left-hand sides. Flawed property? " << value);
}
+ }
- for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS source non-terminal labels seen with this RHS
- size_t sourceLabelLHS;
- if (! (tokenizer >> sourceLabelLHS)) { // LHS source non-terminal label
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read left-hand side label index. Flawed property? " << value);
- }
- float ruleSourceLabelledCount;
- if (! (tokenizer >> ruleSourceLabelledCount)) {
- UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read count. Flawed property? " << value);
- }
- item.m_sourceLabelsLHSList.push_back( std::make_pair(sourceLabelLHS,ruleSourceLabelledCount) );
- ruleLabelledCountsPQ.push(ruleSourceLabelledCount);
+ for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS source non-terminal labels seen with this RHS
+ size_t sourceLabelLHS;
+ if (! (tokenizer >> sourceLabelLHS)) { // LHS source non-terminal label
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read left-hand side label index. Flawed property? " << value);
}
+ float ruleSourceLabelledCount;
+ if (! (tokenizer >> ruleSourceLabelledCount)) {
+ UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read count. Flawed property? " << value);
+ }
+ item.m_sourceLabelsLHSList.push_back( std::make_pair(sourceLabelLHS,ruleSourceLabelledCount) );
+ ruleLabelledCountsPQ.push(ruleSourceLabelledCount);
+ }
- m_sourceLabelItems.push_back(item);
+ m_sourceLabelItems.push_back(item);
// } catch (const std::exception &e) {
// UTIL_THROW2("SourceLabelsPhraseProperty: Read error. Flawed property?");
diff --git a/moses/PP/SourceLabelsPhraseProperty.h b/moses/PP/SourceLabelsPhraseProperty.h
index 39b43ad3e..d9ec82776 100644
--- a/moses/PP/SourceLabelsPhraseProperty.h
+++ b/moses/PP/SourceLabelsPhraseProperty.h
@@ -9,28 +9,25 @@
namespace Moses
{
-// Note that we require label tokens (strings) in the corresponding property values of phrase table entries
+// Note that we require label tokens (strings) in the corresponding property values of phrase table entries
// to be replaced beforehand by indices (size_t) of a label vocabulary. (TODO: change that?)
class SourceLabelsPhrasePropertyItem
{
-friend class SourceLabelsPhraseProperty;
+ friend class SourceLabelsPhraseProperty;
public:
SourceLabelsPhrasePropertyItem() {};
- float GetSourceLabelsRHSCount() const
- {
+ float GetSourceLabelsRHSCount() const {
return m_sourceLabelsRHSCount;
};
- const std::list<size_t> &GetSourceLabelsRHS() const
- {
+ const std::list<size_t> &GetSourceLabelsRHS() const {
return m_sourceLabelsRHS;
};
- const std::list< std::pair<size_t,float> > &GetSourceLabelsLHSList() const
- {
+ const std::list< std::pair<size_t,float> > &GetSourceLabelsLHSList() const {
return m_sourceLabelsLHSList;
};
@@ -60,9 +57,9 @@ public:
return m_sourceLabelItems;
};
- virtual const std::string *GetValueString() const {
+ virtual const std::string *GetValueString() const {
UTIL_THROW2("SourceLabelsPhraseProperty: value string not available in this phrase property");
- return NULL;
+ return NULL;
};
protected:
diff --git a/moses/PP/SpanLengthPhraseProperty.cpp b/moses/PP/SpanLengthPhraseProperty.cpp
index d45c7b919..3a3fb3586 100644
--- a/moses/PP/SpanLengthPhraseProperty.cpp
+++ b/moses/PP/SpanLengthPhraseProperty.cpp
@@ -18,22 +18,21 @@ void SpanLengthPhraseProperty::ProcessValue(const std::string &value)
set< vector<string> > indices;
for (size_t i = 0; i < toks.size(); ++i) {
- const string &span = toks[i];
-
- // is it a ntIndex,sourceSpan,targetSpan or count ?
- vector<string> toks;
- Tokenize<string>(toks, span, ",");
- UTIL_THROW_IF2(toks.size() != 1 && toks.size() != 3, "Incorrect format for SpanLength: " << span);
-
- if (toks.size() == 1) {
- float count = Scan<float>(toks[0]);
- Populate(indices, count);
-
- indices.clear();
- }
- else {
- indices.insert(toks);
- }
+ const string &span = toks[i];
+
+ // is it a ntIndex,sourceSpan,targetSpan or count ?
+ vector<string> toks;
+ Tokenize<string>(toks, span, ",");
+ UTIL_THROW_IF2(toks.size() != 1 && toks.size() != 3, "Incorrect format for SpanLength: " << span);
+
+ if (toks.size() == 1) {
+ float count = Scan<float>(toks[0]);
+ Populate(indices, count);
+
+ indices.clear();
+ } else {
+ indices.insert(toks);
+ }
}
// totals
@@ -45,11 +44,11 @@ void SpanLengthPhraseProperty::Populate(const set< vector<string> > &indices, fl
{
set< vector<string> >::const_iterator iter;
for (iter = indices.begin(); iter != indices.end(); ++iter) {
- const vector<string> &toksStr = *iter;
- vector<size_t> toks = Scan<size_t>(toksStr);
- UTIL_THROW_IF2(toks.size() != 3, "Incorrect format for SpanLength. Size is " << toks.size());
+ const vector<string> &toksStr = *iter;
+ vector<size_t> toks = Scan<size_t>(toksStr);
+ UTIL_THROW_IF2(toks.size() != 3, "Incorrect format for SpanLength. Size is " << toks.size());
- Populate(toks, count);
+ Populate(toks, count);
}
}
@@ -59,8 +58,8 @@ void SpanLengthPhraseProperty::Populate(const std::vector<size_t> &toks, float c
size_t sourceLength = toks[1];
size_t targetLength = toks[2];
if (ntInd >= m_source.size() ) {
- m_source.resize(ntInd + 1);
- m_target.resize(ntInd + 1);
+ m_source.resize(ntInd + 1);
+ m_target.resize(ntInd + 1);
}
Map &sourceMap = m_source[ntInd].first;
@@ -74,54 +73,52 @@ void SpanLengthPhraseProperty::Populate(Map &map, size_t span, float count)
Map::iterator iter;
iter = map.find(span);
if (iter != map.end()) {
- float &value = iter->second;
- value += count;
- }
- else {
- map[span] = count;
+ float &value = iter->second;
+ value += count;
+ } else {
+ map[span] = count;
}
}
void SpanLengthPhraseProperty::CalcTotals(Vec &vec)
{
- for (size_t i = 0; i < vec.size(); ++i) {
- float total = 0;
-
- const Map &map = vec[i].first;
- Map::const_iterator iter;
- for (iter = map.begin(); iter != map.end(); ++iter) {
- float count = iter->second;
- total += count;
- }
-
- vec[i].second = total;
- }
+ for (size_t i = 0; i < vec.size(); ++i) {
+ float total = 0;
+
+ const Map &map = vec[i].first;
+ Map::const_iterator iter;
+ for (iter = map.begin(); iter != map.end(); ++iter) {
+ float count = iter->second;
+ total += count;
+ }
+
+ vec[i].second = total;
+ }
}
float SpanLengthPhraseProperty::GetProb(size_t ntInd, size_t sourceWidth, float smoothing) const
{
- float count;
-
- const std::pair<Map, float> &data = m_source[ntInd];
- const Map &map = data.first;
-
- if (map.size() == 0) {
- // should this ever be reached? there shouldn't be any span length proprty so FF shouldn't call this
- return 1.0f;
- }
-
- Map::const_iterator iter = map.find(sourceWidth);
- if (iter == map.end()) {
- count = 0;
- }
- else {
- count = iter->second;
- }
- count += smoothing;
-
- float total = data.second + smoothing * (float) map.size();
- float ret = count / total;
- return ret;
+ float count;
+
+ const std::pair<Map, float> &data = m_source[ntInd];
+ const Map &map = data.first;
+
+ if (map.size() == 0) {
+ // should this ever be reached? there shouldn't be any span length proprty so FF shouldn't call this
+ return 1.0f;
+ }
+
+ Map::const_iterator iter = map.find(sourceWidth);
+ if (iter == map.end()) {
+ count = 0;
+ } else {
+ count = iter->second;
+ }
+ count += smoothing;
+
+ float total = data.second + smoothing * (float) map.size();
+ float ret = count / total;
+ return ret;
}
}
diff --git a/moses/PP/SpanLengthPhraseProperty.h b/moses/PP/SpanLengthPhraseProperty.h
index 982c3ca0d..490b7db72 100644
--- a/moses/PP/SpanLengthPhraseProperty.h
+++ b/moses/PP/SpanLengthPhraseProperty.h
@@ -13,22 +13,22 @@ namespace Moses
class SpanLengthPhraseProperty : public PhraseProperty
{
public:
- SpanLengthPhraseProperty();
+ SpanLengthPhraseProperty();
- void ProcessValue(const std::string &value);
+ void ProcessValue(const std::string &value);
- float GetProb(size_t ntInd, size_t sourceWidth, float smoothing) const;
+ float GetProb(size_t ntInd, size_t sourceWidth, float smoothing) const;
protected:
- // fractional counts
- typedef std::map<size_t, float> Map;
- typedef std::vector<std::pair<Map, float> > Vec;
- Vec m_source, m_target;
+ // fractional counts
+ typedef std::map<size_t, float> Map;
+ typedef std::vector<std::pair<Map, float> > Vec;
+ Vec m_source, m_target;
- void Populate(const std::set< std::vector<std::string> > &indices, float count);
- void Populate(const std::vector<size_t> &toks, float count);
- void Populate(Map &map, size_t span, float count);
+ void Populate(const std::set< std::vector<std::string> > &indices, float count);
+ void Populate(const std::vector<size_t> &toks, float count);
+ void Populate(Map &map, size_t span, float count);
- void CalcTotals(Vec &vec);
+ void CalcTotals(Vec &vec);
};
} // namespace Moses
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index a2cb293df..39fa6a975 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -223,13 +223,12 @@ Parameter::~Parameter()
const PARAM_VEC *Parameter::GetParam(const std::string &paramName) const
{
- PARAM_MAP::const_iterator iter = m_setting.find( paramName );
- if (iter == m_setting.end()) {
- return NULL;
- }
- else {
- return &iter->second;
- }
+ PARAM_MAP::const_iterator iter = m_setting.find( paramName );
+ if (iter == m_setting.end()) {
+ return NULL;
+ } else {
+ return &iter->second;
+ }
}
@@ -343,8 +342,8 @@ bool Parameter::LoadParam(int argc, char* argv[])
// don't mix old and new format
if ((GetParam("feature") || GetParam("weight"))
&& (GetParam("weight-slm") || GetParam("weight-bl") || GetParam("weight-d") ||
- GetParam("weight-dlm") || GetParam("weight-lrl") || GetParam("weight-generation") ||
- GetParam("weight-i") || GetParam("weight-l") || GetParam("weight-lex") ||
+ GetParam("weight-dlm") || GetParam("weight-lrl") || GetParam("weight-generation") ||
+ GetParam("weight-i") || GetParam("weight-l") || GetParam("weight-lex") ||
GetParam("weight-glm") || GetParam("weight-wt") || GetParam("weight-pp") ||
GetParam("weight-pb") || GetParam("weight-t") || GetParam("weight-w") ||
GetParam("weight-p") ||
@@ -374,7 +373,7 @@ bool Parameter::LoadParam(int argc, char* argv[])
string paramSwitch = (string) argv[i];
string paramName = paramSwitch.substr(1);
if (m_valid.find(paramName) == m_valid.end()) {
- std::cerr << "illegal switch: " << paramSwitch;
+ std::cerr << "illegal switch: " << paramSwitch;
noErrorFlag = false;
}
}
@@ -390,13 +389,13 @@ void Parameter::AddFeaturesCmd()
{
const PARAM_VEC *params = GetParam("feature-add");
if (params) {
- PARAM_VEC::const_iterator iter;
- for (iter = params->begin(); iter != params->end(); ++iter) {
- const string &line = *iter;
- AddFeature(line);
- }
+ PARAM_VEC::const_iterator iter;
+ for (iter = params->begin(); iter != params->end(); ++iter) {
+ const string &line = *iter;
+ AddFeature(line);
+ }
- m_setting.erase("feature-add");
+ m_setting.erase("feature-add");
}
}
@@ -516,7 +515,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
vector<size_t> maxTargetPhrase;
params = GetParam("ttable-limit");
if (params) {
- maxTargetPhrase = Scan<size_t>(*params);
+ maxTargetPhrase = Scan<size_t>(*params);
}
if(maxTargetPhrase.size() == 1 && translationVector.size() > 1) {
@@ -538,7 +537,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
vector<string> token = Tokenize(translationVector[currDict]);
if(currDict == 0 && token.size() == 4) {
- std::cerr << "Phrase table specification in old 4-field format. No longer supported";
+ std::cerr << "Phrase table specification in old 4-field format. No longer supported";
return;
}
UTIL_THROW_IF2(token.size() < 5, "Phrase table must have at least 5 scores");
@@ -655,7 +654,7 @@ void Parameter::ConvertWeightArgsDistortion()
const PARAM_VEC *oldWeights = GetParam(oldWeightName);
if (oldWeights) {
- const PARAM_VEC *searchAlgo = GetParam("search-algorithm");
+ const PARAM_VEC *searchAlgo = GetParam("search-algorithm");
if (searchAlgo == NULL ||
(searchAlgo->size() > 0
&& (Trim(searchAlgo->at(0)) == "0" || Trim(searchAlgo->at(0)) == "1")
@@ -679,8 +678,8 @@ void Parameter::ConvertWeightArgsDistortion()
vector<float> weights(numFF);
for (size_t currFF = 0; currFF < numFF; ++currFF) {
- UTIL_THROW_IF2(oldWeights && currOldInd >= oldWeights->size(),
- "Errors converting old distortion weights to new weights");
+ UTIL_THROW_IF2(oldWeights && currOldInd >= oldWeights->size(),
+ "Errors converting old distortion weights to new weights");
float weight = Scan<float>(oldWeights->at(currOldInd));
weights[currFF] = weight;
@@ -883,8 +882,8 @@ void Parameter::ConvertPhrasePenalty()
string oldWeightName = "weight-p";
const PARAM_VEC *params = GetParam(oldWeightName);
if (params) {
- UTIL_THROW_IF2(params->size() != 1,
- "There should be only 1 phrase-penalty weight");
+ UTIL_THROW_IF2(params->size() != 1,
+ "There should be only 1 phrase-penalty weight");
float weight = Scan<float>(params->at(0));
AddFeature("PhrasePenalty");
SetWeight("PhrasePenalty", 0, weight);
@@ -1017,11 +1016,11 @@ bool Parameter::Validate()
if (m_setting["lmodel-dub"].size() > 0) {
if (m_setting["lmodel-file"].size() != m_setting["lmodel-dub"].size()) {
std::cerr << "Config and parameters specify "
- << static_cast<int>(m_setting["lmodel-file"].size())
- << " language model files (lmodel-file), but "
- << static_cast<int>(m_setting["lmodel-dub"].size())
- << " LM upperbounds (lmodel-dub)"
- << endl;
+ << static_cast<int>(m_setting["lmodel-file"].size())
+ << " language model files (lmodel-file), but "
+ << static_cast<int>(m_setting["lmodel-dub"].size())
+ << " LM upperbounds (lmodel-dub)"
+ << endl;
noErrorFlag = false;
}
}
@@ -1032,7 +1031,7 @@ bool Parameter::Validate()
if (noErrorFlag && m_setting["input-file"].size() == 1) {
noErrorFlag = FileExists(m_setting["input-file"][0]);
if (!noErrorFlag) {
- std::cerr << endl << "Input file " << m_setting["input-file"][0] << " does not exist";
+ std::cerr << endl << "Input file " << m_setting["input-file"][0] << " does not exist";
}
}
// generation tables
@@ -1080,9 +1079,9 @@ bool Parameter::FilesExist(const string &paramName, int fieldNo, std::vector<std
tokenizeIndex = static_cast<size_t>(fieldNo);
if (tokenizeIndex >= vec.size()) {
- std::cerr << "Expected at least " << (tokenizeIndex+1) << " tokens per entry in '"
- << paramName << "', but only found "
- << vec.size();
+ std::cerr << "Expected at least " << (tokenizeIndex+1) << " tokens per entry in '"
+ << paramName << "', but only found "
+ << vec.size();
return false;
}
const string &pathStr = vec[tokenizeIndex];
@@ -1109,7 +1108,7 @@ string Parameter::FindParam(const string &paramSwitch, int argc, char* argv[])
if (i+1 < argc) {
return argv[i+1];
} else {
- std::cerr << "Option " << paramSwitch << " requires a parameter!";
+ std::cerr << "Option " << paramSwitch << " requires a parameter!";
// TODO return some sort of error, not the empty string
}
}
diff --git a/moses/Parameter.h b/moses/Parameter.h
index 9a290f7c4..c6b08dd85 100644
--- a/moses/Parameter.h
+++ b/moses/Parameter.h
@@ -114,15 +114,13 @@ public:
void Save(const std::string path);
template<typename T>
- void SetParameter(T &var, const std::string &name, const T &defaultValue) const
- {
- const PARAM_VEC *params = GetParam(name);
- if (params && params->size()) {
- var = Scan<T>( params->at(0));
- }
- else {
- var = defaultValue;
- }
+ void SetParameter(T &var, const std::string &name, const T &defaultValue) const {
+ const PARAM_VEC *params = GetParam(name);
+ if (params && params->size()) {
+ var = Scan<T>( params->at(0));
+ } else {
+ var = defaultValue;
+ }
}
};
diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp
index 19c7475b6..b8c93c193 100644
--- a/moses/ScoreComponentCollection.cpp
+++ b/moses/ScoreComponentCollection.cpp
@@ -186,8 +186,8 @@ void ScoreComponentCollection::Save(ostream& out, bool multiline) const
string sep = " ";
string linesep = "\n";
if (!multiline) {
- sep = "=";
- linesep = " ";
+ sep = "=";
+ linesep = " ";
}
ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
for (; iter != s_scoreIndexes.end(); ++iter ) {
@@ -323,8 +323,8 @@ void ScoreComponentCollection::OutputAllFeatureScores(std::ostream &out) const
}
void ScoreComponentCollection::OutputFeatureScores( std::ostream& out
- , const FeatureFunction *ff
- , std::string &lastName ) const
+ , const FeatureFunction *ff
+ , std::string &lastName ) const
{
const StaticData &staticData = StaticData::Instance();
bool labeledOutput = staticData.IsLabeledNBestList();
diff --git a/moses/ScoreComponentCollection.h b/moses/ScoreComponentCollection.h
index 84b05b413..ce285b59e 100644
--- a/moses/ScoreComponentCollection.h
+++ b/moses/ScoreComponentCollection.h
@@ -49,11 +49,11 @@ struct ScorePair {
std::vector<float> denseScores;
std::map<StringPiece, float> sparseScores;
- ScorePair()
- {}
+ ScorePair() {
+ }
ScorePair(const std::vector<float> &other)
- :denseScores(other)
- {}
+ :denseScores(other) {
+ }
void PlusEquals(const ScorePair &other);
void PlusEquals(const StringPiece &key, float value);
diff --git a/moses/ScoreComponentCollectionTest.cpp b/moses/ScoreComponentCollectionTest.cpp
index 9b79f18f5..87c4f03b7 100644
--- a/moses/ScoreComponentCollectionTest.cpp
+++ b/moses/ScoreComponentCollectionTest.cpp
@@ -37,21 +37,21 @@ public:
void EvaluateWhenApplied(const Hypothesis&, ScoreComponentCollection*) const {}
void EvaluateWhenApplied(const ChartHypothesis&, ScoreComponentCollection*) const {}
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
- {}
-
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const {
+ }
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const
- {}
+ , const TranslationOptionList &translationOptionList) const {
+ }
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
- {}
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {
+ }
};
diff --git a/moses/SearchNormal.cpp b/moses/SearchNormal.cpp
index e33c77676..b3e647299 100644
--- a/moses/SearchNormal.cpp
+++ b/moses/SearchNormal.cpp
@@ -385,13 +385,13 @@ void SearchNormal::OutputHypoStackSize()
void SearchNormal::OutputHypoStack()
{
- // all stacks
- int i = 0;
- vector < HypothesisStack* >::iterator iterStack;
- for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
- HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
- TRACE_ERR( "Stack " << i++ << ": " << endl << hypoColl << endl);
- }
+ // all stacks
+ int i = 0;
+ vector < HypothesisStack* >::iterator iterStack;
+ for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
+ HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
+ TRACE_ERR( "Stack " << i++ << ": " << endl << hypoColl << endl);
+ }
}
}
diff --git a/moses/Sentence.h b/moses/Sentence.h
index 998123d19..4f206c0d4 100644
--- a/moses/Sentence.h
+++ b/moses/Sentence.h
@@ -97,7 +97,7 @@ public:
TranslationOptionCollection* CreateTranslationOptionCollection() const;
virtual void CreateFromString(const std::vector<FactorType> &factorOrder
- , const std::string &phraseString); // , const std::string &factorDelimiter);
+ , const std::string &phraseString); // , const std::string &factorDelimiter);
const NonTerminalSet &GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const {
return m_defaultLabelSet;
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index 8f399ca02..2bb14a983 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -1,5 +1,5 @@
// $Id$
-// vim:tabstop=2
+// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@@ -130,14 +130,14 @@ bool StaticData::LoadData(Parameter *parameter)
m_parameter->SetParameter(m_recoverPath, "recover-input-path", false);
if (m_recoverPath && m_inputType == SentenceInput) {
- TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
- m_recoverPath = false;
- }
+ TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
+ m_recoverPath = false;
+ }
// factor delimiter
m_parameter->SetParameter<string>(m_factorDelimiter, "factor-delimiter", "|");
if (m_factorDelimiter == "none") {
- m_factorDelimiter = "";
+ m_factorDelimiter = "";
}
m_parameter->SetParameter( m_continuePartialTranslation, "continue-partial-translation", false );
@@ -170,15 +170,14 @@ bool StaticData::LoadData(Parameter *parameter)
// n-best
params = m_parameter->GetParam("n-best-list");
if (params) {
- if (params->size() >= 2) {
- m_nBestFilePath = params->at(0);
- m_nBestSize = Scan<size_t>( params->at(1) );
- m_onlyDistinctNBest=(params->size()>2 && params->at(2)=="distinct");
- }
- else {
- std::cerr << "wrong format for switch -n-best-list file size [disinct]";
- return false;
- }
+ if (params->size() >= 2) {
+ m_nBestFilePath = params->at(0);
+ m_nBestSize = Scan<size_t>( params->at(1) );
+ m_onlyDistinctNBest=(params->size()>2 && params->at(2)=="distinct");
+ } else {
+ std::cerr << "wrong format for switch -n-best-list file size [disinct]";
+ return false;
+ }
} else {
m_nBestSize = 0;
}
@@ -188,40 +187,38 @@ bool StaticData::LoadData(Parameter *parameter)
//lattice samples
params = m_parameter->GetParam("lattice-samples");
if (params) {
- if (params->size() ==2 ) {
- m_latticeSamplesFilePath = params->at(0);
- m_latticeSamplesSize = Scan<size_t>(params->at(1));
- }
- else {
- std::cerr <<"wrong format for switch -lattice-samples file size";
- return false;
- }
- }
- else {
+ if (params->size() ==2 ) {
+ m_latticeSamplesFilePath = params->at(0);
+ m_latticeSamplesSize = Scan<size_t>(params->at(1));
+ } else {
+ std::cerr <<"wrong format for switch -lattice-samples file size";
+ return false;
+ }
+ } else {
m_latticeSamplesSize = 0;
}
// word graph
params = m_parameter->GetParam("output-word-graph");
if (params && params->size() == 2)
- m_outputWordGraph = true;
+ m_outputWordGraph = true;
else
- m_outputWordGraph = false;
+ m_outputWordGraph = false;
// search graph
params = m_parameter->GetParam("output-search-graph");
if (params && params->size()) {
if (params->size() != 1) {
- std::cerr << "ERROR: wrong format for switch -output-search-graph file";
+ std::cerr << "ERROR: wrong format for switch -output-search-graph file";
return false;
}
m_outputSearchGraph = true;
}
// ... in extended format
else if (m_parameter->GetParam("output-search-graph-extended") &&
- m_parameter->GetParam("output-search-graph-extended")->size()) {
+ m_parameter->GetParam("output-search-graph-extended")->size()) {
if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) {
- std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file";
+ std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file";
return false;
}
m_outputSearchGraph = true;
@@ -280,16 +277,16 @@ bool StaticData::LoadData(Parameter *parameter)
//input factors
params = m_parameter->GetParam("input-factors");
if (params) {
- m_inputFactorOrder = Scan<FactorType>(*params);
+ m_inputFactorOrder = Scan<FactorType>(*params);
}
if(m_inputFactorOrder.empty()) {
- m_inputFactorOrder.push_back(0);
+ m_inputFactorOrder.push_back(0);
}
//output factors
params = m_parameter->GetParam("output-factors");
if (params) {
- m_outputFactorOrder = Scan<FactorType>(*params);
+ m_outputFactorOrder = Scan<FactorType>(*params);
}
if(m_outputFactorOrder.empty()) {
// default. output factor 0
@@ -327,11 +324,11 @@ bool StaticData::LoadData(Parameter *parameter)
params = m_parameter->GetParam("stack-diversity");
if (params && params->size()) {
if (m_maxDistortion > 15) {
- std::cerr << "stack diversity > 0 is not allowed for distortion limits larger than 15";
+ std::cerr << "stack diversity > 0 is not allowed for distortion limits larger than 15";
return false;
}
if (m_inputType == WordLatticeInput) {
- std::cerr << "stack diversity > 0 is not allowed for lattice input";
+ std::cerr << "stack diversity > 0 is not allowed for lattice input";
return false;
}
m_minHypoStackDiversity = Scan<size_t>(params->at(0));
@@ -389,7 +386,7 @@ bool StaticData::LoadData(Parameter *parameter)
params = m_parameter->GetParam("lmbr-thetas");
if (params) {
- m_lmbrThetas = Scan<float>(*params);
+ m_lmbrThetas = Scan<float>(*params);
}
//consensus decoding
@@ -423,7 +420,7 @@ bool StaticData::LoadData(Parameter *parameter)
#ifdef WITH_THREADS
m_threadCount = boost::thread::hardware_concurrency();
if (!m_threadCount) {
- std::cerr << "-threads all specified but Boost doesn't know how many cores there are";
+ std::cerr << "-threads all specified but Boost doesn't know how many cores there are";
return false;
}
#else
@@ -433,12 +430,12 @@ bool StaticData::LoadData(Parameter *parameter)
} else {
m_threadCount = Scan<int>(params->at(0));
if (m_threadCount < 1) {
- std::cerr << "Specify at least one thread.";
+ std::cerr << "Specify at least one thread.";
return false;
}
#ifndef WITH_THREADS
if (m_threadCount > 1) {
- std::cerr << "Error: Thread count of " << params->at(0) << " but moses not built with thread support";
+ std::cerr << "Error: Thread count of " << params->at(0) << " but moses not built with thread support";
return false;
}
#endif
@@ -460,8 +457,8 @@ bool StaticData::LoadData(Parameter *parameter)
}
m_xmlBrackets.first= brackets[0];
m_xmlBrackets.second=brackets[1];
- VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
- << m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl);
+ VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
+ << m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl);
}
m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor", NOT_FOUND);
@@ -516,7 +513,7 @@ bool StaticData::LoadData(Parameter *parameter)
if (!weightFile.empty()) {
ScoreComponentCollection extraWeights;
if (!extraWeights.Load(weightFile)) {
- std::cerr << "Unable to load weights from " << weightFile;
+ std::cerr << "Unable to load weights from " << weightFile;
return false;
}
m_allWeights.PlusEquals(extraWeights);
@@ -578,7 +575,7 @@ void StaticData::LoadNonTerminals()
"Incorrect unknown LHS format: " << line);
UnknownLHSEntry entry(tokens[0], Scan<float>(tokens[1]));
m_unknownLHS.push_back(entry);
- // const Factor *targetFactor =
+ // const Factor *targetFactor =
factorCollection.AddFactor(Output, 0, tokens[0], true);
}
@@ -605,31 +602,28 @@ void StaticData::LoadDecodeGraphs()
params = m_parameter->GetParam("mapping");
if (params && params->size()) {
- mappingVector = *params;
+ mappingVector = *params;
}
params = m_parameter->GetParam("max-chart-span");
if (params && params->size()) {
- maxChartSpans = Scan<size_t>(*params);
+ maxChartSpans = Scan<size_t>(*params);
}
vector<string> toks = Tokenize(mappingVector[0]);
if (toks.size() == 3) {
- // eg 0 T 0
- LoadDecodeGraphsOld(mappingVector, maxChartSpans);
- }
- else if (toks.size() == 2) {
- if (toks[0] == "T" || toks[0] == "G") {
- // eg. T 0
- LoadDecodeGraphsOld(mappingVector, maxChartSpans);
- }
- else {
- // eg. 0 TM1
- LoadDecodeGraphsNew(mappingVector, maxChartSpans);
- }
- }
- else {
- UTIL_THROW(util::Exception, "Malformed mapping");
+ // eg 0 T 0
+ LoadDecodeGraphsOld(mappingVector, maxChartSpans);
+ } else if (toks.size() == 2) {
+ if (toks[0] == "T" || toks[0] == "G") {
+ // eg. T 0
+ LoadDecodeGraphsOld(mappingVector, maxChartSpans);
+ } else {
+ // eg. 0 TM1
+ LoadDecodeGraphsNew(mappingVector, maxChartSpans);
+ }
+ } else {
+ UTIL_THROW(util::Exception, "Malformed mapping");
}
}
@@ -653,7 +647,7 @@ void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const
decodeType = token[0] == "T" ? Translate : Generate;
index = Scan<size_t>(token[1]);
} else if (token.size() == 3) {
- // eg. 0 T 0
+ // eg. 0 T 0
// For specifying multiple translation model
decodeGraphInd = Scan<size_t>(token[0]);
//the vectorList index can only increment by one
@@ -724,81 +718,79 @@ void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const
// if specified, record maxmimum unseen n-gram size
const vector<string> *backoffVector = m_parameter->GetParam("decoding-graph-backoff");
for(size_t i=0; i<m_decodeGraphs.size() && backoffVector && i<backoffVector->size(); i++) {
- DecodeGraph &decodeGraph = *m_decodeGraphs[i];
+ DecodeGraph &decodeGraph = *m_decodeGraphs[i];
- if (i < backoffVector->size()) {
- decodeGraph.SetBackoff(Scan<size_t>(backoffVector->at(i)));
- }
+ if (i < backoffVector->size()) {
+ decodeGraph.SetBackoff(Scan<size_t>(backoffVector->at(i)));
+ }
}
}
void StaticData::LoadDecodeGraphsNew(const std::vector<std::string> &mappingVector, const std::vector<size_t> &maxChartSpans)
{
- const std::vector<FeatureFunction*> *featuresRemaining = &FeatureFunction::GetFeatureFunctions();
- DecodeStep *prev = 0;
- size_t prevDecodeGraphInd = 0;
-
- for(size_t i=0; i<mappingVector.size(); i++) {
- vector<string> token = Tokenize(mappingVector[i]);
- size_t decodeGraphInd;
-
- decodeGraphInd = Scan<size_t>(token[0]);
- //the vectorList index can only increment by one
- UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1,
- "Malformed mapping");
- if (decodeGraphInd > prevDecodeGraphInd) {
- prev = NULL;
- }
-
- if (prevDecodeGraphInd < decodeGraphInd) {
- featuresRemaining = &FeatureFunction::GetFeatureFunctions();
- }
-
- FeatureFunction &ff = FeatureFunction::FindFeatureFunction(token[1]);
-
- DecodeStep* decodeStep = NULL;
- if (typeid(ff) == typeid(PhraseDictionary)) {
- decodeStep = new DecodeStepTranslation(&static_cast<PhraseDictionary&>(ff), prev, *featuresRemaining);
- }
- else if (typeid(ff) == typeid(GenerationDictionary)) {
- decodeStep = new DecodeStepGeneration(&static_cast<GenerationDictionary&>(ff), prev, *featuresRemaining);
- }
- else {
- UTIL_THROW(util::Exception, "Unknown decode step");
- }
-
- featuresRemaining = &decodeStep->GetFeaturesRemaining();
-
- UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
- if (m_decodeGraphs.size() < decodeGraphInd + 1) {
- DecodeGraph *decodeGraph;
- if (IsChart()) {
- size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
- VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
- decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
- } else {
- decodeGraph = new DecodeGraph(m_decodeGraphs.size());
- }
-
- m_decodeGraphs.push_back(decodeGraph); // TODO max chart span
- }
-
- m_decodeGraphs[decodeGraphInd]->Add(decodeStep);
- prev = decodeStep;
- prevDecodeGraphInd = decodeGraphInd;
- }
-
- // set maximum n-gram size for backoff approach to decoding paths
- // default is always use subsequent paths (value = 0)
- // if specified, record maxmimum unseen n-gram size
- const vector<string> *backoffVector = m_parameter->GetParam("decoding-graph-backoff");
- for(size_t i=0; i<m_decodeGraphs.size() && backoffVector && i<backoffVector->size(); i++) {
- DecodeGraph &decodeGraph = *m_decodeGraphs[i];
-
- if (i < backoffVector->size()) {
- decodeGraph.SetBackoff(Scan<size_t>(backoffVector->at(i)));
- }
- }
+ const std::vector<FeatureFunction*> *featuresRemaining = &FeatureFunction::GetFeatureFunctions();
+ DecodeStep *prev = 0;
+ size_t prevDecodeGraphInd = 0;
+
+ for(size_t i=0; i<mappingVector.size(); i++) {
+ vector<string> token = Tokenize(mappingVector[i]);
+ size_t decodeGraphInd;
+
+ decodeGraphInd = Scan<size_t>(token[0]);
+ //the vectorList index can only increment by one
+ UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1,
+ "Malformed mapping");
+ if (decodeGraphInd > prevDecodeGraphInd) {
+ prev = NULL;
+ }
+
+ if (prevDecodeGraphInd < decodeGraphInd) {
+ featuresRemaining = &FeatureFunction::GetFeatureFunctions();
+ }
+
+ FeatureFunction &ff = FeatureFunction::FindFeatureFunction(token[1]);
+
+ DecodeStep* decodeStep = NULL;
+ if (typeid(ff) == typeid(PhraseDictionary)) {
+ decodeStep = new DecodeStepTranslation(&static_cast<PhraseDictionary&>(ff), prev, *featuresRemaining);
+ } else if (typeid(ff) == typeid(GenerationDictionary)) {
+ decodeStep = new DecodeStepGeneration(&static_cast<GenerationDictionary&>(ff), prev, *featuresRemaining);
+ } else {
+ UTIL_THROW(util::Exception, "Unknown decode step");
+ }
+
+ featuresRemaining = &decodeStep->GetFeaturesRemaining();
+
+ UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
+ if (m_decodeGraphs.size() < decodeGraphInd + 1) {
+ DecodeGraph *decodeGraph;
+ if (IsChart()) {
+ size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
+ VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
+ decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
+ } else {
+ decodeGraph = new DecodeGraph(m_decodeGraphs.size());
+ }
+
+ m_decodeGraphs.push_back(decodeGraph); // TODO max chart span
+ }
+
+ m_decodeGraphs[decodeGraphInd]->Add(decodeStep);
+ prev = decodeStep;
+ prevDecodeGraphInd = decodeGraphInd;
+ }
+
+ // set maximum n-gram size for backoff approach to decoding paths
+ // default is always use subsequent paths (value = 0)
+ // if specified, record maxmimum unseen n-gram size
+ const vector<string> *backoffVector = m_parameter->GetParam("decoding-graph-backoff");
+ for(size_t i=0; i<m_decodeGraphs.size() && backoffVector && i<backoffVector->size(); i++) {
+ DecodeGraph &decodeGraph = *m_decodeGraphs[i];
+
+ if (i < backoffVector->size()) {
+ decodeGraph.SetBackoff(Scan<size_t>(backoffVector->at(i)));
+ }
+ }
}
@@ -807,7 +799,7 @@ void StaticData::ReLoadBleuScoreFeatureParameter(float weight)
//loop over ScoreProducers to update weights of BleuScoreFeature
const std::vector<FeatureFunction*> &producers = FeatureFunction::GetFeatureFunctions();
for(size_t i=0; i<producers.size(); ++i) {
- FeatureFunction *ff = producers[i];
+ FeatureFunction *ff = producers[i];
std::string ffName = ff->GetScoreProducerDescription();
if (ffName == "BleuScoreFeature") {
@@ -987,7 +979,7 @@ bool StaticData::LoadAlternateWeightSettings()
vector<string> weightSpecification;
const PARAM_VEC *params = m_parameter->GetParam("alternate-weight-setting");
if (params && params->size()) {
- weightSpecification = *params;
+ weightSpecification = *params;
}
// get mapping from feature names to feature functions
@@ -1021,12 +1013,12 @@ bool StaticData::LoadAlternateWeightSettings()
// sparse weights
if (args[0] == "weight-file") {
if (args.size() != 2) {
- std::cerr << "One argument should be supplied for weight-file";
+ std::cerr << "One argument should be supplied for weight-file";
return false;
}
ScoreComponentCollection extraWeights;
if (!extraWeights.Load(args[1])) {
- std::cerr << "Unable to load weights from " << args[1];
+ std::cerr << "Unable to load weights from " << args[1];
return false;
}
m_weightSetting[ currentId ]->PlusEquals(extraWeights);
diff --git a/moses/StaticData.h b/moses/StaticData.h
index 5d13e07a7..91d89aca9 100644
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@@ -711,7 +711,7 @@ public:
// if not found, resort to default
if (i == m_weightSetting.end()) {
std::cerr << "Warning: Specified weight setting " << settingName
- << " does not exist in model, using default weight setting instead";
+ << " does not exist in model, using default weight setting instead";
i = m_weightSetting.find( "default" );
m_currentWeightSetting = "default";
}
@@ -778,8 +778,9 @@ public:
m_treeStructure = treeStructure;
}
- bool GetDefaultNonTermOnlyForEmptyRange() const
- { return m_defaultNonTermOnlyForEmptyRange; }
+ bool GetDefaultNonTermOnlyForEmptyRange() const {
+ return m_defaultNonTermOnlyForEmptyRange;
+ }
bool UseS2TDecoder() const {
return m_useS2TDecoder;
diff --git a/moses/SyntacticLanguageModel.h b/moses/SyntacticLanguageModel.h
index 76882a4d1..ad11d29bf 100644
--- a/moses/SyntacticLanguageModel.h
+++ b/moses/SyntacticLanguageModel.h
@@ -31,8 +31,8 @@ public:
ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
- int featureID,
- ScoreComponentCollection* accumulator) const {
+ int featureID,
+ ScoreComponentCollection* accumulator) const {
throw std::runtime_error("Syntactic LM can only be used with phrase-based decoder.");
}
diff --git a/moses/Syntax/BoundedPriorityContainer.h b/moses/Syntax/BoundedPriorityContainer.h
index 9afc1b75d..192f9ff2e 100644
--- a/moses/Syntax/BoundedPriorityContainer.h
+++ b/moses/Syntax/BoundedPriorityContainer.h
@@ -24,20 +24,30 @@ namespace Syntax
template<typename T>
class BoundedPriorityContainer
{
- public:
+public:
typedef typename std::vector<T>::iterator Iterator;
typedef typename std::vector<T>::const_iterator ConstIterator;
BoundedPriorityContainer(std::size_t);
- Iterator Begin() { return m_elements.begin(); }
- Iterator End() { return m_elements.begin()+m_size; }
+ Iterator Begin() {
+ return m_elements.begin();
+ }
+ Iterator End() {
+ return m_elements.begin()+m_size;
+ }
- ConstIterator Begin() const { return m_elements.begin(); }
- ConstIterator End() const { return m_elements.begin()+m_size; }
+ ConstIterator Begin() const {
+ return m_elements.begin();
+ }
+ ConstIterator End() const {
+ return m_elements.begin()+m_size;
+ }
// Return the number of elements currently held.
- std::size_t Size() const { return m_size; }
+ std::size_t Size() const {
+ return m_size;
+ }
// 'Lazily' clear the container by setting the size to 0 (allowing elements
// to be overwritten).
@@ -46,7 +56,12 @@ class BoundedPriorityContainer
// TODO Alternative, is to clear m_queue by assigning an empty queue value
// TODO but that might incur an alloc-related overhead when the new underlying
// TODO has to be regrown.
- void LazyClear() { m_size = 0; while (!m_queue.empty()) { m_queue.pop(); } }
+ void LazyClear() {
+ m_size = 0;
+ while (!m_queue.empty()) {
+ m_queue.pop();
+ }
+ }
// Insert the given object iff
// i) the container is not full yet, or
@@ -67,17 +82,16 @@ class BoundedPriorityContainer
// Determine if an object with the given priority would be accepted for
// insertion based on the current contents of the container.
- bool WouldAccept(float priority)
- {
+ bool WouldAccept(float priority) {
return m_size < m_limit || priority > m_queue.top().first;
}
- private:
+private:
typedef std::pair<float, int> PriorityIndexPair;
class PriorityIndexPairOrderer
{
- public:
+ public:
bool operator()(const PriorityIndexPair &p,
const PriorityIndexPair &q) const {
return p.first > q.first;
@@ -87,8 +101,8 @@ class BoundedPriorityContainer
// Min-priority queue. The queue stores the indices of the elements, not
// the elements themselves to keep down the costs of heap maintenance.
typedef std::priority_queue<PriorityIndexPair,
- std::vector<PriorityIndexPair>,
- PriorityIndexPairOrderer> Queue;
+ std::vector<PriorityIndexPair>,
+ PriorityIndexPairOrderer> Queue;
// The elements are stored in a vector. Note that the size of this vector
// can be greater than m_size (after a call to LazyClear).
diff --git a/moses/Syntax/Cube.cpp b/moses/Syntax/Cube.cpp
index 4fcf50829..7b7f4cb91 100644
--- a/moses/Syntax/Cube.cpp
+++ b/moses/Syntax/Cube.cpp
@@ -14,7 +14,7 @@ namespace Syntax
{
Cube::Cube(const SHyperedgeBundle &bundle)
- : m_bundle(bundle)
+ : m_bundle(bundle)
{
// Create the SHyperedge for the 'corner' of the cube.
std::vector<int> coordinates(bundle.stacks.size()+1, 0);
@@ -94,7 +94,7 @@ SHyperedge *Cube::CreateHyperedge(const std::vector<int> &coordinates)
head->best = hyperedge;
head->pvertex = 0; // FIXME???
head->state.resize(
- StatefulFeatureFunction::GetStatefulFeatureFunctions().size());
+ StatefulFeatureFunction::GetStatefulFeatureFunctions().size());
hyperedge->head = head;
hyperedge->tail.resize(coordinates.size()-1);
diff --git a/moses/Syntax/Cube.h b/moses/Syntax/Cube.h
index a28440834..a887b2fbb 100644
--- a/moses/Syntax/Cube.h
+++ b/moses/Syntax/Cube.h
@@ -19,31 +19,35 @@ namespace Syntax
// best-first order.
class Cube
{
- public:
+public:
Cube(const SHyperedgeBundle &);
~Cube();
SHyperedge *Pop();
- SHyperedge *Top() const { return m_queue.top().first; }
+ SHyperedge *Top() const {
+ return m_queue.top().first;
+ }
- bool IsEmpty() const { return m_queue.empty(); }
+ bool IsEmpty() const {
+ return m_queue.empty();
+ }
- private:
+private:
typedef boost::unordered_set<std::vector<int> > CoordinateSet;
typedef std::pair<SHyperedge *, const std::vector<int> *> QueueItem;
class QueueItemOrderer
{
- public:
+ public:
bool operator()(const QueueItem &p, const QueueItem &q) const {
return p.first->score < q.first->score;
}
};
typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
- QueueItemOrderer> Queue;
+ QueueItemOrderer> Queue;
SHyperedge *CreateHyperedge(const std::vector<int> &);
void CreateNeighbour(const std::vector<int> &);
diff --git a/moses/Syntax/CubeQueue.h b/moses/Syntax/CubeQueue.h
index 304e59409..e168d0465 100644
--- a/moses/Syntax/CubeQueue.h
+++ b/moses/Syntax/CubeQueue.h
@@ -14,7 +14,7 @@ namespace Syntax
class CubeQueue
{
- public:
+public:
template<typename InputIterator>
CubeQueue(InputIterator, InputIterator);
@@ -22,12 +22,14 @@ class CubeQueue
SHyperedge *Pop();
- bool IsEmpty() const { return m_queue.empty(); }
+ bool IsEmpty() const {
+ return m_queue.empty();
+ }
- private:
+private:
class CubeOrderer
{
- public:
+ public:
bool operator()(const Cube *p, const Cube *q) const {
return p->Top()->score < q->Top()->score;
}
diff --git a/moses/Syntax/KBestExtractor.cpp b/moses/Syntax/KBestExtractor.cpp
index 335d80409..66939ca17 100644
--- a/moses/Syntax/KBestExtractor.cpp
+++ b/moses/Syntax/KBestExtractor.cpp
@@ -14,8 +14,8 @@ namespace Syntax
// Extract the k-best list from the search graph.
void KBestExtractor::Extract(
- const std::vector<boost::shared_ptr<SVertex> > &topLevelVertices,
- std::size_t k, KBestVec &kBestList)
+ const std::vector<boost::shared_ptr<SVertex> > &topLevelVertices,
+ std::size_t k, KBestVec &kBestList)
{
kBestList.clear();
if (topLevelVertices.empty()) {
@@ -25,7 +25,7 @@ void KBestExtractor::Extract(
// Create a new SVertex, supremeVertex, that has the best top-level SVertex as
// its predecessor and has the same score.
std::vector<boost::shared_ptr<SVertex> >::const_iterator p =
- topLevelVertices.begin();
+ topLevelVertices.begin();
SVertex &bestTopLevelVertex = **p;
boost::scoped_ptr<SVertex> supremeVertex(new SVertex());
supremeVertex->pvertex = 0;
@@ -61,8 +61,8 @@ void KBestExtractor::Extract(
// each derivation.
kBestList.reserve(targetVertex->kBestList.size());
for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
- q = targetVertex->kBestList.begin();
- q != targetVertex->kBestList.end(); ++q) {
+ q = targetVertex->kBestList.begin();
+ q != targetVertex->kBestList.end(); ++q) {
const boost::shared_ptr<Derivation> d(*q);
assert(d);
assert(d->subderivations.size() == 1);
@@ -94,24 +94,24 @@ Phrase KBestExtractor::GetOutputPhrase(const Derivation &d)
}
// FIXME
UTIL_THROW2("placeholders are not currently supported by the S2T decoder");
-/*
- std::set<std::size_t> sourcePosSet =
- phrase.GetAlignTerm().GetAlignmentsForTarget(pos);
- if (sourcePosSet.size() == 1) {
- const std::vector<const Word*> *ruleSourceFromInputPath =
- hypo.GetTranslationOption().GetSourceRuleFromInputPath();
- UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
- "Source Words in of the rules hasn't been filled out");
- std::size_t sourcePos = *sourcePosSet.begin();
- const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
- UTIL_THROW_IF2(sourceWord == NULL,
- "Null source word at position " << sourcePos);
- const Factor *factor = sourceWord->GetFactor(placeholderFactor);
- if (factor) {
- ret.Back()[0] = factor;
- }
- }
-*/
+ /*
+ std::set<std::size_t> sourcePosSet =
+ phrase.GetAlignTerm().GetAlignmentsForTarget(pos);
+ if (sourcePosSet.size() == 1) {
+ const std::vector<const Word*> *ruleSourceFromInputPath =
+ hypo.GetTranslationOption().GetSourceRuleFromInputPath();
+ UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
+ "Source Words in of the rules hasn't been filled out");
+ std::size_t sourcePos = *sourcePosSet.begin();
+ const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
+ UTIL_THROW_IF2(sourceWord == NULL,
+ "Null source word at position " << sourcePos);
+ const Factor *factor = sourceWord->GetFactor(placeholderFactor);
+ if (factor) {
+ ret.Back()[0] = factor;
+ }
+ }
+ */
}
}
@@ -140,8 +140,7 @@ TreePointer KBestExtractor::GetOutputTree(const Derivation &d)
mytree->Combine(previous_trees);
return mytree;
- }
- else {
+ } else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
}
}
@@ -180,7 +179,7 @@ KBestExtractor::FindOrCreateVertex(const SVertex &v)
}
boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
#ifndef NDEBUG
- std::pair<DerivationSet::iterator, bool> q =
+ std::pair<DerivationSet::iterator, bool> q =
#endif
m_derivations.insert(bestDerivation);
assert(q.second);
diff --git a/moses/Syntax/KBestExtractor.h b/moses/Syntax/KBestExtractor.h
index 248d26c01..15cf0e3c8 100644
--- a/moses/Syntax/KBestExtractor.h
+++ b/moses/Syntax/KBestExtractor.h
@@ -28,7 +28,7 @@ namespace Syntax
//
class KBestExtractor
{
- public:
+public:
struct KVertex;
struct KHyperedge {
@@ -61,8 +61,8 @@ class KBestExtractor
struct KVertex {
typedef std::priority_queue<boost::weak_ptr<Derivation>,
- std::vector<boost::weak_ptr<Derivation> >,
- DerivationOrderer> DerivationQueue;
+ std::vector<boost::weak_ptr<Derivation> >,
+ DerivationOrderer> DerivationQueue;
KVertex(const SVertex &v) : svertex(v), visited(false) {}
@@ -82,9 +82,9 @@ class KBestExtractor
static Phrase GetOutputPhrase(const Derivation &);
static TreePointer GetOutputTree(const Derivation &);
- private:
+private:
typedef boost::unordered_map<const SVertex *,
- boost::shared_ptr<KVertex> > VertexMap;
+ boost::shared_ptr<KVertex> > VertexMap;
struct DerivationHasher {
std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
@@ -104,7 +104,7 @@ class KBestExtractor
};
typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
- DerivationEqualityPred> DerivationSet;
+ DerivationEqualityPred> DerivationSet;
boost::shared_ptr<KVertex> FindOrCreateVertex(const SVertex &);
void GetCandidates(boost::shared_ptr<KVertex>, std::size_t);
diff --git a/moses/Syntax/Manager.cpp b/moses/Syntax/Manager.cpp
index 5db9e66fb..10ee4217e 100644
--- a/moses/Syntax/Manager.cpp
+++ b/moses/Syntax/Manager.cpp
@@ -13,7 +13,7 @@ namespace Syntax
{
Manager::Manager(const InputType &source)
- : Moses::BaseManager(source)
+ : Moses::BaseManager(source)
{
}
@@ -37,7 +37,7 @@ void Manager::OutputBest(OutputCollector *collector) const
Phrase yield = GetOneBestTargetYield(*best);
// delete 1st & last
UTIL_THROW_IF2(yield.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
yield.RemoveWord(0);
yield.RemoveWord(yield.GetSize()-1);
out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
@@ -66,7 +66,7 @@ void Manager::OutputUnknowns(OutputCollector *collector) const
std::ostringstream out;
for (std::set<Moses::Word>::const_iterator p = m_oovs.begin();
- p != m_oovs.end(); ++p) {
+ p != m_oovs.end(); ++p) {
out << *p;
}
out << std::endl;
@@ -95,7 +95,7 @@ void Manager::OutputNBestList(OutputCollector *collector,
bool PrintNBestTrees = staticData.PrintNBestTrees();
for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
- p != nBestList.end(); ++p) {
+ p != nBestList.end(); ++p) {
const KBestExtractor::Derivation &derivation = **p;
// get the derivation's target-side yield
@@ -103,7 +103,7 @@ void Manager::OutputNBestList(OutputCollector *collector,
// delete <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@@ -120,7 +120,7 @@ void Manager::OutputNBestList(OutputCollector *collector,
Alignments align;
OutputAlignmentNBest(align, derivation, 0);
for (Alignments::const_iterator q = align.begin(); q != align.end();
- ++q) {
+ ++q) {
out << q->first << "-" << q->second << " ";
}
}
@@ -139,9 +139,9 @@ void Manager::OutputNBestList(OutputCollector *collector,
}
std::size_t Manager::OutputAlignmentNBest(
- Alignments &retAlign,
- const KBestExtractor::Derivation &derivation,
- std::size_t startTarget) const
+ Alignments &retAlign,
+ const KBestExtractor::Derivation &derivation,
+ std::size_t startTarget) const
{
const SHyperedge &shyperedge = derivation.edge->shyperedge;
@@ -160,10 +160,10 @@ std::size_t Manager::OutputAlignmentNBest(
const AlignmentInfo &aiNonTerm = shyperedge.translation->GetAlignNonTerm();
std::vector<std::size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd =
- aiNonTerm.GetNonTermIndexMap();
+ aiNonTerm.GetNonTermIndexMap();
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
- "Error");
+ "Error");
std::size_t targetInd = 0;
for (std::size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
@@ -184,7 +184,7 @@ std::size_t Manager::OutputAlignmentNBest(
// Recursively look thru child hypos
std::size_t currStartTarget = startTarget + totalTargetSize;
std::size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
- currStartTarget);
+ currStartTarget);
targetOffsets[targetPos] = targetSize;
totalTargetSize += targetSize;
@@ -225,7 +225,7 @@ std::size_t Manager::CalcSourceSize(const KBestExtractor::Derivation &d) const
std::size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) {
std::size_t childSize =
- shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
+ shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
ret -= (childSize - 1);
}
return ret;
diff --git a/moses/Syntax/Manager.h b/moses/Syntax/Manager.h
index 108791b19..8d814f604 100644
--- a/moses/Syntax/Manager.h
+++ b/moses/Syntax/Manager.h
@@ -13,7 +13,7 @@ namespace Syntax
// Common base class for Moses::Syntax managers.
class Manager : public BaseManager
{
- public:
+public:
Manager(const InputType &);
// Virtual functions from Moses::BaseManager that are implemented the same
@@ -26,7 +26,7 @@ class Manager : public BaseManager
// managers.
void OutputAlignment(OutputCollector *collector) const {}
void OutputDetailedTreeFragmentsTranslationReport(
- OutputCollector *collector) const {}
+ OutputCollector *collector) const {}
void OutputLatticeSamples(OutputCollector *collector) const {}
void OutputSearchGraph(OutputCollector *collector) const {}
void OutputSearchGraphHypergraph() const {}
@@ -38,15 +38,15 @@ class Manager : public BaseManager
// Syntax-specific virtual functions that derived classes must implement.
virtual void ExtractKBest(
- std::size_t k,
- std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
- bool onlyDistinct=false) const = 0;
+ std::size_t k,
+ std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
+ bool onlyDistinct=false) const = 0;
virtual const SHyperedge *GetBestSHyperedge() const = 0;
- protected:
+protected:
std::set<Word> m_oovs;
- private:
+private:
// Syntax-specific helper functions used to implement OutputNBest.
void OutputNBestList(OutputCollector *collector,
const KBestExtractor::KBestVec &nBestList,
diff --git a/moses/Syntax/NonTerminalMap.h b/moses/Syntax/NonTerminalMap.h
index ff7ce2508..b645212c5 100644
--- a/moses/Syntax/NonTerminalMap.h
+++ b/moses/Syntax/NonTerminalMap.h
@@ -23,42 +23,56 @@ namespace Syntax
template<typename T>
class NonTerminalMap
{
- private:
+private:
typedef boost::unordered_map<Word, T, SymbolHasher, SymbolEqualityPred> Map;
typedef std::vector<T*> Vec;
- public:
+public:
typedef typename Map::iterator Iterator;
typedef typename Map::const_iterator ConstIterator;
NonTerminalMap()
- : m_vec(FactorCollection::Instance().GetNumNonTerminals(), NULL) {}
+ : m_vec(FactorCollection::Instance().GetNumNonTerminals(), NULL) {}
- Iterator Begin() { return m_map.begin(); }
- Iterator End() { return m_map.end(); }
+ Iterator Begin() {
+ return m_map.begin();
+ }
+ Iterator End() {
+ return m_map.end();
+ }
- ConstIterator Begin() const { return m_map.begin(); }
- ConstIterator End() const { return m_map.end(); }
+ ConstIterator Begin() const {
+ return m_map.begin();
+ }
+ ConstIterator End() const {
+ return m_map.end();
+ }
- std::size_t Size() const { return m_map.size(); }
+ std::size_t Size() const {
+ return m_map.size();
+ }
- bool IsEmpty() const { return m_map.empty(); }
+ bool IsEmpty() const {
+ return m_map.empty();
+ }
std::pair<Iterator, bool> Insert(const Word &, const T &);
- T *Find(const Word &w) const { return m_vec[w[0]->GetId()]; }
+ T *Find(const Word &w) const {
+ return m_vec[w[0]->GetId()];
+ }
- private:
+private:
Map m_map;
Vec m_vec;
};
template<typename T>
std::pair<typename NonTerminalMap<T>::Iterator, bool> NonTerminalMap<T>::Insert(
- const Word &key, const T &value)
+ const Word &key, const T &value)
{
std::pair<typename Map::iterator, bool> result =
- m_map.insert(typename Map::value_type(key, value));
+ m_map.insert(typename Map::value_type(key, value));
if (result.second) {
T *p = &(result.first->second);
std::size_t i = key[0]->GetId();
diff --git a/moses/Syntax/PHyperedge.h b/moses/Syntax/PHyperedge.h
index 8f236fcb8..ff24e4fc7 100644
--- a/moses/Syntax/PHyperedge.h
+++ b/moses/Syntax/PHyperedge.h
@@ -11,8 +11,7 @@ namespace Syntax
struct PVertex;
-struct PHyperedge
-{
+struct PHyperedge {
PVertex *head;
std::vector<PVertex*> tail;
const TargetPhraseCollection *translations;
diff --git a/moses/Syntax/PVertex.h b/moses/Syntax/PVertex.h
index d82309c82..3a7a960c7 100644
--- a/moses/Syntax/PVertex.h
+++ b/moses/Syntax/PVertex.h
@@ -8,9 +8,8 @@ namespace Moses
namespace Syntax
{
-struct PVertex
-{
- public:
+struct PVertex {
+public:
PVertex(const WordsRange &wr, const Word &w) : span(wr), symbol(w) {}
WordsRange span;
diff --git a/moses/Syntax/RuleTable.h b/moses/Syntax/RuleTable.h
index 90a25d63c..8e20817dc 100644
--- a/moses/Syntax/RuleTable.h
+++ b/moses/Syntax/RuleTable.h
@@ -11,12 +11,12 @@ class RuleTableFF;
// grammar, like a trie (for S2T) or a DFA (for T2S).
class RuleTable
{
- public:
+public:
RuleTable(const RuleTableFF *ff) : m_ff(ff) {}
virtual ~RuleTable() {}
- protected:
+protected:
const RuleTableFF *m_ff;
};
diff --git a/moses/Syntax/RuleTableFF.h b/moses/Syntax/RuleTableFF.h
index 0e6040612..4d6132e86 100644
--- a/moses/Syntax/RuleTableFF.h
+++ b/moses/Syntax/RuleTableFF.h
@@ -21,7 +21,7 @@ class RuleTable;
// anything except provide somewhere to store the weights and parameter values.
class RuleTableFF : public PhraseDictionary
{
- public:
+public:
RuleTableFF(const std::string &);
// FIXME Delete m_table?
@@ -29,18 +29,21 @@ class RuleTableFF : public PhraseDictionary
void Load();
- const RuleTable *GetTable() const { return m_table; }
+ const RuleTable *GetTable() const {
+ return m_table;
+ }
- static const std::vector<RuleTableFF*> &Instances() { return s_instances; }
+ static const std::vector<RuleTableFF*> &Instances() {
+ return s_instances;
+ }
ChartRuleLookupManager *CreateRuleLookupManager(
- const ChartParser &, const ChartCellCollectionBase &, std::size_t)
- {
+ const ChartParser &, const ChartCellCollectionBase &, std::size_t) {
assert(false);
return 0;
}
- private:
+private:
static std::vector<RuleTableFF*> s_instances;
const RuleTable *m_table;
diff --git a/moses/Syntax/S2T/DerivationWriter.h b/moses/Syntax/S2T/DerivationWriter.h
index 706490ce0..af0e5f521 100644
--- a/moses/Syntax/S2T/DerivationWriter.h
+++ b/moses/Syntax/S2T/DerivationWriter.h
@@ -21,14 +21,14 @@ namespace S2T
// TODO should be revisited when other the decoders are implemented.
class DerivationWriter
{
- public:
+public:
// 1-best version.
static void Write(const SHyperedge&, std::size_t, std::ostream &);
// k-best version.
static void Write(const KBestExtractor::Derivation &, std::size_t,
std::ostream &);
- private:
+private:
static void WriteLine(const SHyperedge &, std::size_t, std::ostream &);
static void WriteSymbol(const Word &, std::ostream &);
};
diff --git a/moses/Syntax/S2T/Manager-inl.h b/moses/Syntax/S2T/Manager-inl.h
index 4d0136fe1..5bdb33f1e 100644
--- a/moses/Syntax/S2T/Manager-inl.h
+++ b/moses/Syntax/S2T/Manager-inl.h
@@ -31,9 +31,9 @@ namespace S2T
template<typename Parser>
Manager<Parser>::Manager(const InputType &source)
- : Syntax::Manager(source)
- , m_pchart(source.GetSize(), Parser::RequiresCompressedChart())
- , m_schart(source.GetSize())
+ : Syntax::Manager(source)
+ , m_pchart(source.GetSize(), Parser::RequiresCompressedChart())
+ , m_schart(source.GetSize())
{
}
@@ -100,7 +100,7 @@ void Manager<Parser>::InitializeParsers(PChart &pchart,
m_oovRuleTrie = oovHandler.SynthesizeRuleTrie(m_oovs.begin(), m_oovs.end());
// Create a parser for the OOV rule trie.
boost::shared_ptr<Parser> parser(
- new Parser(pchart, *m_oovRuleTrie, maxOovWidth));
+ new Parser(pchart, *m_oovRuleTrie, maxOovWidth));
m_parsers.push_back(parser);
}
}
@@ -197,14 +197,14 @@ void Manager<Parser>::Decode()
// Retrieve the (pruned) set of SHyperedgeBundles from the callback.
const BoundedPriorityContainer<SHyperedgeBundle> &bundles =
- callback.GetContainer();
+ callback.GetContainer();
// Use cube pruning to extract SHyperedges from SHyperedgeBundles.
// Collect the SHyperedges into buffers, one for each category.
CubeQueue cubeQueue(bundles.Begin(), bundles.End());
std::size_t count = 0;
typedef boost::unordered_map<Word, std::vector<SHyperedge*>,
- SymbolHasher, SymbolEqualityPred > BufferMap;
+ SymbolHasher, SymbolEqualityPred > BufferMap;
BufferMap buffers;
while (count < popLimit && !cubeQueue.IsEmpty()) {
SHyperedge *hyperedge = cubeQueue.Pop();
@@ -228,7 +228,7 @@ void Manager<Parser>::Decode()
const Word &category = p->first;
const std::vector<SHyperedge*> &buffer = p->second;
std::pair<SChart::Cell::NMap::Iterator, bool> ret =
- scell.nonTerminalStacks.Insert(category, SVertexStack());
+ scell.nonTerminalStacks.Insert(category, SVertexStack());
assert(ret.second);
SVertexStack &stack = ret.first->second;
RecombineAndSort(buffer, stack);
@@ -269,9 +269,9 @@ const SHyperedge *Manager<Parser>::GetBestSHyperedge() const
template<typename Parser>
void Manager<Parser>::ExtractKBest(
- std::size_t k,
- std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
- bool onlyDistinct) const
+ std::size_t k,
+ std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
+ bool onlyDistinct) const
{
kBestList.clear();
if (k == 0 || m_source.GetSize() == 0) {
@@ -326,18 +326,18 @@ template<typename Parser>
void Manager<Parser>::PrunePChart(const SChart::Cell &scell,
PChart::Cell &pcell)
{
-/* FIXME
- PChart::Cell::VertexMap::iterator p = pcell.vertices.begin();
- while (p != pcell.vertices.end()) {
- const Word &category = p->first;
- if (scell.stacks.find(category) == scell.stacks.end()) {
- PChart::Cell::VertexMap::iterator q = p++;
- pcell.vertices.erase(q);
- } else {
- ++p;
+ /* FIXME
+ PChart::Cell::VertexMap::iterator p = pcell.vertices.begin();
+ while (p != pcell.vertices.end()) {
+ const Word &category = p->first;
+ if (scell.stacks.find(category) == scell.stacks.end()) {
+ PChart::Cell::VertexMap::iterator q = p++;
+ pcell.vertices.erase(q);
+ } else {
+ ++p;
+ }
}
- }
-*/
+ */
}
template<typename Parser>
@@ -390,7 +390,7 @@ void Manager<Parser>::RecombineAndSort(const std::vector<SHyperedge*> &buffer,
template<typename Parser>
void Manager<Parser>::OutputDetailedTranslationReport(
- OutputCollector *collector) const
+ OutputCollector *collector) const
{
const SHyperedge *best = GetBestSHyperedge();
if (best == NULL || collector == NULL) {
diff --git a/moses/Syntax/S2T/Manager.h b/moses/Syntax/S2T/Manager.h
index 47cca43d6..0961c8e77 100644
--- a/moses/Syntax/S2T/Manager.h
+++ b/moses/Syntax/S2T/Manager.h
@@ -29,7 +29,7 @@ namespace S2T
template<typename Parser>
class Manager : public Syntax::Manager
{
- public:
+public:
Manager(const InputType &);
void Decode();
@@ -38,13 +38,13 @@ class Manager : public Syntax::Manager
const SHyperedge *GetBestSHyperedge() const;
void ExtractKBest(
- std::size_t k,
- std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
- bool onlyDistinct=false) const;
+ std::size_t k,
+ std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
+ bool onlyDistinct=false) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const;
- private:
+private:
void FindOovs(const PChart &, std::set<Word> &, std::size_t);
void InitializeCharts();
diff --git a/moses/Syntax/S2T/OovHandler-inl.h b/moses/Syntax/S2T/OovHandler-inl.h
index e700f65c5..76eed861e 100644
--- a/moses/Syntax/S2T/OovHandler-inl.h
+++ b/moses/Syntax/S2T/OovHandler-inl.h
@@ -13,7 +13,7 @@ namespace S2T
template<typename RuleTrie>
template<typename InputIterator>
boost::shared_ptr<RuleTrie> OovHandler<RuleTrie>::SynthesizeRuleTrie(
- InputIterator first, InputIterator last)
+ InputIterator first, InputIterator last)
{
const UnknownLHSList &lhsList = StaticData::Instance().GetUnknownLHS();
@@ -33,7 +33,7 @@ boost::shared_ptr<RuleTrie> OovHandler<RuleTrie>::SynthesizeRuleTrie(
Word *tgtLHS = SynthesizeTargetLhs(targetLhsStr);
TargetPhrase *tp = SynthesizeTargetPhrase(oov, *srcPhrase, *tgtLHS, prob);
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(
- *trie, *srcPhrase, *tp, NULL); // TODO Check NULL is valid argument
+ *trie, *srcPhrase, *tp, NULL); // TODO Check NULL is valid argument
tpc.Add(tp);
}
}
@@ -63,12 +63,12 @@ Word *OovHandler<RuleTrie>::SynthesizeTargetLhs(const std::string &lhsStr)
template<typename RuleTrie>
TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase(
- const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob)
+ const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob)
{
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
- UnknownWordPenaltyProducer::Instance();
+ UnknownWordPenaltyProducer::Instance();
TargetPhrase *targetPhrase = new TargetPhrase();
Word &targetWord = targetPhrase->AddWord();
diff --git a/moses/Syntax/S2T/OovHandler.h b/moses/Syntax/S2T/OovHandler.h
index 4b0133413..5d484d2fd 100644
--- a/moses/Syntax/S2T/OovHandler.h
+++ b/moses/Syntax/S2T/OovHandler.h
@@ -21,7 +21,7 @@ namespace S2T
template<typename RuleTrie>
class OovHandler : public RuleTrieCreator
{
- public:
+public:
OovHandler(const RuleTableFF &ff) : m_ruleTableFF(ff) {}
// Synthesize a RuleTrie given a sequence of OOV words. The sequence is
@@ -30,7 +30,7 @@ class OovHandler : public RuleTrieCreator
template<typename InputIterator>
boost::shared_ptr<RuleTrie> SynthesizeRuleTrie(InputIterator, InputIterator);
- private:
+private:
const RuleTableFF &m_ruleTableFF;
bool ShouldDrop(const Word &);
diff --git a/moses/Syntax/S2T/PChart.h b/moses/Syntax/S2T/PChart.h
index 8f719eebb..0bd3148b3 100644
--- a/moses/Syntax/S2T/PChart.h
+++ b/moses/Syntax/S2T/PChart.h
@@ -19,11 +19,10 @@ namespace S2T
class PChart
{
- public:
- struct Cell
- {
+public:
+ struct Cell {
typedef boost::unordered_map<Word, PVertex, SymbolHasher,
- SymbolEqualityPred> TMap;
+ SymbolEqualityPred> TMap;
typedef NonTerminalMap<PVertex> NMap;
// Collection of terminal vertices (keyed by terminal symbol).
TMap terminalVertices;
@@ -42,7 +41,9 @@ class PChart
~PChart();
- std::size_t GetWidth() const { return m_cells.size(); }
+ std::size_t GetWidth() const {
+ return m_cells.size();
+ }
const Cell &GetCell(std::size_t start, std::size_t end) const {
return m_cells[start][end];
@@ -57,13 +58,13 @@ class PChart
if (!v.symbol.IsNonTerminal()) {
Cell::TMap::value_type x(v.symbol, v);
std::pair<Cell::TMap::iterator, bool> ret =
- cell.terminalVertices.insert(x);
+ cell.terminalVertices.insert(x);
return ret.first->second;
}
// If v is a non-terminal vertex add it to the cell's nonTerminalVertices
// map and update the compressed chart (if enabled).
std::pair<Cell::NMap::Iterator, bool> result =
- cell.nonTerminalVertices.Insert(v.symbol, v);
+ cell.nonTerminalVertices.Insert(v.symbol, v);
if (result.second && m_compressedChart) {
CompressedItem item;
item.end = end;
@@ -77,7 +78,7 @@ class PChart
return (*m_compressedChart)[start];
}
- private:
+private:
typedef std::vector<CompressedMatrix> CompressedChart;
std::vector<std::vector<Cell> > m_cells;
diff --git a/moses/Syntax/S2T/PHyperedgeToSHyperedgeBundle.h b/moses/Syntax/S2T/PHyperedgeToSHyperedgeBundle.h
index dd0be3ae9..924d9c976 100644
--- a/moses/Syntax/S2T/PHyperedgeToSHyperedgeBundle.h
+++ b/moses/Syntax/S2T/PHyperedgeToSHyperedgeBundle.h
@@ -15,8 +15,9 @@ namespace S2T
// Given a PHyperedge object and SChart produces a SHyperedgeBundle object.
inline void PHyperedgeToSHyperedgeBundle(const PHyperedge &hyperedge,
- const SChart &schart,
- SHyperedgeBundle &bundle) {
+ const SChart &schart,
+ SHyperedgeBundle &bundle)
+{
bundle.translations = hyperedge.translations;
bundle.stacks.clear();
for (std::vector<PVertex*>::const_iterator p = hyperedge.tail.begin();
@@ -31,7 +32,7 @@ inline void PHyperedgeToSHyperedgeBundle(const PHyperedge &hyperedge,
stack = cell.nonTerminalStacks.Find(symbol);
} else {
const SChart::Cell::TMap::const_iterator q =
- cell.terminalStacks.find(symbol);
+ cell.terminalStacks.find(symbol);
assert(q != cell.terminalStacks.end());
stack = &(q->second);
}
diff --git a/moses/Syntax/S2T/ParserCallback.h b/moses/Syntax/S2T/ParserCallback.h
index b18a85eae..f9db51601 100644
--- a/moses/Syntax/S2T/ParserCallback.h
+++ b/moses/Syntax/S2T/ParserCallback.h
@@ -15,14 +15,15 @@ namespace Syntax
namespace S2T
{
-class StandardParserCallback {
- private:
+class StandardParserCallback
+{
+private:
typedef BoundedPriorityContainer<SHyperedgeBundle> Container;
- public:
+public:
StandardParserCallback(const SChart &schart, std::size_t ruleLimit)
- : m_schart(schart)
- , m_container(ruleLimit) {}
+ : m_schart(schart)
+ , m_container(ruleLimit) {}
void operator()(const PHyperedge &hyperedge) {
PHyperedgeToSHyperedgeBundle(hyperedge, m_schart, m_tmpBundle);
@@ -30,25 +31,30 @@ class StandardParserCallback {
m_container.SwapIn(m_tmpBundle, score);
}
- void InitForRange(const WordsRange &range) { m_container.LazyClear(); }
+ void InitForRange(const WordsRange &range) {
+ m_container.LazyClear();
+ }
- const Container &GetContainer() { return m_container; }
+ const Container &GetContainer() {
+ return m_container;
+ }
- private:
+private:
const SChart &m_schart;
SHyperedgeBundle m_tmpBundle;
BoundedPriorityContainer<SHyperedgeBundle> m_container;
};
-class EagerParserCallback {
- private:
+class EagerParserCallback
+{
+private:
typedef BoundedPriorityContainer<SHyperedgeBundle> Container;
- public:
+public:
EagerParserCallback(const SChart &schart, std::size_t ruleLimit)
- : m_schart(schart)
- , m_containers(schart.GetWidth(), Container(ruleLimit))
- , m_prevStart(std::numeric_limits<std::size_t>::max()) {}
+ : m_schart(schart)
+ , m_containers(schart.GetWidth(), Container(ruleLimit))
+ , m_prevStart(std::numeric_limits<std::size_t>::max()) {}
void operator()(const PHyperedge &hyperedge, std::size_t end) {
PHyperedgeToSHyperedgeBundle(hyperedge, m_schart, m_tmpBundle);
@@ -68,9 +74,11 @@ class EagerParserCallback {
}
}
- const Container &GetContainer() { return m_containers[m_end]; }
+ const Container &GetContainer() {
+ return m_containers[m_end];
+ }
- private:
+private:
const SChart &m_schart;
SHyperedgeBundle m_tmpBundle;
std::vector<Container> m_containers;
diff --git a/moses/Syntax/S2T/Parsers/Parser.h b/moses/Syntax/S2T/Parsers/Parser.h
index b13a8d502..785fb66f9 100644
--- a/moses/Syntax/S2T/Parsers/Parser.h
+++ b/moses/Syntax/S2T/Parsers/Parser.h
@@ -13,7 +13,7 @@ class PChart;
template<typename Callback>
class Parser
{
- public:
+public:
typedef Callback CallbackType;
Parser(PChart &chart) : m_chart(chart) {}
@@ -21,7 +21,7 @@ class Parser
virtual ~Parser() {}
virtual void EnumerateHyperedges(const WordsRange &, Callback &) = 0;
- protected:
+protected:
PChart &m_chart;
};
diff --git a/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h b/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h
index b275a93ee..ad3972404 100644
--- a/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h
+++ b/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h
@@ -11,21 +11,21 @@ namespace S2T
template<typename Callback>
RecursiveCYKPlusParser<Callback>::RecursiveCYKPlusParser(
- PChart &chart,
- const RuleTrie &trie,
- std::size_t maxChartSpan)
- : Parser<Callback>(chart)
- , m_ruleTable(trie)
- , m_maxChartSpan(maxChartSpan)
- , m_callback(NULL)
+ PChart &chart,
+ const RuleTrie &trie,
+ std::size_t maxChartSpan)
+ : Parser<Callback>(chart)
+ , m_ruleTable(trie)
+ , m_maxChartSpan(maxChartSpan)
+ , m_callback(NULL)
{
m_hyperedge.head = 0;
}
template<typename Callback>
void RecursiveCYKPlusParser<Callback>::EnumerateHyperedges(
- const WordsRange &range,
- Callback &callback)
+ const WordsRange &range,
+ Callback &callback)
{
const std::size_t start = range.GetStartPos();
const std::size_t end = range.GetEndPos();
@@ -49,16 +49,17 @@ void RecursiveCYKPlusParser<Callback>::EnumerateHyperedges(
// with a non-terminal over a span between [start,minEnd] and [start,maxEnd].
template<typename Callback>
void RecursiveCYKPlusParser<Callback>::GetNonTerminalExtensions(
- const RuleTrie::Node &node,
- std::size_t start,
- std::size_t minEnd,
- std::size_t maxEnd) {
+ const RuleTrie::Node &node,
+ std::size_t start,
+ std::size_t minEnd,
+ std::size_t maxEnd)
+{
// Non-terminal labels in node's outgoing edge set.
const RuleTrie::Node::SymbolMap &nonTermMap = node.GetNonTerminalMap();
// Compressed matrix from PChart.
const PChart::CompressedMatrix &matrix =
- Base::m_chart.GetCompressedMatrix(start);
+ Base::m_chart.GetCompressedMatrix(start);
// Loop over possible expansions of the rule.
RuleTrie::Node::SymbolMap::const_iterator p;
@@ -66,7 +67,7 @@ void RecursiveCYKPlusParser<Callback>::GetNonTerminalExtensions(
for (p = nonTermMap.begin(); p != p_end; ++p) {
const Word &nonTerm = p->first;
const std::vector<PChart::CompressedItem> &items =
- matrix[nonTerm[0]->GetId()];
+ matrix[nonTerm[0]->GetId()];
for (std::vector<PChart::CompressedItem>::const_iterator q = items.begin();
q != items.end(); ++q) {
if (q->end >= minEnd && q->end <= maxEnd) {
@@ -81,12 +82,13 @@ void RecursiveCYKPlusParser<Callback>::GetNonTerminalExtensions(
// with a terminal over span [start,end].
template<typename Callback>
void RecursiveCYKPlusParser<Callback>::GetTerminalExtension(
- const RuleTrie::Node &node,
- std::size_t start,
- std::size_t end) {
+ const RuleTrie::Node &node,
+ std::size_t start,
+ std::size_t end)
+{
const PChart::Cell::TMap &vertexMap =
- Base::m_chart.GetCell(start, end).terminalVertices;
+ Base::m_chart.GetCell(start, end).terminalVertices;
if (vertexMap.empty()) {
return;
}
@@ -122,9 +124,10 @@ void RecursiveCYKPlusParser<Callback>::GetTerminalExtension(
// non-empty), and try to find expansions that have this partial rule as prefix.
template<typename Callback>
void RecursiveCYKPlusParser<Callback>::AddAndExtend(
- const RuleTrie::Node &node,
- std::size_t end,
- const PVertex &vertex) {
+ const RuleTrie::Node &node,
+ std::size_t end,
+ const PVertex &vertex)
+{
// FIXME Sort out const-ness.
m_hyperedge.tail.push_back(const_cast<PVertex *>(&vertex));
@@ -153,7 +156,7 @@ void RecursiveCYKPlusParser<Callback>::AddAndExtend(
template<typename Callback>
bool RecursiveCYKPlusParser<Callback>::IsNonLexicalUnary(
- const PHyperedge &hyperedge) const
+ const PHyperedge &hyperedge) const
{
return hyperedge.tail.size() == 1 &&
hyperedge.tail[0]->symbol.IsNonTerminal();
diff --git a/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h b/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h
index 264d43eea..2b8edbfd1 100644
--- a/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h
+++ b/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h
@@ -22,12 +22,14 @@ namespace S2T
template<typename Callback>
class RecursiveCYKPlusParser : public Parser<Callback>
{
- public:
+public:
typedef Parser<Callback> Base;
typedef RuleTrieCYKPlus RuleTrie;
// TODO Make this configurable?
- static bool RequiresCompressedChart() { return true; }
+ static bool RequiresCompressedChart() {
+ return true;
+ }
RecursiveCYKPlusParser(PChart &, const RuleTrie &, std::size_t);
@@ -35,7 +37,7 @@ class RecursiveCYKPlusParser : public Parser<Callback>
void EnumerateHyperedges(const WordsRange &, Callback &);
- private:
+private:
void GetTerminalExtension(const RuleTrie::Node &, std::size_t, std::size_t);
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h b/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h
index d55f7e842..f50cee3a0 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h
@@ -23,10 +23,10 @@ namespace S2T
template<typename Callback>
Scope3Parser<Callback>::Scope3Parser(PChart &chart, const RuleTrie &trie,
std::size_t maxChartSpan)
- : Parser<Callback>(chart)
- , m_ruleTable(trie)
- , m_maxChartSpan(maxChartSpan)
- , m_latticeBuilder(chart)
+ : Parser<Callback>(chart)
+ , m_ruleTable(trie)
+ , m_maxChartSpan(maxChartSpan)
+ , m_latticeBuilder(chart)
{
Init();
}
@@ -39,7 +39,7 @@ Scope3Parser<Callback>::~Scope3Parser()
template<typename Callback>
void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
- Callback &callback)
+ Callback &callback)
{
const std::size_t start = range.GetStartPos();
const std::size_t end = range.GetEndPos();
@@ -65,7 +65,7 @@ void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
// Ask the grammar for the mapping from label sequences to target phrase
// collections for this pattern.
const RuleTrie::Node::LabelMap &labelMap =
- patNode->m_node->GetLabelMap();
+ patNode->m_node->GetLabelMap();
// For each label sequence, search the lattice for the set of PHyperedge
// tails.
@@ -146,7 +146,7 @@ void Scope3Parser<Callback>::FillSentenceMap(SentenceMap &sentMap)
template<typename Callback>
void Scope3Parser<Callback>::RecordPatternApplicationSpans(
- const PatternApplicationTrie &patNode)
+ const PatternApplicationTrie &patNode)
{
if (patNode.m_node->HasRules()) {
int s1 = -1;
@@ -175,7 +175,7 @@ void Scope3Parser<Callback>::RecordPatternApplicationSpans(
}
for (std::vector<PatternApplicationTrie*>::const_iterator p =
- patNode.m_children.begin(); p != patNode.m_children.end(); ++p) {
+ patNode.m_children.begin(); p != patNode.m_children.end(); ++p) {
RecordPatternApplicationSpans(**p);
}
}
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h b/moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h
index d3104d9b1..df2989d62 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h
@@ -33,7 +33,9 @@ public:
typedef RuleTrieScope3 RuleTrie;
// TODO Make this configurable?
- static bool RequiresCompressedChart() { return false; }
+ static bool RequiresCompressedChart() {
+ return false;
+ }
Scope3Parser(PChart &, const RuleTrie &, std::size_t);
@@ -60,7 +62,7 @@ private:
/* m_patSpans[i][j] records the set of all PAT nodes for span [i,i+j]
i.e. j is the width of the span */
std::vector<std::vector<
- std::vector<const PatternApplicationTrie *> > > m_patSpans;
+ std::vector<const PatternApplicationTrie *> > > m_patSpans;
};
} // namespace S2T
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.cpp b/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.cpp
index 218cd4017..f580c254a 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.cpp
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.cpp
@@ -9,7 +9,8 @@ namespace Syntax
namespace S2T
{
-int PatternApplicationTrie::Depth() const {
+int PatternApplicationTrie::Depth() const
+{
if (m_parent) {
return m_parent->Depth() + 1;
}
@@ -77,8 +78,8 @@ PatternApplicationTrie::GetLowestTerminalNode() const
// may be unknown). This function determines the range of possible start
// values for the partially-applied pattern.
void PatternApplicationTrie::DetermineStartRange(int sentenceLength,
- int &minStart,
- int &maxStart) const
+ int &minStart,
+ int &maxStart) const
{
// Find the leftmost terminal symbol, if any.
const PatternApplicationTrie *n = GetHighestTerminalNode();
@@ -108,8 +109,8 @@ void PatternApplicationTrie::DetermineStartRange(int sentenceLength,
// may be unknown). This function determines the range of possible end values
// for the partially-applied pattern.
void PatternApplicationTrie::DetermineEndRange(int sentenceLength,
- int &minEnd,
- int &maxEnd) const
+ int &minEnd,
+ int &maxEnd) const
{
// Find the rightmost terminal symbol, if any.
const PatternApplicationTrie *n = GetLowestTerminalNode();
@@ -154,7 +155,7 @@ void PatternApplicationTrie::Extend(const RuleTrieScope3::Node &node,
(followsGap && start > (std::size_t)minPos) ||
minPos == -1) {
PatternApplicationTrie *subTrie =
- new PatternApplicationTrie(start, end, child, v, this);
+ new PatternApplicationTrie(start, end, child, v, this);
subTrie->Extend(child, end+1, sentMap, false);
m_children.push_back(subTrie);
}
@@ -174,7 +175,8 @@ void PatternApplicationTrie::Extend(const RuleTrieScope3::Node &node,
}
void PatternApplicationTrie::ReadOffPatternApplicationKey(
- PatternApplicationKey &key) const {
+ PatternApplicationKey &key) const
+{
const int depth = Depth();
key.resize(depth);
const PatternApplicationTrie *p = this;
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.h b/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.h
index 0ad371367..1869c0bfd 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/PatternApplicationTrie.h
@@ -19,7 +19,7 @@ struct PatternApplicationTrie;
typedef std::vector<const PatternApplicationTrie*> PatternApplicationKey;
struct PatternApplicationTrie {
- public:
+public:
PatternApplicationTrie(int start, int end, const RuleTrieScope3::Node &node,
const PVertex *pvertex, PatternApplicationTrie *parent)
: m_start(start)
@@ -36,8 +36,12 @@ struct PatternApplicationTrie {
int Depth() const;
- bool IsGapNode() const { return m_end == -1; }
- bool IsTerminalNode() const { return m_end != -1; }
+ bool IsGapNode() const {
+ return m_end == -1;
+ }
+ bool IsTerminalNode() const {
+ return m_end != -1;
+ }
const PatternApplicationTrie *GetHighestTerminalNode() const;
const PatternApplicationTrie *GetLowestTerminalNode() const;
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/SentenceMap.h b/moses/Syntax/S2T/Parsers/Scope3Parser/SentenceMap.h
index 8e6aae9f1..522b56618 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/SentenceMap.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/SentenceMap.h
@@ -20,7 +20,7 @@ namespace S2T
// FIXME Check SymbolHasher does the right thing here
typedef boost::unordered_map<Word, std::vector<const PVertex *>, SymbolHasher,
- SymbolEqualityPred> SentenceMap;
+ SymbolEqualityPred> SentenceMap;
} // namespace S2T
} // namespace Syntax
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.cpp b/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.cpp
index 0eb615db8..8b1f203be 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.cpp
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.cpp
@@ -18,7 +18,7 @@ void SymbolRangeCalculator::Calc(const PatternApplicationKey &key,
// Fill in ranges for terminals and set ranges to -1 for non-terminals.
void SymbolRangeCalculator::FillInTerminalRanges(
- const PatternApplicationKey &key, std::vector<SymbolRange> &ranges)
+ const PatternApplicationKey &key, std::vector<SymbolRange> &ranges)
{
ranges.resize(key.size());
for (std::size_t i = 0; i < key.size(); ++i) {
@@ -34,7 +34,7 @@ void SymbolRangeCalculator::FillInTerminalRanges(
}
void SymbolRangeCalculator::FillInAuxSymbolInfo(
- const std::vector<SymbolRange> &ranges)
+ const std::vector<SymbolRange> &ranges)
{
m_auxSymbolInfo.resize(ranges.size());
@@ -81,8 +81,8 @@ void SymbolRangeCalculator::FillInAuxSymbolInfo(
}
void SymbolRangeCalculator::FillInGapRanges(const PatternApplicationKey &key,
- int spanStart, int spanEnd,
- std::vector<SymbolRange> &ranges)
+ int spanStart, int spanEnd,
+ std::vector<SymbolRange> &ranges)
{
for (std::size_t i = 0; i < key.size(); ++i) {
const PatternApplicationTrie *patNode = key[i];
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.h b/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.h
index 341fb9bb4..c9bbcb02d 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/SymbolRangeCalculator.h
@@ -14,11 +14,11 @@ namespace S2T
class SymbolRangeCalculator
{
- public:
+public:
void Calc(const PatternApplicationKey &, int, int,
std::vector<SymbolRange> &);
- private:
+private:
// Provides contextual information used in determining a symbol's range.
struct AuxSymbolInfo {
int distanceToNextTerminal;
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLattice.h b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLattice.h
index 9ee16b186..88685e81e 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLattice.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLattice.h
@@ -11,7 +11,7 @@ namespace S2T
{
/* Lattice in which a full path corresponds to the tail of a PHyperedge.
- * For an entry x[i][j][k][l] in a TailLattice x:
+ * For an entry x[i][j][k][l] in a TailLattice x:
*
* i = offset from start of rule pattern
*
@@ -23,9 +23,9 @@ namespace S2T
* l = label index (zero for terminals, otherwise as in RuleTrieScope3::Node)
*/
typedef std::vector<
- std::vector<
- std::vector<
- std::vector<const PVertex *> > > > TailLattice;
+std::vector<
+std::vector<
+std::vector<const PVertex *> > > > TailLattice;
} // namespace S2T
} // namespace Syntax
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.cpp b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.cpp
index 6b31090fc..3921ecfa0 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.cpp
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.cpp
@@ -25,7 +25,7 @@ void TailLatticeBuilder::Build(
const RuleTrieScope3::Node *utrieNode = key.back()->m_node;
const RuleTrieScope3::Node::LabelTable &labelTable =
- utrieNode->GetLabelTable();
+ utrieNode->GetLabelTable();
std::size_t nonTermIndex = 0;
@@ -52,7 +52,7 @@ void TailLatticeBuilder::Build(
p != labelVec.end(); ++p, ++q) {
const Word &label = *p;
const PVertex *v =
- m_chart.GetCell(s, e).nonTerminalVertices.Find(label);
+ m_chart.GetCell(s, e).nonTerminalVertices.Find(label);
lattice[offset][nonTermIndex+1][width].push_back(v);
*q = (*q || static_cast<bool>(v));
}
@@ -81,7 +81,7 @@ void TailLatticeBuilder::ExtendAndClear(
const RuleTrieScope3::Node *utrieNode = key.back()->m_node;
const RuleTrieScope3::Node::LabelTable &labelTable =
- utrieNode->GetLabelTable();
+ utrieNode->GetLabelTable();
std::size_t nonTermIndex = 0;
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.h b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.h
index c61df8a40..9297e5eba 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeBuilder.h
@@ -17,7 +17,7 @@ namespace S2T
class TailLatticeBuilder
{
- public:
+public:
TailLatticeBuilder(PChart &chart) : m_chart(chart) {}
// Given a key from a PatternApplicationTrie and the valid ranges of its
@@ -26,7 +26,7 @@ class TailLatticeBuilder
const std::vector<SymbolRange> &,
TailLattice &, std::vector<std::vector<bool> > &);
- private:
+private:
// Auxiliary function used by Build. Enlarges a TailLattice, if necessary,
// and clears the innermost vectors.
void ExtendAndClear(const std::vector<const PatternApplicationTrie *> &,
diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h
index a2897ce73..79bd91148 100644
--- a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h
+++ b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h
@@ -17,7 +17,7 @@ namespace S2T
template<typename Callback>
class TailLatticeSearcher
{
- public:
+public:
TailLatticeSearcher(const TailLattice &lattice,
const PatternApplicationKey &key,
const std::vector<SymbolRange> &ranges)
@@ -35,7 +35,7 @@ class TailLatticeSearcher
SearchInner(0, 0, 0);
}
- private:
+private:
void SearchInner(int offset, std::size_t i, std::size_t nonTermIndex) {
assert(m_hyperedge.tail.size() == i);
@@ -61,7 +61,7 @@ class TailLatticeSearcher
const int maxWidth = range.maxEnd - absStart + 1;
const std::vector<std::vector<const PVertex *> > &innerVec =
- m_lattice[offset][nonTermIndex+1];
+ m_lattice[offset][nonTermIndex+1];
std::size_t labelIndex = (*m_labels)[nonTermIndex];
diff --git a/moses/Syntax/S2T/RuleTrie.h b/moses/Syntax/S2T/RuleTrie.h
index 8f6dcbb80..27b0bc838 100644
--- a/moses/Syntax/S2T/RuleTrie.h
+++ b/moses/Syntax/S2T/RuleTrie.h
@@ -20,12 +20,12 @@ namespace S2T
// Base class for parser-specific trie types.
class RuleTrie : public RuleTable
{
- public:
+public:
RuleTrie(const RuleTableFF *ff) : RuleTable(ff) {}
virtual bool HasPreterminalRule(const Word &) const = 0;
- private:
+private:
friend class RuleTrieCreator;
virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
diff --git a/moses/Syntax/S2T/RuleTrieCYKPlus.cpp b/moses/Syntax/S2T/RuleTrieCYKPlus.cpp
index cda2bf6e7..05f8758e9 100644
--- a/moses/Syntax/S2T/RuleTrieCYKPlus.cpp
+++ b/moses/Syntax/S2T/RuleTrieCYKPlus.cpp
@@ -53,7 +53,7 @@ void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit)
}
RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateChild(
- const Word &sourceTerm)
+ const Word &sourceTerm)
{
return &m_sourceTermMap[sourceTerm];
}
@@ -61,40 +61,40 @@ RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateChild(
RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
{
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
- "Not a non-terminal: " << targetNonTerm);
+ "Not a non-terminal: " << targetNonTerm);
return &m_nonTermMap[targetNonTerm];
}
const RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetChild(
- const Word &sourceTerm) const
+ const Word &sourceTerm) const
{
UTIL_THROW_IF2(sourceTerm.IsNonTerminal(),
- "Not a terminal: " << sourceTerm);
+ "Not a terminal: " << sourceTerm);
SymbolMap::const_iterator p = m_sourceTermMap.find(sourceTerm);
return (p == m_sourceTermMap.end()) ? NULL : &p->second;
}
const RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetNonTerminalChild(
- const Word &targetNonTerm) const
+ const Word &targetNonTerm) const
{
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
- "Not a non-terminal: " << targetNonTerm);
+ "Not a non-terminal: " << targetNonTerm);
SymbolMap::const_iterator p = m_nonTermMap.find(targetNonTerm);
return (p == m_nonTermMap.end()) ? NULL : &p->second;
}
TargetPhraseCollection &RuleTrieCYKPlus::GetOrCreateTargetPhraseCollection(
- const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
+ const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
{
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetTargetPhraseCollection();
}
RuleTrieCYKPlus::Node &RuleTrieCYKPlus::GetOrCreateNode(
- const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
+ const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
{
const std::size_t size = source.GetSize();
@@ -107,9 +107,9 @@ RuleTrieCYKPlus::Node &RuleTrieCYKPlus::GetOrCreateNode(
if (word.IsNonTerminal()) {
UTIL_THROW_IF2(iterAlign == alignmentInfo.end(),
- "No alignment for non-term at position " << pos);
+ "No alignment for non-term at position " << pos);
UTIL_THROW_IF2(iterAlign->first != pos,
- "Alignment info incorrect at position " << pos);
+ "Alignment info incorrect at position " << pos);
std::size_t targetNonTermInd = iterAlign->second;
++iterAlign;
const Word &targetNonTerm = target.GetWord(targetNonTermInd);
diff --git a/moses/Syntax/S2T/RuleTrieCYKPlus.h b/moses/Syntax/S2T/RuleTrieCYKPlus.h
index 83ea55b87..11cf4c199 100644
--- a/moses/Syntax/S2T/RuleTrieCYKPlus.h
+++ b/moses/Syntax/S2T/RuleTrieCYKPlus.h
@@ -26,18 +26,20 @@ namespace S2T
class RuleTrieCYKPlus : public RuleTrie
{
- public:
+public:
class Node
{
- public:
+ public:
typedef boost::unordered_map<Word, Node, SymbolHasher,
- SymbolEqualityPred> SymbolMap;
+ SymbolEqualityPred> SymbolMap;
bool IsLeaf() const {
return m_sourceTermMap.empty() && m_nonTermMap.empty();
}
- bool HasRules() const { return !m_targetPhraseCollection.IsEmpty(); }
+ bool HasRules() const {
+ return !m_targetPhraseCollection.IsEmpty();
+ }
void Prune(std::size_t tableLimit);
void Sort(std::size_t tableLimit);
@@ -56,11 +58,15 @@ class RuleTrieCYKPlus : public RuleTrie
return m_targetPhraseCollection;
}
- const SymbolMap &GetTerminalMap() const { return m_sourceTermMap; }
+ const SymbolMap &GetTerminalMap() const {
+ return m_sourceTermMap;
+ }
- const SymbolMap &GetNonTerminalMap() const { return m_nonTermMap; }
+ const SymbolMap &GetNonTerminalMap() const {
+ return m_nonTermMap;
+ }
- private:
+ private:
SymbolMap m_sourceTermMap;
SymbolMap m_nonTermMap;
TargetPhraseCollection m_targetPhraseCollection;
@@ -68,11 +74,13 @@ class RuleTrieCYKPlus : public RuleTrie
RuleTrieCYKPlus(const RuleTableFF *ff) : RuleTrie(ff) {}
- const Node &GetRootNode() const { return m_root; }
+ const Node &GetRootNode() const {
+ return m_root;
+ }
bool HasPreterminalRule(const Word &) const;
- private:
+private:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
diff --git a/moses/Syntax/S2T/RuleTrieCreator.h b/moses/Syntax/S2T/RuleTrieCreator.h
index 1fe99e609..e49a2cbde 100644
--- a/moses/Syntax/S2T/RuleTrieCreator.h
+++ b/moses/Syntax/S2T/RuleTrieCreator.h
@@ -13,7 +13,7 @@ namespace S2T
// OovHandler). RuleTrieCreator is a friend of RuleTrie.
class RuleTrieCreator
{
- protected:
+protected:
// Provide access to RuleTrie's private SortAndPrune function.
void SortAndPrune(RuleTrie &trie, std::size_t limit) {
trie.SortAndPrune(limit);
@@ -22,8 +22,8 @@ class RuleTrieCreator
// Provide access to RuleTrie's private GetOrCreateTargetPhraseCollection
// function.
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
- RuleTrie &trie, const Phrase &source, const TargetPhrase &target,
- const Word *sourceLHS) {
+ RuleTrie &trie, const Phrase &source, const TargetPhrase &target,
+ const Word *sourceLHS) {
return trie.GetOrCreateTargetPhraseCollection(source, target, sourceLHS);
}
};
diff --git a/moses/Syntax/S2T/RuleTrieLoader.cpp b/moses/Syntax/S2T/RuleTrieLoader.cpp
index 3abe667ef..b9f7484ad 100644
--- a/moses/Syntax/S2T/RuleTrieLoader.cpp
+++ b/moses/Syntax/S2T/RuleTrieLoader.cpp
@@ -90,7 +90,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
const size_t numScoreComponents = ff.GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
- << numScoreComponents << ") of score components on line " << count);
+ << numScoreComponents << ") of score components on line " << count);
}
// parse source & find pt node
@@ -126,7 +126,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
- trie, sourcePhrase, *targetPhrase, sourceLHS);
+ trie, sourcePhrase, *targetPhrase, sourceLHS);
phraseColl.Add(targetPhrase);
// not implemented correctly in memory pt. just delete it for now
diff --git a/moses/Syntax/S2T/RuleTrieLoader.h b/moses/Syntax/S2T/RuleTrieLoader.h
index c625f91d6..855f1d2a8 100644
--- a/moses/Syntax/S2T/RuleTrieLoader.h
+++ b/moses/Syntax/S2T/RuleTrieLoader.h
@@ -18,7 +18,7 @@ namespace S2T
class RuleTrieLoader : public RuleTrieCreator
{
- public:
+public:
bool Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
const std::string &inFile,
diff --git a/moses/Syntax/S2T/RuleTrieScope3.cpp b/moses/Syntax/S2T/RuleTrieScope3.cpp
index a16cbefdc..7318f09d6 100644
--- a/moses/Syntax/S2T/RuleTrieScope3.cpp
+++ b/moses/Syntax/S2T/RuleTrieScope3.cpp
@@ -55,7 +55,7 @@ void RuleTrieScope3::Node::Sort(std::size_t tableLimit)
}
RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateTerminalChild(
- const Word &sourceTerm)
+ const Word &sourceTerm)
{
assert(!sourceTerm.IsNonTerminal());
std::pair<TerminalMap::iterator, bool> result;
@@ -66,7 +66,7 @@ RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateTerminalChild(
}
RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateNonTerminalChild(
- const Word &targetNonTerm)
+ const Word &targetNonTerm)
{
assert(targetNonTerm.IsNonTerminal());
if (m_gapNode == NULL) {
@@ -77,7 +77,7 @@ RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateNonTerminalChild(
TargetPhraseCollection &
RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection(
- const TargetPhrase &target)
+ const TargetPhrase &target)
{
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
const std::size_t rank = alignmentInfo.GetSize();
@@ -99,14 +99,14 @@ RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection(
}
TargetPhraseCollection &RuleTrieScope3::GetOrCreateTargetPhraseCollection(
- const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
+ const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
{
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetOrCreateTargetPhraseCollection(target);
}
RuleTrieScope3::Node &RuleTrieScope3::GetOrCreateNode(
- const Phrase &source, const TargetPhrase &target, const Word */*sourceLHS*/)
+ const Phrase &source, const TargetPhrase &target, const Word */*sourceLHS*/)
{
const std::size_t size = source.GetSize();
diff --git a/moses/Syntax/S2T/RuleTrieScope3.h b/moses/Syntax/S2T/RuleTrieScope3.h
index 6dd38a4f1..5909b6509 100644
--- a/moses/Syntax/S2T/RuleTrieScope3.h
+++ b/moses/Syntax/S2T/RuleTrieScope3.h
@@ -25,27 +25,37 @@ namespace S2T
class RuleTrieScope3 : public RuleTrie
{
- public:
+public:
class Node
{
- public:
+ public:
typedef std::vector<std::vector<Word> > LabelTable;
typedef boost::unordered_map<Word, Node, SymbolHasher,
- SymbolEqualityPred> TerminalMap;
+ SymbolEqualityPred> TerminalMap;
typedef boost::unordered_map<std::vector<int>,
TargetPhraseCollection> LabelMap;
- ~Node() { delete m_gapNode; }
+ ~Node() {
+ delete m_gapNode;
+ }
- const LabelTable &GetLabelTable() const { return m_labelTable; }
+ const LabelTable &GetLabelTable() const {
+ return m_labelTable;
+ }
- const LabelMap &GetLabelMap() const { return m_labelMap; }
+ const LabelMap &GetLabelMap() const {
+ return m_labelMap;
+ }
- const TerminalMap &GetTerminalMap() const { return m_terminalMap; }
+ const TerminalMap &GetTerminalMap() const {
+ return m_terminalMap;
+ }
- const Node *GetNonTerminalChild() const { return m_gapNode; }
+ const Node *GetNonTerminalChild() const {
+ return m_gapNode;
+ }
Node *GetOrCreateTerminalChild(const Word &sourceTerm);
@@ -54,14 +64,18 @@ class RuleTrieScope3 : public RuleTrie
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const TargetPhrase &);
- bool IsLeaf() const { return m_terminalMap.empty() && m_gapNode == NULL; }
+ bool IsLeaf() const {
+ return m_terminalMap.empty() && m_gapNode == NULL;
+ }
- bool HasRules() const { return !m_labelMap.empty(); }
+ bool HasRules() const {
+ return !m_labelMap.empty();
+ }
void Prune(std::size_t tableLimit);
void Sort(std::size_t tableLimit);
- private:
+ private:
friend class RuleTrieScope3;
Node() : m_gapNode(NULL) {}
@@ -85,11 +99,13 @@ class RuleTrieScope3 : public RuleTrie
RuleTrieScope3(const RuleTableFF *ff) : RuleTrie(ff) {}
- const Node &GetRootNode() const { return m_root; }
+ const Node &GetRootNode() const {
+ return m_root;
+ }
bool HasPreterminalRule(const Word &) const;
- private:
+private:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
diff --git a/moses/Syntax/S2T/SChart.h b/moses/Syntax/S2T/SChart.h
index 62b7d0c2b..ac6404831 100644
--- a/moses/Syntax/S2T/SChart.h
+++ b/moses/Syntax/S2T/SChart.h
@@ -19,11 +19,10 @@ namespace S2T
class SChart
{
- public:
- struct Cell
- {
+public:
+ struct Cell {
typedef boost::unordered_map<Word, SVertexStack, SymbolHasher,
- SymbolEqualityPred> TMap;
+ SymbolEqualityPred> TMap;
typedef NonTerminalMap<SVertexStack> NMap;
TMap terminalStacks;
NMap nonTerminalStacks;
@@ -31,7 +30,9 @@ class SChart
SChart(std::size_t width);
- std::size_t GetWidth() const { return m_cells.size(); }
+ std::size_t GetWidth() const {
+ return m_cells.size();
+ }
const Cell &GetCell(std::size_t start, std::size_t end) const {
return m_cells[start][end];
@@ -41,7 +42,7 @@ class SChart
return m_cells[start][end];
}
- private:
+private:
std::vector<std::vector<Cell> > m_cells;
};
diff --git a/moses/Syntax/SHyperedge.cpp b/moses/Syntax/SHyperedge.cpp
index 0f098c7a4..d91d5e896 100644
--- a/moses/Syntax/SHyperedge.cpp
+++ b/moses/Syntax/SHyperedge.cpp
@@ -32,24 +32,24 @@ Phrase GetOneBestTargetYield(const SHyperedge &h)
}
assert(false);
// FIXME Modify this chunk of code to work for SHyperedge.
-/*
- std::set<std::size_t> sourcePosSet =
- h.translation->GetAlignTerm().GetAlignmentsForTarget(pos);
- if (sourcePosSet.size() == 1) {
- const std::vector<const Word*> *ruleSourceFromInputPath =
- hypo.GetTranslationOption().GetSourceRuleFromInputPath();
- UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
- "Source Words in of the rules hasn't been filled out");
- std::size_t sourcePos = *sourcePosSet.begin();
- const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
- UTIL_THROW_IF2(sourceWord == NULL,
- "Null source word at position " << sourcePos);
- const Factor *factor = sourceWord->GetFactor(placeholderFactor);
- if (factor) {
- ret.Back()[0] = factor;
- }
- }
-*/
+ /*
+ std::set<std::size_t> sourcePosSet =
+ h.translation->GetAlignTerm().GetAlignmentsForTarget(pos);
+ if (sourcePosSet.size() == 1) {
+ const std::vector<const Word*> *ruleSourceFromInputPath =
+ hypo.GetTranslationOption().GetSourceRuleFromInputPath();
+ UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
+ "Source Words in of the rules hasn't been filled out");
+ std::size_t sourcePos = *sourcePosSet.begin();
+ const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
+ UTIL_THROW_IF2(sourceWord == NULL,
+ "Null source word at position " << sourcePos);
+ const Factor *factor = sourceWord->GetFactor(placeholderFactor);
+ if (factor) {
+ ret.Back()[0] = factor;
+ }
+ }
+ */
}
}
return ret;
diff --git a/moses/Syntax/SHyperedge.h b/moses/Syntax/SHyperedge.h
index 6d9128d49..696dbfa20 100644
--- a/moses/Syntax/SHyperedge.h
+++ b/moses/Syntax/SHyperedge.h
@@ -13,8 +13,7 @@ namespace Syntax
struct SVertex;
-struct SHyperedge
-{
+struct SHyperedge {
SVertex *head;
std::vector<SVertex*> tail;
float score;
diff --git a/moses/Syntax/SHyperedgeBundle.h b/moses/Syntax/SHyperedgeBundle.h
index 4a78c5458..f4a07a181 100644
--- a/moses/Syntax/SHyperedgeBundle.h
+++ b/moses/Syntax/SHyperedgeBundle.h
@@ -14,8 +14,7 @@ namespace Syntax
struct PVertex;
-struct SHyperedgeBundle
-{
+struct SHyperedgeBundle {
std::vector<const SVertexStack*> stacks;
const TargetPhraseCollection *translations;
diff --git a/moses/Syntax/SHyperedgeBundleScorer.h b/moses/Syntax/SHyperedgeBundleScorer.h
index 3bf547cfd..cc81812cf 100644
--- a/moses/Syntax/SHyperedgeBundleScorer.h
+++ b/moses/Syntax/SHyperedgeBundleScorer.h
@@ -7,14 +7,13 @@ namespace Moses
namespace Syntax
{
-struct SHyperedgeBundleScorer
-{
- public:
+struct SHyperedgeBundleScorer {
+public:
static float Score(const SHyperedgeBundle &bundle) {
const TargetPhrase &targetPhrase = **(bundle.translations->begin());
float score = targetPhrase.GetFutureScore();
for (std::vector<const SVertexStack*>::const_iterator p =
- bundle.stacks.begin(); p != bundle.stacks.end(); ++p) {
+ bundle.stacks.begin(); p != bundle.stacks.end(); ++p) {
const SVertexStack *stack = *p;
if (stack->front()->best) {
score += stack->front()->best->score;
diff --git a/moses/Syntax/SVertex.h b/moses/Syntax/SVertex.h
index cde14c21a..e596cb442 100644
--- a/moses/Syntax/SVertex.h
+++ b/moses/Syntax/SVertex.h
@@ -17,8 +17,7 @@ struct SHyperedge;
//
// Important: a SVertex owns its incoming SHyperedge objects and its FFState
// objects and will delete them on destruction.
-struct SVertex
-{
+struct SVertex {
~SVertex();
SHyperedge *best;
diff --git a/moses/Syntax/SVertexRecombinationOrderer.h b/moses/Syntax/SVertexRecombinationOrderer.h
index 60686d989..fcabed04a 100644
--- a/moses/Syntax/SVertexRecombinationOrderer.h
+++ b/moses/Syntax/SVertexRecombinationOrderer.h
@@ -9,11 +9,9 @@ namespace Moses
namespace Syntax
{
-struct SVertexRecombinationOrderer
-{
- public:
- bool operator()(const SVertex &x, const SVertex &y) const
- {
+struct SVertexRecombinationOrderer {
+public:
+ bool operator()(const SVertex &x, const SVertex &y) const {
int comp = 0;
for (std::size_t i = 0; i < x.state.size(); ++i) {
if (x.state[i] == NULL || y.state[i] == NULL) {
@@ -28,8 +26,7 @@ struct SVertexRecombinationOrderer
return false;
}
- bool operator()(const SVertex *x, const SVertex *y) const
- {
+ bool operator()(const SVertex *x, const SVertex *y) const {
return operator()(*x, *y);
}
};
diff --git a/moses/Syntax/SVertexStack.h b/moses/Syntax/SVertexStack.h
index 57dc9f247..0feae5cf1 100644
--- a/moses/Syntax/SVertexStack.h
+++ b/moses/Syntax/SVertexStack.h
@@ -14,12 +14,10 @@ namespace Syntax
typedef std::vector<boost::shared_ptr<SVertex> > SVertexStack;
-struct SVertexStackContentOrderer
-{
- public:
+struct SVertexStackContentOrderer {
+public:
bool operator()(const boost::shared_ptr<SVertex> &x,
- const boost::shared_ptr<SVertex> &y)
- {
+ const boost::shared_ptr<SVertex> &y) {
return x->best->score > y->best->score;
}
};
diff --git a/moses/Syntax/SymbolEqualityPred.h b/moses/Syntax/SymbolEqualityPred.h
index e97c4f11b..684d70cee 100644
--- a/moses/Syntax/SymbolEqualityPred.h
+++ b/moses/Syntax/SymbolEqualityPred.h
@@ -12,7 +12,7 @@ namespace Syntax
// *not* work in moses_chart unless this is changed (among other things).
class SymbolEqualityPred
{
- public:
+public:
bool operator()(const Word &s1, const Word &s2) const {
const Factor *f1 = s1[0];
const Factor *f2 = s2[0];
diff --git a/moses/Syntax/SymbolHasher.h b/moses/Syntax/SymbolHasher.h
index b398fdd00..c758d7017 100644
--- a/moses/Syntax/SymbolHasher.h
+++ b/moses/Syntax/SymbolHasher.h
@@ -14,7 +14,7 @@ namespace Syntax
// *not* work in moses_chart unless this is changed (among other things).
class SymbolHasher
{
- public:
+public:
std::size_t operator()(const Word &s) const {
const Factor *f = s[0];
return hash_value(*f);
diff --git a/moses/TabbedSentence.cpp b/moses/TabbedSentence.cpp
index 471a8cc7d..ae0876595 100644
--- a/moses/TabbedSentence.cpp
+++ b/moses/TabbedSentence.cpp
@@ -29,44 +29,44 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
- void TabbedSentence::CreateFromString(const std::vector<FactorType> &factorOrder
- , const std::string &tabbedString) {
- TabbedColumns allColumns;
-
- boost::split(allColumns, tabbedString, boost::is_any_of("\t"));
-
- if(allColumns.size() < 2) {
- Sentence::CreateFromString(factorOrder, tabbedString);
- }
- else {
- m_columns.resize(allColumns.size() - 1);
- std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
- Sentence::CreateFromString(factorOrder, allColumns[0]);
- }
+void TabbedSentence::CreateFromString(const std::vector<FactorType> &factorOrder
+ , const std::string &tabbedString)
+{
+ TabbedColumns allColumns;
+
+ boost::split(allColumns, tabbedString, boost::is_any_of("\t"));
+
+ if(allColumns.size() < 2) {
+ Sentence::CreateFromString(factorOrder, tabbedString);
+ } else {
+ m_columns.resize(allColumns.size() - 1);
+ std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
+ Sentence::CreateFromString(factorOrder, allColumns[0]);
}
-
- int TabbedSentence::Read(std::istream& in, const std::vector<FactorType>& factorOrder) {
- TabbedColumns allColumns;
-
- std::string line;
- if (getline(in, line, '\n').eof())
- return 0;
-
- boost::split(allColumns, line, boost::is_any_of("\t"));
-
- if(allColumns.size() < 2) {
- std::stringstream dummyStream;
- dummyStream << line << std::endl;
- return Sentence::Read(dummyStream, factorOrder);
- }
- else {
- m_columns.resize(allColumns.size() - 1);
- std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
-
- std::stringstream dummyStream;
- dummyStream << allColumns[0] << std::endl;
- return Sentence::Read(dummyStream, factorOrder);
- }
+}
+
+int TabbedSentence::Read(std::istream& in, const std::vector<FactorType>& factorOrder)
+{
+ TabbedColumns allColumns;
+
+ std::string line;
+ if (getline(in, line, '\n').eof())
+ return 0;
+
+ boost::split(allColumns, line, boost::is_any_of("\t"));
+
+ if(allColumns.size() < 2) {
+ std::stringstream dummyStream;
+ dummyStream << line << std::endl;
+ return Sentence::Read(dummyStream, factorOrder);
+ } else {
+ m_columns.resize(allColumns.size() - 1);
+ std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
+
+ std::stringstream dummyStream;
+ dummyStream << allColumns[0] << std::endl;
+ return Sentence::Read(dummyStream, factorOrder);
}
+}
}
diff --git a/moses/TabbedSentence.h b/moses/TabbedSentence.h
index 90c11cc39..ffd28a877 100644
--- a/moses/TabbedSentence.h
+++ b/moses/TabbedSentence.h
@@ -41,12 +41,12 @@ namespace Moses
*
* In theory a column can contain anything, even text-serialized parse trees or
* classifier features as long it can be represented as text and does not contain
- * tab characters.
- *
+ * tab characters.
+ *
*/
typedef std::vector<std::string> TabbedColumns;
-
+
class TabbedSentence : public Sentence
{
@@ -61,25 +61,25 @@ public:
// Splits off the first tab-separated column and passes it to
// Sentence::CreateFromString(...), the remaining columns are stored in
// m_columns .
-
+
virtual void CreateFromString(const std::vector<FactorType> &factorOrder
- , const std::string &tabbedString);
-
+ , const std::string &tabbedString);
+
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
const TabbedColumns& GetColumns() const {
return m_columns;
}
-
+
const std::string& GetColumn(size_t i) const {
UTIL_THROW_IF2(m_columns.size() <= i,
- "There is no column with index " << i);
+ "There is no column with index " << i);
return m_columns[i];
}
private:
TabbedColumns m_columns;
-
+
};
diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp
index 945c3f3d4..2309b1415 100644
--- a/moses/TargetPhrase.cpp
+++ b/moses/TargetPhrase.cpp
@@ -160,11 +160,12 @@ void TargetPhrase::EvaluateWithSourceContext(const InputType &input, const Input
m_fullScore = weightedScore + m_futureScore;
}
-void TargetPhrase::UpdateScore(ScoreComponentCollection* futureScoreBreakdown) {
+void TargetPhrase::UpdateScore(ScoreComponentCollection* futureScoreBreakdown)
+{
float weightedScore = m_scoreBreakdown.GetWeightedScore();
if(futureScoreBreakdown)
m_futureScore += futureScoreBreakdown->GetWeightedScore();
- m_fullScore = weightedScore + m_futureScore;
+ m_fullScore = weightedScore + m_futureScore;
}
void TargetPhrase::SetXMLScore(float score)
@@ -301,23 +302,23 @@ std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp)
os << ": nonterm=" << tp.GetAlignNonTerm() << flush;
os << ": c=" << tp.m_fullScore << flush;
os << " " << tp.m_scoreBreakdown << flush;
-
+
const Phrase *sourcePhrase = tp.GetRuleSource();
if (sourcePhrase) {
os << " sourcePhrase=" << *sourcePhrase << flush;
}
if (tp.m_properties.size()) {
- os << " properties: " << flush;
+ os << " properties: " << flush;
- TargetPhrase::Properties::const_iterator iter;
- for (iter = tp.m_properties.begin(); iter != tp.m_properties.end(); ++iter) {
- const string &key = iter->first;
- const PhraseProperty *prop = iter->second.get();
- assert(prop);
+ TargetPhrase::Properties::const_iterator iter;
+ for (iter = tp.m_properties.begin(); iter != tp.m_properties.end(); ++iter) {
+ const string &key = iter->first;
+ const PhraseProperty *prop = iter->second.get();
+ assert(prop);
- os << key << "=" << *prop << " ";
- }
+ os << key << "=" << *prop << " ";
+ }
}
return os;
diff --git a/moses/TargetPhrase.h b/moses/TargetPhrase.h
index 419b50a6b..db7da97c5 100644
--- a/moses/TargetPhrase.h
+++ b/moses/TargetPhrase.h
@@ -83,7 +83,7 @@ public:
void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
void UpdateScore(ScoreComponentCollection *futureScoreBreakdown = NULL);
-
+
void SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString);
// used to set translation or gen score
@@ -134,21 +134,19 @@ public:
m_alignNonTerm = alignNonTerm;
}
- // ALNREP = alignment representation,
+ // ALNREP = alignment representation,
// see AlignmentInfo constructors for supported representations
template<typename ALNREP>
- void
- SetAlignTerm(const ALNREP &coll)
- {
+ void
+ SetAlignTerm(const ALNREP &coll) {
m_alignTerm = AlignmentInfoCollection::Instance().Add(coll);
}
- // ALNREP = alignment representation,
+ // ALNREP = alignment representation,
// see AlignmentInfo constructors for supported representations
- template<typename ALNREP>
- void
- SetAlignNonTerm(const ALNREP &coll)
- {
+ template<typename ALNREP>
+ void
+ SetAlignNonTerm(const ALNREP &coll) {
m_alignNonTerm = AlignmentInfoCollection::Instance().Add(coll);
}
@@ -164,8 +162,9 @@ public:
return m_ruleSource;
}
- const PhraseDictionary *GetContainer() const
- { return m_container; }
+ const PhraseDictionary *GetContainer() const {
+ return m_container;
+ }
// To be set by the FF that needs it, by default the rule source = NULL
// make a copy of the source side of the rule
diff --git a/moses/TargetPhraseCollection.h b/moses/TargetPhraseCollection.h
index 0af89e833..d61ff2c4f 100644
--- a/moses/TargetPhraseCollection.h
+++ b/moses/TargetPhraseCollection.h
@@ -44,11 +44,10 @@ public:
typedef CollType::iterator iterator;
typedef CollType::const_iterator const_iterator;
- TargetPhrase const*
- operator[](size_t const i) const
- {
+ TargetPhrase const*
+ operator[](size_t const i) const {
return m_collection.at(i);
- }
+ }
iterator begin() {
return m_collection.begin();
@@ -63,8 +62,8 @@ public:
return m_collection.end();
}
- TargetPhraseCollection()
- {}
+ TargetPhraseCollection() {
+ }
TargetPhraseCollection(const TargetPhraseCollection &copy);
diff --git a/moses/TrainingTask.h b/moses/TrainingTask.h
index b67bbcd52..885e8fd16 100644
--- a/moses/TrainingTask.h
+++ b/moses/TrainingTask.h
@@ -17,23 +17,22 @@ class TrainingTask : public Moses::Task
public:
TrainingTask(Moses::InputType* source, Moses::IOWrapper &ioWrapper)
- : m_source(source)
- , m_ioWrapper(ioWrapper)
- {}
+ : m_source(source)
+ , m_ioWrapper(ioWrapper) {
+ }
- ~TrainingTask()
- {}
+ ~TrainingTask() {
+ }
- void Run()
- {
+ void Run() {
StaticData::Instance().InitializeForInput(*m_source);
-
+
std::cerr << *m_source << std::endl;
-
+
TranslationOptionCollection *transOptColl = m_source->CreateTranslationOptionCollection();
transOptColl->CreateTranslationOptions();
delete transOptColl;
-
+
StaticData::Instance().CleanUpAfterSentenceProcessing(*m_source);
}
diff --git a/moses/TranslationAnalysis.h b/moses/TranslationAnalysis.h
index ccb21f041..143f65967 100644
--- a/moses/TranslationAnalysis.h
+++ b/moses/TranslationAnalysis.h
@@ -7,9 +7,10 @@
#include <iostream>
-namespace Moses {
- class Hypothesis;
- class ChartHypothesis;
+namespace Moses
+{
+class Hypothesis;
+class ChartHypothesis;
}
namespace TranslationAnalysis
diff --git a/moses/TranslationModel/BilingualDynSuffixArray.h b/moses/TranslationModel/BilingualDynSuffixArray.h
index 5b52b8814..1c4ceae34 100644
--- a/moses/TranslationModel/BilingualDynSuffixArray.h
+++ b/moses/TranslationModel/BilingualDynSuffixArray.h
@@ -78,8 +78,8 @@ public:
class ScoresComp
{
public:
- ScoresComp(const vector<float>& weights)
- {}
+ ScoresComp(const vector<float>& weights) {
+ }
bool operator()(const Scores& s1, const Scores& s2) const {
return s1[0] < s2[0]; // just p(e|f) as approximation
// float score1(0), score2(0);
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
index e2ba6779c..02bea7b43 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
@@ -51,16 +51,15 @@ protected:
};
// struct that caches cellLabel, its end position and score for quicker lookup
-struct ChartCellCache
-{
- ChartCellCache(size_t endPos, const ChartCellLabel* cellLabel, float score)
+struct ChartCellCache {
+ ChartCellCache(size_t endPos, const ChartCellLabel* cellLabel, float score)
: endPos(endPos)
, cellLabel(cellLabel)
, score(score) {}
- size_t endPos;
- const ChartCellLabel* cellLabel;
- float score;
+ size_t endPos;
+ const ChartCellLabel* cellLabel;
+ float score;
};
} // namespace Moses
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
index 64f9582d3..54f172d1e 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
@@ -93,175 +93,178 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
// Create/update compressed matrix that stores all valid ChartCellLabels for a given start position and label.
void ChartRuleLookupManagerMemory::UpdateCompressedMatrix(size_t startPos,
size_t origEndPos,
- size_t lastPos) {
+ size_t lastPos)
+{
- std::vector<size_t> endPosVec;
- size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
- m_compressedMatrixVec.resize(lastPos+1);
+ std::vector<size_t> endPosVec;
+ size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
+ m_compressedMatrixVec.resize(lastPos+1);
- // we only need to update cell at [startPos, origEndPos-1] for initial lookup
- if (startPos < origEndPos) {
- endPosVec.push_back(origEndPos-1);
- }
+ // we only need to update cell at [startPos, origEndPos-1] for initial lookup
+ if (startPos < origEndPos) {
+ endPosVec.push_back(origEndPos-1);
+ }
- // update all cells starting from startPos+1 for lookup of rule extensions
- else if (startPos == origEndPos)
- {
- startPos++;
- for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
- endPosVec.push_back(endPos);
- }
- //re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
- for (size_t pos = startPos+1; pos <= lastPos; pos++) {
- CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
- cellMatrix.resize(numNonTerms);
- for (size_t i = 0; i < numNonTerms; i++) {
- if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
- cellMatrix[i].pop_back();
- }
- }
+ // update all cells starting from startPos+1 for lookup of rule extensions
+ else if (startPos == origEndPos) {
+ startPos++;
+ for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
+ endPosVec.push_back(endPos);
+ }
+ //re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
+ for (size_t pos = startPos+1; pos <= lastPos; pos++) {
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
+ cellMatrix.resize(numNonTerms);
+ for (size_t i = 0; i < numNonTerms; i++) {
+ if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
+ cellMatrix[i].pop_back();
}
+ }
}
+ }
- if (startPos > lastPos) {
- return;
- }
+ if (startPos > lastPos) {
+ return;
+ }
- // populate compressed matrix with all chart cells that start at current start position
- CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
- cellMatrix.clear();
- cellMatrix.resize(numNonTerms);
- for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
+ // populate compressed matrix with all chart cells that start at current start position
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
+ cellMatrix.clear();
+ cellMatrix.resize(numNonTerms);
+ for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
- size_t endPos = *p;
- // target non-terminal labels for the span
- const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
+ size_t endPos = *p;
+ // target non-terminal labels for the span
+ const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
- if (targetNonTerms.GetSize() == 0) {
- continue;
- }
+ if (targetNonTerms.GetSize() == 0) {
+ continue;
+ }
#if !defined(UNLABELLED_SOURCE)
- // source non-terminal labels for the span
- const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
+ // source non-terminal labels for the span
+ const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
- // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
- if (inputPath.GetNonTerminalSet().size() == 0) {
- continue;
- }
+ // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
+ if (inputPath.GetNonTerminalSet().size() == 0) {
+ continue;
+ }
#endif
- for (size_t i = 0; i < numNonTerms; i++) {
- const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
- if (cellLabel != NULL) {
- float score = cellLabel->GetBestScore(m_outColl);
- cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
- }
- }
+ for (size_t i = 0; i < numNonTerms; i++) {
+ const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
+ if (cellLabel != NULL) {
+ float score = cellLabel->GetBestScore(m_outColl);
+ cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
+ }
}
+ }
}
// if a (partial) rule matches, add it to list completed rules (if non-unary and non-empty), and try find expansions that have this partial rule as prefix.
void ChartRuleLookupManagerMemory::AddAndExtend(
- const PhraseDictionaryNodeMemory *node,
- size_t endPos) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t endPos)
+{
- const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
- // add target phrase collection (except if rule is empty or a unary non-terminal rule)
- if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
- m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
- }
+ const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
+ // add target phrase collection (except if rule is empty or a unary non-terminal rule)
+ if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
+ m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
+ }
- // get all further extensions of rule (until reaching end of sentence or max-chart-span)
- if (endPos < m_lastPos) {
- if (!node->GetTerminalMap().empty()) {
- GetTerminalExtension(node, endPos+1);
- }
- if (!node->GetNonTerminalMap().empty()) {
- GetNonTerminalExtension(node, endPos+1);
- }
+ // get all further extensions of rule (until reaching end of sentence or max-chart-span)
+ if (endPos < m_lastPos) {
+ if (!node->GetTerminalMap().empty()) {
+ GetTerminalExtension(node, endPos+1);
}
+ if (!node->GetNonTerminalMap().empty()) {
+ GetNonTerminalExtension(node, endPos+1);
+ }
+ }
}
// search all possible terminal extensions of a partial rule (pointed at by node) at a given position
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemory::GetTerminalExtension(
- const PhraseDictionaryNodeMemory *node,
- size_t pos) {
-
- const Word &sourceWord = GetSourceAt(pos).GetLabel();
- const PhraseDictionaryNodeMemory::TerminalMap & terminals = node->GetTerminalMap();
-
- // if node has small number of terminal edges, test word equality for each.
- if (terminals.size() < 5) {
- for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
- const Word & word = iter->first;
- if (TerminalEqualityPred()(word, sourceWord)) {
- const PhraseDictionaryNodeMemory *child = & iter->second;
- AddAndExtend(child, pos);
- break;
- }
- }
- }
- // else, do hash lookup
- else {
- const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
- if (child != NULL) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t pos)
+{
+
+ const Word &sourceWord = GetSourceAt(pos).GetLabel();
+ const PhraseDictionaryNodeMemory::TerminalMap & terminals = node->GetTerminalMap();
+
+ // if node has small number of terminal edges, test word equality for each.
+ if (terminals.size() < 5) {
+ for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
+ const Word & word = iter->first;
+ if (TerminalEqualityPred()(word, sourceWord)) {
+ const PhraseDictionaryNodeMemory *child = & iter->second;
AddAndExtend(child, pos);
+ break;
}
}
+ }
+ // else, do hash lookup
+ else {
+ const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
+ if (child != NULL) {
+ AddAndExtend(child, pos);
+ }
+ }
}
// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a variable span (starting from startPos).
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemory::GetNonTerminalExtension(
- const PhraseDictionaryNodeMemory *node,
- size_t startPos) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t startPos)
+{
- const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
+ const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
- // non-terminal labels in phrase dictionary node
- const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
+ // non-terminal labels in phrase dictionary node
+ const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
- // make room for back pointer
- m_stackVec.push_back(NULL);
- m_stackScores.push_back(0);
+ // make room for back pointer
+ m_stackVec.push_back(NULL);
+ m_stackScores.push_back(0);
- // loop over possible expansions of the rule
- PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
- PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
- for (p = nonTermMap.begin(); p != end; ++p) {
- // does it match possible source and target non-terminals?
+ // loop over possible expansions of the rule
+ PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
+ PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
+ for (p = nonTermMap.begin(); p != end; ++p) {
+ // does it match possible source and target non-terminals?
#if defined(UNLABELLED_SOURCE)
- const Word &targetNonTerm = p->first;
+ const Word &targetNonTerm = p->first;
#else
- const Word &targetNonTerm = p->first.second;
+ const Word &targetNonTerm = p->first.second;
#endif
- const PhraseDictionaryNodeMemory *child = &p->second;
- //soft matching of NTs
- if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
- const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
- for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
- const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
- for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
- m_stackVec.back() = match->cellLabel;
- m_stackScores.back() = match->score;
- AddAndExtend(child, match->endPos);
- }
+ const PhraseDictionaryNodeMemory *child = &p->second;
+ //soft matching of NTs
+ if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
+ const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
+ for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
+ const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- } // end of soft matches lookup
-
- const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
- for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
- m_stackVec.back() = match->cellLabel;
- m_stackScores.back() = match->score;
- AddAndExtend(child, match->endPos);
}
+ } // end of soft matches lookup
+
+ const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- // remove last back pointer
- m_stackVec.pop_back();
- m_stackScores.pop_back();
+ }
+ // remove last back pointer
+ m_stackVec.pop_back();
+ m_stackScores.pop_back();
}
} // namespace Moses
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h
index 1abbc25ca..84e5f085d 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h
@@ -70,8 +70,8 @@ private:
size_t endPos);
void UpdateCompressedMatrix(size_t startPos,
- size_t endPos,
- size_t lastPos);
+ size_t endPos,
+ size_t lastPos);
const PhraseDictionaryMemory &m_ruleTable;
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
index b883f17cc..e090ee1ae 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
@@ -93,175 +93,178 @@ void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
// Create/update compressed matrix that stores all valid ChartCellLabels for a given start position and label.
void ChartRuleLookupManagerMemoryPerSentence::UpdateCompressedMatrix(size_t startPos,
size_t origEndPos,
- size_t lastPos) {
+ size_t lastPos)
+{
- std::vector<size_t> endPosVec;
- size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
- m_compressedMatrixVec.resize(lastPos+1);
+ std::vector<size_t> endPosVec;
+ size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
+ m_compressedMatrixVec.resize(lastPos+1);
- // we only need to update cell at [startPos, origEndPos-1] for initial lookup
- if (startPos < origEndPos) {
- endPosVec.push_back(origEndPos-1);
- }
+ // we only need to update cell at [startPos, origEndPos-1] for initial lookup
+ if (startPos < origEndPos) {
+ endPosVec.push_back(origEndPos-1);
+ }
- // update all cells starting from startPos+1 for lookup of rule extensions
- else if (startPos == origEndPos)
- {
- startPos++;
- for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
- endPosVec.push_back(endPos);
- }
- //re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
- for (size_t pos = startPos+1; pos <= lastPos; pos++) {
- CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
- cellMatrix.resize(numNonTerms);
- for (size_t i = 0; i < numNonTerms; i++) {
- if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
- cellMatrix[i].pop_back();
- }
- }
+ // update all cells starting from startPos+1 for lookup of rule extensions
+ else if (startPos == origEndPos) {
+ startPos++;
+ for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
+ endPosVec.push_back(endPos);
+ }
+ //re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
+ for (size_t pos = startPos+1; pos <= lastPos; pos++) {
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
+ cellMatrix.resize(numNonTerms);
+ for (size_t i = 0; i < numNonTerms; i++) {
+ if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
+ cellMatrix[i].pop_back();
}
+ }
}
+ }
- if (startPos > lastPos) {
- return;
- }
+ if (startPos > lastPos) {
+ return;
+ }
- // populate compressed matrix with all chart cells that start at current start position
- CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
- cellMatrix.clear();
- cellMatrix.resize(numNonTerms);
- for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
+ // populate compressed matrix with all chart cells that start at current start position
+ CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
+ cellMatrix.clear();
+ cellMatrix.resize(numNonTerms);
+ for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
- size_t endPos = *p;
- // target non-terminal labels for the span
- const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
+ size_t endPos = *p;
+ // target non-terminal labels for the span
+ const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
- if (targetNonTerms.GetSize() == 0) {
- continue;
- }
+ if (targetNonTerms.GetSize() == 0) {
+ continue;
+ }
#if !defined(UNLABELLED_SOURCE)
- // source non-terminal labels for the span
- const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
+ // source non-terminal labels for the span
+ const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
- // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
- if (inputPath.GetNonTerminalSet().size() == 0) {
- continue;
- }
+ // can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
+ if (inputPath.GetNonTerminalSet().size() == 0) {
+ continue;
+ }
#endif
- for (size_t i = 0; i < numNonTerms; i++) {
- const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
- if (cellLabel != NULL) {
- float score = cellLabel->GetBestScore(m_outColl);
- cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
- }
- }
+ for (size_t i = 0; i < numNonTerms; i++) {
+ const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
+ if (cellLabel != NULL) {
+ float score = cellLabel->GetBestScore(m_outColl);
+ cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
+ }
}
+ }
}
// if a (partial) rule matches, add it to list completed rules (if non-unary and non-empty), and try find expansions that have this partial rule as prefix.
void ChartRuleLookupManagerMemoryPerSentence::AddAndExtend(
- const PhraseDictionaryNodeMemory *node,
- size_t endPos) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t endPos)
+{
- const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
- // add target phrase collection (except if rule is empty or a unary non-terminal rule)
- if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
- m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
- }
+ const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
+ // add target phrase collection (except if rule is empty or a unary non-terminal rule)
+ if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
+ m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
+ }
- // get all further extensions of rule (until reaching end of sentence or max-chart-span)
- if (endPos < m_lastPos) {
- if (!node->GetTerminalMap().empty()) {
- GetTerminalExtension(node, endPos+1);
- }
- if (!node->GetNonTerminalMap().empty()) {
- GetNonTerminalExtension(node, endPos+1);
- }
+ // get all further extensions of rule (until reaching end of sentence or max-chart-span)
+ if (endPos < m_lastPos) {
+ if (!node->GetTerminalMap().empty()) {
+ GetTerminalExtension(node, endPos+1);
}
+ if (!node->GetNonTerminalMap().empty()) {
+ GetNonTerminalExtension(node, endPos+1);
+ }
+ }
}
// search all possible terminal extensions of a partial rule (pointed at by node) at a given position
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemoryPerSentence::GetTerminalExtension(
- const PhraseDictionaryNodeMemory *node,
- size_t pos) {
-
- const Word &sourceWord = GetSourceAt(pos).GetLabel();
- const PhraseDictionaryNodeMemory::TerminalMap & terminals = node->GetTerminalMap();
-
- // if node has small number of terminal edges, test word equality for each.
- if (terminals.size() < 5) {
- for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
- const Word & word = iter->first;
- if (TerminalEqualityPred()(word, sourceWord)) {
- const PhraseDictionaryNodeMemory *child = & iter->second;
- AddAndExtend(child, pos);
- break;
- }
- }
- }
- // else, do hash lookup
- else {
- const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
- if (child != NULL) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t pos)
+{
+
+ const Word &sourceWord = GetSourceAt(pos).GetLabel();
+ const PhraseDictionaryNodeMemory::TerminalMap & terminals = node->GetTerminalMap();
+
+ // if node has small number of terminal edges, test word equality for each.
+ if (terminals.size() < 5) {
+ for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
+ const Word & word = iter->first;
+ if (TerminalEqualityPred()(word, sourceWord)) {
+ const PhraseDictionaryNodeMemory *child = & iter->second;
AddAndExtend(child, pos);
+ break;
}
}
+ }
+ // else, do hash lookup
+ else {
+ const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
+ if (child != NULL) {
+ AddAndExtend(child, pos);
+ }
+ }
}
// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a variable span (starting from startPos).
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemoryPerSentence::GetNonTerminalExtension(
- const PhraseDictionaryNodeMemory *node,
- size_t startPos) {
+ const PhraseDictionaryNodeMemory *node,
+ size_t startPos)
+{
- const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
+ const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
- // non-terminal labels in phrase dictionary node
- const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
+ // non-terminal labels in phrase dictionary node
+ const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
- // make room for back pointer
- m_stackVec.push_back(NULL);
- m_stackScores.push_back(0);
+ // make room for back pointer
+ m_stackVec.push_back(NULL);
+ m_stackScores.push_back(0);
- // loop over possible expansions of the rule
- PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
- PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
- for (p = nonTermMap.begin(); p != end; ++p) {
- // does it match possible source and target non-terminals?
+ // loop over possible expansions of the rule
+ PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
+ PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
+ for (p = nonTermMap.begin(); p != end; ++p) {
+ // does it match possible source and target non-terminals?
#if defined(UNLABELLED_SOURCE)
- const Word &targetNonTerm = p->first;
+ const Word &targetNonTerm = p->first;
#else
- const Word &targetNonTerm = p->first.second;
+ const Word &targetNonTerm = p->first.second;
#endif
- const PhraseDictionaryNodeMemory *child = &p->second;
- //soft matching of NTs
- if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
- const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
- for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
- const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
- for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
- m_stackVec.back() = match->cellLabel;
- m_stackScores.back() = match->score;
- AddAndExtend(child, match->endPos);
- }
+ const PhraseDictionaryNodeMemory *child = &p->second;
+ //soft matching of NTs
+ if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
+ const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
+ for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
+ const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- } // end of soft matches lookup
-
- const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
- for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
- m_stackVec.back() = match->cellLabel;
- m_stackScores.back() = match->score;
- AddAndExtend(child, match->endPos);
}
+ } // end of soft matches lookup
+
+ const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
+ for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
+ m_stackVec.back() = match->cellLabel;
+ m_stackScores.back() = match->score;
+ AddAndExtend(child, match->endPos);
}
- // remove last back pointer
- m_stackVec.pop_back();
- m_stackScores.pop_back();
+ }
+ // remove last back pointer
+ m_stackVec.pop_back();
+ m_stackScores.pop_back();
}
} // namespace Moses
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h
index 742e25355..9db0d02f0 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h
@@ -50,7 +50,7 @@ public:
~ChartRuleLookupManagerMemoryPerSentence() {};
virtual void GetChartRuleCollection(
- const InputPath &inputPath,
+ const InputPath &inputPath,
size_t lastPos, // last position to consider if using lookahead
ChartParserCallback &outColl);
@@ -69,8 +69,8 @@ private:
size_t endPos);
void UpdateCompressedMatrix(size_t startPos,
- size_t endPos,
- size_t lastPos);
+ size_t endPos,
+ size_t lastPos);
const PhraseDictionaryFuzzyMatch &m_ruleTable;
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
index 6f7fafcc2..6f3376979 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
@@ -75,7 +75,7 @@ ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk()
}
void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
- const InputPath &inputPath,
+ const InputPath &inputPath,
size_t lastPos,
ChartParserCallback &outColl)
{
@@ -175,14 +175,14 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
bool doSearch = true;
if (m_dictionary.m_maxSpanDefault != NOT_FOUND) {
- // for Hieu's source syntax
+ // for Hieu's source syntax
- bool isSourceSyntaxNonTerm = sourceLHS != defaultSourceNonTerm;
- size_t nonTermNumWordsCovered = endPos - startPos + 1;
+ bool isSourceSyntaxNonTerm = sourceLHS != defaultSourceNonTerm;
+ size_t nonTermNumWordsCovered = endPos - startPos + 1;
- doSearch = isSourceSyntaxNonTerm ?
- nonTermNumWordsCovered <= m_dictionary.m_maxSpanLabelled :
- nonTermNumWordsCovered <= m_dictionary.m_maxSpanDefault;
+ doSearch = isSourceSyntaxNonTerm ?
+ nonTermNumWordsCovered <= m_dictionary.m_maxSpanLabelled :
+ nonTermNumWordsCovered <= m_dictionary.m_maxSpanDefault;
}
diff --git a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp
index fb9107d9e..3a8fc8662 100644
--- a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp
+++ b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp
@@ -85,25 +85,25 @@ void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
// copies some functionality (pruning) from ChartTranslationOptionList::Add
void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
- const StackVec &stackVec,
- const std::vector<float> &stackScores,
- const ChartParserCallback &outColl)
+ const StackVec &stackVec,
+ const std::vector<float> &stackScores,
+ const ChartParserCallback &outColl)
{
- if (tpc.IsEmpty()) {
- return;
- }
+ if (tpc.IsEmpty()) {
+ return;
+ }
- const TargetPhrase &targetPhrase = **(tpc.begin());
- float score = std::accumulate(stackScores.begin(), stackScores.end(), targetPhrase.GetFutureScore());
+ const TargetPhrase &targetPhrase = **(tpc.begin());
+ float score = std::accumulate(stackScores.begin(), stackScores.end(), targetPhrase.GetFutureScore());
- // If the rule limit has already been reached then don't add the option
- // unless it is better than at least one existing option.
- if (m_collection.size() > m_ruleLimit && score < m_scoreThreshold) {
- return;
- }
+ // If the rule limit has already been reached then don't add the option
+ // unless it is better than at least one existing option.
+ if (m_collection.size() > m_ruleLimit && score < m_scoreThreshold) {
+ return;
+ }
- CompletedRule *completedRule = new CompletedRule(tpc, stackVec, score);
- m_collection.push_back(completedRule);
+ CompletedRule *completedRule = new CompletedRule(tpc, stackVec, score);
+ m_collection.push_back(completedRule);
// If the rule limit hasn't been exceeded then update the threshold.
if (m_collection.size() <= m_ruleLimit) {
@@ -112,10 +112,10 @@ void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
// Prune if bursting
if (m_collection.size() == m_ruleLimit * 2) {
- NTH_ELEMENT4(m_collection.begin(),
- m_collection.begin() + m_ruleLimit - 1,
- m_collection.end(),
- CompletedRuleOrdered());
+ NTH_ELEMENT4(m_collection.begin(),
+ m_collection.begin() + m_ruleLimit - 1,
+ m_collection.end(),
+ CompletedRuleOrdered());
m_scoreThreshold = m_collection[m_ruleLimit-1]->GetScoreEstimate();
for (size_t i = 0 + m_ruleLimit; i < m_collection.size(); i++) {
delete m_collection[i];
diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
index 3bf0d2820..832a444f2 100644
--- a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
@@ -426,7 +426,7 @@ void PhraseTableCreator::AddTargetSymbolId(std::string& symbol)
unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol)
{
boost::unordered_map<std::string, unsigned>::iterator it
- = m_sourceSymbolsMap.find(symbol);
+ = m_sourceSymbolsMap.find(symbol);
if(it != m_sourceSymbolsMap.end())
return it->second;
@@ -437,7 +437,7 @@ unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol)
unsigned PhraseTableCreator::GetTargetSymbolId(std::string& symbol)
{
boost::unordered_map<std::string, unsigned>::iterator it
- = m_targetSymbolsMap.find(symbol);
+ = m_targetSymbolsMap.find(symbol);
if(it != m_targetSymbolsMap.end())
return it->second;
@@ -451,7 +451,7 @@ unsigned PhraseTableCreator::GetOrAddTargetSymbolId(std::string& symbol)
boost::mutex::scoped_lock lock(m_mutex);
#endif
boost::unordered_map<std::string, unsigned>::iterator it
- = m_targetSymbolsMap.find(symbol);
+ = m_targetSymbolsMap.find(symbol);
if(it != m_targetSymbolsMap.end())
return it->second;
@@ -714,10 +714,10 @@ std::string PhraseTableCreator::EncodeLine(std::vector<std::string>& tokens, siz
std::vector<float> scores = Tokenize<float>(scoresStr);
if(scores.size() != m_numScoreComponent) {
- std::stringstream strme;
- strme << "Error: Wrong number of scores detected ("
- << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
- strme << "Line: " << tokens[0] << " ||| " << tokens[1] << " ||| " << tokens[2] << " ..." << std::endl;
+ std::stringstream strme;
+ strme << "Error: Wrong number of scores detected ("
+ << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
+ strme << "Line: " << tokens[0] << " ||| " << tokens[1] << " ||| " << tokens[2] << " ..." << std::endl;
UTIL_THROW2(strme.str());
}
@@ -1040,30 +1040,30 @@ void RankingTask::operator()()
*it = Moses::Trim(*it);
if(tokens.size() < 4) {
- std::stringstream strme;
- strme << "Error: It seems the following line has a wrong format:" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line has a wrong format:" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
- std::stringstream strme;
- strme << "Error: It seems the following line contains no alignment information, " << std::endl;
- strme << "but you are using ";
- strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
- strme << " encoding which makes use of alignment data. " << std::endl;
- strme << "Use -encoding None" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line contains no alignment information, " << std::endl;
+ strme << "but you are using ";
+ strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
+ strme << " encoding which makes use of alignment data. " << std::endl;
+ strme << "Use -encoding None" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
std::vector<float> scores = Tokenize<float>(tokens[2]);
if(scores.size() != m_creator.m_numScoreComponent) {
- std::stringstream strme;
- strme << "Error: It seems the following line has a wrong number of scores ("
- << scores.size() << " != " << m_creator.m_numScoreComponent << ") :" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
- UTIL_THROW2(strme.str());
+ std::stringstream strme;
+ strme << "Error: It seems the following line has a wrong number of scores ("
+ << scores.size() << " != " << m_creator.m_numScoreComponent << ") :" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
+ UTIL_THROW2(strme.str());
}
float sortScore = scores[m_creator.m_sortScoreIndex];
@@ -1140,20 +1140,20 @@ void EncodingTask::operator()()
*it = Moses::Trim(*it);
if(tokens.size() < 3) {
- std::stringstream strme;
- strme << "Error: It seems the following line has a wrong format:" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line has a wrong format:" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
if(tokens.size() > 3 && tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
- std::stringstream strme;
- strme << "Error: It seems the following line contains no alignment information, " << std::endl;
- strme << "but you are using ";
- strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
- strme << " encoding which makes use of alignment data. " << std::endl;
- strme << "Use -encoding None" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line contains no alignment information, " << std::endl;
+ strme << "but you are using ";
+ strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
+ strme << " encoding which makes use of alignment data. " << std::endl;
+ strme << "Use -encoding None" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
@@ -1218,7 +1218,7 @@ void CompressionTask::operator()()
while(collectionNum < m_encodedCollections.size()) {
std::string collection = m_encodedCollections[collectionNum];
std::string compressedCollection
- = m_creator.CompressEncodedCollection(collection);
+ = m_creator.CompressEncodedCollection(collection);
std::string dummy;
PackedItem packedItem(collectionNum, dummy, compressedCollection, 0);
diff --git a/moses/TranslationModel/DynSAInclude/FileHandler.cpp b/moses/TranslationModel/DynSAInclude/FileHandler.cpp
index ffde4a0f3..8645833fe 100644
--- a/moses/TranslationModel/DynSAInclude/FileHandler.cpp
+++ b/moses/TranslationModel/DynSAInclude/FileHandler.cpp
@@ -72,12 +72,12 @@ bool FileHandler::setStreamBuffer(bool checkExists)
// redirect stdin or stdout if necesary
if (path_ == FileHandler::kStdInDescriptor) {
UTIL_THROW_IF2((flags_ & std::ios::in) == 0,
- "Incorrect flags: " << flags_);
+ "Incorrect flags: " << flags_);
std::streambuf* sb = std::cin.rdbuf();
buffer_ = sb;
} else if (path_ == FileHandler::kStdOutDescriptor) {
UTIL_THROW_IF2((flags_ & std::ios::out) == 0,
- "Incorrect flags: " << flags_);
+ "Incorrect flags: " << flags_);
std::streambuf* sb = std::cout.rdbuf();
buffer_ = sb;
} else {
diff --git a/moses/TranslationModel/PhraseDictionary.cpp b/moses/TranslationModel/PhraseDictionary.cpp
index f1acb9688..7fdd61f97 100644
--- a/moses/TranslationModel/PhraseDictionary.cpp
+++ b/moses/TranslationModel/PhraseDictionary.cpp
@@ -37,11 +37,11 @@ std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
CacheColl::~CacheColl()
{
- for (iterator iter = begin(); iter != end(); ++iter) {
- std::pair<const TargetPhraseCollection*, clock_t> &key = iter->second;
- const TargetPhraseCollection *tps = key.first;
- delete tps;
- }
+ for (iterator iter = begin(); iter != end(); ++iter) {
+ std::pair<const TargetPhraseCollection*, clock_t> &key = iter->second;
+ const TargetPhraseCollection *tps = key.first;
+ delete tps;
+ }
}
PhraseDictionary::PhraseDictionary(const std::string &line)
@@ -49,8 +49,8 @@ PhraseDictionary::PhraseDictionary(const std::string &line)
,m_tableLimit(20) // default
,m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
{
- m_id = s_staticColl.size();
- s_staticColl.push_back(this);
+ m_id = s_staticColl.size();
+ s_staticColl.push_back(this);
}
bool
@@ -139,22 +139,22 @@ SetFeaturesToApply()
}
}
-
- // tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
- void
- PhraseDictionary::
- Release(TargetPhraseCollection const* tpc) const
- {
- // do nothing by default
- return;
- }
- bool
- PhraseDictionary::
- PrefixExists(Phrase const& phrase) const
- {
- return true;
- }
+// tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
+void
+PhraseDictionary::
+Release(TargetPhraseCollection const* tpc) const
+{
+ // do nothing by default
+ return;
+}
+
+bool
+PhraseDictionary::
+PrefixExists(Phrase const& phrase) const
+{
+ return true;
+}
void
PhraseDictionary::
@@ -166,7 +166,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
// backoff
if (!SatisfyBackoff(inputPath)) {
- continue;
+ continue;
}
const Phrase &phrase = inputPath.GetPhrase();
@@ -183,7 +183,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
// for( std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter,
// iter != cache.end(),
// iter++ ) {
-//
+//
// }
//}
@@ -253,25 +253,25 @@ bool PhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const
size_t backoff = decodeGraph.GetBackoff();
if (backoff == 0) {
- // ie. don't backoff. Collect ALL translations
- return true;
+ // ie. don't backoff. Collect ALL translations
+ return true;
}
if (sourcePhrase.GetSize() > backoff) {
- // source phrase too big
- return false;
+ // source phrase too big
+ return false;
}
// lookup translation only if no other translations
InputPath::TargetPhrases::const_iterator iter;
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
- const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
- const TargetPhraseCollection *tpCollPrev = temp.first;
+ const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
+ const TargetPhraseCollection *tpCollPrev = temp.first;
- if (tpCollPrev && tpCollPrev->GetSize()) {
- // already have translation from another pt. Don't create translations
- return false;
- }
+ if (tpCollPrev && tpCollPrev->GetSize()) {
+ // already have translation from another pt. Don't create translations
+ return false;
+ }
}
return true;
diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h
index f1b938db0..2d5ae32f6 100644
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@@ -88,8 +88,9 @@ public:
}
//! continguous id for each pt, starting from 0
- size_t GetId() const
- { return m_id; }
+ size_t GetId() const {
+ return m_id;
+ }
virtual
void
diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h
index bf15720e2..41037ba34 100644
--- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h
+++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h
@@ -86,12 +86,16 @@ public:
}
static const PhraseDictionaryDynamicCacheBased* Instance(const std::string& name) {
- if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
+ if (s_instance_map.find(name) == s_instance_map.end()) {
+ return NULL;
+ }
return s_instance_map[name];
}
static PhraseDictionaryDynamicCacheBased* InstanceNonConst(const std::string& name) {
- if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
+ if (s_instance_map.find(name) == s_instance_map.end()) {
+ return NULL;
+ }
return s_instance_map[name];
}
diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
index 5fc7df712..f0b555e22 100644
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
@@ -33,19 +33,19 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
if (m_mode == "interpolate") {
size_t numWeights = m_numScoreComponents;
UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &&
- m_pdStr.size()*numWeights != m_multimodelweights.size(),
- "Number of scores and weights are not equal");
+ m_pdStr.size()*numWeights != m_multimodelweights.size(),
+ "Number of scores and weights are not equal");
} else if (m_mode == "all" || m_mode == "all-restrict") {
size_t componentWeights = 0;
for(size_t i = 0; i < m_numModels; ++i) {
const string &ptName = m_pdStr[i];
PhraseDictionary *pt = FindPhraseDictionary(ptName);
UTIL_THROW_IF2(pt == NULL,
- "Could not find component phrase table " << ptName);
+ "Could not find component phrase table " << ptName);
componentWeights += pt->GetNumScoreComponents();
}
UTIL_THROW_IF2(componentWeights != m_numScoreComponents,
- "Total number of component model scores is unequal to specified number of scores");
+ "Total number of component model scores is unequal to specified number of scores");
} else {
ostringstream msg;
msg << "combination mode unknown: " << m_mode;
@@ -57,10 +57,10 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::stri
:PhraseDictionary(line)
{
if (type == 1) {
- // PhraseDictionaryMultiModelCounts
+ // PhraseDictionaryMultiModelCounts
UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &&
- m_pdStr.size()*4 != m_multimodelweights.size(),
- "Number of scores and weights are not equal");
+ m_pdStr.size()*4 != m_multimodelweights.size(),
+ "Number of scores and weights are not equal");
}
}
@@ -91,7 +91,7 @@ void PhraseDictionaryMultiModel::Load()
PhraseDictionary *pt = FindPhraseDictionary(ptName);
UTIL_THROW_IF2(pt == NULL,
- "Could not find component phrase table " << ptName);
+ "Could not find component phrase table " << ptName);
m_pd.push_back(pt);
}
}
@@ -122,7 +122,7 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret);
-
+
return ret;
}
@@ -303,7 +303,7 @@ std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t n
weights_ptr = &m_multimodelweights; //fall back to weights defined in config
} else if(weights_ptr->size() != m_numModels && weights_ptr->size() != m_numModels * numWeights) {
//TODO: can we pass error message to client if weights are malformed?
- std::cerr << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << ". Reverting to weights in config";
+ std::cerr << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << ". Reverting to weights in config";
weights_ptr = &m_multimodelweights; //fall back to weights defined in config
}
diff --git a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
index c2ffd95da..a336da759 100644
--- a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
@@ -14,20 +14,20 @@ PhraseDictionaryTransliteration::PhraseDictionaryTransliteration(const std::stri
{
ReadParameters();
UTIL_THROW_IF2(m_mosesDir.empty() ||
- m_scriptDir.empty() ||
- m_externalDir.empty() ||
- m_inputLang.empty() ||
- m_outputLang.empty(), "Must specify all arguments");
+ m_scriptDir.empty() ||
+ m_externalDir.empty() ||
+ m_inputLang.empty() ||
+ m_outputLang.empty(), "Must specify all arguments");
}
void PhraseDictionaryTransliteration::Load()
{
- SetFeaturesToApply();
+ SetFeaturesToApply();
}
void PhraseDictionaryTransliteration::CleanUpAfterSentenceProcessing(const InputType& source)
{
- ReduceCache();
+ ReduceCache();
}
void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
@@ -38,14 +38,14 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const Input
InputPath &inputPath = **iter;
if (!SatisfyBackoff(inputPath)) {
- continue;
+ continue;
}
const Phrase &sourcePhrase = inputPath.GetPhrase();
if (sourcePhrase.GetSize() != 1) {
- // only translit single words. A limitation of the translit script
- continue;
+ // only translit single words. A limitation of the translit script
+ continue;
}
GetTargetPhraseCollection(inputPath);
@@ -54,90 +54,89 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const Input
void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &inputPath) const
{
- const Phrase &sourcePhrase = inputPath.GetPhrase();
- size_t hash = hash_value(sourcePhrase);
+ const Phrase &sourcePhrase = inputPath.GetPhrase();
+ size_t hash = hash_value(sourcePhrase);
- CacheColl &cache = GetCache();
+ CacheColl &cache = GetCache();
- CacheColl::iterator iter;
- iter = cache.find(hash);
+ CacheColl::iterator iter;
+ iter = cache.find(hash);
- if (iter != cache.end()) {
- // already in cache
- const TargetPhraseCollection *tpColl = iter->second.first;
- inputPath.SetTargetPhrases(*this, tpColl, NULL);
- }
- else {
- // TRANSLITERATE
- char *ptr = tmpnam(NULL);
- string inFile(ptr);
- ptr = tmpnam(NULL);
- string outDir(ptr);
-
- ofstream inStream(inFile.c_str());
- inStream << sourcePhrase.ToString() << endl;
- inStream.close();
-
- string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
- " --transliteration-model-dir " + m_filePath +
- " --moses-src-dir " + m_mosesDir +
- " --external-bin-dir " + m_externalDir +
- " --input-extension " + m_inputLang +
- " --output-extension " + m_outputLang +
- " --oov-file " + inFile +
- " --out-dir " + outDir;
-
- int ret = system(cmd.c_str());
- UTIL_THROW_IF2(ret != 0, "Transliteration script error");
-
- TargetPhraseCollection *tpColl = new TargetPhraseCollection();
- vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir);
- vector<TargetPhrase*>::const_iterator iter;
- for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
- TargetPhrase *tp = *iter;
- tpColl->Add(tp);
- }
-
- std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
- cache[hash] = value;
-
- inputPath.SetTargetPhrases(*this, tpColl, NULL);
-
- // clean up temporary files
- remove(inFile.c_str());
-
- cmd = "rm -rf " + outDir;
- system(cmd.c_str());
+ if (iter != cache.end()) {
+ // already in cache
+ const TargetPhraseCollection *tpColl = iter->second.first;
+ inputPath.SetTargetPhrases(*this, tpColl, NULL);
+ } else {
+ // TRANSLITERATE
+ char *ptr = tmpnam(NULL);
+ string inFile(ptr);
+ ptr = tmpnam(NULL);
+ string outDir(ptr);
+
+ ofstream inStream(inFile.c_str());
+ inStream << sourcePhrase.ToString() << endl;
+ inStream.close();
+
+ string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
+ " --transliteration-model-dir " + m_filePath +
+ " --moses-src-dir " + m_mosesDir +
+ " --external-bin-dir " + m_externalDir +
+ " --input-extension " + m_inputLang +
+ " --output-extension " + m_outputLang +
+ " --oov-file " + inFile +
+ " --out-dir " + outDir;
+
+ int ret = system(cmd.c_str());
+ UTIL_THROW_IF2(ret != 0, "Transliteration script error");
+
+ TargetPhraseCollection *tpColl = new TargetPhraseCollection();
+ vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir);
+ vector<TargetPhrase*>::const_iterator iter;
+ for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
+ TargetPhrase *tp = *iter;
+ tpColl->Add(tp);
}
+
+ std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
+ cache[hash] = value;
+
+ inputPath.SetTargetPhrases(*this, tpColl, NULL);
+
+ // clean up temporary files
+ remove(inFile.c_str());
+
+ cmd = "rm -rf " + outDir;
+ system(cmd.c_str());
+ }
}
std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(const Phrase &sourcePhrase, const string &outDir) const
{
- std::vector<TargetPhrase*> ret;
+ std::vector<TargetPhrase*> ret;
- string outPath = outDir + "/out.txt";
- ifstream outStream(outPath.c_str());
+ string outPath = outDir + "/out.txt";
+ ifstream outStream(outPath.c_str());
- string line;
- while (getline(outStream, line)) {
- vector<string> toks;
- Tokenize(toks, line, "\t");
- UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
+ string line;
+ while (getline(outStream, line)) {
+ vector<string> toks;
+ Tokenize(toks, line, "\t");
+ UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
- TargetPhrase *tp = new TargetPhrase(this);
- Word &word = tp->AddWord();
- word.CreateFromString(Output, m_output, toks[0], false);
+ TargetPhrase *tp = new TargetPhrase(this);
+ Word &word = tp->AddWord();
+ word.CreateFromString(Output, m_output, toks[0], false);
- float score = Scan<float>(toks[1]);
- tp->GetScoreBreakdown().PlusEquals(this, score);
+ float score = Scan<float>(toks[1]);
+ tp->GetScoreBreakdown().PlusEquals(this, score);
- // score of all other ff when this rule is being loaded
- tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
+ // score of all other ff when this rule is being loaded
+ tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
- ret.push_back(tp);
- }
+ ret.push_back(tp);
+ }
- outStream.close();
+ outStream.close();
return ret;
}
@@ -146,7 +145,7 @@ ChartRuleLookupManager* PhraseDictionaryTransliteration::CreateRuleLookupManager
const ChartCellCollectionBase &cellCollection,
std::size_t /*maxChartSpan*/)
{
- return NULL;
+ return NULL;
//return new ChartRuleLookupManagerSkeleton(parser, cellCollection, *this);
}
@@ -155,17 +154,17 @@ PhraseDictionaryTransliteration::
SetParameter(const std::string& key, const std::string& value)
{
if (key == "moses-dir") {
- m_mosesDir = value;
+ m_mosesDir = value;
} else if (key == "script-dir") {
- m_scriptDir = value;
+ m_scriptDir = value;
} else if (key == "external-dir") {
- m_externalDir = value;
+ m_externalDir = value;
} else if (key == "input-lang") {
- m_inputLang = value;
+ m_inputLang = value;
} else if (key == "output-lang") {
- m_outputLang = value;
+ m_outputLang = value;
} else {
- PhraseDictionary::SetParameter(key, value);
+ PhraseDictionary::SetParameter(key, value);
}
}
diff --git a/moses/TranslationModel/PhraseDictionaryTree.cpp b/moses/TranslationModel/PhraseDictionaryTree.cpp
index dcd63c520..0e031b362 100644
--- a/moses/TranslationModel/PhraseDictionaryTree.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTree.cpp
@@ -234,7 +234,7 @@ public:
typedef PhraseDictionaryTree::PrefixPtr PPtr;
void GetTargetCandidates(PPtr p,TgtCands& tgtCands) {
- UTIL_THROW_IF2(p == 0L, "Error");
+ UTIL_THROW_IF2(p == 0L, "Error");
// UTIL_THROW_IF2(p == NULL, "Error");
if(p.imp->isRoot()) return;
@@ -280,8 +280,8 @@ public:
}
PPtr Extend(PPtr p,const std::string& w) {
- UTIL_THROW_IF2(p == 0L, "Error");
- // UTIL_THROW_IF2(p == NULL, "Error");
+ UTIL_THROW_IF2(p == 0L, "Error");
+ // UTIL_THROW_IF2(p == NULL, "Error");
if(w.empty() || w==EPSILON) return p;
@@ -320,7 +320,7 @@ int PDTimp::Read(const std::string& fn)
if (NeedAlignmentInfo() && !HasAlignmentInfo()) {
// ERROR
- std::cerr << "You are asking for word alignment but the binary phrase table does not contain any alignment info. Please check if you had generated the correct phrase table with word alignment (.wa)\n";
+ std::cerr << "You are asking for word alignment but the binary phrase table does not contain any alignment info. Please check if you had generated the correct phrase table with word alignment (.wa)\n";
return false;
}
@@ -351,7 +351,7 @@ int PDTimp::Read(const std::string& fn)
tv.Read(iftv);
VERBOSE(1,"binary phrasefile loaded, default OFF_T: "
- <<PTF::getDefault() <<"\n");
+ <<PTF::getDefault() <<"\n");
return 1;
}
@@ -381,8 +381,8 @@ PhraseDictionaryTree::PhraseDictionaryTree()
: imp(new PDTimp)
{
if(sizeof(OFF_T)!=8) {
- UTIL_THROW2("ERROR: size of type 'OFF_T' has to be 64 bit!\n"
- "In gcc, use compiler settings '-D_FILE_OFFSET_BITS=64 -D_LARGE_FILES'\n");
+ UTIL_THROW2("ERROR: size of type 'OFF_T' has to be 64 bit!\n"
+ "In gcc, use compiler settings '-D_FILE_OFFSET_BITS=64 -D_LARGE_FILES'\n");
}
}
@@ -501,7 +501,7 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
// init numElement
numElement = tokens.size();
UTIL_THROW_IF2(numElement < (PrintWordAlignment()?4:3),
- "Format error");
+ "Format error");
}
if (tokens.size() != numElement) {
@@ -550,8 +550,8 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
PSA::Data& d=psa->insert(f);
if(d==InvalidOffT) d=fTell(ot);
else {
- UTIL_THROW2("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '"
- <<line);
+ UTIL_THROW2("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '"
+ <<line);
}
}
@@ -560,8 +560,8 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
if (!sparseFeatureString.empty()) {
std::vector<std::string> sparseTokens = Tokenize(sparseFeatureString);
if (sparseTokens.size() % 2 != 0) {
- UTIL_THROW2("ERROR: incorrectly formatted sparse feature string: " <<
- sparseFeatureString);
+ UTIL_THROW2("ERROR: incorrectly formatted sparse feature string: " <<
+ sparseFeatureString);
}
for (size_t i = 0; i < sparseTokens.size(); i+=2) {
fnames.push_back(imp->tv.add(sparseTokens[i]));
@@ -602,13 +602,13 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
PSA::Data& d=psa->insert(f);
if(d==InvalidOffT) d=fTell(ot);
else {
- UTIL_THROW2("ERROR: xsource phrase already inserted (B)!\nline(" << lnc << "): '"
- <<line);
+ UTIL_THROW2("ERROR: xsource phrase already inserted (B)!\nline(" << lnc << "): '"
+ <<line);
}
}
tgtCands.push_back(TgtCand(e,sc, alignmentString));
UTIL_THROW_IF2(currFirstWord == InvalidLabelId,
- "Uninitialize word");
+ "Uninitialize word");
tgtCands.back().SetFeatures(fnames, fvalues);
}
if (PrintWordAlignment())
diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.cpp b/moses/TranslationModel/ProbingPT/ProbingPT.cpp
index b854c8c02..6ccf7dcd8 100644
--- a/moses/TranslationModel/ProbingPT/ProbingPT.cpp
+++ b/moses/TranslationModel/ProbingPT/ProbingPT.cpp
@@ -10,8 +10,8 @@ using namespace std;
namespace Moses
{
ProbingPT::ProbingPT(const std::string &line)
-: PhraseDictionary(line)
-,m_engine(NULL)
+ : PhraseDictionary(line)
+ ,m_engine(NULL)
{
ReadParameters();
@@ -26,39 +26,39 @@ ProbingPT::~ProbingPT()
void ProbingPT::Load()
{
- SetFeaturesToApply();
+ SetFeaturesToApply();
- m_engine = new QueryEngine(m_filePath.c_str());
+ m_engine = new QueryEngine(m_filePath.c_str());
- m_unkId = 456456546456;
+ m_unkId = 456456546456;
- // source vocab
- const std::map<uint64_t, std::string> &sourceVocab = m_engine->getSourceVocab();
- std::map<uint64_t, std::string>::const_iterator iterSource;
- for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end(); ++iterSource) {
- const string &wordStr = iterSource->second;
- const Factor *factor = FactorCollection::Instance().AddFactor(wordStr);
+ // source vocab
+ const std::map<uint64_t, std::string> &sourceVocab = m_engine->getSourceVocab();
+ std::map<uint64_t, std::string>::const_iterator iterSource;
+ for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end(); ++iterSource) {
+ const string &wordStr = iterSource->second;
+ const Factor *factor = FactorCollection::Instance().AddFactor(wordStr);
- uint64_t probingId = iterSource->first;
+ uint64_t probingId = iterSource->first;
- SourceVocabMap::value_type entry(factor, probingId);
- m_sourceVocabMap.insert(entry);
+ SourceVocabMap::value_type entry(factor, probingId);
+ m_sourceVocabMap.insert(entry);
- }
+ }
- // target vocab
- const std::map<unsigned int, std::string> &probingVocab = m_engine->getVocab();
- std::map<unsigned int, std::string>::const_iterator iter;
- for (iter = probingVocab.begin(); iter != probingVocab.end(); ++iter) {
- const string &wordStr = iter->second;
- const Factor *factor = FactorCollection::Instance().AddFactor(wordStr);
+ // target vocab
+ const std::map<unsigned int, std::string> &probingVocab = m_engine->getVocab();
+ std::map<unsigned int, std::string>::const_iterator iter;
+ for (iter = probingVocab.begin(); iter != probingVocab.end(); ++iter) {
+ const string &wordStr = iter->second;
+ const Factor *factor = FactorCollection::Instance().AddFactor(wordStr);
- unsigned int probingId = iter->first;
+ unsigned int probingId = iter->first;
- TargetVocabMap::value_type entry(factor, probingId);
- m_vocabMap.insert(entry);
+ TargetVocabMap::value_type entry(factor, probingId);
+ m_vocabMap.insert(entry);
- }
+ }
}
void ProbingPT::InitializeForInput(InputType const& source)
@@ -76,15 +76,15 @@ void ProbingPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQue
const Phrase &sourcePhrase = inputPath.GetPhrase();
if (sourcePhrase.GetSize() > StaticData::Instance().GetMaxPhraseLength()) {
- continue;
+ continue;
}
TargetPhraseCollection *tpColl = CreateTargetPhrase(sourcePhrase);
// add target phrase to phrase-table cache
size_t hash = hash_value(sourcePhrase);
- std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
- cache[hash] = value;
+ std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
+ cache[hash] = value;
inputPath.SetTargetPhrases(*this, tpColl, NULL);
}
@@ -95,15 +95,14 @@ std::vector<uint64_t> ProbingPT::ConvertToProbingSourcePhrase(const Phrase &sour
size_t size = sourcePhrase.GetSize();
std::vector<uint64_t> ret(size);
for (size_t i = 0; i < size; ++i) {
- const Factor *factor = sourcePhrase.GetFactor(i, m_input[0]);
- uint64_t probingId = GetSourceProbingId(factor);
- if (probingId == m_unkId) {
- ok = false;
- return ret;
- }
- else {
- ret[i] = probingId;
- }
+ const Factor *factor = sourcePhrase.GetFactor(i, m_input[0]);
+ uint64_t probingId = GetSourceProbingId(factor);
+ if (probingId == m_unkId) {
+ ok = false;
+ return ret;
+ } else {
+ ret[i] = probingId;
+ }
}
ok = true;
@@ -118,9 +117,9 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase
bool ok;
vector<uint64_t> probingSource = ConvertToProbingSourcePhrase(sourcePhrase, ok);
if (!ok) {
- // source phrase contains a word unknown in the pt.
- // We know immediately there's no translation for it
- return NULL;
+ // source phrase contains a word unknown in the pt.
+ // We know immediately there's no translation for it
+ return NULL;
}
std::pair<bool, std::vector<target_text> > query_result;
@@ -131,18 +130,18 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase
query_result = m_engine->query(probingSource);
if (query_result.first) {
- //m_engine->printTargetInfo(query_result.second);
- tpColl = new TargetPhraseCollection();
+ //m_engine->printTargetInfo(query_result.second);
+ tpColl = new TargetPhraseCollection();
- const std::vector<target_text> &probingTargetPhrases = query_result.second;
- for (size_t i = 0; i < probingTargetPhrases.size(); ++i) {
- const target_text &probingTargetPhrase = probingTargetPhrases[i];
- TargetPhrase *tp = CreateTargetPhrase(sourcePhrase, probingTargetPhrase);
+ const std::vector<target_text> &probingTargetPhrases = query_result.second;
+ for (size_t i = 0; i < probingTargetPhrases.size(); ++i) {
+ const target_text &probingTargetPhrase = probingTargetPhrases[i];
+ TargetPhrase *tp = CreateTargetPhrase(sourcePhrase, probingTargetPhrase);
- tpColl->Add(tp);
- }
+ tpColl->Add(tp);
+ }
- tpColl->Prune(true, m_tableLimit);
+ tpColl->Prune(true, m_tableLimit);
}
return tpColl;
@@ -157,16 +156,16 @@ TargetPhrase *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase, const ta
// words
for (size_t i = 0; i < size; ++i) {
- uint64_t probingId = probingPhrase[i];
- const Factor *factor = GetTargetFactor(probingId);
- assert(factor);
+ uint64_t probingId = probingPhrase[i];
+ const Factor *factor = GetTargetFactor(probingId);
+ assert(factor);
- Word &word = tp->AddWord();
- word.SetFactor(m_output[0], factor);
+ Word &word = tp->AddWord();
+ word.SetFactor(m_output[0], factor);
}
// score for this phrase table
- vector<float> scores = probingTargetPhrase.prob;
+ vector<float> scores = probingTargetPhrase.prob;
std::transform(scores.begin(), scores.end(), scores.begin(),TransformScore);
tp->GetScoreBreakdown().PlusEquals(this, scores);
@@ -176,7 +175,7 @@ TargetPhrase *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase, const ta
AlignmentInfo &aligns = tp->GetAlignTerm();
for (size_t i = 0; i < alignS.size(); i += 2 ) {
- aligns.Add((size_t) alignments[i], (size_t) alignments[i+1]);
+ aligns.Add((size_t) alignments[i], (size_t) alignments[i+1]);
}
*/
@@ -187,28 +186,26 @@ TargetPhrase *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase, const ta
const Factor *ProbingPT::GetTargetFactor(uint64_t probingId) const
{
- TargetVocabMap::right_map::const_iterator iter;
- iter = m_vocabMap.right.find(probingId);
- if (iter != m_vocabMap.right.end()) {
- return iter->second;
- }
- else {
- // not in mapping. Must be UNK
- return NULL;
- }
+ TargetVocabMap::right_map::const_iterator iter;
+ iter = m_vocabMap.right.find(probingId);
+ if (iter != m_vocabMap.right.end()) {
+ return iter->second;
+ } else {
+ // not in mapping. Must be UNK
+ return NULL;
+ }
}
uint64_t ProbingPT::GetSourceProbingId(const Factor *factor) const
{
- SourceVocabMap::left_map::const_iterator iter;
- iter = m_sourceVocabMap.left.find(factor);
- if (iter != m_sourceVocabMap.left.end()) {
- return iter->second;
- }
- else {
- // not in mapping. Must be UNK
- return m_unkId;
- }
+ SourceVocabMap::left_map::const_iterator iter;
+ iter = m_sourceVocabMap.left.find(factor);
+ if (iter != m_sourceVocabMap.left.end()) {
+ return iter->second;
+ } else {
+ // not in mapping. Must be UNK
+ return m_unkId;
+ }
}
ChartRuleLookupManager *ProbingPT::CreateRuleLookupManager(
diff --git a/moses/TranslationModel/ProbingPT/hash.cpp b/moses/TranslationModel/ProbingPT/hash.cpp
index 1049292b1..8945649ef 100644
--- a/moses/TranslationModel/ProbingPT/hash.cpp
+++ b/moses/TranslationModel/ProbingPT/hash.cpp
@@ -1,27 +1,30 @@
-#include "hash.hh"
+#include "hash.hh"
-uint64_t getHash(StringPiece text) {
- std::size_t len = text.size();
- uint64_t key = util::MurmurHashNative(text.data(), len);
- return key;
+uint64_t getHash(StringPiece text)
+{
+ std::size_t len = text.size();
+ uint64_t key = util::MurmurHashNative(text.data(), len);
+ return key;
}
-std::vector<uint64_t> getVocabIDs(StringPiece textin){
- //Tokenize
- std::vector<uint64_t> output;
+std::vector<uint64_t> getVocabIDs(StringPiece textin)
+{
+ //Tokenize
+ std::vector<uint64_t> output;
- util::TokenIter<util::SingleCharacter> it(textin, util::SingleCharacter(' '));
+ util::TokenIter<util::SingleCharacter> it(textin, util::SingleCharacter(' '));
- while(it){
- output.push_back(getHash(*it));
- it++;
- }
+ while(it) {
+ output.push_back(getHash(*it));
+ it++;
+ }
- return output;
+ return output;
}
-uint64_t getVocabID(std::string candidate) {
- std::size_t len = candidate.length();
- uint64_t key = util::MurmurHashNative(candidate.c_str(), len);
- return key;
+uint64_t getVocabID(std::string candidate)
+{
+ std::size_t len = candidate.length();
+ uint64_t key = util::MurmurHashNative(candidate.c_str(), len);
+ return key;
} \ No newline at end of file
diff --git a/moses/TranslationModel/ProbingPT/huffmanish.cpp b/moses/TranslationModel/ProbingPT/huffmanish.cpp
index bf3989c0e..7bb6f1af3 100644
--- a/moses/TranslationModel/ProbingPT/huffmanish.cpp
+++ b/moses/TranslationModel/ProbingPT/huffmanish.cpp
@@ -1,430 +1,451 @@
#include "huffmanish.hh"
-Huffman::Huffman (const char * filepath) {
- //Read the file
- util::FilePiece filein(filepath);
+Huffman::Huffman (const char * filepath)
+{
+ //Read the file
+ util::FilePiece filein(filepath);
- //Init uniq_lines to zero;
- uniq_lines = 0;
+ //Init uniq_lines to zero;
+ uniq_lines = 0;
- line_text prev_line; //Check for unique lines.
- int num_lines = 0 ;
+ line_text prev_line; //Check for unique lines.
+ int num_lines = 0 ;
- while (true){
- line_text new_line;
+ while (true) {
+ line_text new_line;
- num_lines++;
+ num_lines++;
- try {
- //Process line read
- new_line = splitLine(filein.ReadLine());
- count_elements(new_line); //Counts the number of elements, adds new and increments counters.
+ try {
+ //Process line read
+ new_line = splitLine(filein.ReadLine());
+ count_elements(new_line); //Counts the number of elements, adds new and increments counters.
- } catch (util::EndOfFileException e){
- std::cerr << "Unique entries counted: ";
- break;
- }
-
- if (new_line.source_phrase == prev_line.source_phrase){
- continue;
- } else {
- uniq_lines++;
- prev_line = new_line;
- }
+ } catch (util::EndOfFileException e) {
+ std::cerr << "Unique entries counted: ";
+ break;
}
- std::cerr << uniq_lines << std::endl;
-}
-
-void Huffman::count_elements(line_text linein){
- //For target phrase:
- util::TokenIter<util::SingleCharacter> it(linein.target_phrase, util::SingleCharacter(' '));
- while (it) {
- //Check if we have that entry
- std::map<std::string, unsigned int>::iterator mapiter;
- mapiter = target_phrase_words.find(it->as_string());
-
- if (mapiter != target_phrase_words.end()){
- //If the element is found, increment the count.
- mapiter->second++;
- } else {
- //Else create a new entry;
- target_phrase_words.insert(std::pair<std::string, unsigned int>(it->as_string(), 1));
- }
- it++;
+ if (new_line.source_phrase == prev_line.source_phrase) {
+ continue;
+ } else {
+ uniq_lines++;
+ prev_line = new_line;
}
+ }
- //For word allignment 1
- std::map<std::vector<unsigned char>, unsigned int>::iterator mapiter3;
- std::vector<unsigned char> numbers = splitWordAll1(linein.word_all1);
- mapiter3 = word_all1.find(numbers);
+ std::cerr << uniq_lines << std::endl;
+}
- if (mapiter3 != word_all1.end()){
- //If the element is found, increment the count.
- mapiter3->second++;
+void Huffman::count_elements(line_text linein)
+{
+ //For target phrase:
+ util::TokenIter<util::SingleCharacter> it(linein.target_phrase, util::SingleCharacter(' '));
+ while (it) {
+ //Check if we have that entry
+ std::map<std::string, unsigned int>::iterator mapiter;
+ mapiter = target_phrase_words.find(it->as_string());
+
+ if (mapiter != target_phrase_words.end()) {
+ //If the element is found, increment the count.
+ mapiter->second++;
} else {
- //Else create a new entry;
- word_all1.insert(std::pair<std::vector<unsigned char>, unsigned int>(numbers, 1));
+ //Else create a new entry;
+ target_phrase_words.insert(std::pair<std::string, unsigned int>(it->as_string(), 1));
}
+ it++;
+ }
+
+ //For word allignment 1
+ std::map<std::vector<unsigned char>, unsigned int>::iterator mapiter3;
+ std::vector<unsigned char> numbers = splitWordAll1(linein.word_all1);
+ mapiter3 = word_all1.find(numbers);
+
+ if (mapiter3 != word_all1.end()) {
+ //If the element is found, increment the count.
+ mapiter3->second++;
+ } else {
+ //Else create a new entry;
+ word_all1.insert(std::pair<std::vector<unsigned char>, unsigned int>(numbers, 1));
+ }
}
//Assigns huffman values for each unique element
-void Huffman::assign_values() {
- //First create vectors for all maps so that we could sort them later.
-
- //Create a vector for target phrases
- for(std::map<std::string, unsigned int>::iterator it = target_phrase_words.begin(); it != target_phrase_words.end(); it++ ) {
- target_phrase_words_counts.push_back(*it);
- }
- //Sort it
- std::sort(target_phrase_words_counts.begin(), target_phrase_words_counts.end(), sort_pair());
-
- //Create a vector for word allignments 1
- for(std::map<std::vector<unsigned char>, unsigned int>::iterator it = word_all1.begin(); it != word_all1.end(); it++ ) {
- word_all1_counts.push_back(*it);
- }
- //Sort it
- std::sort(word_all1_counts.begin(), word_all1_counts.end(), sort_pair_vec());
-
-
- //Afterwards we assign a value for each phrase, starting from 1, as zero is reserved for delimiter
- unsigned int i = 1; //huffman code
- for(std::vector<std::pair<std::string, unsigned int> >::iterator it = target_phrase_words_counts.begin();
- it != target_phrase_words_counts.end(); it++){
- target_phrase_huffman.insert(std::pair<std::string, unsigned int>(it->first, i));
- i++; //Go to the next huffman code
- }
-
- i = 1; //Reset i for the next map
- for(std::vector<std::pair<std::vector<unsigned char>, unsigned int> >::iterator it = word_all1_counts.begin();
- it != word_all1_counts.end(); it++){
- word_all1_huffman.insert(std::pair<std::vector<unsigned char>, unsigned int>(it->first, i));
- i++; //Go to the next huffman code
- }
-
- //After lookups are produced, clear some memory usage of objects not needed anymore.
- target_phrase_words.clear();
- word_all1.clear();
-
- target_phrase_words_counts.clear();
- word_all1_counts.clear();
-
- std::cerr << "Finished generating huffman codes." << std::endl;
+void Huffman::assign_values()
+{
+ //First create vectors for all maps so that we could sort them later.
+
+ //Create a vector for target phrases
+ for(std::map<std::string, unsigned int>::iterator it = target_phrase_words.begin(); it != target_phrase_words.end(); it++ ) {
+ target_phrase_words_counts.push_back(*it);
+ }
+ //Sort it
+ std::sort(target_phrase_words_counts.begin(), target_phrase_words_counts.end(), sort_pair());
+
+ //Create a vector for word allignments 1
+ for(std::map<std::vector<unsigned char>, unsigned int>::iterator it = word_all1.begin(); it != word_all1.end(); it++ ) {
+ word_all1_counts.push_back(*it);
+ }
+ //Sort it
+ std::sort(word_all1_counts.begin(), word_all1_counts.end(), sort_pair_vec());
+
+
+ //Afterwards we assign a value for each phrase, starting from 1, as zero is reserved for delimiter
+ unsigned int i = 1; //huffman code
+ for(std::vector<std::pair<std::string, unsigned int> >::iterator it = target_phrase_words_counts.begin();
+ it != target_phrase_words_counts.end(); it++) {
+ target_phrase_huffman.insert(std::pair<std::string, unsigned int>(it->first, i));
+ i++; //Go to the next huffman code
+ }
+
+ i = 1; //Reset i for the next map
+ for(std::vector<std::pair<std::vector<unsigned char>, unsigned int> >::iterator it = word_all1_counts.begin();
+ it != word_all1_counts.end(); it++) {
+ word_all1_huffman.insert(std::pair<std::vector<unsigned char>, unsigned int>(it->first, i));
+ i++; //Go to the next huffman code
+ }
+
+ //After lookups are produced, clear some memory usage of objects not needed anymore.
+ target_phrase_words.clear();
+ word_all1.clear();
+
+ target_phrase_words_counts.clear();
+ word_all1_counts.clear();
+
+ std::cerr << "Finished generating huffman codes." << std::endl;
}
-void Huffman::serialize_maps(const char * dirname){
- //Note that directory name should exist.
- std::string basedir(dirname);
- std::string target_phrase_path(basedir + "/target_phrases");
- std::string probabilities_path(basedir + "/probs");
- std::string word_all1_path(basedir + "/Wall1");
-
- //Target phrase
- std::ofstream os (target_phrase_path.c_str(), std::ios::binary);
- boost::archive::text_oarchive oarch(os);
- oarch << lookup_target_phrase;
- os.close();
-
- //Word all1
- std::ofstream os2 (word_all1_path.c_str(), std::ios::binary);
- boost::archive::text_oarchive oarch2(os2);
- oarch2 << lookup_word_all1;
- os2.close();
+void Huffman::serialize_maps(const char * dirname)
+{
+ //Note that directory name should exist.
+ std::string basedir(dirname);
+ std::string target_phrase_path(basedir + "/target_phrases");
+ std::string probabilities_path(basedir + "/probs");
+ std::string word_all1_path(basedir + "/Wall1");
+
+ //Target phrase
+ std::ofstream os (target_phrase_path.c_str(), std::ios::binary);
+ boost::archive::text_oarchive oarch(os);
+ oarch << lookup_target_phrase;
+ os.close();
+
+ //Word all1
+ std::ofstream os2 (word_all1_path.c_str(), std::ios::binary);
+ boost::archive::text_oarchive oarch2(os2);
+ oarch2 << lookup_word_all1;
+ os2.close();
}
-std::vector<unsigned char> Huffman::full_encode_line(line_text line){
- return vbyte_encode_line((encode_line(line)));
+std::vector<unsigned char> Huffman::full_encode_line(line_text line)
+{
+ return vbyte_encode_line((encode_line(line)));
}
-std::vector<unsigned int> Huffman::encode_line(line_text line){
- std::vector<unsigned int> retvector;
-
- //Get target_phrase first.
- util::TokenIter<util::SingleCharacter> it(line.target_phrase, util::SingleCharacter(' '));
- while (it) {
- retvector.push_back(target_phrase_huffman.find(it->as_string())->second);
- it++;
- }
- //Add a zero;
- retvector.push_back(0);
-
- //Get probabilities. Reinterpreting the float bytes as unsgined int.
- util::TokenIter<util::SingleCharacter> probit(line.prob, util::SingleCharacter(' '));
- while (probit) {
- //Sometimes we have too big floats to handle, so first convert to double
- double tempnum = atof(probit->data());
- float num = (float)tempnum;
- retvector.push_back(reinterpret_float(&num));
- probit++;
- }
- //Add a zero;
- retvector.push_back(0);
-
-
- //Get Word allignments
- retvector.push_back(word_all1_huffman.find(splitWordAll1(line.word_all1))->second);
- retvector.push_back(0);
-
- return retvector;
+std::vector<unsigned int> Huffman::encode_line(line_text line)
+{
+ std::vector<unsigned int> retvector;
+
+ //Get target_phrase first.
+ util::TokenIter<util::SingleCharacter> it(line.target_phrase, util::SingleCharacter(' '));
+ while (it) {
+ retvector.push_back(target_phrase_huffman.find(it->as_string())->second);
+ it++;
+ }
+ //Add a zero;
+ retvector.push_back(0);
+
+ //Get probabilities. Reinterpreting the float bytes as unsgined int.
+ util::TokenIter<util::SingleCharacter> probit(line.prob, util::SingleCharacter(' '));
+ while (probit) {
+ //Sometimes we have too big floats to handle, so first convert to double
+ double tempnum = atof(probit->data());
+ float num = (float)tempnum;
+ retvector.push_back(reinterpret_float(&num));
+ probit++;
+ }
+ //Add a zero;
+ retvector.push_back(0);
+
+
+ //Get Word allignments
+ retvector.push_back(word_all1_huffman.find(splitWordAll1(line.word_all1))->second);
+ retvector.push_back(0);
+
+ return retvector;
}
-void Huffman::produce_lookups(){
- //basically invert every map that we have
- for(std::map<std::string, unsigned int>::iterator it = target_phrase_huffman.begin(); it != target_phrase_huffman.end(); it++ ) {
- lookup_target_phrase.insert(std::pair<unsigned int, std::string>(it->second, it->first));
- }
+void Huffman::produce_lookups()
+{
+ //basically invert every map that we have
+ for(std::map<std::string, unsigned int>::iterator it = target_phrase_huffman.begin(); it != target_phrase_huffman.end(); it++ ) {
+ lookup_target_phrase.insert(std::pair<unsigned int, std::string>(it->second, it->first));
+ }
- for(std::map<std::vector<unsigned char>, unsigned int>::iterator it = word_all1_huffman.begin(); it != word_all1_huffman.end(); it++ ) {
- lookup_word_all1.insert(std::pair<unsigned int, std::vector<unsigned char> >(it->second, it->first));
- }
+ for(std::map<std::vector<unsigned char>, unsigned int>::iterator it = word_all1_huffman.begin(); it != word_all1_huffman.end(); it++ ) {
+ lookup_word_all1.insert(std::pair<unsigned int, std::vector<unsigned char> >(it->second, it->first));
+ }
}
-HuffmanDecoder::HuffmanDecoder (const char * dirname){
- //Read the maps from disk
+HuffmanDecoder::HuffmanDecoder (const char * dirname)
+{
+ //Read the maps from disk
- //Note that directory name should exist.
- std::string basedir(dirname);
- std::string target_phrase_path(basedir + "/target_phrases");
- std::string word_all1_path(basedir + "/Wall1");
+ //Note that directory name should exist.
+ std::string basedir(dirname);
+ std::string target_phrase_path(basedir + "/target_phrases");
+ std::string word_all1_path(basedir + "/Wall1");
- //Target phrases
- std::ifstream is (target_phrase_path.c_str(), std::ios::binary);
- boost::archive::text_iarchive iarch(is);
- iarch >> lookup_target_phrase;
- is.close();
+ //Target phrases
+ std::ifstream is (target_phrase_path.c_str(), std::ios::binary);
+ boost::archive::text_iarchive iarch(is);
+ iarch >> lookup_target_phrase;
+ is.close();
- //Word allignment 1
- std::ifstream is2 (word_all1_path.c_str(), std::ios::binary);
- boost::archive::text_iarchive iarch2(is2);
- iarch2 >> lookup_word_all1;
- is2.close();
+ //Word allignment 1
+ std::ifstream is2 (word_all1_path.c_str(), std::ios::binary);
+ boost::archive::text_iarchive iarch2(is2);
+ iarch2 >> lookup_word_all1;
+ is2.close();
}
HuffmanDecoder::HuffmanDecoder (std::map<unsigned int, std::string> * lookup_target,
- std::map<unsigned int, std::vector<unsigned char> > * lookup_word1) {
- lookup_target_phrase = *lookup_target;
- lookup_word_all1 = *lookup_word1;
+ std::map<unsigned int, std::vector<unsigned char> > * lookup_word1)
+{
+ lookup_target_phrase = *lookup_target;
+ lookup_word_all1 = *lookup_word1;
}
-std::vector<target_text> HuffmanDecoder::full_decode_line (std::vector<unsigned char> lines, int num_scores){
- std::vector<target_text> retvector; //All target phrases
- std::vector<unsigned int> decoded_lines = vbyte_decode_line(lines); //All decoded lines
- std::vector<unsigned int>::iterator it = decoded_lines.begin(); //Iterator for them
- std::vector<unsigned int> current_target_phrase; //Current target phrase decoded
-
- short zero_count = 0; //Count home many zeroes we have met. so far. Every 3 zeroes mean a new target phrase.
- while(it != decoded_lines.end()){
- if (zero_count == 1) {
- //We are extracting scores. we know how many scores there are so we can push them
- //to the vector. This is done in case any of the scores is 0, because it would mess
- //up the state machine.
- for (int i = 0; i < num_scores; i++){
- current_target_phrase.push_back(*it);
- it++;
- }
- }
-
- if (zero_count == 3) {
- //We have finished with this entry, decode it, and add it to the retvector.
- retvector.push_back(decode_line(current_target_phrase, num_scores));
- current_target_phrase.clear(); //Clear the current target phrase and the zero_count
- zero_count = 0; //So that we can reuse them for the next target phrase
- }
- //Add to the next target_phrase, number by number.
+std::vector<target_text> HuffmanDecoder::full_decode_line (std::vector<unsigned char> lines, int num_scores)
+{
+ std::vector<target_text> retvector; //All target phrases
+ std::vector<unsigned int> decoded_lines = vbyte_decode_line(lines); //All decoded lines
+ std::vector<unsigned int>::iterator it = decoded_lines.begin(); //Iterator for them
+ std::vector<unsigned int> current_target_phrase; //Current target phrase decoded
+
+ short zero_count = 0; //Count home many zeroes we have met. so far. Every 3 zeroes mean a new target phrase.
+ while(it != decoded_lines.end()) {
+ if (zero_count == 1) {
+ //We are extracting scores. we know how many scores there are so we can push them
+ //to the vector. This is done in case any of the scores is 0, because it would mess
+ //up the state machine.
+ for (int i = 0; i < num_scores; i++) {
current_target_phrase.push_back(*it);
- if (*it == 0) {
- zero_count++;
- }
- it++; //Go to the next word/symbol
+ it++;
+ }
}
- //Don't forget the last remaining line!
+
if (zero_count == 3) {
- //We have finished with this entry, decode it, and add it to the retvector.
- retvector.push_back(decode_line(current_target_phrase, num_scores));
- current_target_phrase.clear(); //Clear the current target phrase and the zero_count
- zero_count = 0; //So that we can reuse them for the next target phrase
+ //We have finished with this entry, decode it, and add it to the retvector.
+ retvector.push_back(decode_line(current_target_phrase, num_scores));
+ current_target_phrase.clear(); //Clear the current target phrase and the zero_count
+ zero_count = 0; //So that we can reuse them for the next target phrase
}
-
- return retvector;
+ //Add to the next target_phrase, number by number.
+ current_target_phrase.push_back(*it);
+ if (*it == 0) {
+ zero_count++;
+ }
+ it++; //Go to the next word/symbol
+ }
+ //Don't forget the last remaining line!
+ if (zero_count == 3) {
+ //We have finished with this entry, decode it, and add it to the retvector.
+ retvector.push_back(decode_line(current_target_phrase, num_scores));
+ current_target_phrase.clear(); //Clear the current target phrase and the zero_count
+ zero_count = 0; //So that we can reuse them for the next target phrase
+ }
+
+ return retvector;
}
-target_text HuffmanDecoder::decode_line (std::vector<unsigned int> input, int num_scores){
- //demo decoder
- target_text ret;
- //Split everything
- std::vector<unsigned int> target_phrase;
- std::vector<unsigned int> probs;
- unsigned int wAll;
-
- //Split the line into the proper arrays
- short num_zeroes = 0;
- int counter = 0;
- while (num_zeroes < 3){
- unsigned int num = input[counter];
- if (num == 0) {
- num_zeroes++;
- } else if (num_zeroes == 0){
- target_phrase.push_back(num);
- } else if (num_zeroes == 1){
- //Push exactly num_scores scores
- for (int i = 0; i < num_scores; i++){
- probs.push_back(num);
- counter++;
- num = input[counter];
- }
- continue;
- } else if (num_zeroes == 2){
- wAll = num;
- }
+target_text HuffmanDecoder::decode_line (std::vector<unsigned int> input, int num_scores)
+{
+ //demo decoder
+ target_text ret;
+ //Split everything
+ std::vector<unsigned int> target_phrase;
+ std::vector<unsigned int> probs;
+ unsigned int wAll;
+
+ //Split the line into the proper arrays
+ short num_zeroes = 0;
+ int counter = 0;
+ while (num_zeroes < 3) {
+ unsigned int num = input[counter];
+ if (num == 0) {
+ num_zeroes++;
+ } else if (num_zeroes == 0) {
+ target_phrase.push_back(num);
+ } else if (num_zeroes == 1) {
+ //Push exactly num_scores scores
+ for (int i = 0; i < num_scores; i++) {
+ probs.push_back(num);
counter++;
+ num = input[counter];
+ }
+ continue;
+ } else if (num_zeroes == 2) {
+ wAll = num;
}
+ counter++;
+ }
- ret.target_phrase = target_phrase;
- ret.word_all1 = lookup_word_all1.find(wAll)->second;
+ ret.target_phrase = target_phrase;
+ ret.word_all1 = lookup_word_all1.find(wAll)->second;
- //Decode probabilities
- for (std::vector<unsigned int>::iterator it = probs.begin(); it != probs.end(); it++){
- ret.prob.push_back(reinterpret_uint(&(*it)));
- }
+ //Decode probabilities
+ for (std::vector<unsigned int>::iterator it = probs.begin(); it != probs.end(); it++) {
+ ret.prob.push_back(reinterpret_uint(&(*it)));
+ }
- return ret;
+ return ret;
}
-inline std::string HuffmanDecoder::getTargetWordFromID(unsigned int id) {
- return lookup_target_phrase.find(id)->second;
+inline std::string HuffmanDecoder::getTargetWordFromID(unsigned int id)
+{
+ return lookup_target_phrase.find(id)->second;
}
-std::string HuffmanDecoder::getTargetWordsFromIDs(std::vector<unsigned int> ids){
- std::string returnstring;
- for (std::vector<unsigned int>::iterator it = ids.begin(); it != ids.end(); it++){
- returnstring.append(getTargetWordFromID(*it) + " ");
- }
+std::string HuffmanDecoder::getTargetWordsFromIDs(std::vector<unsigned int> ids)
+{
+ std::string returnstring;
+ for (std::vector<unsigned int>::iterator it = ids.begin(); it != ids.end(); it++) {
+ returnstring.append(getTargetWordFromID(*it) + " ");
+ }
- return returnstring;
+ return returnstring;
}
-inline std::string getTargetWordFromID(unsigned int id, std::map<unsigned int, std::string> * lookup_target_phrase) {
- return lookup_target_phrase->find(id)->second;
+inline std::string getTargetWordFromID(unsigned int id, std::map<unsigned int, std::string> * lookup_target_phrase)
+{
+ return lookup_target_phrase->find(id)->second;
}
-std::string getTargetWordsFromIDs(std::vector<unsigned int> ids, std::map<unsigned int, std::string> * lookup_target_phrase) {
- std::string returnstring;
- for (std::vector<unsigned int>::iterator it = ids.begin(); it != ids.end(); it++){
- returnstring.append(getTargetWordFromID(*it, lookup_target_phrase) + " ");
- }
+std::string getTargetWordsFromIDs(std::vector<unsigned int> ids, std::map<unsigned int, std::string> * lookup_target_phrase)
+{
+ std::string returnstring;
+ for (std::vector<unsigned int>::iterator it = ids.begin(); it != ids.end(); it++) {
+ returnstring.append(getTargetWordFromID(*it, lookup_target_phrase) + " ");
+ }
- return returnstring;
+ return returnstring;
}
/*Those functions are used to more easily store the floats in the binary phrase table
We convert the float unsinged int so that it is the same as our other values and we can
apply variable byte encoding on top of it.*/
-inline unsigned int reinterpret_float(float * num){
- unsigned int * converted_num;
- converted_num = reinterpret_cast<unsigned int *>(num);
- return *converted_num;
+inline unsigned int reinterpret_float(float * num)
+{
+ unsigned int * converted_num;
+ converted_num = reinterpret_cast<unsigned int *>(num);
+ return *converted_num;
}
-inline float reinterpret_uint(unsigned int * num){
- float * converted_num;
- converted_num = reinterpret_cast<float *>(num);
- return *converted_num;
+inline float reinterpret_uint(unsigned int * num)
+{
+ float * converted_num;
+ converted_num = reinterpret_cast<float *>(num);
+ return *converted_num;
}
/*Mostly taken from stackoverflow, http://stackoverflow.com/questions/5858646/optimizing-variable-length-encoding
and modified in order to return a vector of chars. Implements ULEB128 or variable byte encoding.
This is highly optimized version with unrolled loop */
-inline std::vector<unsigned char> vbyte_encode(unsigned int num){
- //Determine how many bytes we are going to take.
- short size;
- std::vector<unsigned char> byte_vector;
-
- if (num < 0x00000080U) {
- size = 1;
- byte_vector.reserve(size);
- goto b1;
- }
- if (num < 0x00004000U) {
- size = 2;
- byte_vector.reserve(size);
- goto b2;
- }
- if (num < 0x00200000U) {
- size = 3;
- byte_vector.reserve(size);
- goto b3;
- }
- if (num < 0x10000000U) {
- size = 4;
- byte_vector.reserve(size);
- goto b4;
- }
- size = 5;
+inline std::vector<unsigned char> vbyte_encode(unsigned int num)
+{
+ //Determine how many bytes we are going to take.
+ short size;
+ std::vector<unsigned char> byte_vector;
+
+ if (num < 0x00000080U) {
+ size = 1;
+ byte_vector.reserve(size);
+ goto b1;
+ }
+ if (num < 0x00004000U) {
+ size = 2;
byte_vector.reserve(size);
+ goto b2;
+ }
+ if (num < 0x00200000U) {
+ size = 3;
+ byte_vector.reserve(size);
+ goto b3;
+ }
+ if (num < 0x10000000U) {
+ size = 4;
+ byte_vector.reserve(size);
+ goto b4;
+ }
+ size = 5;
+ byte_vector.reserve(size);
- //Now proceed with the encoding.
- byte_vector.push_back((num & 0x7f) | 0x80);
- num >>= 7;
+ //Now proceed with the encoding.
+ byte_vector.push_back((num & 0x7f) | 0x80);
+ num >>= 7;
b4:
- byte_vector.push_back((num & 0x7f) | 0x80);
- num >>= 7;
+ byte_vector.push_back((num & 0x7f) | 0x80);
+ num >>= 7;
b3:
- byte_vector.push_back((num & 0x7f) | 0x80);
- num >>= 7;
+ byte_vector.push_back((num & 0x7f) | 0x80);
+ num >>= 7;
b2:
- byte_vector.push_back((num & 0x7f) | 0x80);
- num >>= 7;
+ byte_vector.push_back((num & 0x7f) | 0x80);
+ num >>= 7;
b1:
- byte_vector.push_back(num);
+ byte_vector.push_back(num);
- return byte_vector;
+ return byte_vector;
}
-std::vector<unsigned int> vbyte_decode_line(std::vector<unsigned char> line){
- std::vector<unsigned int> huffman_line;
- std::vector<unsigned char> current_num;
-
- for (std::vector<unsigned char>::iterator it = line.begin(); it != line.end(); it++){
- current_num.push_back(*it);
- if ((*it >> 7) != 1) {
- //We don't have continuation in the next bit
- huffman_line.push_back(bytes_to_int(current_num));
- current_num.clear();
- }
+std::vector<unsigned int> vbyte_decode_line(std::vector<unsigned char> line)
+{
+ std::vector<unsigned int> huffman_line;
+ std::vector<unsigned char> current_num;
+
+ for (std::vector<unsigned char>::iterator it = line.begin(); it != line.end(); it++) {
+ current_num.push_back(*it);
+ if ((*it >> 7) != 1) {
+ //We don't have continuation in the next bit
+ huffman_line.push_back(bytes_to_int(current_num));
+ current_num.clear();
}
- return huffman_line;
+ }
+ return huffman_line;
}
-inline unsigned int bytes_to_int(std::vector<unsigned char> number){
- unsigned int retvalue = 0;
- std::vector<unsigned char>::iterator it = number.begin();
- unsigned char shift = 0; //By how many bits to shift
+inline unsigned int bytes_to_int(std::vector<unsigned char> number)
+{
+ unsigned int retvalue = 0;
+ std::vector<unsigned char>::iterator it = number.begin();
+ unsigned char shift = 0; //By how many bits to shift
- while (it != number.end()) {
- retvalue |= (*it & 0x7f) << shift;
- shift += 7;
- it++;
- }
+ while (it != number.end()) {
+ retvalue |= (*it & 0x7f) << shift;
+ shift += 7;
+ it++;
+ }
- return retvalue;
+ return retvalue;
}
-std::vector<unsigned char> vbyte_encode_line(std::vector<unsigned int> line) {
- std::vector<unsigned char> retvec;
+std::vector<unsigned char> vbyte_encode_line(std::vector<unsigned int> line)
+{
+ std::vector<unsigned char> retvec;
- //For each unsigned int in the line, vbyte encode it and add it to a vector of unsigned chars.
- for (std::vector<unsigned int>::iterator it = line.begin(); it != line.end(); it++){
- std::vector<unsigned char> vbyte_encoded = vbyte_encode(*it);
- retvec.insert(retvec.end(), vbyte_encoded.begin(), vbyte_encoded.end());
- }
+ //For each unsigned int in the line, vbyte encode it and add it to a vector of unsigned chars.
+ for (std::vector<unsigned int>::iterator it = line.begin(); it != line.end(); it++) {
+ std::vector<unsigned char> vbyte_encoded = vbyte_encode(*it);
+ retvec.insert(retvec.end(), vbyte_encoded.begin(), vbyte_encoded.end());
+ }
- return retvec;
+ return retvec;
}
diff --git a/moses/TranslationModel/ProbingPT/line_splitter.cpp b/moses/TranslationModel/ProbingPT/line_splitter.cpp
index f50090e4c..97ccfcae1 100644
--- a/moses/TranslationModel/ProbingPT/line_splitter.cpp
+++ b/moses/TranslationModel/ProbingPT/line_splitter.cpp
@@ -1,52 +1,54 @@
#include "line_splitter.hh"
-line_text splitLine(StringPiece textin) {
- const char delim[] = " ||| ";
- line_text output;
-
- //Tokenize
- util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
- //Get source phrase
- output.source_phrase = *it;
- it++;
- //Get target_phrase
- output.target_phrase = *it;
- it++;
- //Get probabilities
- output.prob = *it;
- it++;
- //Get WordAllignment 1
- output.word_all1 = *it;
- it++;
- //Get WordAllignment 2
- output.word_all2 = *it;
-
- return output;
+line_text splitLine(StringPiece textin)
+{
+ const char delim[] = " ||| ";
+ line_text output;
+
+ //Tokenize
+ util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
+ //Get source phrase
+ output.source_phrase = *it;
+ it++;
+ //Get target_phrase
+ output.target_phrase = *it;
+ it++;
+ //Get probabilities
+ output.prob = *it;
+ it++;
+ //Get WordAllignment 1
+ output.word_all1 = *it;
+ it++;
+ //Get WordAllignment 2
+ output.word_all2 = *it;
+
+ return output;
}
-std::vector<unsigned char> splitWordAll1(StringPiece textin){
- const char delim[] = " ";
- const char delim2[] = "-";
- std::vector<unsigned char> output;
-
- //Split on space
- util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
-
- //For each int
- while (it) {
- //Split on dash (-)
- util::TokenIter<util::MultiCharacter> itInner(*it, util::MultiCharacter(delim2));
-
- //Insert the two entries in the vector. User will read entry 0 and 1 to get the first,
- //2 and 3 for second etc. Use unsigned char instead of int to save space, as
- //word allignments are all very small numbers that fit in a single byte
- output.push_back((unsigned char)(atoi(itInner->data())));
- itInner++;
- output.push_back((unsigned char)(atoi(itInner->data())));
- it++;
- }
-
- return output;
+std::vector<unsigned char> splitWordAll1(StringPiece textin)
+{
+ const char delim[] = " ";
+ const char delim2[] = "-";
+ std::vector<unsigned char> output;
+
+ //Split on space
+ util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
+
+ //For each int
+ while (it) {
+ //Split on dash (-)
+ util::TokenIter<util::MultiCharacter> itInner(*it, util::MultiCharacter(delim2));
+
+ //Insert the two entries in the vector. User will read entry 0 and 1 to get the first,
+ //2 and 3 for second etc. Use unsigned char instead of int to save space, as
+ //word allignments are all very small numbers that fit in a single byte
+ output.push_back((unsigned char)(atoi(itInner->data())));
+ itInner++;
+ output.push_back((unsigned char)(atoi(itInner->data())));
+ it++;
+ }
+
+ return output;
}
diff --git a/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp b/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp
index 35cb9e538..ca3e8f69f 100644
--- a/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp
+++ b/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp
@@ -1,32 +1,34 @@
#include "probing_hash_utils.hh"
//Read table from disk, return memory map location
-char * readTable(const char * filename, size_t size) {
- //Initial position of the file is the end of the file, thus we know the size
- int fd;
- char * map;
-
- fd = open(filename, O_RDONLY);
- if (fd == -1) {
- perror("Error opening file for reading");
- exit(EXIT_FAILURE);
- }
-
- map = (char *)mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
-
- if (map == MAP_FAILED) {
- close(fd);
- perror("Error mmapping the file");
- exit(EXIT_FAILURE);
- }
-
- return map;
-}
-
-
-void serialize_table(char *mem, size_t size, const char * filename){
- std::ofstream os (filename, std::ios::binary);
- os.write((const char*)&mem[0], size);
- os.close();
+char * readTable(const char * filename, size_t size)
+{
+ //Initial position of the file is the end of the file, thus we know the size
+ int fd;
+ char * map;
+
+ fd = open(filename, O_RDONLY);
+ if (fd == -1) {
+ perror("Error opening file for reading");
+ exit(EXIT_FAILURE);
+ }
+
+ map = (char *)mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
+
+ if (map == MAP_FAILED) {
+ close(fd);
+ perror("Error mmapping the file");
+ exit(EXIT_FAILURE);
+ }
+
+ return map;
+}
+
+
+void serialize_table(char *mem, size_t size, const char * filename)
+{
+ std::ofstream os (filename, std::ios::binary);
+ os.write((const char*)&mem[0], size);
+ os.close();
} \ No newline at end of file
diff --git a/moses/TranslationModel/ProbingPT/quering.cpp b/moses/TranslationModel/ProbingPT/quering.cpp
index f8cdc7763..59a1229a8 100644
--- a/moses/TranslationModel/ProbingPT/quering.cpp
+++ b/moses/TranslationModel/ProbingPT/quering.cpp
@@ -1,192 +1,198 @@
#include "quering.hh"
-unsigned char * read_binary_file(const char * filename, size_t filesize){
- //Get filesize
- int fd;
- unsigned char * map;
-
- fd = open(filename, O_RDONLY);
-
- if (fd == -1) {
- perror("Error opening file for reading");
- exit(EXIT_FAILURE);
- }
-
- map = (unsigned char *)mmap(0, filesize, PROT_READ, MAP_SHARED, fd, 0);
- if (map == MAP_FAILED) {
- close(fd);
- perror("Error mmapping the file");
- exit(EXIT_FAILURE);
- }
-
- return map;
+unsigned char * read_binary_file(const char * filename, size_t filesize)
+{
+ //Get filesize
+ int fd;
+ unsigned char * map;
+
+ fd = open(filename, O_RDONLY);
+
+ if (fd == -1) {
+ perror("Error opening file for reading");
+ exit(EXIT_FAILURE);
+ }
+
+ map = (unsigned char *)mmap(0, filesize, PROT_READ, MAP_SHARED, fd, 0);
+ if (map == MAP_FAILED) {
+ close(fd);
+ perror("Error mmapping the file");
+ exit(EXIT_FAILURE);
+ }
+
+ return map;
}
-QueryEngine::QueryEngine(const char * filepath) : decoder(filepath){
-
- //Create filepaths
- std::string basepath(filepath);
- std::string path_to_hashtable = basepath + "/probing_hash.dat";
- std::string path_to_data_bin = basepath + "/binfile.dat";
- std::string path_to_source_vocabid = basepath + "/source_vocabids";
-
- ///Source phrase vocabids
- read_map(&source_vocabids, path_to_source_vocabid.c_str());
-
- //Target phrase vocabIDs
- vocabids = decoder.get_target_lookup_map();
-
- //Read config file
- std::string line;
- std::ifstream config ((basepath + "/config").c_str());
- //Check API version:
- getline(config, line);
- if (atoi(line.c_str()) != API_VERSION) {
- std::cerr << "The ProbingPT API has changed, please rebinarize your phrase tables." << std::endl;
- exit(EXIT_FAILURE);
- }
- //Get tablesize.
- getline(config, line);
- int tablesize = atoi(line.c_str());
- //Number of scores
- getline(config, line);
- num_scores = atoi(line.c_str());
- //do we have a reordering table
- getline(config, line);
- std::transform(line.begin(), line.end(), line.begin(), ::tolower); //Get the boolean in lowercase
- is_reordering = false;
- if (line == "true") {
- is_reordering = true;
- std::cerr << "WARNING. REORDERING TABLES NOT SUPPORTED YET." << std::endl;
- }
- config.close();
-
- //Mmap binary table
- struct stat filestatus;
- stat(path_to_data_bin.c_str(), &filestatus);
- binary_filesize = filestatus.st_size;
- binary_mmaped = read_binary_file(path_to_data_bin.c_str(), binary_filesize);
-
- //Read hashtable
- size_t table_filesize = Table::Size(tablesize, 1.2);
- mem = readTable(path_to_hashtable.c_str(), table_filesize);
- Table table_init(mem, table_filesize);
- table = table_init;
-
- std::cerr << "Initialized successfully! " << std::endl;
+QueryEngine::QueryEngine(const char * filepath) : decoder(filepath)
+{
+
+ //Create filepaths
+ std::string basepath(filepath);
+ std::string path_to_hashtable = basepath + "/probing_hash.dat";
+ std::string path_to_data_bin = basepath + "/binfile.dat";
+ std::string path_to_source_vocabid = basepath + "/source_vocabids";
+
+ ///Source phrase vocabids
+ read_map(&source_vocabids, path_to_source_vocabid.c_str());
+
+ //Target phrase vocabIDs
+ vocabids = decoder.get_target_lookup_map();
+
+ //Read config file
+ std::string line;
+ std::ifstream config ((basepath + "/config").c_str());
+ //Check API version:
+ getline(config, line);
+ if (atoi(line.c_str()) != API_VERSION) {
+ std::cerr << "The ProbingPT API has changed, please rebinarize your phrase tables." << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ //Get tablesize.
+ getline(config, line);
+ int tablesize = atoi(line.c_str());
+ //Number of scores
+ getline(config, line);
+ num_scores = atoi(line.c_str());
+ //do we have a reordering table
+ getline(config, line);
+ std::transform(line.begin(), line.end(), line.begin(), ::tolower); //Get the boolean in lowercase
+ is_reordering = false;
+ if (line == "true") {
+ is_reordering = true;
+ std::cerr << "WARNING. REORDERING TABLES NOT SUPPORTED YET." << std::endl;
+ }
+ config.close();
+
+ //Mmap binary table
+ struct stat filestatus;
+ stat(path_to_data_bin.c_str(), &filestatus);
+ binary_filesize = filestatus.st_size;
+ binary_mmaped = read_binary_file(path_to_data_bin.c_str(), binary_filesize);
+
+ //Read hashtable
+ size_t table_filesize = Table::Size(tablesize, 1.2);
+ mem = readTable(path_to_hashtable.c_str(), table_filesize);
+ Table table_init(mem, table_filesize);
+ table = table_init;
+
+ std::cerr << "Initialized successfully! " << std::endl;
}
-QueryEngine::~QueryEngine(){
- //Clear mmap content from memory.
- munmap(binary_mmaped, binary_filesize);
- munmap(mem, table_filesize);
-
+QueryEngine::~QueryEngine()
+{
+ //Clear mmap content from memory.
+ munmap(binary_mmaped, binary_filesize);
+ munmap(mem, table_filesize);
+
}
-std::pair<bool, std::vector<target_text> > QueryEngine::query(std::vector<uint64_t> source_phrase){
- bool found;
- std::vector<target_text> translation_entries;
- const Entry * entry;
- //TOO SLOW
- //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
- uint64_t key = 0;
- for (int i = 0; i < source_phrase.size(); i++){
- key += (source_phrase[i] << i);
+std::pair<bool, std::vector<target_text> > QueryEngine::query(std::vector<uint64_t> source_phrase)
+{
+ bool found;
+ std::vector<target_text> translation_entries;
+ const Entry * entry;
+ //TOO SLOW
+ //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
+ uint64_t key = 0;
+ for (int i = 0; i < source_phrase.size(); i++) {
+ key += (source_phrase[i] << i);
+ }
+
+
+ found = table.Find(key, entry);
+
+ if (found) {
+ //The phrase that was searched for was found! We need to get the translation entries.
+ //We will read the largest entry in bytes and then filter the unnecesarry with functions
+ //from line_splitter
+ uint64_t initial_index = entry -> GetValue();
+ unsigned int bytes_toread = entry -> bytes_toread;
+
+ //ASK HIEU FOR MORE EFFICIENT WAY TO DO THIS!
+ std::vector<unsigned char> encoded_text; //Assign to the vector the relevant portion of the array.
+ encoded_text.reserve(bytes_toread);
+ for (int i = 0; i < bytes_toread; i++) {
+ encoded_text.push_back(binary_mmaped[i+initial_index]);
}
+ //Get only the translation entries necessary
+ translation_entries = decoder.full_decode_line(encoded_text, num_scores);
- found = table.Find(key, entry);
-
- if (found){
- //The phrase that was searched for was found! We need to get the translation entries.
- //We will read the largest entry in bytes and then filter the unnecesarry with functions
- //from line_splitter
- uint64_t initial_index = entry -> GetValue();
- unsigned int bytes_toread = entry -> bytes_toread;
-
- //ASK HIEU FOR MORE EFFICIENT WAY TO DO THIS!
- std::vector<unsigned char> encoded_text; //Assign to the vector the relevant portion of the array.
- encoded_text.reserve(bytes_toread);
- for (int i = 0; i < bytes_toread; i++){
- encoded_text.push_back(binary_mmaped[i+initial_index]);
- }
+ }
- //Get only the translation entries necessary
- translation_entries = decoder.full_decode_line(encoded_text, num_scores);
-
- }
+ std::pair<bool, std::vector<target_text> > output (found, translation_entries);
- std::pair<bool, std::vector<target_text> > output (found, translation_entries);
+ return output;
- return output;
-
}
-std::pair<bool, std::vector<target_text> > QueryEngine::query(StringPiece source_phrase){
- bool found;
- std::vector<target_text> translation_entries;
- const Entry * entry;
- //Convert source frase to VID
- std::vector<uint64_t> source_phrase_vid = getVocabIDs(source_phrase);
- //TOO SLOW
- //uint64_t key = util::MurmurHashNative(&source_phrase_vid[0], source_phrase_vid.size());
- uint64_t key = 0;
- for (int i = 0; i < source_phrase_vid.size(); i++){
- key += (source_phrase_vid[i] << i);
+std::pair<bool, std::vector<target_text> > QueryEngine::query(StringPiece source_phrase)
+{
+ bool found;
+ std::vector<target_text> translation_entries;
+ const Entry * entry;
+ //Convert source frase to VID
+ std::vector<uint64_t> source_phrase_vid = getVocabIDs(source_phrase);
+ //TOO SLOW
+ //uint64_t key = util::MurmurHashNative(&source_phrase_vid[0], source_phrase_vid.size());
+ uint64_t key = 0;
+ for (int i = 0; i < source_phrase_vid.size(); i++) {
+ key += (source_phrase_vid[i] << i);
+ }
+
+ found = table.Find(key, entry);
+
+
+ if (found) {
+ //The phrase that was searched for was found! We need to get the translation entries.
+ //We will read the largest entry in bytes and then filter the unnecesarry with functions
+ //from line_splitter
+ uint64_t initial_index = entry -> GetValue();
+ unsigned int bytes_toread = entry -> bytes_toread;
+ //At the end of the file we can't readd + largest_entry cause we get a segfault.
+ std::cerr << "Entry size is bytes is: " << bytes_toread << std::endl;
+
+ //ASK HIEU FOR MORE EFFICIENT WAY TO DO THIS!
+ std::vector<unsigned char> encoded_text; //Assign to the vector the relevant portion of the array.
+ encoded_text.reserve(bytes_toread);
+ for (int i = 0; i < bytes_toread; i++) {
+ encoded_text.push_back(binary_mmaped[i+initial_index]);
}
- found = table.Find(key, entry);
+ //Get only the translation entries necessary
+ translation_entries = decoder.full_decode_line(encoded_text, num_scores);
+ }
- if (found){
- //The phrase that was searched for was found! We need to get the translation entries.
- //We will read the largest entry in bytes and then filter the unnecesarry with functions
- //from line_splitter
- uint64_t initial_index = entry -> GetValue();
- unsigned int bytes_toread = entry -> bytes_toread;
- //At the end of the file we can't readd + largest_entry cause we get a segfault.
- std::cerr << "Entry size is bytes is: " << bytes_toread << std::endl;
-
- //ASK HIEU FOR MORE EFFICIENT WAY TO DO THIS!
- std::vector<unsigned char> encoded_text; //Assign to the vector the relevant portion of the array.
- encoded_text.reserve(bytes_toread);
- for (int i = 0; i < bytes_toread; i++){
- encoded_text.push_back(binary_mmaped[i+initial_index]);
- }
+ std::pair<bool, std::vector<target_text> > output (found, translation_entries);
- //Get only the translation entries necessary
- translation_entries = decoder.full_decode_line(encoded_text, num_scores);
+ return output;
- }
+}
- std::pair<bool, std::vector<target_text> > output (found, translation_entries);
+void QueryEngine::printTargetInfo(std::vector<target_text> target_phrases)
+{
+ int entries = target_phrases.size();
- return output;
+ for (int i = 0; i<entries; i++) {
+ std::cout << "Entry " << i+1 << " of " << entries << ":" << std::endl;
+ //Print text
+ std::cout << getTargetWordsFromIDs(target_phrases[i].target_phrase, &vocabids) << "\t";
-}
-
-void QueryEngine::printTargetInfo(std::vector<target_text> target_phrases){
- int entries = target_phrases.size();
-
- for (int i = 0; i<entries; i++){
- std::cout << "Entry " << i+1 << " of " << entries << ":" << std::endl;
- //Print text
- std::cout << getTargetWordsFromIDs(target_phrases[i].target_phrase, &vocabids) << "\t";
-
- //Print probabilities:
- for (int j = 0; j<target_phrases[i].prob.size(); j++){
- std::cout << target_phrases[i].prob[j] << " ";
- }
- std::cout << "\t";
-
- //Print word_all1
- for (int j = 0; j<target_phrases[i].word_all1.size(); j++){
- if (j%2 == 0){
- std::cout << (short)target_phrases[i].word_all1[j] << "-";
- }else{
- std::cout << (short)target_phrases[i].word_all1[j] << " ";
- }
- }
- std::cout << std::endl;
+ //Print probabilities:
+ for (int j = 0; j<target_phrases[i].prob.size(); j++) {
+ std::cout << target_phrases[i].prob[j] << " ";
+ }
+ std::cout << "\t";
+
+ //Print word_all1
+ for (int j = 0; j<target_phrases[i].word_all1.size(); j++) {
+ if (j%2 == 0) {
+ std::cout << (short)target_phrases[i].word_all1[j] << "-";
+ } else {
+ std::cout << (short)target_phrases[i].word_all1[j] << " ";
+ }
}
+ std::cout << std::endl;
+ }
}
diff --git a/moses/TranslationModel/ProbingPT/storing.cpp b/moses/TranslationModel/ProbingPT/storing.cpp
index 6315b7b3d..01128c1e4 100644
--- a/moses/TranslationModel/ProbingPT/storing.cpp
+++ b/moses/TranslationModel/ProbingPT/storing.cpp
@@ -1,156 +1,161 @@
#include "storing.hh"
-BinaryFileWriter::BinaryFileWriter (std::string basepath) : os ((basepath + "/binfile.dat").c_str(), std::ios::binary) {
- binfile.reserve(10000); //Reserve part of the vector to avoid realocation
- it = binfile.begin();
- dist_from_start = 0; //Initialize variables
- extra_counter = 0;
+BinaryFileWriter::BinaryFileWriter (std::string basepath) : os ((basepath + "/binfile.dat").c_str(), std::ios::binary)
+{
+ binfile.reserve(10000); //Reserve part of the vector to avoid realocation
+ it = binfile.begin();
+ dist_from_start = 0; //Initialize variables
+ extra_counter = 0;
}
-void BinaryFileWriter::write (std::vector<unsigned char> * bytes) {
- binfile.insert(it, bytes->begin(), bytes->end()); //Insert the bytes
- //Keep track of the offsets
- it += bytes->size();
- dist_from_start = distance(binfile.begin(),it);
- //Flush the vector to disk every once in a while so that we don't consume too much ram
- if (dist_from_start > 9000) {
- flush();
- }
+void BinaryFileWriter::write (std::vector<unsigned char> * bytes)
+{
+ binfile.insert(it, bytes->begin(), bytes->end()); //Insert the bytes
+ //Keep track of the offsets
+ it += bytes->size();
+ dist_from_start = distance(binfile.begin(),it);
+ //Flush the vector to disk every once in a while so that we don't consume too much ram
+ if (dist_from_start > 9000) {
+ flush();
+ }
}
-void BinaryFileWriter::flush () {
- //Cast unsigned char to char before writing...
- os.write((char *)&binfile[0], dist_from_start);
- //Clear the vector:
- binfile.clear();
- binfile.reserve(10000);
- extra_counter += dist_from_start; //Keep track of the total number of bytes.
- it = binfile.begin(); //Reset iterator
- dist_from_start = distance(binfile.begin(),it); //Reset dist from start
+void BinaryFileWriter::flush ()
+{
+ //Cast unsigned char to char before writing...
+ os.write((char *)&binfile[0], dist_from_start);
+ //Clear the vector:
+ binfile.clear();
+ binfile.reserve(10000);
+ extra_counter += dist_from_start; //Keep track of the total number of bytes.
+ it = binfile.begin(); //Reset iterator
+ dist_from_start = distance(binfile.begin(),it); //Reset dist from start
}
-BinaryFileWriter::~BinaryFileWriter (){
- os.close();
- binfile.clear();
+BinaryFileWriter::~BinaryFileWriter ()
+{
+ os.close();
+ binfile.clear();
}
void createProbingPT(const char * phrasetable_path, const char * target_path,
- const char * num_scores, const char * is_reordering){
- //Get basepath and create directory if missing
- std::string basepath(target_path);
- mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
-
- //Set up huffman and serialize decoder maps.
- Huffman huffmanEncoder(phrasetable_path); //initialize
- huffmanEncoder.assign_values();
- huffmanEncoder.produce_lookups();
- huffmanEncoder.serialize_maps(target_path);
-
- //Get uniq lines:
- unsigned long uniq_entries = huffmanEncoder.getUniqLines();
-
- //Source phrase vocabids
- std::map<uint64_t, std::string> source_vocabids;
-
- //Read the file
- util::FilePiece filein(phrasetable_path);
-
- //Init the probing hash table
- size_t size = Table::Size(uniq_entries, 1.2);
- char * mem = new char[size];
- memset(mem, 0, size);
- Table table(mem, size);
-
- BinaryFileWriter binfile(basepath); //Init the binary file writer.
-
- line_text prev_line; //Check if the source phrase of the previous line is the same
-
- //Keep track of the size of each group of target phrases
- uint64_t entrystartidx = 0;
- //uint64_t line_num = 0;
-
-
- //Read everything and processs
- while(true){
- try {
- //Process line read
- line_text line;
- line = splitLine(filein.ReadLine());
- //Add source phrases to vocabularyIDs
- add_to_map(&source_vocabids, line.source_phrase);
-
- if ((binfile.dist_from_start + binfile.extra_counter) == 0) {
- prev_line = line; //For the first iteration assume the previous line is
- } //The same as this one.
-
- if (line.source_phrase != prev_line.source_phrase){
-
- //Create a new entry even
-
- //Create an entry for the previous source phrase:
- Entry pesho;
- pesho.value = entrystartidx;
- //The key is the sum of hashes of individual words bitshifted by their position in the phrase.
- //Probably not entirerly correct, but fast and seems to work fine in practise.
- pesho.key = 0;
- std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase);
- for (int i = 0; i < vocabid_source.size(); i++){
- pesho.key += (vocabid_source[i] << i);
- }
- pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx;
-
- //Put into table
- table.Insert(pesho);
-
- entrystartidx = binfile.dist_from_start + binfile.extra_counter; //Designate start idx for new entry
-
- //Encode a line and write it to disk.
- std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line);
- binfile.write(&encoded_line);
-
- //Set prevLine
- prev_line = line;
-
- } else{
- //If we still have the same line, just append to it:
- std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line);
- binfile.write(&encoded_line);
- }
-
- } catch (util::EndOfFileException e){
- std::cerr << "Reading phrase table finished, writing remaining files to disk." << std::endl;
- binfile.flush();
-
- //After the final entry is constructed we need to add it to the phrase_table
- //Create an entry for the previous source phrase:
- Entry pesho;
- pesho.value = entrystartidx;
- //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
- pesho.key = 0;
- std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase);
- for (int i = 0; i < vocabid_source.size(); i++){
- pesho.key += (vocabid_source[i] << i);
- }
- pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx;
- //Put into table
- table.Insert(pesho);
-
- break;
+ const char * num_scores, const char * is_reordering)
+{
+ //Get basepath and create directory if missing
+ std::string basepath(target_path);
+ mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+
+ //Set up huffman and serialize decoder maps.
+ Huffman huffmanEncoder(phrasetable_path); //initialize
+ huffmanEncoder.assign_values();
+ huffmanEncoder.produce_lookups();
+ huffmanEncoder.serialize_maps(target_path);
+
+ //Get uniq lines:
+ unsigned long uniq_entries = huffmanEncoder.getUniqLines();
+
+ //Source phrase vocabids
+ std::map<uint64_t, std::string> source_vocabids;
+
+ //Read the file
+ util::FilePiece filein(phrasetable_path);
+
+ //Init the probing hash table
+ size_t size = Table::Size(uniq_entries, 1.2);
+ char * mem = new char[size];
+ memset(mem, 0, size);
+ Table table(mem, size);
+
+ BinaryFileWriter binfile(basepath); //Init the binary file writer.
+
+ line_text prev_line; //Check if the source phrase of the previous line is the same
+
+ //Keep track of the size of each group of target phrases
+ uint64_t entrystartidx = 0;
+ //uint64_t line_num = 0;
+
+
+ //Read everything and processs
+ while(true) {
+ try {
+ //Process line read
+ line_text line;
+ line = splitLine(filein.ReadLine());
+ //Add source phrases to vocabularyIDs
+ add_to_map(&source_vocabids, line.source_phrase);
+
+ if ((binfile.dist_from_start + binfile.extra_counter) == 0) {
+ prev_line = line; //For the first iteration assume the previous line is
+ } //The same as this one.
+
+ if (line.source_phrase != prev_line.source_phrase) {
+
+ //Create a new entry even
+
+ //Create an entry for the previous source phrase:
+ Entry pesho;
+ pesho.value = entrystartidx;
+ //The key is the sum of hashes of individual words bitshifted by their position in the phrase.
+ //Probably not entirerly correct, but fast and seems to work fine in practise.
+ pesho.key = 0;
+ std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase);
+ for (int i = 0; i < vocabid_source.size(); i++) {
+ pesho.key += (vocabid_source[i] << i);
}
+ pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx;
+
+ //Put into table
+ table.Insert(pesho);
+
+ entrystartidx = binfile.dist_from_start + binfile.extra_counter; //Designate start idx for new entry
+
+ //Encode a line and write it to disk.
+ std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line);
+ binfile.write(&encoded_line);
+
+ //Set prevLine
+ prev_line = line;
+
+ } else {
+ //If we still have the same line, just append to it:
+ std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line);
+ binfile.write(&encoded_line);
+ }
+
+ } catch (util::EndOfFileException e) {
+ std::cerr << "Reading phrase table finished, writing remaining files to disk." << std::endl;
+ binfile.flush();
+
+ //After the final entry is constructed we need to add it to the phrase_table
+ //Create an entry for the previous source phrase:
+ Entry pesho;
+ pesho.value = entrystartidx;
+ //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
+ pesho.key = 0;
+ std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase);
+ for (int i = 0; i < vocabid_source.size(); i++) {
+ pesho.key += (vocabid_source[i] << i);
+ }
+ pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx;
+ //Put into table
+ table.Insert(pesho);
+
+ break;
}
+ }
+
+ serialize_table(mem, size, (basepath + "/probing_hash.dat").c_str());
- serialize_table(mem, size, (basepath + "/probing_hash.dat").c_str());
+ serialize_map(&source_vocabids, (basepath + "/source_vocabids").c_str());
- serialize_map(&source_vocabids, (basepath + "/source_vocabids").c_str());
-
- delete[] mem;
+ delete[] mem;
- //Write configfile
- std::ofstream configfile;
- configfile.open((basepath + "/config").c_str());
- configfile << API_VERSION << '\n';
- configfile << uniq_entries << '\n';
- configfile << num_scores << '\n';
- configfile << is_reordering << '\n';
- configfile.close();
+ //Write configfile
+ std::ofstream configfile;
+ configfile.open((basepath + "/config").c_str());
+ configfile << API_VERSION << '\n';
+ configfile << uniq_entries << '\n';
+ configfile << num_scores << '\n';
+ configfile << is_reordering << '\n';
+ configfile.close();
}
diff --git a/moses/TranslationModel/ProbingPT/tests/tokenization_tests.cpp b/moses/TranslationModel/ProbingPT/tests/tokenization_tests.cpp
index 2a63242de..528c9c37c 100644
--- a/moses/TranslationModel/ProbingPT/tests/tokenization_tests.cpp
+++ b/moses/TranslationModel/ProbingPT/tests/tokenization_tests.cpp
@@ -1,198 +1,206 @@
-#include "line_splitter.hh"
-
-bool test_vectorinsert() {
- StringPiece line1 = StringPiece("! ! ! ! ||| ! ! ! ! ||| 0.0804289 0.141656 0.0804289 0.443409 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 1 1");
- StringPiece line2 = StringPiece("! ! ! ) , has ||| ! ! ! ) - , a ||| 0.0804289 0.0257627 0.0804289 0.00146736 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 5-6 ||| 1 1 1");
- line_text output = splitLine(line1);
- line_text output2 = splitLine(line2);
-
- //Init container vector and iterator.
- std::vector<char> container;
- container.reserve(10000); //Reserve vector
- std::vector<char>::iterator it = container.begin();
- std::pair<std::vector<char>::iterator, int> binary_append_ret; //Return values from vector_append
-
- //Put a value into the vector
- binary_append_ret = vector_append(&output, &container, it, false);
- it = binary_append_ret.first;
- binary_append_ret = vector_append(&output2, &container, it, false);
- it = binary_append_ret.first;
-
- std::string test(container.begin(), container.end());
- std::string should_be = "! ! ! ! 0.0804289 0.141656 0.0804289 0.443409 2.718 0-0 1-1 2-2 3-3 1 1 1! ! ! ) - , a 0.0804289 0.0257627 0.0804289 0.00146736 2.718 0-0 1-1 2-2 3-3 4-4 4-5 5-6 1 1 1";
- if (test == should_be) {
- return true;
- } else {
- return false;
- }
+#include "line_splitter.hh"
+
+bool test_vectorinsert()
+{
+ StringPiece line1 = StringPiece("! ! ! ! ||| ! ! ! ! ||| 0.0804289 0.141656 0.0804289 0.443409 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 1 1");
+ StringPiece line2 = StringPiece("! ! ! ) , has ||| ! ! ! ) - , a ||| 0.0804289 0.0257627 0.0804289 0.00146736 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 5-6 ||| 1 1 1");
+ line_text output = splitLine(line1);
+ line_text output2 = splitLine(line2);
+
+ //Init container vector and iterator.
+ std::vector<char> container;
+ container.reserve(10000); //Reserve vector
+ std::vector<char>::iterator it = container.begin();
+ std::pair<std::vector<char>::iterator, int> binary_append_ret; //Return values from vector_append
+
+ //Put a value into the vector
+ binary_append_ret = vector_append(&output, &container, it, false);
+ it = binary_append_ret.first;
+ binary_append_ret = vector_append(&output2, &container, it, false);
+ it = binary_append_ret.first;
+
+ std::string test(container.begin(), container.end());
+ std::string should_be = "! ! ! ! 0.0804289 0.141656 0.0804289 0.443409 2.718 0-0 1-1 2-2 3-3 1 1 1! ! ! ) - , a 0.0804289 0.0257627 0.0804289 0.00146736 2.718 0-0 1-1 2-2 3-3 4-4 4-5 5-6 1 1 1";
+ if (test == should_be) {
+ return true;
+ } else {
+ return false;
+ }
}
-bool probabilitiesTest(){
- StringPiece line1 = StringPiece("0.536553 0.75961 0.634108 0.532927 2.718");
- StringPiece line2 = StringPiece("1.42081e-05 3.91895e-09 0.0738539 0.749514 2.718");
-
- std::vector<double> pesho;
- bool peshobool = false;
- bool kirobool = false;
- std::vector<double> kiro;
-
- pesho = splitProbabilities(line1);
- kiro = splitProbabilities(line2);
-
- if (pesho[0] == 0.536553 && pesho[1] == 0.75961 && pesho[2] == 0.634108 && pesho[3] == 0.532927 && pesho[4] == 2.718 && pesho.size() == 5) {
- peshobool = true;
- } else {
- std::cout << "Processed: " << pesho[0] << " " << pesho[1] << " " << pesho[2] << " " << pesho[3] << " " << pesho[4] << std::endl;
- std::cout << "Size is: " << pesho.size() << " Expected 5." << std::endl;
- std::cout << "Expected: " << "0.536553 0.75961 0.634108 0.532927 2.718" << std::endl;
- }
-
- if (kiro[0] == 1.42081e-05 && kiro[1] == 3.91895e-09 && kiro[2] == 0.0738539 && kiro[3] == 0.749514 && kiro[4] == 2.718 && kiro.size() == 5) {
- kirobool = true;
- } else {
- std::cout << "Processed: " << kiro[0] << " " << kiro[1] << " " << kiro[2] << " " << kiro[3] << " " << kiro[4] << std::endl;
- std::cout << "Size is: " << kiro.size() << " Expected 5." << std::endl;
- std::cout << "Expected: " << "1.42081e-05 3.91895e-09 0.0738539 0.749514 2.718" << std::endl;
- }
-
- return (peshobool && kirobool);
+bool probabilitiesTest()
+{
+ StringPiece line1 = StringPiece("0.536553 0.75961 0.634108 0.532927 2.718");
+ StringPiece line2 = StringPiece("1.42081e-05 3.91895e-09 0.0738539 0.749514 2.718");
+
+ std::vector<double> pesho;
+ bool peshobool = false;
+ bool kirobool = false;
+ std::vector<double> kiro;
+
+ pesho = splitProbabilities(line1);
+ kiro = splitProbabilities(line2);
+
+ if (pesho[0] == 0.536553 && pesho[1] == 0.75961 && pesho[2] == 0.634108 && pesho[3] == 0.532927 && pesho[4] == 2.718 && pesho.size() == 5) {
+ peshobool = true;
+ } else {
+ std::cout << "Processed: " << pesho[0] << " " << pesho[1] << " " << pesho[2] << " " << pesho[3] << " " << pesho[4] << std::endl;
+ std::cout << "Size is: " << pesho.size() << " Expected 5." << std::endl;
+ std::cout << "Expected: " << "0.536553 0.75961 0.634108 0.532927 2.718" << std::endl;
+ }
+
+ if (kiro[0] == 1.42081e-05 && kiro[1] == 3.91895e-09 && kiro[2] == 0.0738539 && kiro[3] == 0.749514 && kiro[4] == 2.718 && kiro.size() == 5) {
+ kirobool = true;
+ } else {
+ std::cout << "Processed: " << kiro[0] << " " << kiro[1] << " " << kiro[2] << " " << kiro[3] << " " << kiro[4] << std::endl;
+ std::cout << "Size is: " << kiro.size() << " Expected 5." << std::endl;
+ std::cout << "Expected: " << "1.42081e-05 3.91895e-09 0.0738539 0.749514 2.718" << std::endl;
+ }
+
+ return (peshobool && kirobool);
}
-bool wordAll1test(){
- StringPiece line1 = StringPiece("2-0 3-1 4-2 5-2");
- StringPiece line2 = StringPiece("0-0 1-1 2-2 3-3 4-3 6-4 5-5");
-
- std::vector<int> pesho;
- bool peshobool = false;
- bool kirobool = false;
- std::vector<int> kiro;
-
- pesho = splitWordAll1(line1);
- kiro = splitWordAll1(line2);
-
- if (pesho[0] == 2 && pesho[1] == 0 && pesho[2] == 3 && pesho[3] == 1 && pesho[4] == 4
- && pesho[5] == 2 && pesho[6] == 5 && pesho[7] == 2 && pesho.size() == 8) {
- peshobool = true;
- } else {
- std::cout << "Processed: " << pesho[0] << "-" << pesho[1] << " " << pesho[2] << "-" << pesho[3] << " "
- << pesho[4] << "-" << pesho[5] << " " << pesho[6] << "-" << pesho[7] << std::endl;
- std::cout << "Size is: " << pesho.size() << " Expected: 8." << std::endl;
- std::cout << "Expected: " << "2-0 3-1 4-2 5-2" << std::endl;
- }
-
- if (kiro[0] == 0 && kiro[1] == 0 && kiro[2] == 1 && kiro[3] == 1 && kiro[4] == 2 && kiro[5] == 2
- && kiro[6] == 3 && kiro[7] == 3 && kiro[8] == 4 && kiro[9] == 3 && kiro[10] == 6 && kiro[11] == 4
- && kiro[12] == 5 && kiro[13] == 5 && kiro.size() == 14){
- kirobool = true;
- } else {
- std::cout << "Processed: " << kiro[0] << "-" << kiro[1] << " " << kiro[2] << "-" << kiro[3] << " "
- << kiro[4] << "-" << kiro[5] << " " << kiro[6] << "-" << kiro[7] << " " << kiro[8] << "-" << kiro[9]
- << " " << kiro[10] << "-" << kiro[11] << " " << kiro[12] << "-" << kiro[13] << std::endl;
- std::cout << "Size is: " << kiro.size() << " Expected: 14" << std::endl;
- std::cout << "Expected: " << "0-0 1-1 2-2 3-3 4-3 6-4 5-5" << std::endl;
- }
-
- return (peshobool && kirobool);
+bool wordAll1test()
+{
+ StringPiece line1 = StringPiece("2-0 3-1 4-2 5-2");
+ StringPiece line2 = StringPiece("0-0 1-1 2-2 3-3 4-3 6-4 5-5");
+
+ std::vector<int> pesho;
+ bool peshobool = false;
+ bool kirobool = false;
+ std::vector<int> kiro;
+
+ pesho = splitWordAll1(line1);
+ kiro = splitWordAll1(line2);
+
+ if (pesho[0] == 2 && pesho[1] == 0 && pesho[2] == 3 && pesho[3] == 1 && pesho[4] == 4
+ && pesho[5] == 2 && pesho[6] == 5 && pesho[7] == 2 && pesho.size() == 8) {
+ peshobool = true;
+ } else {
+ std::cout << "Processed: " << pesho[0] << "-" << pesho[1] << " " << pesho[2] << "-" << pesho[3] << " "
+ << pesho[4] << "-" << pesho[5] << " " << pesho[6] << "-" << pesho[7] << std::endl;
+ std::cout << "Size is: " << pesho.size() << " Expected: 8." << std::endl;
+ std::cout << "Expected: " << "2-0 3-1 4-2 5-2" << std::endl;
+ }
+
+ if (kiro[0] == 0 && kiro[1] == 0 && kiro[2] == 1 && kiro[3] == 1 && kiro[4] == 2 && kiro[5] == 2
+ && kiro[6] == 3 && kiro[7] == 3 && kiro[8] == 4 && kiro[9] == 3 && kiro[10] == 6 && kiro[11] == 4
+ && kiro[12] == 5 && kiro[13] == 5 && kiro.size() == 14) {
+ kirobool = true;
+ } else {
+ std::cout << "Processed: " << kiro[0] << "-" << kiro[1] << " " << kiro[2] << "-" << kiro[3] << " "
+ << kiro[4] << "-" << kiro[5] << " " << kiro[6] << "-" << kiro[7] << " " << kiro[8] << "-" << kiro[9]
+ << " " << kiro[10] << "-" << kiro[11] << " " << kiro[12] << "-" << kiro[13] << std::endl;
+ std::cout << "Size is: " << kiro.size() << " Expected: 14" << std::endl;
+ std::cout << "Expected: " << "0-0 1-1 2-2 3-3 4-3 6-4 5-5" << std::endl;
+ }
+
+ return (peshobool && kirobool);
}
-bool wordAll2test(){
- StringPiece line1 = StringPiece("4 9 1");
- StringPiece line2 = StringPiece("3255 9 1");
-
- std::vector<int> pesho;
- bool peshobool = false;
- bool kirobool = false;
- std::vector<int> kiro;
-
- pesho = splitWordAll2(line1);
- kiro = splitWordAll2(line2);
-
- if (pesho[0] == 4 && pesho[1] == 9 && pesho[2] == 1 && pesho.size() == 3){
- peshobool = true;
- } else {
- std::cout << "Processed: " << pesho[0] << " " << pesho[1] << " " << pesho[2] << std::endl;
- std::cout << "Size: " << pesho.size() << " Expected: 3" << std::endl;
- std::cout << "Expected: " << "4 9 1" << std::endl;
- }
-
- if (kiro[0] == 3255 && kiro[1] == 9 && kiro[2] == 1 && kiro.size() == 3){
- kirobool = true;
- } else {
- std::cout << "Processed: " << kiro[0] << " " << kiro[1] << " " << kiro[2] << std::endl;
- std::cout << "Size: " << kiro.size() << " Expected: 3" << std::endl;
- std::cout << "Expected: " << "3255 9 1" << std::endl;
- }
-
- return (peshobool && kirobool);
+bool wordAll2test()
+{
+ StringPiece line1 = StringPiece("4 9 1");
+ StringPiece line2 = StringPiece("3255 9 1");
+
+ std::vector<int> pesho;
+ bool peshobool = false;
+ bool kirobool = false;
+ std::vector<int> kiro;
+
+ pesho = splitWordAll2(line1);
+ kiro = splitWordAll2(line2);
+
+ if (pesho[0] == 4 && pesho[1] == 9 && pesho[2] == 1 && pesho.size() == 3) {
+ peshobool = true;
+ } else {
+ std::cout << "Processed: " << pesho[0] << " " << pesho[1] << " " << pesho[2] << std::endl;
+ std::cout << "Size: " << pesho.size() << " Expected: 3" << std::endl;
+ std::cout << "Expected: " << "4 9 1" << std::endl;
+ }
+
+ if (kiro[0] == 3255 && kiro[1] == 9 && kiro[2] == 1 && kiro.size() == 3) {
+ kirobool = true;
+ } else {
+ std::cout << "Processed: " << kiro[0] << " " << kiro[1] << " " << kiro[2] << std::endl;
+ std::cout << "Size: " << kiro.size() << " Expected: 3" << std::endl;
+ std::cout << "Expected: " << "3255 9 1" << std::endl;
+ }
+
+ return (peshobool && kirobool);
}
-bool test_tokenization(){
- StringPiece line1 = StringPiece("! ! ! ! ||| ! ! ! ! ||| 0.0804289 0.141656 0.0804289 0.443409 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 1 1");
- StringPiece line2 = StringPiece("! ! ! ) , has ||| ! ! ! ) - , a ||| 0.0804289 0.0257627 0.0804289 0.00146736 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 5-6 ||| 1 1 1");
- StringPiece line3 = StringPiece("! ! ! ) , ||| ! ! ! ) - , ||| 0.0804289 0.075225 0.0804289 0.00310345 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 ||| 1 1 1");
- StringPiece line4 = StringPiece("! ! ! ) ||| ! ! ! ) . ||| 0.0804289 0.177547 0.0268096 0.000872597 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 3 1");
+bool test_tokenization()
+{
+ StringPiece line1 = StringPiece("! ! ! ! ||| ! ! ! ! ||| 0.0804289 0.141656 0.0804289 0.443409 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 1 1");
+ StringPiece line2 = StringPiece("! ! ! ) , has ||| ! ! ! ) - , a ||| 0.0804289 0.0257627 0.0804289 0.00146736 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 5-6 ||| 1 1 1");
+ StringPiece line3 = StringPiece("! ! ! ) , ||| ! ! ! ) - , ||| 0.0804289 0.075225 0.0804289 0.00310345 2.718 ||| 0-0 1-1 2-2 3-3 4-4 4-5 ||| 1 1 1");
+ StringPiece line4 = StringPiece("! ! ! ) ||| ! ! ! ) . ||| 0.0804289 0.177547 0.0268096 0.000872597 2.718 ||| 0-0 1-1 2-2 3-3 ||| 1 3 1");
- line_text output1 = splitLine(line1);
- line_text output2 = splitLine(line2);
- line_text output3 = splitLine(line3);
- line_text output4 = splitLine(line4);
+ line_text output1 = splitLine(line1);
+ line_text output2 = splitLine(line2);
+ line_text output3 = splitLine(line3);
+ line_text output4 = splitLine(line4);
- bool test1 = output1.prob == StringPiece("0.0804289 0.141656 0.0804289 0.443409 2.718");
- bool test2 = output2.word_all1 == StringPiece("0-0 1-1 2-2 3-3 4-4 4-5 5-6");
- bool test3 = output2.target_phrase == StringPiece("! ! ! ) - , a");
- bool test4 = output3.source_phrase == StringPiece("! ! ! ) ,");
- bool test5 = output4.word_all2 == StringPiece("1 3 1");
+ bool test1 = output1.prob == StringPiece("0.0804289 0.141656 0.0804289 0.443409 2.718");
+ bool test2 = output2.word_all1 == StringPiece("0-0 1-1 2-2 3-3 4-4 4-5 5-6");
+ bool test3 = output2.target_phrase == StringPiece("! ! ! ) - , a");
+ bool test4 = output3.source_phrase == StringPiece("! ! ! ) ,");
+ bool test5 = output4.word_all2 == StringPiece("1 3 1");
- //std::cout << test1 << " " << test2 << " " << test3 << " " << test4 << std::endl;
+ //std::cout << test1 << " " << test2 << " " << test3 << " " << test4 << std::endl;
- return (test1 && test2 && test3 && test4 && test5);
+ return (test1 && test2 && test3 && test4 && test5);
}
-bool test_linesplitter(){
- StringPiece line1 = StringPiece("! &#93; 0.0738539 0.901133 0.0738539 0.65207 2.718 0-0 1-1 1 1 1");
- target_text ans1;
- ans1 = splitSingleTargetLine(line1);
+bool test_linesplitter()
+{
+ StringPiece line1 = StringPiece("! &#93; 0.0738539 0.901133 0.0738539 0.65207 2.718 0-0 1-1 1 1 1");
+ target_text ans1;
+ ans1 = splitSingleTargetLine(line1);
- /* For testing purposes
- std::cout << ans1.target_phrase[0] << " " <<ans1.target_phrase[1] << " Size: " << ans1.target_phrase.size() << std::endl;
- std::cout << ans1.word_all1[3] << " " << ans1.word_all2[2] << " " << ans1.prob[3] << std::endl; */
+ /* For testing purposes
+ std::cout << ans1.target_phrase[0] << " " <<ans1.target_phrase[1] << " Size: " << ans1.target_phrase.size() << std::endl;
+ std::cout << ans1.word_all1[3] << " " << ans1.word_all2[2] << " " << ans1.prob[3] << std::endl; */
- return (ans1.target_phrase.size() == 2 && ans1.prob.size() == 5 && ans1.word_all1.size() == 4 && ans1.word_all2.size() == 3);
+ return (ans1.target_phrase.size() == 2 && ans1.prob.size() == 5 && ans1.word_all1.size() == 4 && ans1.word_all2.size() == 3);
}
-bool test_linessplitter(){
- StringPiece line1 = StringPiece("! &#93; 0.0738539 0.901133 0.0738539 0.65207 2.718 0-0 1-1 1 1 1\n\n! ) . proto došlo 0.0738539 7.14446e-06");
- StringPiece line2 = StringPiece("! &quot; ) 0.536553 0.75961 0.634108 0.532927 2.718 0-0 1-1 2-2 13 11 8\n! ) . 0.0369269 0.00049839 0.00671399 0.00372884 2.718 0-0 1-1 2-1 2-2 2 11 1\n&quot; ! ) 0.0738539 0.75961 0.00671399 0.532927 2.718 1-0 0-1 2-2 1 11 1\nse ! &quot; ) 0.0738539 0.75961 0.00671399 0.0225211 2.718 0-1 1-2 2-3 1 11 1\n\n! &quot; , a to 0.0738539 0.0894238 0.0738539 0.048");
+bool test_linessplitter()
+{
+ StringPiece line1 = StringPiece("! &#93; 0.0738539 0.901133 0.0738539 0.65207 2.718 0-0 1-1 1 1 1\n\n! ) . proto došlo 0.0738539 7.14446e-06");
+ StringPiece line2 = StringPiece("! &quot; ) 0.536553 0.75961 0.634108 0.532927 2.718 0-0 1-1 2-2 13 11 8\n! ) . 0.0369269 0.00049839 0.00671399 0.00372884 2.718 0-0 1-1 2-1 2-2 2 11 1\n&quot; ! ) 0.0738539 0.75961 0.00671399 0.532927 2.718 1-0 0-1 2-2 1 11 1\nse ! &quot; ) 0.0738539 0.75961 0.00671399 0.0225211 2.718 0-1 1-2 2-3 1 11 1\n\n! &quot; , a to 0.0738539 0.0894238 0.0738539 0.048");
- std::vector<target_text> ans1;
- std::vector<target_text> ans2;
+ std::vector<target_text> ans1;
+ std::vector<target_text> ans2;
- ans1 = splitTargetLine(line1);
- ans2 = splitTargetLine(line2);
+ ans1 = splitTargetLine(line1);
+ ans2 = splitTargetLine(line2);
- bool sizes = ans1.size() == 1 && ans2.size() == 4;
- bool prob = ans1[0].prob[3] == 0.65207 && ans2[1].prob[1] == 0.00049839;
- bool word_alls = ans2[0].word_all2[1] == 11 && ans2[3].word_all1[5] == 3;
+ bool sizes = ans1.size() == 1 && ans2.size() == 4;
+ bool prob = ans1[0].prob[3] == 0.65207 && ans2[1].prob[1] == 0.00049839;
+ bool word_alls = ans2[0].word_all2[1] == 11 && ans2[3].word_all1[5] == 3;
- /* FOr testing
- std::cout << ans1.size() << std::endl;
- std::cout << ans2.size() << std::endl;
- std::cout << ans1[0].prob[3] << std::endl;
- std::cout << ans2[1].prob[1] << std::endl;
- std::cout << ans2[0].word_all2[1] << std::endl;
- std::cout << ans2[3].word_all1[5] << std::endl; */
+ /* FOr testing
+ std::cout << ans1.size() << std::endl;
+ std::cout << ans2.size() << std::endl;
+ std::cout << ans1[0].prob[3] << std::endl;
+ std::cout << ans2[1].prob[1] << std::endl;
+ std::cout << ans2[0].word_all2[1] << std::endl;
+ std::cout << ans2[3].word_all1[5] << std::endl; */
- return sizes && prob && word_alls;
+ return sizes && prob && word_alls;
}
-int main(){
- if (probabilitiesTest() && wordAll1test() && wordAll2test() && test_tokenization() && test_linesplitter() && test_linessplitter() && test_vectorinsert()){
- std::cout << "All tests pass!" << std::endl;
- } else {
- std::cout << "Failiure in some tests!" << std::endl;
- }
+int main()
+{
+ if (probabilitiesTest() && wordAll1test() && wordAll2test() && test_tokenization() && test_linesplitter() && test_linessplitter() && test_vectorinsert()) {
+ std::cout << "All tests pass!" << std::endl;
+ } else {
+ std::cout << "Failiure in some tests!" << std::endl;
+ }
- return 1;
+ return 1;
} \ No newline at end of file
diff --git a/moses/TranslationModel/ProbingPT/tests/vocabid_test.cpp b/moses/TranslationModel/ProbingPT/tests/vocabid_test.cpp
index bc82db74e..fac439eeb 100644
--- a/moses/TranslationModel/ProbingPT/tests/vocabid_test.cpp
+++ b/moses/TranslationModel/ProbingPT/tests/vocabid_test.cpp
@@ -3,43 +3,44 @@
#include "hash.hh"
#include "vocabid.hh"
-int main(int argc, char* argv[]){
-
- //Create a map and serialize it
- std::map<uint64_t, std::string> vocabids;
- StringPiece demotext = StringPiece("Demo text with 3 elements");
- add_to_map(&vocabids, demotext);
- //Serialize map
- serialize_map(&vocabids, "/tmp/testmap.bin");
-
- //Read the map and test if the values are the same
- std::map<uint64_t, std::string> newmap;
- read_map(&newmap, "/tmp/testmap.bin");
-
- //Used hashes
- uint64_t num1 = getHash(StringPiece("Demo"));
- uint64_t num2 = getVocabID("text");
- uint64_t num3 = getHash(StringPiece("with"));
- uint64_t num4 = getVocabID("3");
- uint64_t num5 = getHash(StringPiece("elements"));
- uint64_t num6 = 0;
-
- //Tests
- bool test1 = getStringFromID(&newmap, num1) == getStringFromID(&vocabids, num1);
- bool test2 = getStringFromID(&newmap, num2) == getStringFromID(&vocabids, num2);
- bool test3 = getStringFromID(&newmap, num3) == getStringFromID(&vocabids, num3);
- bool test4 = getStringFromID(&newmap, num4) == getStringFromID(&vocabids, num4);
- bool test5 = getStringFromID(&newmap, num5) == getStringFromID(&vocabids, num5);
- bool test6 = getStringFromID(&newmap, num6) == getStringFromID(&vocabids, num6);
-
-
- if (test1 && test2 && test3 && test4 && test5 && test6){
- std::cout << "Map was successfully written and read!" << std::endl;
- } else {
- std::cout << "Error! " << test1 << " " << test2 << " " << test3 << " " << test4 << " " << test5 << " " << test6 << std::endl;
- }
-
-
- return 1;
-
-}
+int main(int argc, char* argv[])
+{
+
+ //Create a map and serialize it
+ std::map<uint64_t, std::string> vocabids;
+ StringPiece demotext = StringPiece("Demo text with 3 elements");
+ add_to_map(&vocabids, demotext);
+ //Serialize map
+ serialize_map(&vocabids, "/tmp/testmap.bin");
+
+ //Read the map and test if the values are the same
+ std::map<uint64_t, std::string> newmap;
+ read_map(&newmap, "/tmp/testmap.bin");
+
+ //Used hashes
+ uint64_t num1 = getHash(StringPiece("Demo"));
+ uint64_t num2 = getVocabID("text");
+ uint64_t num3 = getHash(StringPiece("with"));
+ uint64_t num4 = getVocabID("3");
+ uint64_t num5 = getHash(StringPiece("elements"));
+ uint64_t num6 = 0;
+
+ //Tests
+ bool test1 = getStringFromID(&newmap, num1) == getStringFromID(&vocabids, num1);
+ bool test2 = getStringFromID(&newmap, num2) == getStringFromID(&vocabids, num2);
+ bool test3 = getStringFromID(&newmap, num3) == getStringFromID(&vocabids, num3);
+ bool test4 = getStringFromID(&newmap, num4) == getStringFromID(&vocabids, num4);
+ bool test5 = getStringFromID(&newmap, num5) == getStringFromID(&vocabids, num5);
+ bool test6 = getStringFromID(&newmap, num6) == getStringFromID(&vocabids, num6);
+
+
+ if (test1 && test2 && test3 && test4 && test5 && test6) {
+ std::cout << "Map was successfully written and read!" << std::endl;
+ } else {
+ std::cout << "Error! " << test1 << " " << test2 << " " << test3 << " " << test4 << " " << test5 << " " << test6 << std::endl;
+ }
+
+
+ return 1;
+
+}
diff --git a/moses/TranslationModel/ProbingPT/vocabid.cpp b/moses/TranslationModel/ProbingPT/vocabid.cpp
index bcdbe78d0..1452f299d 100644
--- a/moses/TranslationModel/ProbingPT/vocabid.cpp
+++ b/moses/TranslationModel/ProbingPT/vocabid.cpp
@@ -1,29 +1,32 @@
-#include "vocabid.hh"
+#include "vocabid.hh"
-void add_to_map(std::map<uint64_t, std::string> *karta, StringPiece textin){
- //Tokenize
- util::TokenIter<util::SingleCharacter> it(textin, util::SingleCharacter(' '));
+void add_to_map(std::map<uint64_t, std::string> *karta, StringPiece textin)
+{
+ //Tokenize
+ util::TokenIter<util::SingleCharacter> it(textin, util::SingleCharacter(' '));
- while(it){
- karta->insert(std::pair<uint64_t, std::string>(getHash(*it), it->as_string()));
- it++;
- }
+ while(it) {
+ karta->insert(std::pair<uint64_t, std::string>(getHash(*it), it->as_string()));
+ it++;
+ }
}
-void serialize_map(std::map<uint64_t, std::string> *karta, const char* filename){
- std::ofstream os (filename, std::ios::binary);
- boost::archive::text_oarchive oarch(os);
+void serialize_map(std::map<uint64_t, std::string> *karta, const char* filename)
+{
+ std::ofstream os (filename, std::ios::binary);
+ boost::archive::text_oarchive oarch(os);
- oarch << *karta; //Serialise map
- os.close();
+ oarch << *karta; //Serialise map
+ os.close();
}
-void read_map(std::map<uint64_t, std::string> *karta, const char* filename){
- std::ifstream is (filename, std::ios::binary);
- boost::archive::text_iarchive iarch(is);
+void read_map(std::map<uint64_t, std::string> *karta, const char* filename)
+{
+ std::ifstream is (filename, std::ios::binary);
+ boost::archive::text_iarchive iarch(is);
- iarch >> *karta;
+ iarch >> *karta;
- //Close the stream after we are done.
- is.close();
+ //Close the stream after we are done.
+ is.close();
}
diff --git a/moses/TranslationModel/RuleTable/LoaderCompact.cpp b/moses/TranslationModel/RuleTable/LoaderCompact.cpp
index ed98f39bc..c947dfdc2 100644
--- a/moses/TranslationModel/RuleTable/LoaderCompact.cpp
+++ b/moses/TranslationModel/RuleTable/LoaderCompact.cpp
@@ -45,7 +45,7 @@ bool RuleTableLoaderCompact::Load(const std::vector<FactorType> &input,
// Read and check version number.
reader.ReadLine();
if (reader.m_line != "1") {
- std::cerr << "Unexpected compact rule table format: " << reader.m_line;
+ std::cerr << "Unexpected compact rule table format: " << reader.m_line;
return false;
}
@@ -208,9 +208,9 @@ bool RuleTableLoaderCompact::LoadRuleSection(
scoreVector[j] = FloorScore(TransformScore(score));
}
if (reader.m_line[tokenPositions[3+numScoreComponents]] != ':') {
- std::cerr << "Size of scoreVector != number ("
- << scoreVector.size() << "!=" << numScoreComponents
- << ") of score components on line " << reader.m_lineNum;
+ std::cerr << "Size of scoreVector != number ("
+ << scoreVector.size() << "!=" << numScoreComponents
+ << ") of score components on line " << reader.m_lineNum;
return false;
}
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
index 3bcd57168..63ec38599 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
@@ -49,7 +49,7 @@ void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
*this);
UTIL_THROW_IF2(!ret,
- "Rules not successfully loaded for sentence id " << translationId);
+ "Rules not successfully loaded for sentence id " << translationId);
}
void PhraseDictionaryALSuffixArray::CleanUpAfterSentenceProcessing(const InputType &source)
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
index cc554c602..ce1290f92 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
@@ -212,11 +212,9 @@ void PhraseDictionaryOnDisk::SetParameter(const std::string& key, const std::str
{
if (key == "max-span-default") {
m_maxSpanDefault = Scan<size_t>(value);
- }
- else if (key == "max-span-labelled") {
+ } else if (key == "max-span-labelled") {
m_maxSpanLabelled = Scan<size_t>(value);
- }
- else {
+ } else {
PhraseDictionary::SetParameter(key, value);
}
}
diff --git a/moses/TranslationModel/Scope3Parser/Parser.cpp b/moses/TranslationModel/Scope3Parser/Parser.cpp
index 1b08ec94e..c8c8c3e49 100644
--- a/moses/TranslationModel/Scope3Parser/Parser.cpp
+++ b/moses/TranslationModel/Scope3Parser/Parser.cpp
@@ -39,7 +39,7 @@ namespace Moses
{
void Scope3Parser::GetChartRuleCollection(
- const InputPath &inputPath,
+ const InputPath &inputPath,
size_t last,
ChartParserCallback &outColl)
{
diff --git a/moses/TranslationModel/Scope3Parser/Parser.h b/moses/TranslationModel/Scope3Parser/Parser.h
index c07e2143d..70b26b50d 100644
--- a/moses/TranslationModel/Scope3Parser/Parser.h
+++ b/moses/TranslationModel/Scope3Parser/Parser.h
@@ -58,7 +58,7 @@ public:
}
void GetChartRuleCollection(
- const InputPath &inputPath,
+ const InputPath &inputPath,
size_t last,
ChartParserCallback &outColl);
diff --git a/moses/TranslationModel/fuzzy-match/create_xml.cpp b/moses/TranslationModel/fuzzy-match/create_xml.cpp
index a8b6a52cf..0a31b9b28 100644
--- a/moses/TranslationModel/fuzzy-match/create_xml.cpp
+++ b/moses/TranslationModel/fuzzy-match/create_xml.cpp
@@ -47,11 +47,13 @@ void create_xml(const string &inPath)
//cout << inLine << endl;
switch (step) {
case 0:
- /*setenceId = */ Scan<int>(inLine);
+ /*setenceId = */
+ Scan<int>(inLine);
++step;
break;
case 1:
- /*score = */ Scan<float>(inLine);
+ /*score = */
+ Scan<float>(inLine);
++step;
break;
case 2:
diff --git a/moses/TranslationOption.h b/moses/TranslationOption.h
index ff2a6295c..b5a50fc32 100644
--- a/moses/TranslationOption.h
+++ b/moses/TranslationOption.h
@@ -140,7 +140,7 @@ public:
void UpdateScore(ScoreComponentCollection *futureScoreBreakdown = NULL) {
m_targetPhrase.UpdateScore(futureScoreBreakdown);
}
-
+
/** returns cached scores */
inline const Scores *GetLexReorderingScores(const LexicalReordering *scoreProducer) const {
_ScoreCacheMap::const_iterator it = m_lexReorderingScores.find(scoreProducer);
diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp
index 5f3a1a453..212b346d0 100644
--- a/moses/TranslationOptionCollection.cpp
+++ b/moses/TranslationOptionCollection.cpp
@@ -555,7 +555,7 @@ void TranslationOptionCollection::SetInputScore(const InputPath &inputPath, Part
void TranslationOptionCollection::EvaluateWithSourceContext()
{
const size_t size = m_source.GetSize();
-
+
for (size_t startPos = 0 ; startPos < size ; ++startPos) {
size_t maxSize = m_source.GetSize() - startPos;
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
@@ -569,15 +569,16 @@ void TranslationOptionCollection::EvaluateWithSourceContext()
TranslationOption &transOpt = **iterTransOpt;
transOpt.EvaluateWithSourceContext(m_source);
}
-
+
EvaluateTranslatonOptionListWithSourceContext(transOptList);
}
}
}
void TranslationOptionCollection::EvaluateTranslatonOptionListWithSourceContext(
- TranslationOptionList &translationOptionList) {
-
+ TranslationOptionList &translationOptionList)
+{
+
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
const StaticData &staticData = StaticData::Instance();
for (size_t i = 0; i < ffs.size(); ++i) {
@@ -586,7 +587,7 @@ void TranslationOptionCollection::EvaluateTranslatonOptionListWithSourceContext(
ff.EvaluateTranslationOptionListWithSourceContext(m_source, translationOptionList);
}
}
-
+
}
void TranslationOptionCollection::Sort()
diff --git a/moses/TranslationOptionCollection.h b/moses/TranslationOptionCollection.h
index 0598df46f..2db0df34a 100644
--- a/moses/TranslationOptionCollection.h
+++ b/moses/TranslationOptionCollection.h
@@ -81,7 +81,7 @@ protected:
void ProcessUnknownWord();
//! special handling of ONE unknown words.
virtual void ProcessOneUnknownWord(const InputPath &inputPath, size_t sourcePos, size_t length = 1, const ScorePair *inputScores = NULL);
-
+
//! pruning: only keep the top n (m_maxNoTransOptPerCoverage) elements */
void Prune();
@@ -99,7 +99,7 @@ protected:
void EvaluateWithSourceContext();
void EvaluateTranslatonOptionListWithSourceContext(TranslationOptionList&);
-
+
void CacheLexReordering();
void GetTargetPhraseCollectionBatch();
diff --git a/moses/TranslationOptionCollectionLattice.cpp b/moses/TranslationOptionCollectionLattice.cpp
index 349aa385c..d20e07fbf 100644
--- a/moses/TranslationOptionCollectionLattice.cpp
+++ b/moses/TranslationOptionCollectionLattice.cpp
@@ -24,7 +24,7 @@ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(
: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
{
UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
- "Not for models using the legqacy binary phrase table");
+ "Not for models using the legqacy binary phrase table");
const InputFeature &inputFeature = InputFeature::Instance();
UTIL_THROW_IF2(&inputFeature == NULL, "Input feature must be specified");
@@ -48,7 +48,7 @@ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(
WordsRange range(startPos, endPos);
if (range.GetNumWordsCovered() > maxPhraseLength) {
- continue;
+ continue;
}
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
@@ -73,53 +73,53 @@ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(
void TranslationOptionCollectionLattice::Extend(const InputPath &prevPath, const WordLattice &input)
{
- size_t nextPos = prevPath.GetWordsRange().GetEndPos() + 1;
- if (nextPos >= input.GetSize()) {
- return;
- }
+ size_t nextPos = prevPath.GetWordsRange().GetEndPos() + 1;
+ if (nextPos >= input.GetSize()) {
+ return;
+ }
- size_t startPos = prevPath.GetWordsRange().GetStartPos();
- const Phrase &prevPhrase = prevPath.GetPhrase();
- const ScorePair *prevInputScore = prevPath.GetInputScore();
- UTIL_THROW_IF2(prevInputScore == NULL,
- "Null previous score");
+ size_t startPos = prevPath.GetWordsRange().GetStartPos();
+ const Phrase &prevPhrase = prevPath.GetPhrase();
+ const ScorePair *prevInputScore = prevPath.GetInputScore();
+ UTIL_THROW_IF2(prevInputScore == NULL,
+ "Null previous score");
- const std::vector<size_t> &nextNodes = input.GetNextNodes(nextPos);
+ const std::vector<size_t> &nextNodes = input.GetNextNodes(nextPos);
- const ConfusionNet::Column &col = input.GetColumn(nextPos);
- for (size_t i = 0; i < col.size(); ++i) {
- const Word &word = col[i].first;
- UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported");
+ const ConfusionNet::Column &col = input.GetColumn(nextPos);
+ for (size_t i = 0; i < col.size(); ++i) {
+ const Word &word = col[i].first;
+ UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported");
- size_t nextNode = nextNodes[i];
- size_t endPos = nextPos + nextNode - 1;
+ size_t nextNode = nextNodes[i];
+ size_t endPos = nextPos + nextNode - 1;
- WordsRange range(startPos, endPos);
+ WordsRange range(startPos, endPos);
- size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength();
- if (range.GetNumWordsCovered() > maxPhraseLength) {
- continue;
- }
+ size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength();
+ if (range.GetNumWordsCovered() > maxPhraseLength) {
+ continue;
+ }
- const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
+ const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
- Phrase subphrase(prevPhrase);
- subphrase.AddWord(word);
+ Phrase subphrase(prevPhrase);
+ subphrase.AddWord(word);
- const ScorePair &scores = col[i].second;
- ScorePair *inputScore = new ScorePair(*prevInputScore);
- inputScore->PlusEquals(scores);
+ const ScorePair &scores = col[i].second;
+ ScorePair *inputScore = new ScorePair(*prevInputScore);
+ inputScore->PlusEquals(scores);
- InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
+ InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
- path->SetNextNode(nextNode);
- m_inputPathQueue.push_back(path);
+ path->SetNextNode(nextNode);
+ m_inputPathQueue.push_back(path);
- // recursive
- Extend(*path, input);
+ // recursive
+ Extend(*path, input);
- }
+ }
}
void TranslationOptionCollectionLattice::CreateTranslationOptions()
@@ -142,19 +142,18 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
const WordsRange &range = path.GetWordsRange();
if (tpColl && tpColl->GetSize()) {
- TargetPhraseCollection::const_iterator iter;
- for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) {
- const TargetPhrase &tp = **iter;
- TranslationOption *transOpt = new TranslationOption(range, tp);
- transOpt->SetInputPath(path);
- transOpt->EvaluateWithSourceContext(m_source);
-
- Add(transOpt);
- }
- }
- else if (path.GetPhrase().GetSize() == 1) {
- // unknown word processing
- ProcessOneUnknownWord(path, path.GetWordsRange().GetStartPos(), path.GetWordsRange().GetNumWordsCovered() , path.GetInputScore());
+ TargetPhraseCollection::const_iterator iter;
+ for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) {
+ const TargetPhrase &tp = **iter;
+ TranslationOption *transOpt = new TranslationOption(range, tp);
+ transOpt->SetInputPath(path);
+ transOpt->EvaluateWithSourceContext(m_source);
+
+ Add(transOpt);
+ }
+ } else if (path.GetPhrase().GetSize() == 1) {
+ // unknown word processing
+ ProcessOneUnknownWord(path, path.GetWordsRange().GetStartPos(), path.GetWordsRange().GetNumWordsCovered() , path.GetInputScore());
}
}
@@ -173,16 +172,16 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
void TranslationOptionCollectionLattice::ProcessUnknownWord(size_t sourcePos)
{
- UTIL_THROW(util::Exception, "ProcessUnknownWord() not implemented for lattice");
+ UTIL_THROW(util::Exception, "ProcessUnknownWord() not implemented for lattice");
}
void TranslationOptionCollectionLattice::CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
- , size_t startPosition
- , size_t endPosition
- , bool adhereTableLimit
- , size_t graphInd)
+ , size_t startPosition
+ , size_t endPosition
+ , bool adhereTableLimit
+ , size_t graphInd)
{
- UTIL_THROW(util::Exception, "CreateTranslationOptionsForRange() not implemented for lattice");
+ UTIL_THROW(util::Exception, "CreateTranslationOptionsForRange() not implemented for lattice");
}
} // namespace
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index f9a559a83..6e95a1368 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -21,11 +21,12 @@ namespace Moses
{
TranslationTask::TranslationTask(InputType* source, Moses::IOWrapper &ioWrapper)
-: m_source(source)
-, m_ioWrapper(ioWrapper)
+ : m_source(source)
+ , m_ioWrapper(ioWrapper)
{}
-TranslationTask::~TranslationTask() {
+TranslationTask::~TranslationTask()
+{
delete m_source;
}
diff --git a/moses/TreeInput.cpp b/moses/TreeInput.cpp
index 792522540..3fef029ad 100644
--- a/moses/TreeInput.cpp
+++ b/moses/TreeInput.cpp
@@ -155,8 +155,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
if (startPos == endPos) {
TRACE_ERR("WARNING: tag " << tagName << " span is empty. Ignoring: " << line << endl);
continue;
- }
- else if (startPos > endPos) {
+ } else if (startPos > endPos) {
TRACE_ERR("ERROR: tag " << tagName << " startPos > endPos: " << line << endl);
return false;
}
diff --git a/moses/TypeDef.h b/moses/TypeDef.h
index d0bd636b3..79f24d505 100644
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@@ -61,7 +61,7 @@ const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
const size_t DEFAULT_MAX_TRANS_OPT_SIZE = 5000;
const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
//#ifdef PT_UG
-// setting to std::numeric_limits<size_t>::max() makes the regression test for (deprecated) PhraseDictionaryDynamicSuffixArray fail.
+// setting to std::numeric_limits<size_t>::max() makes the regression test for (deprecated) PhraseDictionaryDynamicSuffixArray fail.
// const size_t DEFAULT_MAX_PHRASE_LENGTH = 100000;
//#else
const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
@@ -123,7 +123,7 @@ enum InputTypeEnum {
,WordLatticeInput = 2
,TreeInputType = 3
,WordLatticeInput2 = 4
- , TabbedSentenceInput = 5
+ , TabbedSentenceInput = 5
};
@@ -142,11 +142,11 @@ enum DictionaryFind {
enum SearchAlgorithm {
Normal = 0
- ,CubePruning = 1
- //,CubeGrowing = 2
- ,CYKPlus = 3
- ,NormalBatch = 4
- ,ChartIncremental = 5
+ ,CubePruning = 1
+ //,CubeGrowing = 2
+ ,CYKPlus = 3
+ ,NormalBatch = 4
+ ,ChartIncremental = 5
};
enum SourceLabelOverlap {
diff --git a/moses/Util.cpp b/moses/Util.cpp
index 34d03cab8..24323f61d 100644
--- a/moses/Util.cpp
+++ b/moses/Util.cpp
@@ -362,8 +362,7 @@ void ShowWeights()
const StatefulFeatureFunction *ff = sff[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
- }
- else {
+ } else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
@@ -371,8 +370,7 @@ void ShowWeights()
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
- }
- else {
+ } else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
diff --git a/moses/Util.h b/moses/Util.h
index 12038468e..beefa53da 100644
--- a/moses/Util.h
+++ b/moses/Util.h
@@ -145,7 +145,7 @@ inline XmlInputType Scan<XmlInputType>(const std::string &input)
else if (input=="ignore") ret = XmlIgnore;
else if (input=="pass-through") ret = XmlPassThrough;
else {
- UTIL_THROW2("Unknown XML input type");
+ UTIL_THROW2("Unknown XML input type");
}
return ret;
diff --git a/moses/Word.cpp b/moses/Word.cpp
index b1ea77059..f55be5ee8 100644
--- a/moses/Word.cpp
+++ b/moses/Word.cpp
@@ -34,15 +34,15 @@ using namespace std;
namespace Moses
{
-
- // utility function for factorless decoding
- size_t
- max_fax()
- {
- if (StaticData::Instance().GetFactorDelimiter().size())
- return MAX_NUM_FACTORS;
- return 1;
- }
+
+// utility function for factorless decoding
+size_t
+max_fax()
+{
+ if (StaticData::Instance().GetFactorDelimiter().size())
+ return MAX_NUM_FACTORS;
+ return 1;
+}
// static
int Word::Compare(const Word &targetWord, const Word &sourceWord)
@@ -85,8 +85,8 @@ std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlan
unsigned int stop = min(max_fax(),factorType.size());
for (unsigned int i = 0 ; i < stop ; i++) {
UTIL_THROW_IF2(factorType[i] >= MAX_NUM_FACTORS,
- "Trying to reference factor " << factorType[i]
- << ". Max factor is " << MAX_NUM_FACTORS);
+ "Trying to reference factor " << factorType[i]
+ << ". Max factor is " << MAX_NUM_FACTORS);
const Factor *factor = m_factorArray[factorType[i]];
if (factor != NULL) {
@@ -109,49 +109,45 @@ StringPiece Word::GetString(FactorType factorType) const
class StrayFactorException : public util::Exception {};
-void
+void
Word::
CreateFromString(FactorDirection direction
- , const std::vector<FactorType> &factorOrder
- , const StringPiece &str
- , bool isNonTerminal
- , bool strict)
+ , const std::vector<FactorType> &factorOrder
+ , const StringPiece &str
+ , bool isNonTerminal
+ , bool strict)
{
FactorCollection &factorCollection = FactorCollection::Instance();
vector<StringPiece> bits(MAX_NUM_FACTORS);
string factorDelimiter = StaticData::Instance().GetFactorDelimiter();
- if (factorDelimiter.size())
- {
- util::TokenIter<util::MultiCharacter> fit(str, factorDelimiter);
- size_t i = 0;
- for (; i < MAX_NUM_FACTORS && fit; ++i,++fit)
- bits[i] = *fit;
- if (i == MAX_NUM_FACTORS)
- UTIL_THROW_IF(fit, StrayFactorException,
- "The hard limit for factors is " << MAX_NUM_FACTORS
- << ". The word " << str << " contains factor delimiter "
- << StaticData::Instance().GetFactorDelimiter()
- << " too many times.");
- if (strict)
- UTIL_THROW_IF(fit, StrayFactorException,
- "You have configured " << factorOrder.size()
- << " factors but the word " << str
- << " contains factor delimiter "
- << StaticData::Instance().GetFactorDelimiter()
- << " too many times.");
- UTIL_THROW_IF(!isNonTerminal && i < factorOrder.size(),util::Exception,
- "Too few factors in string '" << str << "'.");
- }
- else
- {
- bits[0] = str;
- }
- for (size_t k = 0; k < factorOrder.size(); ++k)
- {
- UTIL_THROW_IF(factorOrder[k] >= MAX_NUM_FACTORS, util::Exception,
- "Factor order out of bounds.");
- m_factorArray[factorOrder[k]] = factorCollection.AddFactor(bits[k], isNonTerminal);
- }
+ if (factorDelimiter.size()) {
+ util::TokenIter<util::MultiCharacter> fit(str, factorDelimiter);
+ size_t i = 0;
+ for (; i < MAX_NUM_FACTORS && fit; ++i,++fit)
+ bits[i] = *fit;
+ if (i == MAX_NUM_FACTORS)
+ UTIL_THROW_IF(fit, StrayFactorException,
+ "The hard limit for factors is " << MAX_NUM_FACTORS
+ << ". The word " << str << " contains factor delimiter "
+ << StaticData::Instance().GetFactorDelimiter()
+ << " too many times.");
+ if (strict)
+ UTIL_THROW_IF(fit, StrayFactorException,
+ "You have configured " << factorOrder.size()
+ << " factors but the word " << str
+ << " contains factor delimiter "
+ << StaticData::Instance().GetFactorDelimiter()
+ << " too many times.");
+ UTIL_THROW_IF(!isNonTerminal && i < factorOrder.size(),util::Exception,
+ "Too few factors in string '" << str << "'.");
+ } else {
+ bits[0] = str;
+ }
+ for (size_t k = 0; k < factorOrder.size(); ++k) {
+ UTIL_THROW_IF(factorOrder[k] >= MAX_NUM_FACTORS, util::Exception,
+ "Factor order out of bounds.");
+ m_factorArray[factorOrder[k]] = factorCollection.AddFactor(bits[k], isNonTerminal);
+ }
// assume term/non-term same for all factors
m_isNonTerminal = isNonTerminal;
}
@@ -188,10 +184,10 @@ void Word::OnlyTheseFactors(const FactorMask &factors)
bool Word::IsEpsilon() const
{
- const Factor *factor = m_factorArray[0];
- int compare = factor->GetString().compare(EPSILON);
+ const Factor *factor = m_factorArray[0];
+ int compare = factor->GetString().compare(EPSILON);
- return compare == 0;
+ return compare == 0;
}
TO_STRING_BODY(Word);
diff --git a/moses/XmlOption.cpp b/moses/XmlOption.cpp
index 2f66d647e..3ac4f6cd2 100644
--- a/moses/XmlOption.cpp
+++ b/moses/XmlOption.cpp
@@ -321,32 +321,32 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
// update: add new aligned sentence pair to Mmsapt identified by name
else if (tagName == "update") {
#if PT_UG
- // get model name and aligned sentence pair
- string pdName = ParseXmlTagAttribute(tagContent,"name");
- string source = ParseXmlTagAttribute(tagContent,"source");
- string target = ParseXmlTagAttribute(tagContent,"target");
- string alignment = ParseXmlTagAttribute(tagContent,"alignment");
- // find PhraseDictionary by name
- const vector<PhraseDictionary*> &pds = PhraseDictionary::GetColl();
- PhraseDictionary* pd = NULL;
- for (vector<PhraseDictionary*>::const_iterator i = pds.begin(); i != pds.end(); ++i) {
- PhraseDictionary* curPd = *i;
- if (curPd->GetScoreProducerDescription() == pdName) {
- pd = curPd;
- break;
- }
- }
- if (pd == NULL) {
- TRACE_ERR("ERROR: No PhraseDictionary with name " << pdName << ", no update" << endl);
- return false;
+ // get model name and aligned sentence pair
+ string pdName = ParseXmlTagAttribute(tagContent,"name");
+ string source = ParseXmlTagAttribute(tagContent,"source");
+ string target = ParseXmlTagAttribute(tagContent,"target");
+ string alignment = ParseXmlTagAttribute(tagContent,"alignment");
+ // find PhraseDictionary by name
+ const vector<PhraseDictionary*> &pds = PhraseDictionary::GetColl();
+ PhraseDictionary* pd = NULL;
+ for (vector<PhraseDictionary*>::const_iterator i = pds.begin(); i != pds.end(); ++i) {
+ PhraseDictionary* curPd = *i;
+ if (curPd->GetScoreProducerDescription() == pdName) {
+ pd = curPd;
+ break;
}
- // update model
- VERBOSE(3,"Updating " << pdName << " ||| " << source << " ||| " << target << " ||| " << alignment << endl);
- Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(pd);
- pdsa->add(source, target, alignment);
-#else
- TRACE_ERR("ERROR: recompile with --with-mm to update PhraseDictionary at runtime" << endl);
+ }
+ if (pd == NULL) {
+ TRACE_ERR("ERROR: No PhraseDictionary with name " << pdName << ", no update" << endl);
return false;
+ }
+ // update model
+ VERBOSE(3,"Updating " << pdName << " ||| " << source << " ||| " << target << " ||| " << alignment << endl);
+ Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(pd);
+ pdsa->add(source, target, alignment);
+#else
+ TRACE_ERR("ERROR: recompile with --with-mm to update PhraseDictionary at runtime" << endl);
+ return false;
#endif
}
@@ -356,44 +356,44 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
// for PhraseDictionaryBitextSampling (Mmsapt) models:
// <update name="TranslationModelName" source=" " target=" " alignment=" " />
else if (tagName == "weight-overwrite") {
-
- // is a name->ff map stored anywhere so we don't have to build it every time?
- const vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
- boost::unordered_map<string, FeatureFunction*> map;
- BOOST_FOREACH(FeatureFunction* const& ff, ffs) {
- map[ff->GetScoreProducerDescription()] = ff;
- }
- // update each weight listed
- ScoreComponentCollection allWeights = StaticData::Instance().GetAllWeights();
- boost::unordered_map<string, FeatureFunction*>::iterator ffi;
- string ffName("");
- vector<float> ffWeights;
- vector<string> toks = Tokenize(ParseXmlTagAttribute(tagContent,"weights"));
- BOOST_FOREACH(string const& tok, toks) {
- if (tok.substr(tok.size() - 1, 1) == "=") {
- // start new feature
- if (ffName != "") {
- // set previous feature weights
- if (ffi != map.end()) {
- allWeights.Assign(ffi->second, ffWeights);
- }
- ffWeights.clear();
- }
- ffName = tok.substr(0, tok.size() - 1);
- ffi = map.find(ffName);
- if (ffi == map.end()) {
- TRACE_ERR("ERROR: No FeatureFunction with name " << ffName << ", no weight update" << endl);
- }
- } else {
- // weight for current feature
- ffWeights.push_back(Scan<float>(tok));
+ // is a name->ff map stored anywhere so we don't have to build it every time?
+ const vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
+ boost::unordered_map<string, FeatureFunction*> map;
+ BOOST_FOREACH(FeatureFunction* const& ff, ffs) {
+ map[ff->GetScoreProducerDescription()] = ff;
+ }
+
+ // update each weight listed
+ ScoreComponentCollection allWeights = StaticData::Instance().GetAllWeights();
+ boost::unordered_map<string, FeatureFunction*>::iterator ffi;
+ string ffName("");
+ vector<float> ffWeights;
+ vector<string> toks = Tokenize(ParseXmlTagAttribute(tagContent,"weights"));
+ BOOST_FOREACH(string const& tok, toks) {
+ if (tok.substr(tok.size() - 1, 1) == "=") {
+ // start new feature
+ if (ffName != "") {
+ // set previous feature weights
+ if (ffi != map.end()) {
+ allWeights.Assign(ffi->second, ffWeights);
}
+ ffWeights.clear();
+ }
+ ffName = tok.substr(0, tok.size() - 1);
+ ffi = map.find(ffName);
+ if (ffi == map.end()) {
+ TRACE_ERR("ERROR: No FeatureFunction with name " << ffName << ", no weight update" << endl);
+ }
+ } else {
+ // weight for current feature
+ ffWeights.push_back(Scan<float>(tok));
}
- if (ffi != map.end()) {
- allWeights.Assign(ffi->second, ffWeights);
- }
- StaticData::InstanceNonConst().SetAllWeights(allWeights);
+ }
+ if (ffi != map.end()) {
+ allWeights.Assign(ffi->second, ffWeights);
+ }
+ StaticData::InstanceNonConst().SetAllWeights(allWeights);
}
// default: opening tag that specifies translation options
@@ -401,8 +401,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
if (startPos > endPos) {
TRACE_ERR("ERROR: tag " << tagName << " startPos > endPos: " << line << endl);
return false;
- }
- else if (startPos == endPos) {
+ } else if (startPos == endPos) {
TRACE_ERR("WARNING: tag " << tagName << " 0 span: " << line << endl);
continue;
}
diff --git a/phrase-extract/DomainFeature.cpp b/phrase-extract/DomainFeature.cpp
index 4cbaba50c..899eb9f1c 100644
--- a/phrase-extract/DomainFeature.cpp
+++ b/phrase-extract/DomainFeature.cpp
@@ -14,7 +14,7 @@ void Domain::load( const std::string &domainFileName )
Moses::InputFileStream fileS( domainFileName );
istream *fileP = &fileS;
- string line;
+ string line;
while(getline(*fileP, line)) {
// read
vector< string > domainSpecLine = tokenize( line.c_str() );
diff --git a/phrase-extract/ExtractionPhrasePair.cpp b/phrase-extract/ExtractionPhrasePair.cpp
index 2fb93fab2..2f73f36f0 100644
--- a/phrase-extract/ExtractionPhrasePair.cpp
+++ b/phrase-extract/ExtractionPhrasePair.cpp
@@ -324,10 +324,10 @@ std::string ExtractionPhrasePair::CollectAllPropertyValues(const std::string &ke
std::string ExtractionPhrasePair::CollectAllLabelsSeparateLHSAndRHS(const std::string& propertyKey,
- std::set<std::string>& labelSet,
- boost::unordered_map<std::string,float>& countsLabelsLHS,
- boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >& jointCountsRulesTargetLHSAndLabelsLHS,
- Vocabulary &vcbT) const
+ std::set<std::string>& labelSet,
+ boost::unordered_map<std::string,float>& countsLabelsLHS,
+ boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >& jointCountsRulesTargetLHSAndLabelsLHS,
+ Vocabulary &vcbT) const
{
const PROPERTY_VALUES *allPropertyValues = GetProperty( propertyKey );
@@ -340,7 +340,7 @@ std::string ExtractionPhrasePair::CollectAllLabelsSeparateLHSAndRHS(const std::s
std::list< std::pair<std::string,float> > lhsGivenCurrentRhsCounts;
std::ostringstream oss;
- for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
+ for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
iter!=allPropertyValues->end(); ++iter) {
size_t space = (iter->first).find_last_of(' ');
@@ -379,28 +379,28 @@ std::string ExtractionPhrasePair::CollectAllLabelsSeparateLHSAndRHS(const std::s
ruleTargetLhs.erase(ruleTargetLhs.begin()); // strip square brackets
ruleTargetLhs.erase(ruleTargetLhs.size()-1);
- std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedCountsLabelsLHS =
- countsLabelsLHS.insert(std::pair<std::string,float>(iter2->first,iter2->second));
+ std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedCountsLabelsLHS =
+ countsLabelsLHS.insert(std::pair<std::string,float>(iter2->first,iter2->second));
if (!insertedCountsLabelsLHS.second) {
(insertedCountsLabelsLHS.first)->second += iter2->second;
}
- boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >::iterator jointCountsRulesTargetLHSAndLabelsLHSIter =
- jointCountsRulesTargetLHSAndLabelsLHS.find(ruleTargetLhs);
+ boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >::iterator jointCountsRulesTargetLHSAndLabelsLHSIter =
+ jointCountsRulesTargetLHSAndLabelsLHS.find(ruleTargetLhs);
if ( jointCountsRulesTargetLHSAndLabelsLHSIter == jointCountsRulesTargetLHSAndLabelsLHS.end() ) {
boost::unordered_map<std::string,float>* jointCounts = new boost::unordered_map<std::string,float>;
jointCounts->insert(std::pair<std::string,float>(iter2->first,iter2->second));
jointCountsRulesTargetLHSAndLabelsLHS.insert(std::pair<std::string,boost::unordered_map<std::string,float>* >(ruleTargetLhs,jointCounts));
} else {
boost::unordered_map<std::string,float>* jointCounts = jointCountsRulesTargetLHSAndLabelsLHSIter->second;
- std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedJointCounts =
- jointCounts->insert(std::pair<std::string,float>(iter2->first,iter2->second));
+ std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedJointCounts =
+ jointCounts->insert(std::pair<std::string,float>(iter2->first,iter2->second));
if (!insertedJointCounts.second) {
(insertedJointCounts.first)->second += iter2->second;
}
}
- }
+ }
}
lhsGivenCurrentRhsCounts.clear();
@@ -410,7 +410,7 @@ std::string ExtractionPhrasePair::CollectAllLabelsSeparateLHSAndRHS(const std::s
currentRhs = rhs;
}
- currentRhsCount += iter->second;
+ currentRhsCount += iter->second;
lhsGivenCurrentRhsCounts.push_back( std::pair<std::string,float>(lhs,iter->second) );
}
@@ -436,28 +436,28 @@ std::string ExtractionPhrasePair::CollectAllLabelsSeparateLHSAndRHS(const std::s
ruleTargetLhs.erase(ruleTargetLhs.begin()); // strip square brackets
ruleTargetLhs.erase(ruleTargetLhs.size()-1);
- std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedCountsLabelsLHS =
- countsLabelsLHS.insert(std::pair<std::string,float>(iter2->first,iter2->second));
+ std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedCountsLabelsLHS =
+ countsLabelsLHS.insert(std::pair<std::string,float>(iter2->first,iter2->second));
if (!insertedCountsLabelsLHS.second) {
(insertedCountsLabelsLHS.first)->second += iter2->second;
}
- boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >::iterator jointCountsRulesTargetLHSAndLabelsLHSIter =
- jointCountsRulesTargetLHSAndLabelsLHS.find(ruleTargetLhs);
+ boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >::iterator jointCountsRulesTargetLHSAndLabelsLHSIter =
+ jointCountsRulesTargetLHSAndLabelsLHS.find(ruleTargetLhs);
if ( jointCountsRulesTargetLHSAndLabelsLHSIter == jointCountsRulesTargetLHSAndLabelsLHS.end() ) {
boost::unordered_map<std::string,float>* jointCounts = new boost::unordered_map<std::string,float>;
jointCounts->insert(std::pair<std::string,float>(iter2->first,iter2->second));
jointCountsRulesTargetLHSAndLabelsLHS.insert(std::pair<std::string,boost::unordered_map<std::string,float>* >(ruleTargetLhs,jointCounts));
} else {
boost::unordered_map<std::string,float>* jointCounts = jointCountsRulesTargetLHSAndLabelsLHSIter->second;
- std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedJointCounts =
- jointCounts->insert(std::pair<std::string,float>(iter2->first,iter2->second));
+ std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedJointCounts =
+ jointCounts->insert(std::pair<std::string,float>(iter2->first,iter2->second));
if (!insertedJointCounts.second) {
(insertedJointCounts.first)->second += iter2->second;
}
}
- }
+ }
}
std::string allPropertyValuesString(oss.str());
@@ -465,11 +465,11 @@ std::string ExtractionPhrasePair::CollectAllLabelsSeparateLHSAndRHS(const std::s
}
-void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
- const std::vector<float> &orientationClassPriorsL2R,
- const std::vector<float> &orientationClassPriorsR2L,
- double smoothingFactor,
- std::ostream &out) const
+void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
+ const std::vector<float> &orientationClassPriorsL2R,
+ const std::vector<float> &orientationClassPriorsR2L,
+ double smoothingFactor,
+ std::ostream &out) const
{
assert(orientationClassPriorsL2R.size()==4 && orientationClassPriorsR2L.size()==4); // mono swap dleft dright
@@ -479,12 +479,12 @@ void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
return;
}
- // bidirectional MSLR phrase orientation with 2x4 orientation classes:
+ // bidirectional MSLR phrase orientation with 2x4 orientation classes:
// mono swap dright dleft
std::vector<float> orientationClassCountSumL2R(4,0);
std::vector<float> orientationClassCountSumR2L(4,0);
- for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
+ for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
iter!=allPropertyValues->end(); ++iter) {
std::string l2rOrientationClass, r2lOrientationClass;
try {
@@ -492,12 +492,12 @@ void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
tokenizer >> l2rOrientationClass;
tokenizer >> r2lOrientationClass;
if ( tokenizer.peek() != EOF ) {
- UTIL_THROW(util::Exception, "ExtractionPhrasePair"
+ UTIL_THROW(util::Exception, "ExtractionPhrasePair"
<< ": Collecting phrase orientations failed. "
<< "Too many tokens?");
}
} catch (const std::exception &e) {
- UTIL_THROW(util::Exception, "ExtractionPhrasePair"
+ UTIL_THROW(util::Exception, "ExtractionPhrasePair"
<< ": Collecting phrase orientations failed. "
<< "Flawed property value in extract file?");
}
@@ -516,7 +516,7 @@ void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
l2rOrientationClassId = 3;
}
if (l2rOrientationClassId == -1) {
- UTIL_THROW(util::Exception, "ExtractionPhrasePair"
+ UTIL_THROW(util::Exception, "ExtractionPhrasePair"
<< ": Collecting phrase orientations failed. "
<< "Unknown orientation class \"" << l2rOrientationClass << "\"." );
}
@@ -534,7 +534,7 @@ void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
r2lOrientationClassId = 3;
}
if (r2lOrientationClassId == -1) {
- UTIL_THROW(util::Exception, "ExtractionPhrasePair"
+ UTIL_THROW(util::Exception, "ExtractionPhrasePair"
<< ": Collecting phrase orientations failed. "
<< "Unknown orientation class \"" << r2lOrientationClass << "\"." );
}
diff --git a/phrase-extract/ExtractionPhrasePair.h b/phrase-extract/ExtractionPhrasePair.h
index 368033284..4f1a0cd99 100644
--- a/phrase-extract/ExtractionPhrasePair.h
+++ b/phrase-extract/ExtractionPhrasePair.h
@@ -128,15 +128,15 @@ public:
std::string CollectAllPropertyValues(const std::string &key) const;
std::string CollectAllLabelsSeparateLHSAndRHS(const std::string& propertyKey,
- std::set<std::string>& sourceLabelSet,
- boost::unordered_map<std::string,float>& sourceLHSCounts,
- boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >& sourceRHSAndLHSJointCounts,
- Vocabulary &vcbT) const;
-
- void CollectAllPhraseOrientations(const std::string &key,
- const std::vector<float> &orientationClassPriorsL2R,
- const std::vector<float> &orientationClassPriorsR2L,
- double smoothingFactor,
+ std::set<std::string>& sourceLabelSet,
+ boost::unordered_map<std::string,float>& sourceLHSCounts,
+ boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >& sourceRHSAndLHSJointCounts,
+ Vocabulary &vcbT) const;
+
+ void CollectAllPhraseOrientations(const std::string &key,
+ const std::vector<float> &orientationClassPriorsL2R,
+ const std::vector<float> &orientationClassPriorsR2L,
+ double smoothingFactor,
std::ostream &out) const;
void AddProperties( const std::string &str, float count );
diff --git a/phrase-extract/PhraseExtractionOptions.h b/phrase-extract/PhraseExtractionOptions.h
index 14d1575e1..cf2e4b365 100644
--- a/phrase-extract/PhraseExtractionOptions.h
+++ b/phrase-extract/PhraseExtractionOptions.h
@@ -73,8 +73,8 @@ public:
onlyOutputSpanInfo(false),
gzOutput(false),
flexScoreFlag(false),
- debug(false)
- {}
+ debug(false) {
+ }
//functions for initialization of options
void initAllModelsOutputFlag(const bool initallModelsOutputFlag) {
diff --git a/phrase-extract/PropertiesConsolidator.cpp b/phrase-extract/PropertiesConsolidator.cpp
index 642c48672..d6dc67e6f 100644
--- a/phrase-extract/PropertiesConsolidator.cpp
+++ b/phrase-extract/PropertiesConsolidator.cpp
@@ -31,7 +31,7 @@
namespace MosesTraining
{
-void PropertiesConsolidator::ActivateSourceLabelsProcessing(const std::string &sourceLabelSetFile)
+void PropertiesConsolidator::ActivateSourceLabelsProcessing(const std::string &sourceLabelSetFile)
{
Moses::InputFileStream inFile(sourceLabelSetFile);
@@ -57,7 +57,7 @@ void PropertiesConsolidator::ActivateSourceLabelsProcessing(const std::string &s
}
-std::string PropertiesConsolidator::ProcessPropertiesString(const std::string &propertiesString) const
+std::string PropertiesConsolidator::ProcessPropertiesString(const std::string &propertiesString) const
{
if ( propertiesString.empty() ) {
return propertiesString;
@@ -89,14 +89,14 @@ std::string PropertiesConsolidator::ProcessPropertiesString(const std::string &p
double totalCount;
if (! (tokenizer >> nNTs)) { // first token: number of non-terminals (incl. left-hand side)
- UTIL_THROW2("Not able to read number of non-terminals from SourceLabels property. "
+ UTIL_THROW2("Not able to read number of non-terminals from SourceLabels property. "
<< "Flawed SourceLabels property?");
}
assert( nNTs > 0 );
out << " " << nNTs;
if (! (tokenizer >> totalCount)) { // second token: overall rule count
- UTIL_THROW2("Not able to read overall rule count from SourceLabels property. "
+ UTIL_THROW2("Not able to read overall rule count from SourceLabels property. "
<< "Flawed SourceLabels property?");
}
assert( totalCount > 0.0 );
diff --git a/phrase-extract/ScoreFeatureTest.cpp b/phrase-extract/ScoreFeatureTest.cpp
index 7af06405c..93a452dad 100644
--- a/phrase-extract/ScoreFeatureTest.cpp
+++ b/phrase-extract/ScoreFeatureTest.cpp
@@ -53,17 +53,17 @@ BOOST_AUTO_TEST_CASE(manager_configure_domain_except)
//Check that configure rejects illegal domain arg combinations
ScoreFeatureManager manager;
BOOST_CHECK_THROW(
- manager.configure(boost::assign::list_of("--DomainRatio")("/dev/null")("--DomainIndicator")("/dev/null")),
- ScoreFeatureArgumentException);
+ manager.configure(boost::assign::list_of("--DomainRatio")("/dev/null")("--DomainIndicator")("/dev/null")),
+ ScoreFeatureArgumentException);
BOOST_CHECK_THROW(
- manager.configure(boost::assign::list_of("--SparseDomainSubset")("/dev/null")("--SparseDomainRatio")("/dev/null")),
- ScoreFeatureArgumentException);
+ manager.configure(boost::assign::list_of("--SparseDomainSubset")("/dev/null")("--SparseDomainRatio")("/dev/null")),
+ ScoreFeatureArgumentException);
BOOST_CHECK_THROW(
- manager.configure(boost::assign::list_of("--SparseDomainBlah")("/dev/null")),
- ScoreFeatureArgumentException);
+ manager.configure(boost::assign::list_of("--SparseDomainBlah")("/dev/null")),
+ ScoreFeatureArgumentException);
BOOST_CHECK_THROW(
- manager.configure(boost::assign::list_of("--DomainSubset")),
- ScoreFeatureArgumentException);
+ manager.configure(boost::assign::list_of("--DomainSubset")),
+ ScoreFeatureArgumentException);
}
template <class Expected>
diff --git a/phrase-extract/SentenceAlignment.cpp b/phrase-extract/SentenceAlignment.cpp
index 120c9154d..ee7f27ed9 100644
--- a/phrase-extract/SentenceAlignment.cpp
+++ b/phrase-extract/SentenceAlignment.cpp
@@ -55,10 +55,10 @@ bool SentenceAlignment::processSourceSentence(const char * sourceString, int, bo
}
bool SentenceAlignment::create(const char targetString[],
- const char sourceString[],
- const char alignmentString[],
- const char weightString[],
- int sentenceID, bool boundaryRules)
+ const char sourceString[],
+ const char alignmentString[],
+ const char weightString[],
+ int sentenceID, bool boundaryRules)
{
using namespace std;
this->sentenceID = sentenceID;
diff --git a/phrase-extract/SentenceAlignment.h b/phrase-extract/SentenceAlignment.h
index 576d3279e..a3d6fc6d4 100644
--- a/phrase-extract/SentenceAlignment.h
+++ b/phrase-extract/SentenceAlignment.h
@@ -44,10 +44,10 @@ public:
virtual bool processSourceSentence(const char *, int, bool boundaryRules);
bool create(const char targetString[],
- const char sourceString[],
- const char alignmentString[],
- const char weightString[],
- int sentenceID, bool boundaryRules);
+ const char sourceString[],
+ const char alignmentString[],
+ const char weightString[],
+ int sentenceID, bool boundaryRules);
void invertAlignment();
diff --git a/phrase-extract/SyntaxTree.h b/phrase-extract/SyntaxTree.h
index 8d65b99bd..6ffb5da34 100644
--- a/phrase-extract/SyntaxTree.h
+++ b/phrase-extract/SyntaxTree.h
@@ -97,8 +97,8 @@ protected:
public:
SyntaxTree()
- : m_top(0) // m_top doesn't get set unless ConnectNodes is called.
- , m_size(0) {}
+ : m_top(0) // m_top doesn't get set unless ConnectNodes is called.
+ , m_size(0) {}
~SyntaxTree();
diff --git a/phrase-extract/XmlTree.cpp b/phrase-extract/XmlTree.cpp
index ce7e6837e..6efa1bf5c 100644
--- a/phrase-extract/XmlTree.cpp
+++ b/phrase-extract/XmlTree.cpp
@@ -356,8 +356,7 @@ bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &label
if (startPos > endPos) {
cerr << "ERROR: tag " << tagName << " startPos is bigger than endPos (" << startPos << "-" << endPos << "): " << line << endl;
return false;
- }
- else if (startPos == endPos) {
+ } else if (startPos == endPos) {
cerr << "WARNING: tag " << tagName << ". Ignoring 0 span (" << startPos << "-" << endPos << "): " << line << endl;
continue;
}
diff --git a/phrase-extract/consolidate-direct-main.cpp b/phrase-extract/consolidate-direct-main.cpp
index 40e0e35d4..576cdd568 100644
--- a/phrase-extract/consolidate-direct-main.cpp
+++ b/phrase-extract/consolidate-direct-main.cpp
@@ -54,13 +54,12 @@ bool getLine( istream &fileP, vector< string > &item )
{
if (fileP.eof())
return false;
-
+
string line;
if (getline(fileP, line)) {
item = splitLine(line.c_str());
return false;
- }
- else {
+ } else {
return false;
}
}
diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp
index 2298d0523..88eb1f7d5 100644
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@@ -133,12 +133,11 @@ int main(int argc, char* argv[])
bool done = false;
while (!done) {
string single_setting;
- size_t pos;
+ size_t pos;
if ((pos = setting.find(",")) != std::string::npos) {
single_setting = setting.substr(0, pos);
setting.erase(0, pos + 1);
- }
- else {
+ } else {
single_setting = setting;
done = true;
}
@@ -151,12 +150,10 @@ int main(int argc, char* argv[])
if (field == 0) {
minScore0 = threshold;
cerr << "setting minScore0 to " << threshold << endl;
- }
- else if (field == 2) {
+ } else if (field == 2) {
minScore2 = threshold;
cerr << "setting minScore2 to " << threshold << endl;
- }
- else {
+ } else {
cerr << "ERROR: MinScore currently only supported for indirect (0) and direct (2) phrase translation probabilities" << endl;
exit(1);
}
@@ -245,7 +242,7 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
exit(1);
}
- // create properties consolidator
+ // create properties consolidator
// (in case any additional phrase property requires further processing)
MosesTraining::PropertiesConsolidator propertiesConsolidator = MosesTraining::PropertiesConsolidator();
if (sourceLabelsFlag) {
@@ -323,7 +320,7 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
(minScore2 > 0 && adjustedCountEF /countF < minScore2)) {
continue;
}
-
+
// output hierarchical phrase pair (with separated labels)
fileConsolidated << itemDirect[0] << " ||| " << itemDirect[1] << " |||";
@@ -396,7 +393,7 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
fileConsolidated << " |||";
if (itemDirect.size() >= 6) {
//if (sourceLabelsFlag) {
- fileConsolidated << propertiesConsolidator.ProcessPropertiesString(itemDirect[5]);
+ fileConsolidated << propertiesConsolidator.ProcessPropertiesString(itemDirect[5]);
//} else {
// fileConsolidated << itemDirect[5];
//}
diff --git a/phrase-extract/consolidate-reverse-main.cpp b/phrase-extract/consolidate-reverse-main.cpp
index ce59315b9..e2b0ad473 100644
--- a/phrase-extract/consolidate-reverse-main.cpp
+++ b/phrase-extract/consolidate-reverse-main.cpp
@@ -186,13 +186,12 @@ bool getLine( istream &fileP, vector< string > &item )
{
if (fileP.eof())
return false;
-
+
string line;
if (getline(fileP, line)) {
item = splitLine(line.c_str());
return false;
- }
- else {
+ } else {
return false;
}
}
diff --git a/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index 7c210541d..5eaed4a3d 100644
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@@ -69,9 +69,9 @@ int ExtractGHKM::Main(int argc, char *argv[])
// input files are switched prior to extraction and then the source and
// target of the extracted rules are switched on output.
std::string effectiveTargetFile = options.t2s ? options.sourceFile
- : options.targetFile;
+ : options.targetFile;
std::string effectiveSourceFile = options.t2s ? options.targetFile
- : options.sourceFile;
+ : options.sourceFile;
InputFileStream targetStream(effectiveTargetFile);
InputFileStream sourceStream(effectiveSourceFile);
InputFileStream alignmentStream(options.alignmentFile);
@@ -675,16 +675,16 @@ void ExtractGHKM::WriteGlueGrammar(
if (options.sourceLabels) {
out << " {{SourceLabels 2 1 " << sourceLabelGlueTop << " 1 1 " << sourceLabelGlueTop << " 1}}";
}
- if (options.phraseOrientation) {
- out << " {{Orientation 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25}}";
- }
+ if (options.phraseOrientation) {
+ out << " {{Orientation 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25}}";
+ }
out << std::endl;
// top rules
for (std::map<std::string, int>::const_iterator i = topLabelSet.begin();
i != topLabelSet.end(); ++i) {
out << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 1-1 2-2 ||| ||| |||";
- if (options.treeFragments) {
+ if (options.treeFragments) {
out << " {{Tree [" << topLabel << " [SSTART <s>] [" << i->first << "] [SEND </s>]]}}";
}
if (options.sourceLabels) {
@@ -700,11 +700,11 @@ void ExtractGHKM::WriteGlueGrammar(
for(std::set<std::string>::const_iterator i = labelSet.begin();
i != labelSet.end(); i++ ) {
out << "[X][" << topLabel << "] [X][" << *i << "] [X] ||| [X][" << topLabel << "] [X][" << *i << "] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1 ||| ||| |||";
- if (options.treeFragments) {
+ if (options.treeFragments) {
out << " {{Tree [" << topLabel << " ["<< topLabel << "] [" << *i << "]]}}";
}
if (options.sourceLabels) {
- out << " {{SourceLabels 3 2.718 " << sourceLabelGlueTop << " " << sourceLabelGlueX << " 2.718 1 " << sourceLabelGlueTop << " 2.718}}";
+ out << " {{SourceLabels 3 2.718 " << sourceLabelGlueTop << " " << sourceLabelGlueX << " 2.718 1 " << sourceLabelGlueTop << " 2.718}}";
}
if (options.phraseOrientation) {
out << " {{Orientation 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25}}";
@@ -827,8 +827,8 @@ void ExtractGHKM::WriteUnknownWordSoftMatches(
std::ostream &out)
{
for (std::set<std::string>::const_iterator p = labelSet.begin(); p != labelSet.end(); ++p) {
- std::string label = *p;
- out << "UNK " << label << std::endl;
+ std::string label = *p;
+ out << "UNK " << label << std::endl;
}
}
diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.cpp b/phrase-extract/extract-ghkm/PhraseOrientation.cpp
index f9a7af8c2..01b1ee187 100644
--- a/phrase-extract/extract-ghkm/PhraseOrientation.cpp
+++ b/phrase-extract/extract-ghkm/PhraseOrientation.cpp
@@ -82,16 +82,14 @@ PhraseOrientation::PhraseOrientation(int sourceSize,
std::vector<int> alignedCountS(m_countF,0);
for (Moses::AlignmentInfo::const_iterator it=alignTerm.begin();
- it!=alignTerm.end(); ++it)
- {
+ it!=alignTerm.end(); ++it) {
alignedToS[it->first].push_back(it->second);
alignedCountS[it->first]++;
m_alignedToT[it->second].push_back(it->first);
}
for (Moses::AlignmentInfo::const_iterator it=alignNonTerm.begin();
- it!=alignNonTerm.end(); ++it)
- {
+ it!=alignNonTerm.end(); ++it) {
alignedToS[it->first].push_back(it->second);
alignedCountS[it->first]++;
m_alignedToT[it->second].push_back(it->first);
@@ -124,7 +122,7 @@ void PhraseOrientation::Init(int sourceSize,
}
}
- m_minAndMaxAlignedToSourceSpan[ std::pair<int,int>(startF,endF) ] = std::pair<int,int>(minE,maxE);
+ m_minAndMaxAlignedToSourceSpan[ std::pair<int,int>(startF,endF) ] = std::pair<int,int>(minE,maxE);
}
}
@@ -149,7 +147,7 @@ void PhraseOrientation::Init(int sourceSize,
}
}
- m_minAndMaxAlignedToTargetSpan[ std::pair<int,int>(startE,endE) ] = std::pair<int,int>(minF,maxF);
+ m_minAndMaxAlignedToTargetSpan[ std::pair<int,int>(startE,endE) ] = std::pair<int,int>(minF,maxF);
if (maxF >= 0) { // aligned to any source words at all
@@ -197,10 +195,10 @@ void PhraseOrientation::InsertVertex( HSentenceVertices & corners, int x, int y
void PhraseOrientation::InsertPhraseVertices(HSentenceVertices & topLeft,
- HSentenceVertices & topRight,
- HSentenceVertices & bottomLeft,
- HSentenceVertices & bottomRight,
- int startF, int startE, int endF, int endE)
+ HSentenceVertices & topRight,
+ HSentenceVertices & bottomLeft,
+ HSentenceVertices & bottomRight,
+ int startF, int startE, int endF, int endE)
{
InsertVertex(topLeft, startF, startE);
@@ -212,7 +210,7 @@ void PhraseOrientation::InsertPhraseVertices(HSentenceVertices & topLeft,
const std::string PhraseOrientation::GetOrientationInfoString(int startF, int endF, REO_DIR direction) const
{
- boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator foundMinMax
+ boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator foundMinMax
= m_minAndMaxAlignedToSourceSpan.find( std::pair<int,int>(startF,endF) );
if ( foundMinMax != m_minAndMaxAlignedToSourceSpan.end() ) {
@@ -243,18 +241,18 @@ const std::string PhraseOrientation::GetOrientationInfoString(int startF, int st
hierNextOrient = GetOrientationInfo(startF, startE, endF, endE, REO_DIR_R2L);
switch (direction) {
- case REO_DIR_L2R:
- return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR);
- break;
- case REO_DIR_R2L:
- return GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
- break;
- case REO_DIR_BIDIR:
- return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
- break;
- default:
- return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
- break;
+ case REO_DIR_L2R:
+ return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR);
+ break;
+ case REO_DIR_R2L:
+ return GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
+ break;
+ case REO_DIR_BIDIR:
+ return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
+ break;
+ default:
+ return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
+ break;
}
return "PhraseOrientationERROR";
}
@@ -262,7 +260,7 @@ const std::string PhraseOrientation::GetOrientationInfoString(int startF, int st
PhraseOrientation::REO_CLASS PhraseOrientation::GetOrientationInfo(int startF, int endF, REO_DIR direction) const
{
- boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator foundMinMax
+ boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator foundMinMax
= m_minAndMaxAlignedToSourceSpan.find( std::pair<int,int>(startF,endF) );
if ( foundMinMax != m_minAndMaxAlignedToSourceSpan.end() ) {
@@ -291,25 +289,25 @@ PhraseOrientation::REO_CLASS PhraseOrientation::GetOrientationInfo(int startF, i
if ( direction == REO_DIR_L2R )
return GetOrientHierModel(REO_MODEL_TYPE_MSLR,
- startF, endF, startE, endE, m_countF-1, 0, 0, 1,
- &ge, &le,
+ startF, endF, startE, endE, m_countF-1, 0, 0, 1,
+ &ge, &le,
m_bottomRight, m_bottomLeft);
if ( direction == REO_DIR_R2L )
return GetOrientHierModel(REO_MODEL_TYPE_MSLR,
- endF, startF, endE, startE, 0, m_countF-1, m_countE-1, -1,
- &le, &ge,
+ endF, startF, endE, startE, 0, m_countF-1, m_countE-1, -1,
+ &le, &ge,
m_topLeft, m_topRight);
- return REO_CLASS_UNKNOWN;
+ return REO_CLASS_UNKNOWN;
}
// to be called with countF-1 instead of countF
PhraseOrientation::REO_CLASS PhraseOrientation::GetOrientHierModel(REO_MODEL_TYPE modelType,
- int startF, int endF, int startE, int endE, int countF, int zeroF, int zeroE, int unit,
- bool (*ge)(int, int), bool (*le)(int, int),
- const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const
+ int startF, int endF, int startE, int endE, int countF, int zeroF, int zeroE, int unit,
+ bool (*ge)(int, int), bool (*le)(int, int),
+ const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const
{
bool leftSourceSpanIsAligned = ( (startF != zeroF) && SourceSpanIsAligned(zeroF,startF-unit) );
bool topTargetSpanIsAligned = ( (startE != zeroE) && TargetSpanIsAligned(zeroE,startE-unit) );
@@ -320,30 +318,28 @@ PhraseOrientation::REO_CLASS PhraseOrientation::GetOrientHierModel(REO_MODEL_TYP
HSentenceVertices::const_iterator it;
if (//(connectedLeftTop && !connectedRightTop) ||
- ((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
- it->second.find(startF-unit) != it->second.end()))
+ ((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
+ it->second.find(startF-unit) != it->second.end()))
return REO_CLASS_LEFT;
if (modelType == REO_MODEL_TYPE_MONO)
return REO_CLASS_UNKNOWN;
if (//(!connectedLeftTop && connectedRightTop) ||
- ((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
- it->second.find(endF + unit) != it->second.end()))
+ ((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
+ it->second.find(endF + unit) != it->second.end()))
return REO_CLASS_RIGHT;
if (modelType == REO_MODEL_TYPE_MSD)
return REO_CLASS_UNKNOWN;
- for (int indexF=startF-2*unit; (*ge)(indexF, zeroF); indexF=indexF-unit)
- {
+ for (int indexF=startF-2*unit; (*ge)(indexF, zeroF); indexF=indexF-unit) {
if ((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
- it->second.find(indexF) != it->second.end())
+ it->second.find(indexF) != it->second.end())
return REO_CLASS_DLEFT;
}
- for (int indexF=endF+2*unit; (*le)(indexF, countF); indexF=indexF+unit)
- {
+ for (int indexF=endF+2*unit; (*le)(indexF, countF); indexF=indexF+unit) {
if ((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
it->second.find(indexF) != it->second.end())
return REO_CLASS_DRIGHT;
@@ -351,7 +347,7 @@ PhraseOrientation::REO_CLASS PhraseOrientation::GetOrientHierModel(REO_MODEL_TYP
return REO_CLASS_UNKNOWN;
}
-
+
bool PhraseOrientation::SourceSpanIsAligned(int index1, int index2) const
{
return SpanIsAligned(index1, index2, m_minAndMaxAlignedToSourceSpan);
@@ -364,18 +360,14 @@ bool PhraseOrientation::TargetSpanIsAligned(int index1, int index2) const
bool PhraseOrientation::SpanIsAligned(int index1, int index2, const boost::unordered_map< std::pair<int,int> , std::pair<int,int> > &minAndMaxAligned) const
{
- boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator itMinAndMaxAligned =
+ boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator itMinAndMaxAligned =
minAndMaxAligned.find(std::pair<int,int>(std::min(index1,index2),std::max(index1,index2)));
- if (itMinAndMaxAligned == minAndMaxAligned.end())
- {
+ if (itMinAndMaxAligned == minAndMaxAligned.end()) {
std::cerr << "PhraseOrientation::SourceSpanIsAligned(): Error" << std::endl;
std::exit(1);
- }
- else
- {
- if (itMinAndMaxAligned->second.first == std::numeric_limits<int>::max())
- {
+ } else {
+ if (itMinAndMaxAligned->second.first == std::numeric_limits<int>::max()) {
return false;
}
}
@@ -462,7 +454,7 @@ void PhraseOrientation::WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE
std::map<std::string,float> r2lOrientationPriorCountsMap;
for (int orient=0; orient<=REO_CLASS_UNKNOWN; ++orient) {
l2rOrientationPriorCountsMap[GetOrientationString((REO_CLASS)orient, modelType)] += m_l2rOrientationPriorCounts[orient];
- }
+ }
for (int orient=0; orient<=REO_CLASS_UNKNOWN; ++orient) {
r2lOrientationPriorCountsMap[GetOrientationString((REO_CLASS)orient, modelType)] += m_r2lOrientationPriorCounts[orient];
}
diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.h b/phrase-extract/extract-ghkm/PhraseOrientation.h
index aac9c34d1..d826c127c 100644
--- a/phrase-extract/extract-ghkm/PhraseOrientation.h
+++ b/phrase-extract/extract-ghkm/PhraseOrientation.h
@@ -83,17 +83,23 @@ private:
int startF, int startE, int endF, int endE);
REO_CLASS GetOrientHierModel(REO_MODEL_TYPE modelType,
- int startF, int endF, int startE, int endE, int countF, int zeroF, int zeroE, int unit,
- bool (*ge)(int, int), bool (*lt)(int, int),
- const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const;
+ int startF, int endF, int startE, int endE, int countF, int zeroF, int zeroE, int unit,
+ bool (*ge)(int, int), bool (*lt)(int, int),
+ const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const;
bool SpanIsAligned(int index1, int index2, const boost::unordered_map< std::pair<int,int> , std::pair<int,int> > &minAndMaxAligned) const;
bool IsAligned(int fi, int ei) const;
- static bool ge(int first, int second) { return first >= second; };
- static bool le(int first, int second) { return first <= second; };
- static bool lt(int first, int second) { return first < second; };
+ static bool ge(int first, int second) {
+ return first >= second;
+ };
+ static bool le(int first, int second) {
+ return first <= second;
+ };
+ static bool lt(int first, int second) {
+ return first < second;
+ };
const int m_countF;
const int m_countE;
diff --git a/phrase-extract/extract-ghkm/Rule.h b/phrase-extract/extract-ghkm/Rule.h
index 186cfda37..36e24c799 100644
--- a/phrase-extract/extract-ghkm/Rule.h
+++ b/phrase-extract/extract-ghkm/Rule.h
@@ -16,7 +16,8 @@ class Node;
enum SymbolType { Terminal, NonTerminal };
-class Symbol {
+class Symbol
+{
public:
Symbol(const std::string &v, SymbolType t) : m_value(v) , m_type(t) {}
diff --git a/phrase-extract/extract-ghkm/ScfgRule.cpp b/phrase-extract/extract-ghkm/ScfgRule.cpp
index af801d648..cc689d6fb 100644
--- a/phrase-extract/extract-ghkm/ScfgRule.cpp
+++ b/phrase-extract/extract-ghkm/ScfgRule.cpp
@@ -30,7 +30,7 @@ namespace Moses
namespace GHKM
{
-ScfgRule::ScfgRule(const Subgraph &fragment,
+ScfgRule::ScfgRule(const Subgraph &fragment,
const MosesTraining::SyntaxTree *sourceSyntaxTree)
: m_sourceLHS("X", NonTerminal)
, m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
@@ -124,26 +124,26 @@ ScfgRule::ScfgRule(const Subgraph &fragment,
}
if (sourceSyntaxTree) {
- // Source syntax label for root node (if sourceSyntaxTree available)
- PushSourceLabel(sourceSyntaxTree,fragment.GetRoot(),"XLHS");
- // All non-terminal spans (including the LHS) should have obtained a label
- // (a source-side syntactic constituent label if the span matches, "XLHS" otherwise)
- assert(m_sourceLabels.size() == m_numberOfNonTerminals+1);
+ // Source syntax label for root node (if sourceSyntaxTree available)
+ PushSourceLabel(sourceSyntaxTree,fragment.GetRoot(),"XLHS");
+ // All non-terminal spans (including the LHS) should have obtained a label
+ // (a source-side syntactic constituent label if the span matches, "XLHS" otherwise)
+ assert(m_sourceLabels.size() == m_numberOfNonTerminals+1);
}
}
void ScfgRule::PushSourceLabel(const MosesTraining::SyntaxTree *sourceSyntaxTree,
const Node *node,
- const std::string &nonMatchingLabel)
+ const std::string &nonMatchingLabel)
{
ContiguousSpan span = Closure(node->GetSpan());
if (sourceSyntaxTree->HasNode(span.first,span.second)) { // does a source constituent match the span?
- std::vector<MosesTraining::SyntaxNode*> sourceLabels =
- sourceSyntaxTree->GetNodes(span.first,span.second);
- if (!sourceLabels.empty()) {
- // store the topmost matching label from the source syntax tree
- m_sourceLabels.push_back(sourceLabels.back()->GetLabel());
- }
+ std::vector<MosesTraining::SyntaxNode*> sourceLabels =
+ sourceSyntaxTree->GetNodes(span.first,span.second);
+ if (!sourceLabels.empty()) {
+ // store the topmost matching label from the source syntax tree
+ m_sourceLabels.push_back(sourceLabels.back()->GetLabel());
+ }
} else {
// no matching source-side syntactic constituent: store nonMatchingLabel
m_sourceLabels.push_back(nonMatchingLabel);
@@ -178,7 +178,7 @@ void ScfgRule::UpdateSourceLabelCoocCounts(std::map< std::string, std::map<std::
std::map< std::string, std::map<std::string,float>* >::iterator iter = coocCounts.find(sourceLabel);
if ( iter == coocCounts.end() ) {
std::map<std::string,float> *newCountMap = new std::map<std::string,float>();
- std::pair< std::map< std::string, std::map<std::string,float>* >::iterator, bool > inserted =
+ std::pair< std::map< std::string, std::map<std::string,float>* >::iterator, bool > inserted =
coocCounts.insert( std::pair< std::string, std::map<std::string,float>* >(sourceLabel, newCountMap) );
assert(inserted.second);
countMap = (inserted.first)->second;
diff --git a/phrase-extract/extract-ghkm/ScfgRule.h b/phrase-extract/extract-ghkm/ScfgRule.h
index 6b8abb94e..1b210e0d2 100644
--- a/phrase-extract/extract-ghkm/ScfgRule.h
+++ b/phrase-extract/extract-ghkm/ScfgRule.h
@@ -42,7 +42,7 @@ class Subgraph;
class ScfgRule : public Rule
{
public:
- ScfgRule(const Subgraph &fragment,
+ ScfgRule(const Subgraph &fragment,
const MosesTraining::SyntaxTree *sourceSyntaxTree = 0);
const Symbol &GetSourceLHS() const {
@@ -66,7 +66,7 @@ public:
void PrintSourceLabels(std::ostream &out) const {
for (std::vector<std::string>::const_iterator it = m_sourceLabels.begin();
it != m_sourceLabels.end(); ++it) {
- out << " " << (*it);
+ out << " " << (*it);
}
}
void UpdateSourceLabelCoocCounts(std::map< std::string, std::map<std::string,float>* > &coocCounts,
diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
index d306b845f..9cd1e1cc8 100644
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
@@ -183,7 +183,7 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
}
}
-void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g, bool printEndl)
+void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g, bool printEndl)
{
Write(rule,false);
m_fwd << " {{Tree ";
diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.h b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
index 240492824..932bd9f8b 100644
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.h
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
@@ -44,7 +44,7 @@ public:
void Write(const ScfgRule &rule, bool printEndl=true);
- void Write(const ScfgRule &rule, const Subgraph &g, bool printEndl=true);
+ void Write(const ScfgRule &rule, const Subgraph &g, bool printEndl=true);
private:
// Disallow copying
diff --git a/phrase-extract/extract-ghkm/StsgRule.cpp b/phrase-extract/extract-ghkm/StsgRule.cpp
index a6100ac96..83398f80a 100644
--- a/phrase-extract/extract-ghkm/StsgRule.cpp
+++ b/phrase-extract/extract-ghkm/StsgRule.cpp
@@ -12,7 +12,7 @@ namespace GHKM
{
StsgRule::StsgRule(const Subgraph &fragment)
- : m_targetSide(fragment, true)
+ : m_targetSide(fragment, true)
{
// Source side
@@ -77,7 +77,7 @@ StsgRule::StsgRule(const Subgraph &fragment)
continue;
}
std::map<const Node *, std::vector<int> >::iterator p =
- sinkToSourceIndices.find(leaf);
+ sinkToSourceIndices.find(leaf);
assert(p != sinkToSourceIndices.end());
std::vector<int> &sourceNodes = p->second;
for (std::vector<int>::iterator r = sourceNodes.begin();
diff --git a/phrase-extract/extract-ghkm/Subgraph.h b/phrase-extract/extract-ghkm/Subgraph.h
index b02404beb..4a0b3b692 100644
--- a/phrase-extract/extract-ghkm/Subgraph.h
+++ b/phrase-extract/extract-ghkm/Subgraph.h
@@ -57,12 +57,12 @@ public:
}
Subgraph(const Subgraph &other, bool targetOnly=false)
- : m_root(other.m_root)
- , m_leaves(other.m_leaves)
- , m_depth(other.m_depth)
- , m_size(other.m_size)
- , m_nodeCount(other.m_nodeCount)
- , m_pcfgScore(other.m_pcfgScore) {
+ : m_root(other.m_root)
+ , m_leaves(other.m_leaves)
+ , m_depth(other.m_depth)
+ , m_size(other.m_size)
+ , m_nodeCount(other.m_nodeCount)
+ , m_pcfgScore(other.m_pcfgScore) {
if (targetOnly && m_root->GetType() != SOURCE) {
// Replace any source-word sink nodes with their parents (except for
// the special case where the parent is a non-word tree node -- see
diff --git a/phrase-extract/extract-ghkm/XmlTreeParser.h b/phrase-extract/extract-ghkm/XmlTreeParser.h
index d0209254f..ff0baeace 100644
--- a/phrase-extract/extract-ghkm/XmlTreeParser.h
+++ b/phrase-extract/extract-ghkm/XmlTreeParser.h
@@ -47,9 +47,11 @@ public:
std::auto_ptr<ParseTree> Parse(const std::string &);
static std::auto_ptr<ParseTree> ConvertTree(const MosesTraining::SyntaxNode &,
- const std::vector<std::string> &);
+ const std::vector<std::string> &);
- const std::vector<std::string>& GetWords() { return m_words; };
+ const std::vector<std::string>& GetWords() {
+ return m_words;
+ };
private:
diff --git a/phrase-extract/extract-main.cpp b/phrase-extract/extract-main.cpp
index a7ec0ac92..46c029eff 100644
--- a/phrase-extract/extract-main.cpp
+++ b/phrase-extract/extract-main.cpp
@@ -295,10 +295,10 @@ int main(int argc, char* argv[])
cout << "LOG: PHRASES_BEGIN:" << endl;
}
if (sentence.create( englishString.c_str(),
- foreignString.c_str(),
- alignmentString.c_str(),
- weightString.c_str(),
- i, false)) {
+ foreignString.c_str(),
+ alignmentString.c_str(),
+ weightString.c_str(),
+ i, false)) {
if (options.placeholders.size()) {
sentence.invertAlignment();
}
diff --git a/phrase-extract/extract-mixed-syntax/AlignedSentence.cpp b/phrase-extract/extract-mixed-syntax/AlignedSentence.cpp
index 082878c00..b6b74454e 100644
--- a/phrase-extract/extract-mixed-syntax/AlignedSentence.cpp
+++ b/phrase-extract/extract-mixed-syntax/AlignedSentence.cpp
@@ -15,99 +15,100 @@ using namespace std;
/////////////////////////////////////////////////////////////////////////////////
AlignedSentence::AlignedSentence(int lineNum,
- const std::string &source,
- const std::string &target,
- const std::string &alignment)
-:m_lineNum(lineNum)
+ const std::string &source,
+ const std::string &target,
+ const std::string &alignment)
+ :m_lineNum(lineNum)
{
- PopulateWordVec(m_source, source);
- PopulateWordVec(m_target, target);
- PopulateAlignment(alignment);
+ PopulateWordVec(m_source, source);
+ PopulateWordVec(m_target, target);
+ PopulateAlignment(alignment);
}
-AlignedSentence::~AlignedSentence() {
- Moses::RemoveAllInColl(m_source);
- Moses::RemoveAllInColl(m_target);
+AlignedSentence::~AlignedSentence()
+{
+ Moses::RemoveAllInColl(m_source);
+ Moses::RemoveAllInColl(m_target);
}
void AlignedSentence::PopulateWordVec(Phrase &vec, const std::string &line)
{
- std::vector<string> toks;
- Moses::Tokenize(toks, line);
-
- vec.resize(toks.size());
- for (size_t i = 0; i < vec.size(); ++i) {
- const string &tok = toks[i];
- Word *word = new Word(i, tok);
- vec[i] = word;
- }
+ std::vector<string> toks;
+ Moses::Tokenize(toks, line);
+
+ vec.resize(toks.size());
+ for (size_t i = 0; i < vec.size(); ++i) {
+ const string &tok = toks[i];
+ Word *word = new Word(i, tok);
+ vec[i] = word;
+ }
}
void AlignedSentence::PopulateAlignment(const std::string &line)
{
- vector<string> alignStr;
- Moses::Tokenize(alignStr, line);
-
- for (size_t i = 0; i < alignStr.size(); ++i) {
- vector<int> alignPair;
- Moses::Tokenize(alignPair, alignStr[i], "-");
- assert(alignPair.size() == 2);
-
- int sourcePos = alignPair[0];
- int targetPos = alignPair[1];
-
- if (sourcePos >= m_source.size()) {
- cerr << "ERROR1:AlignedSentence=" << Debug() << endl;
- cerr << "m_source=" << m_source.size() << endl;
- abort();
- }
- assert(sourcePos < m_source.size());
- assert(targetPos < m_target.size());
- Word *sourceWord = m_source[sourcePos];
- Word *targetWord = m_target[targetPos];
-
- sourceWord->AddAlignment(targetWord);
- targetWord->AddAlignment(sourceWord);
- }
+ vector<string> alignStr;
+ Moses::Tokenize(alignStr, line);
+
+ for (size_t i = 0; i < alignStr.size(); ++i) {
+ vector<int> alignPair;
+ Moses::Tokenize(alignPair, alignStr[i], "-");
+ assert(alignPair.size() == 2);
+
+ int sourcePos = alignPair[0];
+ int targetPos = alignPair[1];
+
+ if (sourcePos >= m_source.size()) {
+ cerr << "ERROR1:AlignedSentence=" << Debug() << endl;
+ cerr << "m_source=" << m_source.size() << endl;
+ abort();
+ }
+ assert(sourcePos < m_source.size());
+ assert(targetPos < m_target.size());
+ Word *sourceWord = m_source[sourcePos];
+ Word *targetWord = m_target[targetPos];
+
+ sourceWord->AddAlignment(targetWord);
+ targetWord->AddAlignment(sourceWord);
+ }
}
std::string AlignedSentence::Debug() const
{
stringstream out;
- out << "m_lineNum:";
- out << m_lineNum;
- out << endl;
+ out << "m_lineNum:";
+ out << m_lineNum;
+ out << endl;
- out << "m_source:";
- out << m_source.Debug();
- out << endl;
+ out << "m_source:";
+ out << m_source.Debug();
+ out << endl;
- out << "m_target:";
- out << m_target.Debug();
- out << endl;
+ out << "m_target:";
+ out << m_target.Debug();
+ out << endl;
- out << "consistent phrases:" << endl;
- out << m_consistentPhrases.Debug();
- out << endl;
+ out << "consistent phrases:" << endl;
+ out << m_consistentPhrases.Debug();
+ out << endl;
- return out.str();
+ return out.str();
}
std::vector<int> AlignedSentence::GetSourceAlignmentCount() const
{
- vector<int> ret(m_source.size());
+ vector<int> ret(m_source.size());
- for (size_t i = 0; i < m_source.size(); ++i) {
- const Word &word = *m_source[i];
- ret[i] = word.GetAlignmentIndex().size();
- }
- return ret;
+ for (size_t i = 0; i < m_source.size(); ++i) {
+ const Word &word = *m_source[i];
+ ret[i] = word.GetAlignmentIndex().size();
+ }
+ return ret;
}
void AlignedSentence::Create(const Parameter &params)
{
- CreateConsistentPhrases(params);
- m_consistentPhrases.AddHieroNonTerms(params);
+ CreateConsistentPhrases(params);
+ m_consistentPhrases.AddHieroNonTerms(params);
}
void AlignedSentence::CreateConsistentPhrases(const Parameter &params)
@@ -119,76 +120,76 @@ void AlignedSentence::CreateConsistentPhrases(const Parameter &params)
// check alignments for target phrase startT...endT
for(int lengthT=1;
- lengthT <= params.maxSpan && lengthT <= countT;
- lengthT++) {
- for(int startT=0; startT < countT-(lengthT-1); startT++) {
-
- // that's nice to have
- int endT = startT + lengthT - 1;
-
- // find find aligned source words
- // first: find minimum and maximum source word
- int minS = 9999;
- int maxS = -1;
- vector< int > usedS = GetSourceAlignmentCount();
- for(int ti=startT; ti<=endT; ti++) {
- const Word &word = *m_target[ti];
- const std::set<int> &alignment = word.GetAlignmentIndex();
-
- std::set<int>::const_iterator iterAlign;
- for(iterAlign = alignment.begin(); iterAlign != alignment.end(); ++iterAlign) {
- int si = *iterAlign;
- if (si<minS) {
- minS = si;
- }
- if (si>maxS) {
- maxS = si;
- }
- usedS[ si ]--;
- }
- }
-
- // unaligned phrases are not allowed
- if( maxS == -1 )
- continue;
-
- // source phrase has to be within limits
- size_t width = maxS - minS + 1;
-
- if( width < params.minSpan )
- continue;
-
- if( width > params.maxSpan )
- continue;
-
- // check if source words are aligned to out of bound target words
- bool out_of_bounds = false;
- for(int si=minS; si<=maxS && !out_of_bounds; si++)
- if (usedS[si]>0) {
- out_of_bounds = true;
- }
-
- // if out of bound, you gotta go
- if (out_of_bounds)
- continue;
-
- // done with all the checks, lets go over all consistent phrase pairs
- // start point of source phrase may retreat over unaligned
- for(int startS=minS;
- (startS>=0 &&
- startS>maxS - params.maxSpan && // within length limit
- (startS==minS || m_source[startS]->GetAlignment().size()==0)); // unaligned
- startS--) {
- // end point of source phrase may advance over unaligned
- for(int endS=maxS;
- (endS<countS && endS<startS + params.maxSpan && // within length limit
- (endS==maxS || m_source[endS]->GetAlignment().size()==0)); // unaligned
- endS++) {
-
- // take note that this is a valid phrase alignment
- m_consistentPhrases.Add(startS, endS, startT, endT, params);
- }
- }
- }
+ lengthT <= params.maxSpan && lengthT <= countT;
+ lengthT++) {
+ for(int startT=0; startT < countT-(lengthT-1); startT++) {
+
+ // that's nice to have
+ int endT = startT + lengthT - 1;
+
+ // find find aligned source words
+ // first: find minimum and maximum source word
+ int minS = 9999;
+ int maxS = -1;
+ vector< int > usedS = GetSourceAlignmentCount();
+ for(int ti=startT; ti<=endT; ti++) {
+ const Word &word = *m_target[ti];
+ const std::set<int> &alignment = word.GetAlignmentIndex();
+
+ std::set<int>::const_iterator iterAlign;
+ for(iterAlign = alignment.begin(); iterAlign != alignment.end(); ++iterAlign) {
+ int si = *iterAlign;
+ if (si<minS) {
+ minS = si;
+ }
+ if (si>maxS) {
+ maxS = si;
+ }
+ usedS[ si ]--;
+ }
+ }
+
+ // unaligned phrases are not allowed
+ if( maxS == -1 )
+ continue;
+
+ // source phrase has to be within limits
+ size_t width = maxS - minS + 1;
+
+ if( width < params.minSpan )
+ continue;
+
+ if( width > params.maxSpan )
+ continue;
+
+ // check if source words are aligned to out of bound target words
+ bool out_of_bounds = false;
+ for(int si=minS; si<=maxS && !out_of_bounds; si++)
+ if (usedS[si]>0) {
+ out_of_bounds = true;
+ }
+
+ // if out of bound, you gotta go
+ if (out_of_bounds)
+ continue;
+
+ // done with all the checks, lets go over all consistent phrase pairs
+ // start point of source phrase may retreat over unaligned
+ for(int startS=minS;
+ (startS>=0 &&
+ startS>maxS - params.maxSpan && // within length limit
+ (startS==minS || m_source[startS]->GetAlignment().size()==0)); // unaligned
+ startS--) {
+ // end point of source phrase may advance over unaligned
+ for(int endS=maxS;
+ (endS<countS && endS<startS + params.maxSpan && // within length limit
+ (endS==maxS || m_source[endS]->GetAlignment().size()==0)); // unaligned
+ endS++) {
+
+ // take note that this is a valid phrase alignment
+ m_consistentPhrases.Add(startS, endS, startT, endT, params);
+ }
+ }
+ }
}
}
diff --git a/phrase-extract/extract-mixed-syntax/AlignedSentence.h b/phrase-extract/extract-mixed-syntax/AlignedSentence.h
index 915bdf90c..2d9e21dbe 100644
--- a/phrase-extract/extract-mixed-syntax/AlignedSentence.h
+++ b/phrase-extract/extract-mixed-syntax/AlignedSentence.h
@@ -14,38 +14,41 @@
class Parameter;
-class AlignedSentence {
+class AlignedSentence
+{
public:
- AlignedSentence(int lineNum)
- :m_lineNum(lineNum)
- {}
+ AlignedSentence(int lineNum)
+ :m_lineNum(lineNum) {
+ }
- AlignedSentence(int lineNum,
- const std::string &source,
- const std::string &target,
- const std::string &alignment);
- virtual ~AlignedSentence();
- virtual void Create(const Parameter &params);
+ AlignedSentence(int lineNum,
+ const std::string &source,
+ const std::string &target,
+ const std::string &alignment);
+ virtual ~AlignedSentence();
+ virtual void Create(const Parameter &params);
- const Phrase &GetPhrase(Moses::FactorDirection direction) const
- { return (direction == Moses::Input) ? m_source : m_target; }
+ const Phrase &GetPhrase(Moses::FactorDirection direction) const {
+ return (direction == Moses::Input) ? m_source : m_target;
+ }
- const ConsistentPhrases &GetConsistentPhrases() const
- { return m_consistentPhrases; }
+ const ConsistentPhrases &GetConsistentPhrases() const {
+ return m_consistentPhrases;
+ }
- virtual std::string Debug() const;
+ virtual std::string Debug() const;
- int m_lineNum;
+ int m_lineNum;
protected:
Phrase m_source, m_target;
ConsistentPhrases m_consistentPhrases;
- void CreateConsistentPhrases(const Parameter &params);
- void PopulateWordVec(Phrase &vec, const std::string &line);
+ void CreateConsistentPhrases(const Parameter &params);
+ void PopulateWordVec(Phrase &vec, const std::string &line);
- // m_source and m_target MUST be populated before calling this
- void PopulateAlignment(const std::string &line);
- std::vector<int> GetSourceAlignmentCount() const;
+ // m_source and m_target MUST be populated before calling this
+ void PopulateAlignment(const std::string &line);
+ std::vector<int> GetSourceAlignmentCount() const;
};
diff --git a/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp b/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp
index 3d63ed044..cb088f5b4 100644
--- a/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp
+++ b/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp
@@ -13,171 +13,170 @@
using namespace std;
AlignedSentenceSyntax::AlignedSentenceSyntax(int lineNum,
- const std::string &source,
- const std::string &target,
- const std::string &alignment)
-:AlignedSentence(lineNum)
-,m_sourceStr(source)
-,m_targetStr(target)
-,m_alignmentStr(alignment)
+ const std::string &source,
+ const std::string &target,
+ const std::string &alignment)
+ :AlignedSentence(lineNum)
+ ,m_sourceStr(source)
+ ,m_targetStr(target)
+ ,m_alignmentStr(alignment)
{
}
-AlignedSentenceSyntax::~AlignedSentenceSyntax() {
- // TODO Auto-generated destructor stub
+AlignedSentenceSyntax::~AlignedSentenceSyntax()
+{
+ // TODO Auto-generated destructor stub
}
void AlignedSentenceSyntax::Populate(bool isSyntax, int mixedSyntaxType, const Parameter &params,
- string line, Phrase &phrase, SyntaxTree &tree)
+ string line, Phrase &phrase, SyntaxTree &tree)
{
- // parse source and target string
- if (isSyntax) {
- line = "<xml><tree label=\"X\">" + line + "</tree></xml>";
- XMLParse(phrase, tree, line, params);
-
- if (mixedSyntaxType != 0) {
- // mixed syntax. Always add [X] where there isn't 1
- tree.SetHieroLabel(params.hieroNonTerm);
- if (mixedSyntaxType == 2) {
- tree.AddToAll(params.hieroNonTerm);
- }
- }
- }
- else {
- PopulateWordVec(phrase, line);
- tree.SetHieroLabel(params.hieroNonTerm);
- }
+ // parse source and target string
+ if (isSyntax) {
+ line = "<xml><tree label=\"X\">" + line + "</tree></xml>";
+ XMLParse(phrase, tree, line, params);
+
+ if (mixedSyntaxType != 0) {
+ // mixed syntax. Always add [X] where there isn't 1
+ tree.SetHieroLabel(params.hieroNonTerm);
+ if (mixedSyntaxType == 2) {
+ tree.AddToAll(params.hieroNonTerm);
+ }
+ }
+ } else {
+ PopulateWordVec(phrase, line);
+ tree.SetHieroLabel(params.hieroNonTerm);
+ }
}
void AlignedSentenceSyntax::Create(const Parameter &params)
{
- Populate(params.sourceSyntax, params.mixedSyntaxType, params, m_sourceStr,
- m_source, m_sourceTree);
- Populate(params.targetSyntax, params.mixedSyntaxType, params, m_targetStr,
- m_target, m_targetTree);
+ Populate(params.sourceSyntax, params.mixedSyntaxType, params, m_sourceStr,
+ m_source, m_sourceTree);
+ Populate(params.targetSyntax, params.mixedSyntaxType, params, m_targetStr,
+ m_target, m_targetTree);
- PopulateAlignment(m_alignmentStr);
- CreateConsistentPhrases(params);
+ PopulateAlignment(m_alignmentStr);
+ CreateConsistentPhrases(params);
- // create labels
- CreateNonTerms();
+ // create labels
+ CreateNonTerms();
}
void Escape(string &text)
{
- text = Moses::Replace(text, "&", "&amp;");
- text = Moses::Replace(text, "|", "&#124;");
- text = Moses::Replace(text, "<", "&lt;");
- text = Moses::Replace(text, ">", "&gt;");
- text = Moses::Replace(text, "'", "&apos;");
- text = Moses::Replace(text, "\"", "&quot;");
- text = Moses::Replace(text, "[", "&#91;");
- text = Moses::Replace(text, "]", "&#93;");
+ text = Moses::Replace(text, "&", "&amp;");
+ text = Moses::Replace(text, "|", "&#124;");
+ text = Moses::Replace(text, "<", "&lt;");
+ text = Moses::Replace(text, ">", "&gt;");
+ text = Moses::Replace(text, "'", "&apos;");
+ text = Moses::Replace(text, "\"", "&quot;");
+ text = Moses::Replace(text, "[", "&#91;");
+ text = Moses::Replace(text, "]", "&#93;");
}
void AlignedSentenceSyntax::XMLParse(Phrase &output,
- SyntaxTree &tree,
- const pugi::xml_node &parentNode,
- const Parameter &params)
+ SyntaxTree &tree,
+ const pugi::xml_node &parentNode,
+ const Parameter &params)
{
- int childNum = 0;
- for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling())
- {
- string nodeName = childNode.name();
+ int childNum = 0;
+ for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
+ string nodeName = childNode.name();
- // span label
- string label;
- int startPos = output.size();
+ // span label
+ string label;
+ int startPos = output.size();
- if (!nodeName.empty()) {
- pugi::xml_attribute attribute = childNode.attribute("label");
- label = attribute.as_string();
-
- // recursively call this function. For proper recursive trees
- XMLParse(output, tree, childNode, params);
- }
+ if (!nodeName.empty()) {
+ pugi::xml_attribute attribute = childNode.attribute("label");
+ label = attribute.as_string();
+ // recursively call this function. For proper recursive trees
+ XMLParse(output, tree, childNode, params);
+ }
- // fill phrase vector
- string text = childNode.value();
- Escape(text);
- //cerr << childNum << " " << label << "=" << text << endl;
- std::vector<string> toks;
- Moses::Tokenize(toks, text);
+ // fill phrase vector
+ string text = childNode.value();
+ Escape(text);
+ //cerr << childNum << " " << label << "=" << text << endl;
- for (size_t i = 0; i < toks.size(); ++i) {
- const string &tok = toks[i];
- Word *word = new Word(output.size(), tok);
- output.push_back(word);
- }
+ std::vector<string> toks;
+ Moses::Tokenize(toks, text);
- // is it a labelled span?
- int endPos = output.size() - 1;
+ for (size_t i = 0; i < toks.size(); ++i) {
+ const string &tok = toks[i];
+ Word *word = new Word(output.size(), tok);
+ output.push_back(word);
+ }
- // fill syntax labels
- if (!label.empty()) {
- label = "[" + label + "]";
- tree.Add(startPos, endPos, label, params);
- }
+ // is it a labelled span?
+ int endPos = output.size() - 1;
- ++childNum;
+ // fill syntax labels
+ if (!label.empty()) {
+ label = "[" + label + "]";
+ tree.Add(startPos, endPos, label, params);
}
+ ++childNum;
+ }
+
}
void AlignedSentenceSyntax::XMLParse(Phrase &output,
- SyntaxTree &tree,
- const std::string input,
- const Parameter &params)
+ SyntaxTree &tree,
+ const std::string input,
+ const Parameter &params)
{
- pugi::xml_document doc;
- pugi::xml_parse_result result = doc.load(input.c_str(),
- pugi::parse_default | pugi::parse_comments);
+ pugi::xml_document doc;
+ pugi::xml_parse_result result = doc.load(input.c_str(),
+ pugi::parse_default | pugi::parse_comments);
- pugi::xml_node topNode = doc.child("xml");
- XMLParse(output, tree, topNode, params);
+ pugi::xml_node topNode = doc.child("xml");
+ XMLParse(output, tree, topNode, params);
}
void AlignedSentenceSyntax::CreateNonTerms()
{
- for (int sourceStart = 0; sourceStart < m_source.size(); ++sourceStart) {
- for (int sourceEnd = sourceStart; sourceEnd < m_source.size(); ++sourceEnd) {
- ConsistentPhrases::Coll &coll = m_consistentPhrases.GetColl(sourceStart, sourceEnd);
- const SyntaxTree::Labels &sourceLabels = m_sourceTree.Find(sourceStart, sourceEnd);
+ for (int sourceStart = 0; sourceStart < m_source.size(); ++sourceStart) {
+ for (int sourceEnd = sourceStart; sourceEnd < m_source.size(); ++sourceEnd) {
+ ConsistentPhrases::Coll &coll = m_consistentPhrases.GetColl(sourceStart, sourceEnd);
+ const SyntaxTree::Labels &sourceLabels = m_sourceTree.Find(sourceStart, sourceEnd);
- ConsistentPhrases::Coll::iterator iter;
- for (iter = coll.begin(); iter != coll.end(); ++iter) {
- ConsistentPhrase &cp = **iter;
+ ConsistentPhrases::Coll::iterator iter;
+ for (iter = coll.begin(); iter != coll.end(); ++iter) {
+ ConsistentPhrase &cp = **iter;
- int targetStart = cp.corners[2];
- int targetEnd = cp.corners[3];
- const SyntaxTree::Labels &targetLabels = m_targetTree.Find(targetStart, targetEnd);
+ int targetStart = cp.corners[2];
+ int targetEnd = cp.corners[3];
+ const SyntaxTree::Labels &targetLabels = m_targetTree.Find(targetStart, targetEnd);
- CreateNonTerms(cp, sourceLabels, targetLabels);
- }
- }
- }
+ CreateNonTerms(cp, sourceLabels, targetLabels);
+ }
+ }
+ }
}
void AlignedSentenceSyntax::CreateNonTerms(ConsistentPhrase &cp,
- const SyntaxTree::Labels &sourceLabels,
- const SyntaxTree::Labels &targetLabels)
+ const SyntaxTree::Labels &sourceLabels,
+ const SyntaxTree::Labels &targetLabels)
{
- SyntaxTree::Labels::const_iterator iterSource;
- for (iterSource = sourceLabels.begin(); iterSource != sourceLabels.end(); ++iterSource) {
- const string &sourceLabel = *iterSource;
-
- SyntaxTree::Labels::const_iterator iterTarget;
- for (iterTarget = targetLabels.begin(); iterTarget != targetLabels.end(); ++iterTarget) {
- const string &targetLabel = *iterTarget;
- cp.AddNonTerms(sourceLabel, targetLabel);
- }
- }
+ SyntaxTree::Labels::const_iterator iterSource;
+ for (iterSource = sourceLabels.begin(); iterSource != sourceLabels.end(); ++iterSource) {
+ const string &sourceLabel = *iterSource;
+
+ SyntaxTree::Labels::const_iterator iterTarget;
+ for (iterTarget = targetLabels.begin(); iterTarget != targetLabels.end(); ++iterTarget) {
+ const string &targetLabel = *iterTarget;
+ cp.AddNonTerms(sourceLabel, targetLabel);
+ }
+ }
}
diff --git a/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.h b/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.h
index 2e9431996..db6764cee 100644
--- a/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.h
+++ b/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.h
@@ -14,33 +14,33 @@
class AlignedSentenceSyntax : public AlignedSentence
{
public:
- AlignedSentenceSyntax(int lineNum,
- const std::string &source,
- const std::string &target,
- const std::string &alignment);
- virtual ~AlignedSentenceSyntax();
+ AlignedSentenceSyntax(int lineNum,
+ const std::string &source,
+ const std::string &target,
+ const std::string &alignment);
+ virtual ~AlignedSentenceSyntax();
- void Create(const Parameter &params);
+ void Create(const Parameter &params);
- //virtual std::string Debug() const;
+ //virtual std::string Debug() const;
protected:
- std::string m_sourceStr, m_targetStr, m_alignmentStr;
- SyntaxTree m_sourceTree, m_targetTree;
-
- void XMLParse(Phrase &output,
- SyntaxTree &tree,
- const std::string input,
- const Parameter &params);
- void XMLParse(Phrase &output,
- SyntaxTree &tree,
- const pugi::xml_node &parentNode,
- const Parameter &params);
- void CreateNonTerms();
- void CreateNonTerms(ConsistentPhrase &cp,
- const SyntaxTree::Labels &sourceLabels,
- const SyntaxTree::Labels &targetLabels);
- void Populate(bool isSyntax, int mixedSyntaxType, const Parameter &params,
- std::string line, Phrase &phrase, SyntaxTree &tree);
+ std::string m_sourceStr, m_targetStr, m_alignmentStr;
+ SyntaxTree m_sourceTree, m_targetTree;
+
+ void XMLParse(Phrase &output,
+ SyntaxTree &tree,
+ const std::string input,
+ const Parameter &params);
+ void XMLParse(Phrase &output,
+ SyntaxTree &tree,
+ const pugi::xml_node &parentNode,
+ const Parameter &params);
+ void CreateNonTerms();
+ void CreateNonTerms(ConsistentPhrase &cp,
+ const SyntaxTree::Labels &sourceLabels,
+ const SyntaxTree::Labels &targetLabels);
+ void Populate(bool isSyntax, int mixedSyntaxType, const Parameter &params,
+ std::string line, Phrase &phrase, SyntaxTree &tree);
};
diff --git a/phrase-extract/extract-mixed-syntax/ConsistentPhrase.cpp b/phrase-extract/extract-mixed-syntax/ConsistentPhrase.cpp
index bb913da5a..6ac22b1f7 100644
--- a/phrase-extract/extract-mixed-syntax/ConsistentPhrase.cpp
+++ b/phrase-extract/extract-mixed-syntax/ConsistentPhrase.cpp
@@ -13,20 +13,21 @@
using namespace std;
ConsistentPhrase::ConsistentPhrase(
- int sourceStart, int sourceEnd,
- int targetStart, int targetEnd,
- const Parameter &params)
-:corners(4)
-,m_hieroNonTerm(*this, params.hieroNonTerm, params.hieroNonTerm)
+ int sourceStart, int sourceEnd,
+ int targetStart, int targetEnd,
+ const Parameter &params)
+ :corners(4)
+ ,m_hieroNonTerm(*this, params.hieroNonTerm, params.hieroNonTerm)
{
- corners[0] = sourceStart;
- corners[1] = sourceEnd;
- corners[2] = targetStart;
- corners[3] = targetEnd;
+ corners[0] = sourceStart;
+ corners[1] = sourceEnd;
+ corners[2] = targetStart;
+ corners[3] = targetEnd;
}
-ConsistentPhrase::~ConsistentPhrase() {
- // TODO Auto-generated destructor stub
+ConsistentPhrase::~ConsistentPhrase()
+{
+ // TODO Auto-generated destructor stub
}
bool ConsistentPhrase::operator<(const ConsistentPhrase &other) const
@@ -35,29 +36,29 @@ bool ConsistentPhrase::operator<(const ConsistentPhrase &other) const
}
void ConsistentPhrase::AddNonTerms(const std::string &source,
- const std::string &target)
+ const std::string &target)
{
- m_nonTerms.push_back(NonTerm(*this, source, target));
+ m_nonTerms.push_back(NonTerm(*this, source, target));
}
bool ConsistentPhrase::TargetOverlap(const ConsistentPhrase &other) const
{
- if ( other.corners[3] < corners[2] || other.corners[2] > corners[3])
- return false;
+ if ( other.corners[3] < corners[2] || other.corners[2] > corners[3])
+ return false;
- return true;
+ return true;
}
std::string ConsistentPhrase::Debug() const
{
stringstream out;
out << "[" << corners[0] << "-" << corners[1]
- << "][" << corners[2] << "-" << corners[3] << "]";
+ << "][" << corners[2] << "-" << corners[3] << "]";
out << "NT:";
for (size_t i = 0; i < m_nonTerms.size(); ++i) {
- const NonTerm &nonTerm = m_nonTerms[i];
- out << nonTerm.GetLabel(Moses::Input) << ":" << nonTerm.GetLabel(Moses::Output);
+ const NonTerm &nonTerm = m_nonTerms[i];
+ out << nonTerm.GetLabel(Moses::Input) << ":" << nonTerm.GetLabel(Moses::Output);
}
return out.str();
diff --git a/phrase-extract/extract-mixed-syntax/ConsistentPhrase.h b/phrase-extract/extract-mixed-syntax/ConsistentPhrase.h
index 865b4386f..21b358709 100644
--- a/phrase-extract/extract-mixed-syntax/ConsistentPhrase.h
+++ b/phrase-extract/extract-mixed-syntax/ConsistentPhrase.h
@@ -16,29 +16,32 @@
class ConsistentPhrase
{
public:
- typedef std::vector<NonTerm> NonTerms;
+ typedef std::vector<NonTerm> NonTerms;
- std::vector<int> corners;
+ std::vector<int> corners;
- ConsistentPhrase(const ConsistentPhrase &copy); // do not implement
- ConsistentPhrase(int sourceStart, int sourceEnd,
- int targetStart, int targetEnd,
- const Parameter &params);
+ ConsistentPhrase(const ConsistentPhrase &copy); // do not implement
+ ConsistentPhrase(int sourceStart, int sourceEnd,
+ int targetStart, int targetEnd,
+ const Parameter &params);
- virtual ~ConsistentPhrase();
+ virtual ~ConsistentPhrase();
- int GetWidth(Moses::FactorDirection direction) const
- { return (direction == Moses::Input) ? corners[1] - corners[0] + 1 : corners[3] - corners[2] + 1; }
+ int GetWidth(Moses::FactorDirection direction) const {
+ return (direction == Moses::Input) ? corners[1] - corners[0] + 1 : corners[3] - corners[2] + 1;
+ }
- void AddNonTerms(const std::string &source,
- const std::string &target);
- const NonTerms &GetNonTerms() const
- { return m_nonTerms;}
- const NonTerm &GetHieroNonTerm() const
- { return m_hieroNonTerm;}
+ void AddNonTerms(const std::string &source,
+ const std::string &target);
+ const NonTerms &GetNonTerms() const {
+ return m_nonTerms;
+ }
+ const NonTerm &GetHieroNonTerm() const {
+ return m_hieroNonTerm;
+ }
- bool TargetOverlap(const ConsistentPhrase &other) const;
+ bool TargetOverlap(const ConsistentPhrase &other) const;
bool operator<(const ConsistentPhrase &other) const;
diff --git a/phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp b/phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp
index 8978c88fa..f03a61840 100644
--- a/phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp
+++ b/phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp
@@ -17,35 +17,36 @@ ConsistentPhrases::ConsistentPhrases()
{
}
-ConsistentPhrases::~ConsistentPhrases() {
- for (int start = 0; start < m_coll.size(); ++start) {
- std::vector<Coll> &allSourceStart = m_coll[start];
-
- for (int size = 0; size < allSourceStart.size(); ++size) {
- Coll &coll = allSourceStart[size];
- Moses::RemoveAllInColl(coll);
- }
- }
+ConsistentPhrases::~ConsistentPhrases()
+{
+ for (int start = 0; start < m_coll.size(); ++start) {
+ std::vector<Coll> &allSourceStart = m_coll[start];
+
+ for (int size = 0; size < allSourceStart.size(); ++size) {
+ Coll &coll = allSourceStart[size];
+ Moses::RemoveAllInColl(coll);
+ }
+ }
}
void ConsistentPhrases::Initialize(size_t size)
{
- m_coll.resize(size);
+ m_coll.resize(size);
- for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
- std::vector<Coll> &allSourceStart = m_coll[sourceStart];
- allSourceStart.resize(size - sourceStart);
- }
+ for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
+ std::vector<Coll> &allSourceStart = m_coll[sourceStart];
+ allSourceStart.resize(size - sourceStart);
+ }
}
void ConsistentPhrases::Add(int sourceStart, int sourceEnd,
- int targetStart, int targetEnd,
- const Parameter &params)
+ int targetStart, int targetEnd,
+ const Parameter &params)
{
Coll &coll = m_coll[sourceStart][sourceEnd - sourceStart];
ConsistentPhrase *cp = new ConsistentPhrase(sourceStart, sourceEnd,
- targetStart, targetEnd,
- params);
+ targetStart, targetEnd,
+ params);
pair<Coll::iterator, bool> inserted = coll.insert(cp);
assert(inserted.second);
@@ -53,51 +54,51 @@ void ConsistentPhrases::Add(int sourceStart, int sourceEnd,
const ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd) const
{
- const std::vector<Coll> &allSourceStart = m_coll[sourceStart];
- const Coll &ret = allSourceStart[sourceEnd - sourceStart];
- return ret;
+ const std::vector<Coll> &allSourceStart = m_coll[sourceStart];
+ const Coll &ret = allSourceStart[sourceEnd - sourceStart];
+ return ret;
}
ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd)
{
- std::vector<Coll> &allSourceStart = m_coll[sourceStart];
- Coll &ret = allSourceStart[sourceEnd - sourceStart];
- return ret;
+ std::vector<Coll> &allSourceStart = m_coll[sourceStart];
+ Coll &ret = allSourceStart[sourceEnd - sourceStart];
+ return ret;
}
std::string ConsistentPhrases::Debug() const
{
- std::stringstream out;
- for (int start = 0; start < m_coll.size(); ++start) {
- const std::vector<Coll> &allSourceStart = m_coll[start];
-
- for (int size = 0; size < allSourceStart.size(); ++size) {
- const Coll &coll = allSourceStart[size];
-
- Coll::const_iterator iter;
- for (iter = coll.begin(); iter != coll.end(); ++iter) {
- const ConsistentPhrase &consistentPhrase = **iter;
- out << consistentPhrase.Debug() << endl;
- }
- }
- }
-
- return out.str();
+ std::stringstream out;
+ for (int start = 0; start < m_coll.size(); ++start) {
+ const std::vector<Coll> &allSourceStart = m_coll[start];
+
+ for (int size = 0; size < allSourceStart.size(); ++size) {
+ const Coll &coll = allSourceStart[size];
+
+ Coll::const_iterator iter;
+ for (iter = coll.begin(); iter != coll.end(); ++iter) {
+ const ConsistentPhrase &consistentPhrase = **iter;
+ out << consistentPhrase.Debug() << endl;
+ }
+ }
+ }
+
+ return out.str();
}
void ConsistentPhrases::AddHieroNonTerms(const Parameter &params)
{
- // add [X] labels everywhere
- for (int i = 0; i < m_coll.size(); ++i) {
- vector<Coll> &inner = m_coll[i];
- for (int j = 0; j < inner.size(); ++j) {
- ConsistentPhrases::Coll &coll = inner[j];
- ConsistentPhrases::Coll::iterator iter;
- for (iter = coll.begin(); iter != coll.end(); ++iter) {
- ConsistentPhrase &cp = **iter;
- cp.AddNonTerms(params.hieroNonTerm, params.hieroNonTerm);
- }
- }
- }
+ // add [X] labels everywhere
+ for (int i = 0; i < m_coll.size(); ++i) {
+ vector<Coll> &inner = m_coll[i];
+ for (int j = 0; j < inner.size(); ++j) {
+ ConsistentPhrases::Coll &coll = inner[j];
+ ConsistentPhrases::Coll::iterator iter;
+ for (iter = coll.begin(); iter != coll.end(); ++iter) {
+ ConsistentPhrase &cp = **iter;
+ cp.AddNonTerms(params.hieroNonTerm, params.hieroNonTerm);
+ }
+ }
+ }
}
diff --git a/phrase-extract/extract-mixed-syntax/ConsistentPhrases.h b/phrase-extract/extract-mixed-syntax/ConsistentPhrases.h
index 3daf6b7ff..1347b4665 100644
--- a/phrase-extract/extract-mixed-syntax/ConsistentPhrases.h
+++ b/phrase-extract/extract-mixed-syntax/ConsistentPhrases.h
@@ -14,27 +14,28 @@
class Word;
class Parameter;
-class ConsistentPhrases {
+class ConsistentPhrases
+{
public:
- typedef std::set<ConsistentPhrase*> Coll;
+ typedef std::set<ConsistentPhrase*> Coll;
- ConsistentPhrases();
- virtual ~ConsistentPhrases();
+ ConsistentPhrases();
+ virtual ~ConsistentPhrases();
- void Initialize(size_t size);
+ void Initialize(size_t size);
- void Add(int sourceStart, int sourceEnd,
- int targetStart, int targetEnd,
- const Parameter &params);
+ void Add(int sourceStart, int sourceEnd,
+ int targetStart, int targetEnd,
+ const Parameter &params);
- void AddHieroNonTerms(const Parameter &params);
+ void AddHieroNonTerms(const Parameter &params);
- const Coll &GetColl(int sourceStart, int sourceEnd) const;
- Coll &GetColl(int sourceStart, int sourceEnd);
+ const Coll &GetColl(int sourceStart, int sourceEnd) const;
+ Coll &GetColl(int sourceStart, int sourceEnd);
- std::string Debug() const;
+ std::string Debug() const;
protected:
- std::vector< std::vector<Coll> > m_coll;
+ std::vector< std::vector<Coll> > m_coll;
};
diff --git a/phrase-extract/extract-mixed-syntax/InputFileStream.cpp b/phrase-extract/extract-mixed-syntax/InputFileStream.cpp
index b52d1f920..d111903e6 100644
--- a/phrase-extract/extract-mixed-syntax/InputFileStream.cpp
+++ b/phrase-extract/extract-mixed-syntax/InputFileStream.cpp
@@ -3,17 +3,17 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -27,36 +27,35 @@ using namespace std;
namespace Moses
{
- InputFileStream::InputFileStream(const std::string &filePath)
- : std::istream(NULL)
- , m_streambuf(NULL)
- {
- if (filePath.size() > 3 &&
- filePath.substr(filePath.size() - 3, 3) == ".gz")
- {
- m_streambuf = new gzfilebuf(filePath.c_str());
- } else {
- std::filebuf* fb = new std::filebuf();
- fb = fb->open(filePath.c_str(), std::ios::in);
- if (! fb) {
- cerr << "Can't read " << filePath.c_str() << endl;
- exit(1);
- }
- m_streambuf = fb;
- }
- this->init(m_streambuf);
- }
-
- InputFileStream::~InputFileStream()
- {
- delete m_streambuf;
- m_streambuf = NULL;
- }
-
- void InputFileStream::Close()
- {
- }
-
-
+InputFileStream::InputFileStream(const std::string &filePath)
+ : std::istream(NULL)
+ , m_streambuf(NULL)
+{
+ if (filePath.size() > 3 &&
+ filePath.substr(filePath.size() - 3, 3) == ".gz") {
+ m_streambuf = new gzfilebuf(filePath.c_str());
+ } else {
+ std::filebuf* fb = new std::filebuf();
+ fb = fb->open(filePath.c_str(), std::ios::in);
+ if (! fb) {
+ cerr << "Can't read " << filePath.c_str() << endl;
+ exit(1);
+ }
+ m_streambuf = fb;
+ }
+ this->init(m_streambuf);
+}
+
+InputFileStream::~InputFileStream()
+{
+ delete m_streambuf;
+ m_streambuf = NULL;
+}
+
+void InputFileStream::Close()
+{
+}
+
+
}
diff --git a/phrase-extract/extract-mixed-syntax/InputFileStream.h b/phrase-extract/extract-mixed-syntax/InputFileStream.h
index f10ec2164..e2a31bc82 100644
--- a/phrase-extract/extract-mixed-syntax/InputFileStream.h
+++ b/phrase-extract/extract-mixed-syntax/InputFileStream.h
@@ -3,17 +3,17 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -28,21 +28,21 @@
namespace Moses
{
-
- /** Used in place of std::istream, can read zipped files if it ends in .gz
- */
- class InputFileStream : public std::istream
- {
- protected:
- std::streambuf *m_streambuf;
- public:
-
- InputFileStream(const std::string &filePath);
- ~InputFileStream();
-
- void Close();
- };
-
+
+/** Used in place of std::istream, can read zipped files if it ends in .gz
+ */
+class InputFileStream : public std::istream
+{
+protected:
+ std::streambuf *m_streambuf;
+public:
+
+ InputFileStream(const std::string &filePath);
+ ~InputFileStream();
+
+ void Close();
+};
+
}
#endif
diff --git a/phrase-extract/extract-mixed-syntax/Main.cpp b/phrase-extract/extract-mixed-syntax/Main.cpp
index 10656b577..6d9118014 100644
--- a/phrase-extract/extract-mixed-syntax/Main.cpp
+++ b/phrase-extract/extract-mixed-syntax/Main.cpp
@@ -23,63 +23,59 @@ int main(int argc, char** argv)
namespace po = boost::program_options;
po::options_description desc("Options");
desc.add_options()
- ("help", "Print help messages")
- ("MaxSpan", po::value<int>()->default_value(params.maxSpan), "Max (source) span of a rule. ie. number of words in the source")
- ("MinSpan", po::value<int>()->default_value(params.minSpan), "Min (source) span of a rule.")
- ("GlueGrammar", po::value<string>()->default_value(params.gluePath), "Output glue grammar to here")
- ("SentenceOffset", po::value<long>()->default_value(params.sentenceOffset), "Starting sentence id. Not used")
- ("GZOutput", "Compress extract files")
- ("MaxNonTerm", po::value<int>()->default_value(params.maxNonTerm), "Maximum number of non-terms allowed per rule")
- ("MaxHieroNonTerm", po::value<int>()->default_value(params.maxHieroNonTerm), "Maximum number of Hiero non-term. Usually, --MaxNonTerm is the normal constraint")
- ("MinHoleSource", po::value<int>()->default_value(params.minHoleSource), "Minimum source span for a non-term.")
- ("MinHoleSourceSyntax", po::value<int>()->default_value(params.minHoleSourceSyntax), "Minimum source span for a syntactic non-term (source or target).")
-
- ("SourceSyntax", "Source sentence is a parse tree")
- ("TargetSyntax", "Target sentence is a parse tree")
- ("MixedSyntaxType", po::value<int>()->default_value(params.mixedSyntaxType), "Hieu's Mixed syntax type. 0(default)=no mixed syntax, 1=add [X] only if no syntactic label. 2=add [X] everywhere")
- ("MultiLabel", po::value<int>()->default_value(params.multiLabel), "What to do with multiple labels on the same span. 0(default)=keep them all, 1=keep only top-most, 2=keep only bottom-most")
- ("HieroSourceLHS", "Always use Hiero source LHS? Default = 0")
- ("MaxSpanFreeNonTermSource", po::value<int>()->default_value(params.maxSpanFreeNonTermSource), "Max number of words covered by beginning/end NT. Default = 0 (no limit)")
- ("NoNieceTerminal", "Don't extract rule if 1 of the non-term covers the same word as 1 of the terminals")
- ("MaxScope", po::value<int>()->default_value(params.maxScope), "maximum scope (see Hopkins and Langmead (2010)). Default is HIGH")
- ("MinScope", po::value<int>()->default_value(params.minScope), "min scope.")
-
- ("SpanLength", "Property - span length of RHS each non-term")
-
- ("NonTermContext", "Property - (source) left and right, inside and outside words of each non-term ")
- ("NonTermContextTarget", "Property - (target) left and right, inside and outside words of each non-term")
- ("NonTermContextFactor", po::value<int>()->default_value(params.nonTermContextFactor), "Factor to use for non-term context property.")
-
- ("NumSourceFactors", po::value<int>()->default_value(params.numSourceFactors), "Number of source factors.")
- ("NumTargetFactors", po::value<int>()->default_value(params.numTargetFactors), "Number of target factors.")
-
- ("HieroNonTerm", po::value<string>()->default_value(params.hieroNonTerm), "Hiero non-terminal label, including bracket")
- ("ScopeSpan", po::value<string>()->default_value(params.scopeSpanStr), "Min and max span for rules of each scope. Format is min,max:min,max...")
-
- ("NonTermConsecSource", "Allow consecutive non-terms on the source side")
- ("NonTermConsecSourceMixedSyntax", po::value<int>()->default_value(params.nonTermConsecSourceMixedSyntax), "In mixed syntax mode, what nt can be consecutive. 0=don't allow consec nt. 1(default)=hiero+syntax. 2=syntax+syntax. 3=always allow");
+ ("help", "Print help messages")
+ ("MaxSpan", po::value<int>()->default_value(params.maxSpan), "Max (source) span of a rule. ie. number of words in the source")
+ ("MinSpan", po::value<int>()->default_value(params.minSpan), "Min (source) span of a rule.")
+ ("GlueGrammar", po::value<string>()->default_value(params.gluePath), "Output glue grammar to here")
+ ("SentenceOffset", po::value<long>()->default_value(params.sentenceOffset), "Starting sentence id. Not used")
+ ("GZOutput", "Compress extract files")
+ ("MaxNonTerm", po::value<int>()->default_value(params.maxNonTerm), "Maximum number of non-terms allowed per rule")
+ ("MaxHieroNonTerm", po::value<int>()->default_value(params.maxHieroNonTerm), "Maximum number of Hiero non-term. Usually, --MaxNonTerm is the normal constraint")
+ ("MinHoleSource", po::value<int>()->default_value(params.minHoleSource), "Minimum source span for a non-term.")
+ ("MinHoleSourceSyntax", po::value<int>()->default_value(params.minHoleSourceSyntax), "Minimum source span for a syntactic non-term (source or target).")
+
+ ("SourceSyntax", "Source sentence is a parse tree")
+ ("TargetSyntax", "Target sentence is a parse tree")
+ ("MixedSyntaxType", po::value<int>()->default_value(params.mixedSyntaxType), "Hieu's Mixed syntax type. 0(default)=no mixed syntax, 1=add [X] only if no syntactic label. 2=add [X] everywhere")
+ ("MultiLabel", po::value<int>()->default_value(params.multiLabel), "What to do with multiple labels on the same span. 0(default)=keep them all, 1=keep only top-most, 2=keep only bottom-most")
+ ("HieroSourceLHS", "Always use Hiero source LHS? Default = 0")
+ ("MaxSpanFreeNonTermSource", po::value<int>()->default_value(params.maxSpanFreeNonTermSource), "Max number of words covered by beginning/end NT. Default = 0 (no limit)")
+ ("NoNieceTerminal", "Don't extract rule if 1 of the non-term covers the same word as 1 of the terminals")
+ ("MaxScope", po::value<int>()->default_value(params.maxScope), "maximum scope (see Hopkins and Langmead (2010)). Default is HIGH")
+ ("MinScope", po::value<int>()->default_value(params.minScope), "min scope.")
+
+ ("SpanLength", "Property - span length of RHS each non-term")
+
+ ("NonTermContext", "Property - (source) left and right, inside and outside words of each non-term ")
+ ("NonTermContextTarget", "Property - (target) left and right, inside and outside words of each non-term")
+ ("NonTermContextFactor", po::value<int>()->default_value(params.nonTermContextFactor), "Factor to use for non-term context property.")
+
+ ("NumSourceFactors", po::value<int>()->default_value(params.numSourceFactors), "Number of source factors.")
+ ("NumTargetFactors", po::value<int>()->default_value(params.numTargetFactors), "Number of target factors.")
+
+ ("HieroNonTerm", po::value<string>()->default_value(params.hieroNonTerm), "Hiero non-terminal label, including bracket")
+ ("ScopeSpan", po::value<string>()->default_value(params.scopeSpanStr), "Min and max span for rules of each scope. Format is min,max:min,max...")
+
+ ("NonTermConsecSource", "Allow consecutive non-terms on the source side")
+ ("NonTermConsecSourceMixedSyntax", po::value<int>()->default_value(params.nonTermConsecSourceMixedSyntax), "In mixed syntax mode, what nt can be consecutive. 0=don't allow consec nt. 1(default)=hiero+syntax. 2=syntax+syntax. 3=always allow");
po::variables_map vm;
- try
- {
+ try {
po::store(po::parse_command_line(argc, argv, desc),
vm); // can throw
/** --help option
*/
- if ( vm.count("help") || argc < 5 )
- {
+ if ( vm.count("help") || argc < 5 ) {
std::cout << argv[0] << " target source alignment [options...]" << std::endl
<< desc << std::endl;
return EXIT_SUCCESS;
}
po::notify(vm); // throws on error, so do after help in case
- // there are any problems
- }
- catch(po::error& e)
- {
+ // there are any problems
+ } catch(po::error& e) {
std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
std::cerr << desc << std::endl;
return EXIT_FAILURE;
@@ -116,7 +112,7 @@ int main(int argc, char** argv)
if (vm.count("HieroNonTerm")) params.hieroNonTerm = vm["HieroNonTerm"].as<string>();
if (vm.count("ScopeSpan")) {
- params.SetScopeSpan(vm["ScopeSpan"].as<string>());
+ params.SetScopeSpan(vm["ScopeSpan"].as<string>());
}
if (vm.count("NonTermConsecSource")) params.nonTermConsecSource = true;
@@ -131,8 +127,8 @@ int main(int argc, char** argv)
string pathExtract = argv[4];
string pathExtractInv = pathExtract + ".inv";
if (params.gzOutput) {
- pathExtract += ".gz";
- pathExtractInv += ".gz";
+ pathExtract += ".gz";
+ pathExtractInv += ".gz";
}
Moses::InputFileStream strmTarget(pathTarget);
@@ -146,54 +142,53 @@ int main(int argc, char** argv)
int lineNum = 1;
string lineTarget, lineSource, lineAlignment;
while (getline(strmTarget, lineTarget)) {
- if (lineNum % 10000 == 0) {
- cerr << lineNum << " ";
- }
-
- bool success;
- success = getline(strmSource, lineSource);
- if (!success) {
- throw "Couldn't read source";
- }
- success = getline(strmAlignment, lineAlignment);
- if (!success) {
- throw "Couldn't read alignment";
- }
-
- /*
- cerr << "lineTarget=" << lineTarget << endl;
- cerr << "lineSource=" << lineSource << endl;
- cerr << "lineAlignment=" << lineAlignment << endl;
- */
-
- AlignedSentence *alignedSentence;
-
- if (params.sourceSyntax || params.targetSyntax) {
- alignedSentence = new AlignedSentenceSyntax(lineNum, lineSource, lineTarget, lineAlignment);
- }
- else {
- alignedSentence = new AlignedSentence(lineNum, lineSource, lineTarget, lineAlignment);
- }
-
- alignedSentence->Create(params);
- //cerr << alignedSentence->Debug();
-
- Rules rules(*alignedSentence);
- rules.Extend(params);
- rules.Consolidate(params);
- //cerr << rules.Debug();
-
- rules.Output(extractFile, true, params);
- rules.Output(extractInvFile, false, params);
-
- delete alignedSentence;
-
- ++lineNum;
+ if (lineNum % 10000 == 0) {
+ cerr << lineNum << " ";
+ }
+
+ bool success;
+ success = getline(strmSource, lineSource);
+ if (!success) {
+ throw "Couldn't read source";
+ }
+ success = getline(strmAlignment, lineAlignment);
+ if (!success) {
+ throw "Couldn't read alignment";
+ }
+
+ /*
+ cerr << "lineTarget=" << lineTarget << endl;
+ cerr << "lineSource=" << lineSource << endl;
+ cerr << "lineAlignment=" << lineAlignment << endl;
+ */
+
+ AlignedSentence *alignedSentence;
+
+ if (params.sourceSyntax || params.targetSyntax) {
+ alignedSentence = new AlignedSentenceSyntax(lineNum, lineSource, lineTarget, lineAlignment);
+ } else {
+ alignedSentence = new AlignedSentence(lineNum, lineSource, lineTarget, lineAlignment);
+ }
+
+ alignedSentence->Create(params);
+ //cerr << alignedSentence->Debug();
+
+ Rules rules(*alignedSentence);
+ rules.Extend(params);
+ rules.Consolidate(params);
+ //cerr << rules.Debug();
+
+ rules.Output(extractFile, true, params);
+ rules.Output(extractInvFile, false, params);
+
+ delete alignedSentence;
+
+ ++lineNum;
}
if (!params.gluePath.empty()) {
- Moses::OutputFileStream glueFile(params.gluePath);
- CreateGlueGrammar(glueFile);
+ Moses::OutputFileStream glueFile(params.gluePath);
+ CreateGlueGrammar(glueFile);
}
cerr << "Finished" << endl;
@@ -201,8 +196,8 @@ int main(int argc, char** argv)
void CreateGlueGrammar(Moses::OutputFileStream &glueFile)
{
- glueFile << "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" << endl
- << "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" << endl
- << "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl;
+ glueFile << "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" << endl
+ << "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" << endl
+ << "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl;
}
diff --git a/phrase-extract/extract-mixed-syntax/NonTerm.cpp b/phrase-extract/extract-mixed-syntax/NonTerm.cpp
index 5de780a9a..456be8932 100644
--- a/phrase-extract/extract-mixed-syntax/NonTerm.cpp
+++ b/phrase-extract/extract-mixed-syntax/NonTerm.cpp
@@ -14,18 +14,19 @@
using namespace std;
NonTerm::NonTerm(const ConsistentPhrase &consistentPhrase,
- const std::string &source,
- const std::string &target)
-:m_consistentPhrase(&consistentPhrase)
-,m_source(source)
-,m_target(target)
+ const std::string &source,
+ const std::string &target)
+ :m_consistentPhrase(&consistentPhrase)
+ ,m_source(source)
+ ,m_target(target)
{
- // TODO Auto-generated constructor stub
+ // TODO Auto-generated constructor stub
}
-NonTerm::~NonTerm() {
- // TODO Auto-generated destructor stub
+NonTerm::~NonTerm()
+{
+ // TODO Auto-generated destructor stub
}
std::string NonTerm::Debug() const
@@ -43,7 +44,7 @@ void NonTerm::Output(std::ostream &out) const
void NonTerm::Output(std::ostream &out, Moses::FactorDirection direction) const
{
- out << GetLabel(direction);
+ out << GetLabel(direction);
}
const std::string &NonTerm::GetLabel(Moses::FactorDirection direction) const
@@ -53,14 +54,16 @@ const std::string &NonTerm::GetLabel(Moses::FactorDirection direction) const
bool NonTerm::IsHiero(Moses::FactorDirection direction, const Parameter &params) const
{
- const std::string &label = NonTerm::GetLabel(direction);
- return label == params.hieroNonTerm;
+ const std::string &label = NonTerm::GetLabel(direction);
+ return label == params.hieroNonTerm;
}
bool NonTerm::IsHiero(const Parameter &params) const
{
- return IsHiero(Moses::Input, params) && IsHiero(Moses::Output, params);
+ return IsHiero(Moses::Input, params) && IsHiero(Moses::Output, params);
}
int NonTerm::GetWidth(Moses::FactorDirection direction) const
-{ return GetConsistentPhrase().GetWidth(direction); }
+{
+ return GetConsistentPhrase().GetWidth(direction);
+}
diff --git a/phrase-extract/extract-mixed-syntax/NonTerm.h b/phrase-extract/extract-mixed-syntax/NonTerm.h
index 5b3bb9f04..701978731 100644
--- a/phrase-extract/extract-mixed-syntax/NonTerm.h
+++ b/phrase-extract/extract-mixed-syntax/NonTerm.h
@@ -16,24 +16,27 @@ class NonTerm : public RuleSymbol
{
public:
- NonTerm(const ConsistentPhrase &consistentPhrase,
- const std::string &source,
- const std::string &target);
- virtual ~NonTerm();
+ NonTerm(const ConsistentPhrase &consistentPhrase,
+ const std::string &source,
+ const std::string &target);
+ virtual ~NonTerm();
- const ConsistentPhrase &GetConsistentPhrase() const
- { return *m_consistentPhrase; }
+ const ConsistentPhrase &GetConsistentPhrase() const {
+ return *m_consistentPhrase;
+ }
- int GetWidth(Moses::FactorDirection direction) const;
+ int GetWidth(Moses::FactorDirection direction) const;
- virtual bool IsNonTerm() const
- { return true; }
+ virtual bool IsNonTerm() const {
+ return true;
+ }
- std::string GetString() const
- { return m_source + m_target; }
+ std::string GetString() const {
+ return m_source + m_target;
+ }
- virtual std::string Debug() const;
- virtual void Output(std::ostream &out) const;
+ virtual std::string Debug() const;
+ virtual void Output(std::ostream &out) const;
void Output(std::ostream &out, Moses::FactorDirection direction) const;
const std::string &GetLabel(Moses::FactorDirection direction) const;
@@ -41,7 +44,7 @@ public:
bool IsHiero(const Parameter &params) const;
protected:
- const ConsistentPhrase *m_consistentPhrase;
- std::string m_source, m_target;
+ const ConsistentPhrase *m_consistentPhrase;
+ std::string m_source, m_target;
};
diff --git a/phrase-extract/extract-mixed-syntax/Parameter.cpp b/phrase-extract/extract-mixed-syntax/Parameter.cpp
index 4f742e774..ca15ab752 100644
--- a/phrase-extract/extract-mixed-syntax/Parameter.cpp
+++ b/phrase-extract/extract-mixed-syntax/Parameter.cpp
@@ -11,62 +11,63 @@
using namespace std;
Parameter::Parameter()
-:maxSpan(10)
-,minSpan(0)
-,maxNonTerm(2)
-,maxHieroNonTerm(999)
-,maxSymbolsTarget(999)
-,maxSymbolsSource(5)
-,minHoleSource(2)
-,minHoleSourceSyntax(1)
-,sentenceOffset(0)
-,nonTermConsecSource(false)
-,requireAlignedWord(true)
-,fractionalCounting(true)
-,gzOutput(false)
+ :maxSpan(10)
+ ,minSpan(0)
+ ,maxNonTerm(2)
+ ,maxHieroNonTerm(999)
+ ,maxSymbolsTarget(999)
+ ,maxSymbolsSource(5)
+ ,minHoleSource(2)
+ ,minHoleSourceSyntax(1)
+ ,sentenceOffset(0)
+ ,nonTermConsecSource(false)
+ ,requireAlignedWord(true)
+ ,fractionalCounting(true)
+ ,gzOutput(false)
-,hieroNonTerm("[X]")
-,sourceSyntax(false)
-,targetSyntax(false)
+ ,hieroNonTerm("[X]")
+ ,sourceSyntax(false)
+ ,targetSyntax(false)
-,mixedSyntaxType(0)
-,multiLabel(0)
-,nonTermConsecSourceMixed(true)
-,hieroSourceLHS(false)
-,maxSpanFreeNonTermSource(0)
-,nieceTerminal(true)
-,maxScope(UNDEFINED)
-,minScope(0)
+ ,mixedSyntaxType(0)
+ ,multiLabel(0)
+ ,nonTermConsecSourceMixed(true)
+ ,hieroSourceLHS(false)
+ ,maxSpanFreeNonTermSource(0)
+ ,nieceTerminal(true)
+ ,maxScope(UNDEFINED)
+ ,minScope(0)
-,spanLength(false)
-,nonTermContext(false)
-,nonTermContextTarget(false)
-,nonTermContextFactor(0)
+ ,spanLength(false)
+ ,nonTermContext(false)
+ ,nonTermContextTarget(false)
+ ,nonTermContextFactor(0)
-,numSourceFactors(1)
-,numTargetFactors(1)
+ ,numSourceFactors(1)
+ ,numTargetFactors(1)
-,nonTermConsecSourceMixedSyntax(1)
+ ,nonTermConsecSourceMixedSyntax(1)
{}
-Parameter::~Parameter() {
- // TODO Auto-generated destructor stub
+Parameter::~Parameter()
+{
+ // TODO Auto-generated destructor stub
}
void Parameter::SetScopeSpan(const std::string &str)
{
- scopeSpanStr = str;
- vector<string> toks1;
- Moses::Tokenize(toks1, str, ":");
+ scopeSpanStr = str;
+ vector<string> toks1;
+ Moses::Tokenize(toks1, str, ":");
- for (size_t i = 0; i < toks1.size(); ++i) {
- const string &tok1 = toks1[i];
+ for (size_t i = 0; i < toks1.size(); ++i) {
+ const string &tok1 = toks1[i];
- vector<int> toks2;
- Moses::Tokenize<int>(toks2, tok1, ",");
- UTIL_THROW_IF2(toks2.size() != 2, "Format is min,max:min,max... String is " << tok1);
+ vector<int> toks2;
+ Moses::Tokenize<int>(toks2, tok1, ",");
+ UTIL_THROW_IF2(toks2.size() != 2, "Format is min,max:min,max... String is " << tok1);
- std::pair<int,int> values(toks2[0], toks2[1]);
- scopeSpan.push_back(values);
- }
+ std::pair<int,int> values(toks2[0], toks2[1]);
+ scopeSpan.push_back(values);
+ }
}
diff --git a/phrase-extract/extract-mixed-syntax/Phrase.cpp b/phrase-extract/extract-mixed-syntax/Phrase.cpp
index 535e10d6b..613c67a26 100644
--- a/phrase-extract/extract-mixed-syntax/Phrase.cpp
+++ b/phrase-extract/extract-mixed-syntax/Phrase.cpp
@@ -3,12 +3,12 @@
std::string Phrase::Debug() const
{
- std::stringstream out;
+ std::stringstream out;
- for (size_t i = 0; i < size(); ++i) {
- Word &word = *at(i);
- out << word.Debug() << " ";
- }
+ for (size_t i = 0; i < size(); ++i) {
+ Word &word = *at(i);
+ out << word.Debug() << " ";
+ }
- return out.str();
+ return out.str();
}
diff --git a/phrase-extract/extract-mixed-syntax/Phrase.h b/phrase-extract/extract-mixed-syntax/Phrase.h
index 13912cb95..ec0e43bfc 100644
--- a/phrase-extract/extract-mixed-syntax/Phrase.h
+++ b/phrase-extract/extract-mixed-syntax/Phrase.h
@@ -7,13 +7,13 @@
class Phrase : public std::vector<Word*>
{
public:
- Phrase()
- {}
+ Phrase() {
+ }
- Phrase(size_t size)
- :std::vector<Word*>(size)
- {}
+ Phrase(size_t size)
+ :std::vector<Word*>(size) {
+ }
- std::string Debug() const;
+ std::string Debug() const;
};
diff --git a/phrase-extract/extract-mixed-syntax/Rule.cpp b/phrase-extract/extract-mixed-syntax/Rule.cpp
index a3e148e6c..d84cebed3 100644
--- a/phrase-extract/extract-mixed-syntax/Rule.cpp
+++ b/phrase-extract/extract-mixed-syntax/Rule.cpp
@@ -16,79 +16,80 @@
using namespace std;
Rule::Rule(const NonTerm &lhsNonTerm, const AlignedSentence &alignedSentence)
-:m_lhs(lhsNonTerm)
-,m_alignedSentence(alignedSentence)
-,m_isValid(true)
-,m_canRecurse(true)
+ :m_lhs(lhsNonTerm)
+ ,m_alignedSentence(alignedSentence)
+ ,m_isValid(true)
+ ,m_canRecurse(true)
{
- CreateSource();
+ CreateSource();
}
Rule::Rule(const Rule &copy, const NonTerm &nonTerm)
-:m_lhs(copy.m_lhs)
-,m_alignedSentence(copy.m_alignedSentence)
-,m_isValid(true)
-,m_canRecurse(true)
-,m_nonterms(copy.m_nonterms)
+ :m_lhs(copy.m_lhs)
+ ,m_alignedSentence(copy.m_alignedSentence)
+ ,m_isValid(true)
+ ,m_canRecurse(true)
+ ,m_nonterms(copy.m_nonterms)
{
- m_nonterms.push_back(&nonTerm);
- CreateSource();
+ m_nonterms.push_back(&nonTerm);
+ CreateSource();
}
-Rule::~Rule() {
- // TODO Auto-generated destructor stub
+Rule::~Rule()
+{
+ // TODO Auto-generated destructor stub
}
const ConsistentPhrase &Rule::GetConsistentPhrase() const
-{ return m_lhs.GetConsistentPhrase(); }
+{
+ return m_lhs.GetConsistentPhrase();
+}
void Rule::CreateSource()
{
const NonTerm *cp = NULL;
size_t nonTermInd = 0;
if (nonTermInd < m_nonterms.size()) {
- cp = m_nonterms[nonTermInd];
+ cp = m_nonterms[nonTermInd];
}
for (int sourcePos = m_lhs.GetConsistentPhrase().corners[0];
- sourcePos <= m_lhs.GetConsistentPhrase().corners[1];
- ++sourcePos) {
-
- const RuleSymbol *ruleSymbol;
- if (cp && cp->GetConsistentPhrase().corners[0] <= sourcePos && sourcePos <= cp->GetConsistentPhrase().corners[1]) {
- // replace words with non-term
- ruleSymbol = cp;
- sourcePos = cp->GetConsistentPhrase().corners[1];
- if (m_nonterms.size()) {
- cp = m_nonterms[nonTermInd];
- }
-
- // move to next non-term
- ++nonTermInd;
- cp = (nonTermInd < m_nonterms.size()) ? m_nonterms[nonTermInd] : NULL;
- }
- else {
- // terminal
- ruleSymbol = m_alignedSentence.GetPhrase(Moses::Input)[sourcePos];
- }
-
- m_source.Add(ruleSymbol);
+ sourcePos <= m_lhs.GetConsistentPhrase().corners[1];
+ ++sourcePos) {
+
+ const RuleSymbol *ruleSymbol;
+ if (cp && cp->GetConsistentPhrase().corners[0] <= sourcePos && sourcePos <= cp->GetConsistentPhrase().corners[1]) {
+ // replace words with non-term
+ ruleSymbol = cp;
+ sourcePos = cp->GetConsistentPhrase().corners[1];
+ if (m_nonterms.size()) {
+ cp = m_nonterms[nonTermInd];
+ }
+
+ // move to next non-term
+ ++nonTermInd;
+ cp = (nonTermInd < m_nonterms.size()) ? m_nonterms[nonTermInd] : NULL;
+ } else {
+ // terminal
+ ruleSymbol = m_alignedSentence.GetPhrase(Moses::Input)[sourcePos];
+ }
+
+ m_source.Add(ruleSymbol);
}
}
int Rule::GetNextSourcePosForNonTerm() const
{
- if (m_nonterms.empty()) {
- // no non-terms so far. Can start next non-term on left corner
- return m_lhs.GetConsistentPhrase().corners[0];
- }
- else {
- // next non-term can start just left of previous
- const ConsistentPhrase &cp = m_nonterms.back()->GetConsistentPhrase();
- int nextPos = cp.corners[1] + 1;
- return nextPos;
- }
+ if (m_nonterms.empty()) {
+ // no non-terms so far. Can start next non-term on left corner
+ return m_lhs.GetConsistentPhrase().corners[0];
+ } else {
+ // next non-term can start just left of previous
+ const ConsistentPhrase &cp = m_nonterms.back()->GetConsistentPhrase();
+ int nextPos = cp.corners[1] + 1;
+ return nextPos;
+ }
}
std::string Rule::Debug() const
@@ -97,22 +98,22 @@ std::string Rule::Debug() const
// source
for (size_t i = 0; i < m_source.GetSize(); ++i) {
- const RuleSymbol &symbol = *m_source[i];
- out << symbol.Debug() << " ";
+ const RuleSymbol &symbol = *m_source[i];
+ out << symbol.Debug() << " ";
}
// target
out << "||| ";
for (size_t i = 0; i < m_target.GetSize(); ++i) {
- const RuleSymbol &symbol = *m_target[i];
- out << symbol.Debug() << " ";
+ const RuleSymbol &symbol = *m_target[i];
+ out << symbol.Debug() << " ";
}
out << "||| ";
Alignments::const_iterator iterAlign;
for (iterAlign = m_alignments.begin(); iterAlign != m_alignments.end(); ++iterAlign) {
- const std::pair<int,int> &alignPair = *iterAlign;
- out << alignPair.first << "-" << alignPair.second << " ";
+ const std::pair<int,int> &alignPair = *iterAlign;
+ out << alignPair.first << "-" << alignPair.second << " ";
}
// overall range
@@ -124,26 +125,25 @@ std::string Rule::Debug() const
void Rule::Output(std::ostream &out, bool forward, const Parameter &params) const
{
if (forward) {
- // source
- m_source.Output(out);
- m_lhs.Output(out, Moses::Input);
+ // source
+ m_source.Output(out);
+ m_lhs.Output(out, Moses::Input);
- out << " ||| ";
+ out << " ||| ";
- // target
- m_target.Output(out);
- m_lhs.Output(out, Moses::Output);
- }
- else {
- // target
- m_target.Output(out);
- m_lhs.Output(out, Moses::Output);
+ // target
+ m_target.Output(out);
+ m_lhs.Output(out, Moses::Output);
+ } else {
+ // target
+ m_target.Output(out);
+ m_lhs.Output(out, Moses::Output);
- out << " ||| ";
+ out << " ||| ";
- // source
- m_source.Output(out);
- m_lhs.Output(out, Moses::Input);
+ // source
+ m_source.Output(out);
+ m_lhs.Output(out, Moses::Input);
}
out << " ||| ";
@@ -151,14 +151,13 @@ void Rule::Output(std::ostream &out, bool forward, const Parameter &params) cons
// alignment
Alignments::const_iterator iterAlign;
for (iterAlign = m_alignments.begin(); iterAlign != m_alignments.end(); ++iterAlign) {
- const std::pair<int,int> &alignPair = *iterAlign;
+ const std::pair<int,int> &alignPair = *iterAlign;
- if (forward) {
- out << alignPair.first << "-" << alignPair.second << " ";
- }
- else {
- out << alignPair.second << "-" << alignPair.first << " ";
- }
+ if (forward) {
+ out << alignPair.first << "-" << alignPair.second << " ";
+ } else {
+ out << alignPair.second << "-" << alignPair.first << " ";
+ }
}
out << "||| ";
@@ -172,42 +171,42 @@ void Rule::Output(std::ostream &out, bool forward, const Parameter &params) cons
// span length
if (forward && params.spanLength && m_nonterms.size()) {
- out << "{{SpanLength ";
+ out << "{{SpanLength ";
- for (size_t i = 0; i < m_nonterms.size(); ++i) {
- const NonTerm &nonTerm = *m_nonterms[i];
- const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
- out << i << "," << cp.GetWidth(Moses::Input) << "," << cp.GetWidth(Moses::Output) << " ";
- }
- out << "}} ";
+ for (size_t i = 0; i < m_nonterms.size(); ++i) {
+ const NonTerm &nonTerm = *m_nonterms[i];
+ const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
+ out << i << "," << cp.GetWidth(Moses::Input) << "," << cp.GetWidth(Moses::Output) << " ";
+ }
+ out << "}} ";
}
// non-term context (source)
if (forward && params.nonTermContext && m_nonterms.size()) {
- out << "{{NonTermContext ";
+ out << "{{NonTermContext ";
- int factor = params.nonTermContextFactor;
+ int factor = params.nonTermContextFactor;
- for (size_t i = 0; i < m_nonterms.size(); ++i) {
- const NonTerm &nonTerm = *m_nonterms[i];
- const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
- NonTermContext(1, factor, i, cp, out);
- }
- out << "}} ";
+ for (size_t i = 0; i < m_nonterms.size(); ++i) {
+ const NonTerm &nonTerm = *m_nonterms[i];
+ const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
+ NonTermContext(1, factor, i, cp, out);
+ }
+ out << "}} ";
}
// non-term context (target)
if (forward && params.nonTermContextTarget && m_nonterms.size()) {
- out << "{{NonTermContextTarget ";
+ out << "{{NonTermContextTarget ";
- int factor = params.nonTermContextFactor;
+ int factor = params.nonTermContextFactor;
- for (size_t i = 0; i < m_nonterms.size(); ++i) {
- const NonTerm &nonTerm = *m_nonterms[i];
- const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
- NonTermContext(2, factor, i, cp, out);
- }
- out << "}} ";
+ for (size_t i = 0; i < m_nonterms.size(); ++i) {
+ const NonTerm &nonTerm = *m_nonterms[i];
+ const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
+ NonTermContext(2, factor, i, cp, out);
+ }
+ out << "}} ";
}
}
@@ -223,27 +222,24 @@ void Rule::NonTermContext(int sourceTarget, int factor, size_t ntInd, const Cons
const Phrase *phrase;
if (sourceTarget == 1) {
- startPos = cp.corners[0];
- endPos = cp.corners[1];
- phrase = &m_alignedSentence.GetPhrase(Moses::Input);
- }
- else if (sourceTarget == 2) {
- startPos = cp.corners[2];
- endPos = cp.corners[3];
- phrase = &m_alignedSentence.GetPhrase(Moses::Output);
- }
- else {
- abort();
+ startPos = cp.corners[0];
+ endPos = cp.corners[1];
+ phrase = &m_alignedSentence.GetPhrase(Moses::Input);
+ } else if (sourceTarget == 2) {
+ startPos = cp.corners[2];
+ endPos = cp.corners[3];
+ phrase = &m_alignedSentence.GetPhrase(Moses::Output);
+ } else {
+ abort();
}
out << ntInd << " ";
// left outside
if (startPos == 0) {
- out << "<s> ";
- }
- else {
- NonTermContextFactor(factor, *phrase->at(startPos - 1), out);
+ out << "<s> ";
+ } else {
+ NonTermContextFactor(factor, *phrase->at(startPos - 1), out);
}
// left inside
@@ -254,10 +250,9 @@ void Rule::NonTermContext(int sourceTarget, int factor, size_t ntInd, const Cons
// right outside
if (endPos == phrase->size() - 1) {
- out << "</s> ";
- }
- else {
- NonTermContextFactor(factor, *phrase->at(endPos + 1), out);
+ out << "</s> ";
+ } else {
+ NonTermContextFactor(factor, *phrase->at(endPos + 1), out);
}
@@ -269,27 +264,26 @@ void Rule::Prevalidate(const Parameter &params)
// check number of source symbols in rule
if (m_source.GetSize() > params.maxSymbolsSource) {
- m_isValid = false;
+ m_isValid = false;
}
// check that last non-term added isn't too small
if (m_nonterms.size()) {
- const NonTerm &lastNonTerm = *m_nonterms.back();
- const ConsistentPhrase &cp = lastNonTerm.GetConsistentPhrase();
+ const NonTerm &lastNonTerm = *m_nonterms.back();
+ const ConsistentPhrase &cp = lastNonTerm.GetConsistentPhrase();
- int sourceWidth = cp.GetWidth(Moses::Input);
- if (lastNonTerm.IsHiero(params)) {
- if (sourceWidth < params.minHoleSource) {
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
- }
- else if (sourceWidth < params.minHoleSourceSyntax) {
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
+ int sourceWidth = cp.GetWidth(Moses::Input);
+ if (lastNonTerm.IsHiero(params)) {
+ if (sourceWidth < params.minHoleSource) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ } else if (sourceWidth < params.minHoleSourceSyntax) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
}
@@ -297,242 +291,240 @@ void Rule::Prevalidate(const Parameter &params)
int numNonTerms = 0;
int numHieroNonTerms = 0;
for (size_t i = 0; i < m_source.GetSize(); ++i) {
- const RuleSymbol *arc = m_source[i];
- if (arc->IsNonTerm()) {
- ++numNonTerms;
- const NonTerm &nonTerm = *static_cast<const NonTerm*>(arc);
- bool isHiero = nonTerm.IsHiero(params);
- if (isHiero) {
- ++numHieroNonTerms;
- }
- }
+ const RuleSymbol *arc = m_source[i];
+ if (arc->IsNonTerm()) {
+ ++numNonTerms;
+ const NonTerm &nonTerm = *static_cast<const NonTerm*>(arc);
+ bool isHiero = nonTerm.IsHiero(params);
+ if (isHiero) {
+ ++numHieroNonTerms;
+ }
+ }
}
if (numNonTerms >= params.maxNonTerm) {
- m_canRecurse = false;
- if (numNonTerms > params.maxNonTerm) {
- m_isValid = false;
- return;
- }
+ m_canRecurse = false;
+ if (numNonTerms > params.maxNonTerm) {
+ m_isValid = false;
+ return;
+ }
}
if (numHieroNonTerms >= params.maxHieroNonTerm) {
- m_canRecurse = false;
- if (numHieroNonTerms > params.maxHieroNonTerm) {
- m_isValid = false;
- return;
- }
+ m_canRecurse = false;
+ if (numHieroNonTerms > params.maxHieroNonTerm) {
+ m_isValid = false;
+ return;
+ }
}
// check if 2 consecutive non-terms in source
if (!params.nonTermConsecSource && m_nonterms.size() >= 2) {
- const NonTerm &lastNonTerm = *m_nonterms.back();
- const NonTerm &secondLastNonTerm = *m_nonterms[m_nonterms.size() - 2];
- if (secondLastNonTerm.GetConsistentPhrase().corners[1] + 1 ==
- lastNonTerm.GetConsistentPhrase().corners[0]) {
- if (params.mixedSyntaxType == 0) {
- // ordinary hiero or syntax model
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
- else {
- // Hieu's mixed syntax
- switch (params.nonTermConsecSourceMixedSyntax) {
- case 0:
- m_isValid = false;
- m_canRecurse = false;
- return;
- case 1:
- if (lastNonTerm.IsHiero(Moses::Input, params)
- && secondLastNonTerm.IsHiero(Moses::Input, params)) {
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
- break;
- case 2:
- if (lastNonTerm.IsHiero(Moses::Input, params)
- || secondLastNonTerm.IsHiero(Moses::Input, params)) {
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
- break;
- case 3:
- break;
- } // switch
- }
- }
+ const NonTerm &lastNonTerm = *m_nonterms.back();
+ const NonTerm &secondLastNonTerm = *m_nonterms[m_nonterms.size() - 2];
+ if (secondLastNonTerm.GetConsistentPhrase().corners[1] + 1 ==
+ lastNonTerm.GetConsistentPhrase().corners[0]) {
+ if (params.mixedSyntaxType == 0) {
+ // ordinary hiero or syntax model
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ } else {
+ // Hieu's mixed syntax
+ switch (params.nonTermConsecSourceMixedSyntax) {
+ case 0:
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ case 1:
+ if (lastNonTerm.IsHiero(Moses::Input, params)
+ && secondLastNonTerm.IsHiero(Moses::Input, params)) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ break;
+ case 2:
+ if (lastNonTerm.IsHiero(Moses::Input, params)
+ || secondLastNonTerm.IsHiero(Moses::Input, params)) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ break;
+ case 3:
+ break;
+ } // switch
+ }
+ }
}
//check to see if it overlaps with any other non-terms
if (m_nonterms.size() >= 2) {
- const NonTerm &lastNonTerm = *m_nonterms.back();
+ const NonTerm &lastNonTerm = *m_nonterms.back();
- for (size_t i = 0; i < m_nonterms.size() - 1; ++i) {
- const NonTerm &otherNonTerm = *m_nonterms[i];
- bool overlap = lastNonTerm.GetConsistentPhrase().TargetOverlap(otherNonTerm.GetConsistentPhrase());
+ for (size_t i = 0; i < m_nonterms.size() - 1; ++i) {
+ const NonTerm &otherNonTerm = *m_nonterms[i];
+ bool overlap = lastNonTerm.GetConsistentPhrase().TargetOverlap(otherNonTerm.GetConsistentPhrase());
- if (overlap) {
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
- }
+ if (overlap) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ }
}
// check that at least 1 word is aligned
if (params.requireAlignedWord) {
- bool ok = false;
- for (size_t i = 0; i < m_source.GetSize(); ++i) {
- const RuleSymbol &symbol = *m_source[i];
- if (!symbol.IsNonTerm()) {
- const Word &word = static_cast<const Word&>(symbol);
- if (word.GetAlignment().size()) {
- ok = true;
- break;
- }
- }
- }
-
- if (!ok) {
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
+ bool ok = false;
+ for (size_t i = 0; i < m_source.GetSize(); ++i) {
+ const RuleSymbol &symbol = *m_source[i];
+ if (!symbol.IsNonTerm()) {
+ const Word &word = static_cast<const Word&>(symbol);
+ if (word.GetAlignment().size()) {
+ ok = true;
+ break;
+ }
+ }
+ }
+
+ if (!ok) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
}
if (params.maxSpanFreeNonTermSource) {
- const NonTerm *front = dynamic_cast<const NonTerm*>(m_source[0]);
- if (front) {
- int width = front->GetWidth(Moses::Input);
- if (width > params.maxSpanFreeNonTermSource) {
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
- }
-
- const NonTerm *back = dynamic_cast<const NonTerm*>(m_source.Back());
- if (back) {
- int width = back->GetWidth(Moses::Input);
- if (width > params.maxSpanFreeNonTermSource) {
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
- }
+ const NonTerm *front = dynamic_cast<const NonTerm*>(m_source[0]);
+ if (front) {
+ int width = front->GetWidth(Moses::Input);
+ if (width > params.maxSpanFreeNonTermSource) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ }
+
+ const NonTerm *back = dynamic_cast<const NonTerm*>(m_source.Back());
+ if (back) {
+ int width = back->GetWidth(Moses::Input);
+ if (width > params.maxSpanFreeNonTermSource) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ }
}
if (!params.nieceTerminal) {
- // collect terminal in a rule
- std::set<const Word*> terms;
- for (size_t i = 0; i < m_source.GetSize(); ++i) {
- const Word *word = dynamic_cast<const Word*>(m_source[i]);
- if (word) {
- terms.insert(word);
- }
- }
-
- // look in non-terms
- for (size_t i = 0; i < m_source.GetSize(); ++i) {
- const NonTerm *nonTerm = dynamic_cast<const NonTerm*>(m_source[i]);
- if (nonTerm) {
- const ConsistentPhrase &cp = nonTerm->GetConsistentPhrase();
- bool containTerm = ContainTerm(cp, terms);
-
- if (containTerm) {
- //cerr << "ruleSource=" << *ruleSource << " ";
- //cerr << "ntRange=" << ntRange << endl;
-
- // non-term contains 1 of the terms in the rule.
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
- }
- }
+ // collect terminal in a rule
+ std::set<const Word*> terms;
+ for (size_t i = 0; i < m_source.GetSize(); ++i) {
+ const Word *word = dynamic_cast<const Word*>(m_source[i]);
+ if (word) {
+ terms.insert(word);
+ }
+ }
+
+ // look in non-terms
+ for (size_t i = 0; i < m_source.GetSize(); ++i) {
+ const NonTerm *nonTerm = dynamic_cast<const NonTerm*>(m_source[i]);
+ if (nonTerm) {
+ const ConsistentPhrase &cp = nonTerm->GetConsistentPhrase();
+ bool containTerm = ContainTerm(cp, terms);
+
+ if (containTerm) {
+ //cerr << "ruleSource=" << *ruleSource << " ";
+ //cerr << "ntRange=" << ntRange << endl;
+
+ // non-term contains 1 of the terms in the rule.
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ }
+ }
}
if (params.maxScope != UNDEFINED || params.minScope > 0) {
- int scope = GetScope(params);
- if (scope > params.maxScope) {
- // scope of subsequent rules will be the same or increase
- // therefore can NOT recurse
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
-
- if (scope < params.minScope) {
- // scope of subsequent rules may increase
- // therefore can recurse
- m_isValid = false;
- }
+ int scope = GetScope(params);
+ if (scope > params.maxScope) {
+ // scope of subsequent rules will be the same or increase
+ // therefore can NOT recurse
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+
+ if (scope < params.minScope) {
+ // scope of subsequent rules may increase
+ // therefore can recurse
+ m_isValid = false;
+ }
}
// min/max span per scope
if (params.scopeSpan.size()) {
- int scope = GetScope(params);
- if (scope >= params.scopeSpan.size()) {
- // no constraint on it. It's ok
- }
- else {
- const std::pair<int,int> &constraint = params.scopeSpan[scope];
- int sourceWidth = m_lhs.GetWidth(Moses::Input);
- if (sourceWidth < constraint.first || sourceWidth > constraint.second) {
- m_isValid = false;
- m_canRecurse = false;
- return;
- }
- }
+ int scope = GetScope(params);
+ if (scope >= params.scopeSpan.size()) {
+ // no constraint on it. It's ok
+ } else {
+ const std::pair<int,int> &constraint = params.scopeSpan[scope];
+ int sourceWidth = m_lhs.GetWidth(Moses::Input);
+ if (sourceWidth < constraint.first || sourceWidth > constraint.second) {
+ m_isValid = false;
+ m_canRecurse = false;
+ return;
+ }
+ }
}
}
int Rule::GetScope(const Parameter &params) const
{
- size_t scope = 0;
- bool previousIsAmbiguous = false;
-
- if (m_source[0]->IsNonTerm()) {
- scope++;
- previousIsAmbiguous = true;
- }
-
- for (size_t i = 1; i < m_source.GetSize(); ++i) {
- const RuleSymbol *symbol = m_source[i];
- bool isAmbiguous = symbol->IsNonTerm();
- if (isAmbiguous) {
- // mixed syntax
- const NonTerm *nt = static_cast<const NonTerm*>(symbol);
- isAmbiguous = nt->IsHiero(Moses::Input, params);
- }
-
- if (isAmbiguous && previousIsAmbiguous) {
- scope++;
- }
- previousIsAmbiguous = isAmbiguous;
- }
-
- if (previousIsAmbiguous) {
- scope++;
- }
-
- return scope;
-
- /*
+ size_t scope = 0;
+ bool previousIsAmbiguous = false;
+
+ if (m_source[0]->IsNonTerm()) {
+ scope++;
+ previousIsAmbiguous = true;
+ }
+
+ for (size_t i = 1; i < m_source.GetSize(); ++i) {
+ const RuleSymbol *symbol = m_source[i];
+ bool isAmbiguous = symbol->IsNonTerm();
+ if (isAmbiguous) {
+ // mixed syntax
+ const NonTerm *nt = static_cast<const NonTerm*>(symbol);
+ isAmbiguous = nt->IsHiero(Moses::Input, params);
+ }
+
+ if (isAmbiguous && previousIsAmbiguous) {
+ scope++;
+ }
+ previousIsAmbiguous = isAmbiguous;
+ }
+
+ if (previousIsAmbiguous) {
+ scope++;
+ }
+
+ return scope;
+
+ /*
int scope = 0;
if (m_source.GetSize() > 1) {
- const RuleSymbol &front = *m_source.Front();
- if (front.IsNonTerm()) {
- ++scope;
- }
+ const RuleSymbol &front = *m_source.Front();
+ if (front.IsNonTerm()) {
+ ++scope;
+ }
- const RuleSymbol &back = *m_source.Back();
- if (back.IsNonTerm()) {
- ++scope;
- }
+ const RuleSymbol &back = *m_source.Back();
+ if (back.IsNonTerm()) {
+ ++scope;
+ }
}
return scope;
*/
@@ -541,41 +533,41 @@ int Rule::GetScope(const Parameter &params) const
template<typename T>
bool Contains(const T *sought, const set<const T*> &coll)
{
- std::set<const Word*>::const_iterator iter;
- for (iter = coll.begin(); iter != coll.end(); ++iter) {
- const Word *found = *iter;
- if (sought->CompareString(*found) == 0) {
- return true;
- }
- }
- return false;
+ std::set<const Word*>::const_iterator iter;
+ for (iter = coll.begin(); iter != coll.end(); ++iter) {
+ const Word *found = *iter;
+ if (sought->CompareString(*found) == 0) {
+ return true;
+ }
+ }
+ return false;
}
bool Rule::ContainTerm(const ConsistentPhrase &cp, const std::set<const Word*> &terms) const
{
- const Phrase &sourceSentence = m_alignedSentence.GetPhrase(Moses::Input);
+ const Phrase &sourceSentence = m_alignedSentence.GetPhrase(Moses::Input);
- for (int pos = cp.corners[0]; pos <= cp.corners[1]; ++pos) {
- const Word *soughtWord = sourceSentence[pos];
+ for (int pos = cp.corners[0]; pos <= cp.corners[1]; ++pos) {
+ const Word *soughtWord = sourceSentence[pos];
- // find same word in set
- if (Contains(soughtWord, terms)) {
- return true;
- }
- }
- return false;
+ // find same word in set
+ if (Contains(soughtWord, terms)) {
+ return true;
+ }
+ }
+ return false;
}
bool CompareTargetNonTerms(const NonTerm *a, const NonTerm *b)
{
- // compare just start target pos
- return a->GetConsistentPhrase().corners[2] < b->GetConsistentPhrase().corners[2];
+ // compare just start target pos
+ return a->GetConsistentPhrase().corners[2] < b->GetConsistentPhrase().corners[2];
}
void Rule::CreateTarget(const Parameter &params)
{
if (!m_isValid) {
- return;
+ return;
}
vector<const NonTerm*> targetNonTerm(m_nonterms);
@@ -584,32 +576,31 @@ void Rule::CreateTarget(const Parameter &params)
const NonTerm *cp = NULL;
size_t nonTermInd = 0;
if (nonTermInd < targetNonTerm.size()) {
- cp = targetNonTerm[nonTermInd];
+ cp = targetNonTerm[nonTermInd];
}
for (int targetPos = m_lhs.GetConsistentPhrase().corners[2];
- targetPos <= m_lhs.GetConsistentPhrase().corners[3];
- ++targetPos) {
-
- const RuleSymbol *ruleSymbol;
- if (cp && cp->GetConsistentPhrase().corners[2] <= targetPos && targetPos <= cp->GetConsistentPhrase().corners[3]) {
- // replace words with non-term
- ruleSymbol = cp;
- targetPos = cp->GetConsistentPhrase().corners[3];
- if (targetNonTerm.size()) {
- cp = targetNonTerm[nonTermInd];
- }
-
- // move to next non-term
- ++nonTermInd;
- cp = (nonTermInd < targetNonTerm.size()) ? targetNonTerm[nonTermInd] : NULL;
- }
- else {
- // terminal
- ruleSymbol = m_alignedSentence.GetPhrase(Moses::Output)[targetPos];
- }
-
- m_target.Add(ruleSymbol);
+ targetPos <= m_lhs.GetConsistentPhrase().corners[3];
+ ++targetPos) {
+
+ const RuleSymbol *ruleSymbol;
+ if (cp && cp->GetConsistentPhrase().corners[2] <= targetPos && targetPos <= cp->GetConsistentPhrase().corners[3]) {
+ // replace words with non-term
+ ruleSymbol = cp;
+ targetPos = cp->GetConsistentPhrase().corners[3];
+ if (targetNonTerm.size()) {
+ cp = targetNonTerm[nonTermInd];
+ }
+
+ // move to next non-term
+ ++nonTermInd;
+ cp = (nonTermInd < targetNonTerm.size()) ? targetNonTerm[nonTermInd] : NULL;
+ } else {
+ // terminal
+ ruleSymbol = m_alignedSentence.GetPhrase(Moses::Output)[targetPos];
+ }
+
+ m_target.Add(ruleSymbol);
}
CreateAlignments();
@@ -618,45 +609,44 @@ void Rule::CreateTarget(const Parameter &params)
void Rule::CreateAlignments()
{
- int sourceStart = GetConsistentPhrase().corners[0];
- int targetStart = GetConsistentPhrase().corners[2];
+ int sourceStart = GetConsistentPhrase().corners[0];
+ int targetStart = GetConsistentPhrase().corners[2];
for (size_t sourcePos = 0; sourcePos < m_source.GetSize(); ++sourcePos) {
- const RuleSymbol *symbol = m_source[sourcePos];
- if (!symbol->IsNonTerm()) {
- // terminals
- const Word &sourceWord = static_cast<const Word&>(*symbol);
- const std::set<const Word *> &targetWords = sourceWord.GetAlignment();
- CreateAlignments(sourcePos, targetWords);
- }
- else {
- // non-terms. same object in both source & target
- CreateAlignments(sourcePos, symbol);
- }
+ const RuleSymbol *symbol = m_source[sourcePos];
+ if (!symbol->IsNonTerm()) {
+ // terminals
+ const Word &sourceWord = static_cast<const Word&>(*symbol);
+ const std::set<const Word *> &targetWords = sourceWord.GetAlignment();
+ CreateAlignments(sourcePos, targetWords);
+ } else {
+ // non-terms. same object in both source & target
+ CreateAlignments(sourcePos, symbol);
+ }
}
}
void Rule::CreateAlignments(int sourcePos, const std::set<const Word *> &targetWords)
{
- std::set<const Word *>::const_iterator iterTarget;
- for (iterTarget = targetWords.begin(); iterTarget != targetWords.end(); ++iterTarget) {
- const Word *targetWord = *iterTarget;
- CreateAlignments(sourcePos, targetWord);
- }
+ std::set<const Word *>::const_iterator iterTarget;
+ for (iterTarget = targetWords.begin(); iterTarget != targetWords.end(); ++iterTarget) {
+ const Word *targetWord = *iterTarget;
+ CreateAlignments(sourcePos, targetWord);
+ }
}
void Rule::CreateAlignments(int sourcePos, const RuleSymbol *targetSought)
{
- // should be in target phrase
- for (size_t targetPos = 0; targetPos < m_target.GetSize(); ++targetPos) {
- const RuleSymbol *foundSymbol = m_target[targetPos];
- if (targetSought == foundSymbol) {
- pair<int, int> alignPoint(sourcePos, targetPos);
- m_alignments.insert(alignPoint);
- return;
- }
- }
-
- throw "not found";
+ // should be in target phrase
+ for (size_t targetPos = 0; targetPos < m_target.GetSize(); ++targetPos) {
+ const RuleSymbol *foundSymbol = m_target[targetPos];
+ if (targetSought == foundSymbol) {
+ pair<int, int> alignPoint(sourcePos, targetPos);
+ m_alignments.insert(alignPoint);
+ return;
+ }
+ }
+
+ throw "not found";
}
diff --git a/phrase-extract/extract-mixed-syntax/Rule.h b/phrase-extract/extract-mixed-syntax/Rule.h
index 15a142b97..51cdee98e 100644
--- a/phrase-extract/extract-mixed-syntax/Rule.h
+++ b/phrase-extract/extract-mixed-syntax/Rule.h
@@ -16,75 +16,83 @@ class NonTerm;
class Parameter;
-class Rule {
+class Rule
+{
public:
- typedef std::set<std::pair<int,int> > Alignments;
+ typedef std::set<std::pair<int,int> > Alignments;
- Rule(const Rule &copy); // do not implement
+ Rule(const Rule &copy); // do not implement
- // original rule with no non-term
- Rule(const NonTerm &lhsNonTerm, const AlignedSentence &alignedSentence);
+ // original rule with no non-term
+ Rule(const NonTerm &lhsNonTerm, const AlignedSentence &alignedSentence);
- // extend a rule, adding 1 new non-term
- Rule(const Rule &copy, const NonTerm &nonTerm);
+ // extend a rule, adding 1 new non-term
+ Rule(const Rule &copy, const NonTerm &nonTerm);
- virtual ~Rule();
+ virtual ~Rule();
- bool IsValid() const
- { return m_isValid; }
+ bool IsValid() const {
+ return m_isValid;
+ }
- bool CanRecurse() const
- { return m_canRecurse; }
+ bool CanRecurse() const {
+ return m_canRecurse;
+ }
- const NonTerm &GetLHS() const
- { return m_lhs; }
+ const NonTerm &GetLHS() const {
+ return m_lhs;
+ }
- const ConsistentPhrase &GetConsistentPhrase() const;
+ const ConsistentPhrase &GetConsistentPhrase() const;
- int GetNextSourcePosForNonTerm() const;
+ int GetNextSourcePosForNonTerm() const;
- void SetCount(float count)
- { m_count = count; }
- float GetCount() const
- { return m_count; }
+ void SetCount(float count) {
+ m_count = count;
+ }
+ float GetCount() const {
+ return m_count;
+ }
- const Alignments &GetAlignments() const
- { return m_alignments; }
+ const Alignments &GetAlignments() const {
+ return m_alignments;
+ }
- std::string Debug() const;
- void Output(std::ostream &out, bool forward, const Parameter &params) const;
+ std::string Debug() const;
+ void Output(std::ostream &out, bool forward, const Parameter &params) const;
- void Prevalidate(const Parameter &params);
- void CreateTarget(const Parameter &params);
+ void Prevalidate(const Parameter &params);
+ void CreateTarget(const Parameter &params);
- const RulePhrase &GetPhrase(Moses::FactorDirection direction) const
- { return (direction == Moses::Input) ? m_source : m_target; }
+ const RulePhrase &GetPhrase(Moses::FactorDirection direction) const {
+ return (direction == Moses::Input) ? m_source : m_target;
+ }
protected:
- const NonTerm &m_lhs;
- const AlignedSentence &m_alignedSentence;
- RulePhrase m_source, m_target;
- float m_count;
+ const NonTerm &m_lhs;
+ const AlignedSentence &m_alignedSentence;
+ RulePhrase m_source, m_target;
+ float m_count;
- Alignments m_alignments;
+ Alignments m_alignments;
- // in source order
- std::vector<const NonTerm*> m_nonterms;
+ // in source order
+ std::vector<const NonTerm*> m_nonterms;
- bool m_isValid, m_canRecurse;
+ bool m_isValid, m_canRecurse;
- void CreateSource();
- void CreateAlignments();
- void CreateAlignments(int sourcePos, const std::set<const Word *> &targetWords);
- void CreateAlignments(int sourcePos, const RuleSymbol *targetSought);
+ void CreateSource();
+ void CreateAlignments();
+ void CreateAlignments(int sourcePos, const std::set<const Word *> &targetWords);
+ void CreateAlignments(int sourcePos, const RuleSymbol *targetSought);
- bool ContainTerm(const ConsistentPhrase &cp, const std::set<const Word*> &terms) const;
- int GetScope(const Parameter &params) const;
+ bool ContainTerm(const ConsistentPhrase &cp, const std::set<const Word*> &terms) const;
+ int GetScope(const Parameter &params) const;
- void NonTermContext(int sourceTarget, int factors, size_t ntInd, const ConsistentPhrase &cp, std::ostream &out) const;
- // sourceTarget: 1 = source, 2 = target
+ void NonTermContext(int sourceTarget, int factors, size_t ntInd, const ConsistentPhrase &cp, std::ostream &out) const;
+ // sourceTarget: 1 = source, 2 = target
- void NonTermContextFactor(int factor, const Word &word, std::ostream &out) const;
+ void NonTermContextFactor(int factor, const Word &word, std::ostream &out) const;
};
diff --git a/phrase-extract/extract-mixed-syntax/RulePhrase.cpp b/phrase-extract/extract-mixed-syntax/RulePhrase.cpp
index 5c629168b..1894ade7d 100644
--- a/phrase-extract/extract-mixed-syntax/RulePhrase.cpp
+++ b/phrase-extract/extract-mixed-syntax/RulePhrase.cpp
@@ -16,17 +16,17 @@ extern bool g_debug;
int RulePhrase::Compare(const RulePhrase &other) const
{
if (GetSize() != other.GetSize()) {
- return GetSize() < other.GetSize() ? -1 : +1;
+ return GetSize() < other.GetSize() ? -1 : +1;
}
for (size_t i = 0; i < m_coll.size(); ++i) {
- const RuleSymbol &symbol = *m_coll[i];
- const RuleSymbol &otherSymbol = *other.m_coll[i];
- int compare = symbol.Compare(otherSymbol);
+ const RuleSymbol &symbol = *m_coll[i];
+ const RuleSymbol &otherSymbol = *other.m_coll[i];
+ int compare = symbol.Compare(otherSymbol);
- if (compare) {
- return compare;
- }
+ if (compare) {
+ return compare;
+ }
}
return 0;
@@ -35,16 +35,16 @@ int RulePhrase::Compare(const RulePhrase &other) const
void RulePhrase::Output(std::ostream &out) const
{
for (size_t i = 0; i < m_coll.size(); ++i) {
- const RuleSymbol &symbol = *m_coll[i];
- symbol.Output(out);
- out << " ";
+ const RuleSymbol &symbol = *m_coll[i];
+ symbol.Output(out);
+ out << " ";
}
}
std::string RulePhrase::Debug() const
{
- std::stringstream out;
- Output(out);
- return out.str();
+ std::stringstream out;
+ Output(out);
+ return out.str();
}
diff --git a/phrase-extract/extract-mixed-syntax/RulePhrase.h b/phrase-extract/extract-mixed-syntax/RulePhrase.h
index 412169b74..0527293ba 100644
--- a/phrase-extract/extract-mixed-syntax/RulePhrase.h
+++ b/phrase-extract/extract-mixed-syntax/RulePhrase.h
@@ -21,12 +21,12 @@ public:
typedef std::vector<const RuleSymbol*> Coll;
Coll m_coll;
- size_t GetSize() const
- { return m_coll.size(); }
+ size_t GetSize() const {
+ return m_coll.size();
+ }
- void Add(const RuleSymbol *symbol)
- {
- m_coll.push_back(symbol);
+ void Add(const RuleSymbol *symbol) {
+ m_coll.push_back(symbol);
}
const RuleSymbol* operator[](size_t index) const {
diff --git a/phrase-extract/extract-mixed-syntax/RuleSymbol.cpp b/phrase-extract/extract-mixed-syntax/RuleSymbol.cpp
index 933ffc9c2..2f098d1aa 100644
--- a/phrase-extract/extract-mixed-syntax/RuleSymbol.cpp
+++ b/phrase-extract/extract-mixed-syntax/RuleSymbol.cpp
@@ -9,28 +9,29 @@
using namespace std;
-RuleSymbol::RuleSymbol() {
- // TODO Auto-generated constructor stub
+RuleSymbol::RuleSymbol()
+{
+ // TODO Auto-generated constructor stub
}
-RuleSymbol::~RuleSymbol() {
- // TODO Auto-generated destructor stub
+RuleSymbol::~RuleSymbol()
+{
+ // TODO Auto-generated destructor stub
}
int RuleSymbol::Compare(const RuleSymbol &other) const
{
- if (IsNonTerm() != other.IsNonTerm()) {
- return IsNonTerm() ? -1 : +1;
- }
-
- string str = GetString();
- string otherStr = other.GetString();
-
- if (str == otherStr) {
- return 0;
- }
- else {
- return (str < otherStr) ? -1 : +1;
- }
+ if (IsNonTerm() != other.IsNonTerm()) {
+ return IsNonTerm() ? -1 : +1;
+ }
+
+ string str = GetString();
+ string otherStr = other.GetString();
+
+ if (str == otherStr) {
+ return 0;
+ } else {
+ return (str < otherStr) ? -1 : +1;
+ }
}
diff --git a/phrase-extract/extract-mixed-syntax/RuleSymbol.h b/phrase-extract/extract-mixed-syntax/RuleSymbol.h
index c292fcc0d..a9909664d 100644
--- a/phrase-extract/extract-mixed-syntax/RuleSymbol.h
+++ b/phrase-extract/extract-mixed-syntax/RuleSymbol.h
@@ -12,19 +12,20 @@
#include <string>
// base class - terminal or non-term
-class RuleSymbol {
+class RuleSymbol
+{
public:
- RuleSymbol();
- virtual ~RuleSymbol();
+ RuleSymbol();
+ virtual ~RuleSymbol();
- virtual bool IsNonTerm() const = 0;
+ virtual bool IsNonTerm() const = 0;
- virtual std::string Debug() const = 0;
- virtual void Output(std::ostream &out) const = 0;
+ virtual std::string Debug() const = 0;
+ virtual void Output(std::ostream &out) const = 0;
- virtual std::string GetString() const = 0;
+ virtual std::string GetString() const = 0;
- int Compare(const RuleSymbol &other) const;
+ int Compare(const RuleSymbol &other) const;
};
diff --git a/phrase-extract/extract-mixed-syntax/Rules.cpp b/phrase-extract/extract-mixed-syntax/Rules.cpp
index 1b93430e2..b637a212d 100644
--- a/phrase-extract/extract-mixed-syntax/Rules.cpp
+++ b/phrase-extract/extract-mixed-syntax/Rules.cpp
@@ -19,180 +19,177 @@ using namespace std;
extern bool g_debug;
Rules::Rules(const AlignedSentence &alignedSentence)
-:m_alignedSentence(alignedSentence)
+ :m_alignedSentence(alignedSentence)
{
}
-Rules::~Rules() {
- Moses::RemoveAllInColl(m_keepRules);
+Rules::~Rules()
+{
+ Moses::RemoveAllInColl(m_keepRules);
}
void Rules::CreateRules(const ConsistentPhrase &cp,
- const Parameter &params)
+ const Parameter &params)
{
- if (params.hieroSourceLHS) {
- const NonTerm &nonTerm = cp.GetHieroNonTerm();
- CreateRule(nonTerm, params);
- }
- else {
- const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
- for (size_t i = 0; i < nonTerms.size(); ++i) {
- const NonTerm &nonTerm = nonTerms[i];
- CreateRule(nonTerm, params);
- }
- }
+ if (params.hieroSourceLHS) {
+ const NonTerm &nonTerm = cp.GetHieroNonTerm();
+ CreateRule(nonTerm, params);
+ } else {
+ const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
+ for (size_t i = 0; i < nonTerms.size(); ++i) {
+ const NonTerm &nonTerm = nonTerms[i];
+ CreateRule(nonTerm, params);
+ }
+ }
}
void Rules::CreateRule(const NonTerm &nonTerm,
- const Parameter &params)
+ const Parameter &params)
{
- Rule *rule = new Rule(nonTerm, m_alignedSentence);
+ Rule *rule = new Rule(nonTerm, m_alignedSentence);
- rule->Prevalidate(params);
- rule->CreateTarget(params);
+ rule->Prevalidate(params);
+ rule->CreateTarget(params);
- if (rule->CanRecurse()) {
- Extend(*rule, params);
- }
+ if (rule->CanRecurse()) {
+ Extend(*rule, params);
+ }
- if (rule->IsValid()) {
- m_keepRules.insert(rule);
- }
- else {
- delete rule;
- }
+ if (rule->IsValid()) {
+ m_keepRules.insert(rule);
+ } else {
+ delete rule;
+ }
}
void Rules::Extend(const Parameter &params)
{
- const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
-
- size_t size = m_alignedSentence.GetPhrase(Moses::Input).size();
- for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
- for (size_t sourceEnd = sourceStart; sourceEnd < size; ++sourceEnd) {
- const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
-
- ConsistentPhrases::Coll::const_iterator iter;
- for (iter = cps.begin(); iter != cps.end(); ++iter) {
- const ConsistentPhrase &cp = **iter;
- CreateRules(cp, params);
- }
- }
- }
+ const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
+
+ size_t size = m_alignedSentence.GetPhrase(Moses::Input).size();
+ for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
+ for (size_t sourceEnd = sourceStart; sourceEnd < size; ++sourceEnd) {
+ const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
+
+ ConsistentPhrases::Coll::const_iterator iter;
+ for (iter = cps.begin(); iter != cps.end(); ++iter) {
+ const ConsistentPhrase &cp = **iter;
+ CreateRules(cp, params);
+ }
+ }
+ }
}
void Rules::Extend(const Rule &rule, const Parameter &params)
{
- const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
- int sourceMin = rule.GetNextSourcePosForNonTerm();
-
- int ruleStart = rule.GetConsistentPhrase().corners[0];
- int ruleEnd = rule.GetConsistentPhrase().corners[1];
-
- for (int sourceStart = sourceMin; sourceStart <= ruleEnd; ++sourceStart) {
- for (int sourceEnd = sourceStart; sourceEnd <= ruleEnd; ++sourceEnd) {
- if (sourceStart == ruleStart && sourceEnd == ruleEnd) {
- // don't cover whole rule with 1 non-term
- continue;
- }
-
- const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
- Extend(rule, cps, params);
- }
- }
+ const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
+ int sourceMin = rule.GetNextSourcePosForNonTerm();
+
+ int ruleStart = rule.GetConsistentPhrase().corners[0];
+ int ruleEnd = rule.GetConsistentPhrase().corners[1];
+
+ for (int sourceStart = sourceMin; sourceStart <= ruleEnd; ++sourceStart) {
+ for (int sourceEnd = sourceStart; sourceEnd <= ruleEnd; ++sourceEnd) {
+ if (sourceStart == ruleStart && sourceEnd == ruleEnd) {
+ // don't cover whole rule with 1 non-term
+ continue;
+ }
+
+ const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
+ Extend(rule, cps, params);
+ }
+ }
}
void Rules::Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter &params)
{
- ConsistentPhrases::Coll::const_iterator iter;
- for (iter = cps.begin(); iter != cps.end(); ++iter) {
- const ConsistentPhrase &cp = **iter;
- Extend(rule, cp, params);
- }
+ ConsistentPhrases::Coll::const_iterator iter;
+ for (iter = cps.begin(); iter != cps.end(); ++iter) {
+ const ConsistentPhrase &cp = **iter;
+ Extend(rule, cp, params);
+ }
}
void Rules::Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter &params)
{
- const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
- for (size_t i = 0; i < nonTerms.size(); ++i) {
- const NonTerm &nonTerm = nonTerms[i];
-
- Rule *newRule = new Rule(rule, nonTerm);
- newRule->Prevalidate(params);
- newRule->CreateTarget(params);
-
- if (newRule->CanRecurse()) {
- // recursively extend
- Extend(*newRule, params);
- }
-
- if (newRule->IsValid()) {
- m_keepRules.insert(newRule);
- }
- else {
- delete newRule;
- }
- }
+ const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
+ for (size_t i = 0; i < nonTerms.size(); ++i) {
+ const NonTerm &nonTerm = nonTerms[i];
+
+ Rule *newRule = new Rule(rule, nonTerm);
+ newRule->Prevalidate(params);
+ newRule->CreateTarget(params);
+
+ if (newRule->CanRecurse()) {
+ // recursively extend
+ Extend(*newRule, params);
+ }
+
+ if (newRule->IsValid()) {
+ m_keepRules.insert(newRule);
+ } else {
+ delete newRule;
+ }
+ }
}
std::string Rules::Debug() const
{
- stringstream out;
+ stringstream out;
- std::set<Rule*>::const_iterator iter;
- out << "m_keepRules:" << endl;
- for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
- const Rule &rule = **iter;
- out << rule.Debug() << endl;
- }
+ std::set<Rule*>::const_iterator iter;
+ out << "m_keepRules:" << endl;
+ for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
+ const Rule &rule = **iter;
+ out << rule.Debug() << endl;
+ }
- return out.str();
+ return out.str();
}
void Rules::Output(std::ostream &out, bool forward, const Parameter &params) const
{
- std::set<Rule*, CompareRules>::const_iterator iter;
- for (iter = m_mergeRules.begin(); iter != m_mergeRules.end(); ++iter) {
- const Rule &rule = **iter;
- rule.Output(out, forward, params);
- out << endl;
- }
+ std::set<Rule*, CompareRules>::const_iterator iter;
+ for (iter = m_mergeRules.begin(); iter != m_mergeRules.end(); ++iter) {
+ const Rule &rule = **iter;
+ rule.Output(out, forward, params);
+ out << endl;
+ }
}
void Rules::Consolidate(const Parameter &params)
{
- if (params.fractionalCounting) {
- CalcFractionalCount();
- }
- else {
- std::set<Rule*>::iterator iter;
- for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
- Rule &rule = **iter;
- rule.SetCount(1);
- }
- }
-
- MergeRules(params);
+ if (params.fractionalCounting) {
+ CalcFractionalCount();
+ } else {
+ std::set<Rule*>::iterator iter;
+ for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
+ Rule &rule = **iter;
+ rule.SetCount(1);
+ }
+ }
+
+ MergeRules(params);
}
void Rules::MergeRules(const Parameter &params)
{
- typedef std::set<Rule*, CompareRules> MergeRules;
-
- std::set<Rule*>::const_iterator iterOrig;
- for (iterOrig = m_keepRules.begin(); iterOrig != m_keepRules.end(); ++iterOrig) {
- Rule *origRule = *iterOrig;
-
- pair<MergeRules::iterator, bool> inserted = m_mergeRules.insert(origRule);
- if (!inserted.second) {
- // already there, just add count
- Rule &rule = **inserted.first;
- float newCount = rule.GetCount() + origRule->GetCount();
- rule.SetCount(newCount);
- }
- }
+ typedef std::set<Rule*, CompareRules> MergeRules;
+
+ std::set<Rule*>::const_iterator iterOrig;
+ for (iterOrig = m_keepRules.begin(); iterOrig != m_keepRules.end(); ++iterOrig) {
+ Rule *origRule = *iterOrig;
+
+ pair<MergeRules::iterator, bool> inserted = m_mergeRules.insert(origRule);
+ if (!inserted.second) {
+ // already there, just add count
+ Rule &rule = **inserted.first;
+ float newCount = rule.GetCount() + origRule->GetCount();
+ rule.SetCount(newCount);
+ }
+ }
}
void Rules::CalcFractionalCount()
@@ -204,22 +201,22 @@ void Rules::CalcFractionalCount()
// sort by source AND target ranges
std::set<Rule*>::const_iterator iter;
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
- Rule *rule = *iter;
- const ConsistentPhrase &cp = rule->GetConsistentPhrase();
- RuleColl &ruleColl = allRules[&cp];
- ruleColl.insert(rule);
+ Rule *rule = *iter;
+ const ConsistentPhrase &cp = rule->GetConsistentPhrase();
+ RuleColl &ruleColl = allRules[&cp];
+ ruleColl.insert(rule);
}
// fractional count
RuleByConsistentPhrase::iterator iterOuter;
for (iterOuter = allRules.begin(); iterOuter != allRules.end(); ++iterOuter) {
- RuleColl &rules = iterOuter->second;
+ RuleColl &rules = iterOuter->second;
- RuleColl::iterator iterInner;
- for (iterInner = rules.begin(); iterInner != rules.end(); ++iterInner) {
- Rule &rule = **iterInner;
- rule.SetCount(1.0f / (float) rules.size());
- }
+ RuleColl::iterator iterInner;
+ for (iterInner = rules.begin(); iterInner != rules.end(); ++iterInner) {
+ Rule &rule = **iterInner;
+ rule.SetCount(1.0f / (float) rules.size());
+ }
}
}
diff --git a/phrase-extract/extract-mixed-syntax/Rules.h b/phrase-extract/extract-mixed-syntax/Rules.h
index 6d8cb122d..b29989a3f 100644
--- a/phrase-extract/extract-mixed-syntax/Rules.h
+++ b/phrase-extract/extract-mixed-syntax/Rules.h
@@ -18,55 +18,55 @@ class AlignedSentence;
class Parameter;
struct CompareRules {
- bool operator()(const Rule *a, const Rule *b)
- {
- int compare;
+ bool operator()(const Rule *a, const Rule *b) {
+ int compare;
- compare = a->GetPhrase(Moses::Input).Compare(b->GetPhrase(Moses::Input));
- if (compare) return compare < 0;
+ compare = a->GetPhrase(Moses::Input).Compare(b->GetPhrase(Moses::Input));
+ if (compare) return compare < 0;
- compare = a->GetPhrase(Moses::Output).Compare(b->GetPhrase(Moses::Output));
- if (compare) return compare < 0;
+ compare = a->GetPhrase(Moses::Output).Compare(b->GetPhrase(Moses::Output));
+ if (compare) return compare < 0;
- if (a->GetAlignments() != b->GetAlignments()) {
- return a->GetAlignments() < b->GetAlignments();
- }
+ if (a->GetAlignments() != b->GetAlignments()) {
+ return a->GetAlignments() < b->GetAlignments();
+ }
- if (a->GetLHS().GetString() != b->GetLHS().GetString()) {
- return a->GetLHS().GetString() < b->GetLHS().GetString();
- }
+ if (a->GetLHS().GetString() != b->GetLHS().GetString()) {
+ return a->GetLHS().GetString() < b->GetLHS().GetString();
+ }
- return false;
- }
+ return false;
+ }
};
-class Rules {
+class Rules
+{
public:
- Rules(const AlignedSentence &alignedSentence);
- virtual ~Rules();
- void Extend(const Parameter &params);
- void Consolidate(const Parameter &params);
+ Rules(const AlignedSentence &alignedSentence);
+ virtual ~Rules();
+ void Extend(const Parameter &params);
+ void Consolidate(const Parameter &params);
- std::string Debug() const;
- void Output(std::ostream &out, bool forward, const Parameter &params) const;
+ std::string Debug() const;
+ void Output(std::ostream &out, bool forward, const Parameter &params) const;
protected:
- const AlignedSentence &m_alignedSentence;
- std::set<Rule*> m_keepRules;
- std::set<Rule*, CompareRules> m_mergeRules;
-
- void Extend(const Rule &rule, const Parameter &params);
- void Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter &params);
- void Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter &params);
-
- // create original rules
- void CreateRules(const ConsistentPhrase &cp,
- const Parameter &params);
- void CreateRule(const NonTerm &nonTerm,
- const Parameter &params);
-
- void MergeRules(const Parameter &params);
- void CalcFractionalCount();
+ const AlignedSentence &m_alignedSentence;
+ std::set<Rule*> m_keepRules;
+ std::set<Rule*, CompareRules> m_mergeRules;
+
+ void Extend(const Rule &rule, const Parameter &params);
+ void Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter &params);
+ void Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter &params);
+
+ // create original rules
+ void CreateRules(const ConsistentPhrase &cp,
+ const Parameter &params);
+ void CreateRule(const NonTerm &nonTerm,
+ const Parameter &params);
+
+ void MergeRules(const Parameter &params);
+ void CalcFractionalCount();
};
diff --git a/phrase-extract/extract-mixed-syntax/SyntaxTree.cpp b/phrase-extract/extract-mixed-syntax/SyntaxTree.cpp
index 472444e7c..b96c66167 100644
--- a/phrase-extract/extract-mixed-syntax/SyntaxTree.cpp
+++ b/phrase-extract/extract-mixed-syntax/SyntaxTree.cpp
@@ -7,41 +7,40 @@ using namespace std;
void SyntaxTree::Add(int startPos, int endPos, const std::string &label, const Parameter &params)
{
- //cerr << "add " << label << " to " << "[" << startPos << "-" << endPos << "]" << endl;
+ //cerr << "add " << label << " to " << "[" << startPos << "-" << endPos << "]" << endl;
- Range range(startPos, endPos);
- Labels &labels = m_coll[range];
+ Range range(startPos, endPos);
+ Labels &labels = m_coll[range];
- bool add = true;
- if (labels.size()) {
- if (params.multiLabel == 1) {
- // delete the label in collection and add new
- assert(labels.size() == 1);
- labels.clear();
- }
- else if (params.multiLabel == 2) {
- // ignore this label
- add = false;
- }
- }
+ bool add = true;
+ if (labels.size()) {
+ if (params.multiLabel == 1) {
+ // delete the label in collection and add new
+ assert(labels.size() == 1);
+ labels.clear();
+ } else if (params.multiLabel == 2) {
+ // ignore this label
+ add = false;
+ }
+ }
- if (add) {
- labels.push_back(label);
- }
+ if (add) {
+ labels.push_back(label);
+ }
}
void SyntaxTree::AddToAll(const std::string &label)
{
- Coll::iterator iter;
- for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
- Labels &labels = iter->second;
- labels.push_back(label);
- }
+ Coll::iterator iter;
+ for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
+ Labels &labels = iter->second;
+ labels.push_back(label);
+ }
}
const SyntaxTree::Labels &SyntaxTree::Find(int startPos, int endPos) const
{
- Coll::const_iterator iter;
- iter = m_coll.find(Range(startPos, endPos));
- return (iter == m_coll.end()) ? m_defaultLabels : iter->second;
+ Coll::const_iterator iter;
+ iter = m_coll.find(Range(startPos, endPos));
+ return (iter == m_coll.end()) ? m_defaultLabels : iter->second;
}
diff --git a/phrase-extract/extract-mixed-syntax/SyntaxTree.h b/phrase-extract/extract-mixed-syntax/SyntaxTree.h
index 58f718151..202ebff34 100644
--- a/phrase-extract/extract-mixed-syntax/SyntaxTree.h
+++ b/phrase-extract/extract-mixed-syntax/SyntaxTree.h
@@ -1,4 +1,4 @@
-#pragma once
+#pragma once
#include <vector>
#include <map>
@@ -19,7 +19,7 @@ public:
const Labels &Find(int startPos, int endPos) const;
void SetHieroLabel(const std::string &label) {
- m_defaultLabels.push_back(label);
+ m_defaultLabels.push_back(label);
}
diff --git a/phrase-extract/extract-mixed-syntax/Word.cpp b/phrase-extract/extract-mixed-syntax/Word.cpp
index 8ce4f76c6..f36391f1a 100644
--- a/phrase-extract/extract-mixed-syntax/Word.cpp
+++ b/phrase-extract/extract-mixed-syntax/Word.cpp
@@ -11,44 +11,45 @@
using namespace std;
Word::Word(int pos, const std::string &str)
-:m_pos(pos)
-,m_str(str)
+ :m_pos(pos)
+ ,m_str(str)
{
- // TODO Auto-generated constructor stub
+ // TODO Auto-generated constructor stub
}
-Word::~Word() {
- // TODO Auto-generated destructor stub
+Word::~Word()
+{
+ // TODO Auto-generated destructor stub
}
void Word::AddAlignment(const Word *other)
{
- m_alignment.insert(other);
+ m_alignment.insert(other);
}
std::set<int> Word::GetAlignmentIndex() const
{
- std::set<int> ret;
+ std::set<int> ret;
- std::set<const Word *>::const_iterator iter;
- for (iter = m_alignment.begin(); iter != m_alignment.end(); ++iter) {
- const Word &otherWord = **iter;
- int otherPos = otherWord.GetPos();
- ret.insert(otherPos);
- }
+ std::set<const Word *>::const_iterator iter;
+ for (iter = m_alignment.begin(); iter != m_alignment.end(); ++iter) {
+ const Word &otherWord = **iter;
+ int otherPos = otherWord.GetPos();
+ ret.insert(otherPos);
+ }
- return ret;
+ return ret;
}
void Word::Output(std::ostream &out) const
{
- out << m_str;
+ out << m_str;
}
std::string Word::Debug() const
{
- return m_str;
+ return m_str;
}
int Word::CompareString(const Word &other) const
diff --git a/phrase-extract/extract-mixed-syntax/Word.h b/phrase-extract/extract-mixed-syntax/Word.h
index 54419ceb0..80ee20ba9 100644
--- a/phrase-extract/extract-mixed-syntax/Word.h
+++ b/phrase-extract/extract-mixed-syntax/Word.h
@@ -14,36 +14,40 @@
class Word : public RuleSymbol
{
public:
- Word(const Word&); // do not implement
- Word(int pos, const std::string &str);
- virtual ~Word();
+ Word(const Word&); // do not implement
+ Word(int pos, const std::string &str);
+ virtual ~Word();
- virtual bool IsNonTerm() const
- { return false; }
+ virtual bool IsNonTerm() const {
+ return false;
+ }
- std::string GetString() const
- { return m_str; }
+ std::string GetString() const {
+ return m_str;
+ }
- std::string GetString(int factor) const;
+ std::string GetString(int factor) const;
- int GetPos() const
- { return m_pos; }
+ int GetPos() const {
+ return m_pos;
+ }
- void AddAlignment(const Word *other);
+ void AddAlignment(const Word *other);
- const std::set<const Word *> &GetAlignment() const
- { return m_alignment; }
+ const std::set<const Word *> &GetAlignment() const {
+ return m_alignment;
+ }
- std::set<int> GetAlignmentIndex() const;
+ std::set<int> GetAlignmentIndex() const;
- void Output(std::ostream &out) const;
- std::string Debug() const;
+ void Output(std::ostream &out) const;
+ std::string Debug() const;
- int CompareString(const Word &other) const;
+ int CompareString(const Word &other) const;
protected:
- int m_pos; // original position in sentence, NOT in lattice
- std::string m_str;
- std::set<const Word *> m_alignment;
+ int m_pos; // original position in sentence, NOT in lattice
+ std::string m_str;
+ std::set<const Word *> m_alignment;
};
diff --git a/phrase-extract/extract-mixed-syntax/gzfilebuf.h b/phrase-extract/extract-mixed-syntax/gzfilebuf.h
index 885c661f0..b5b0ce87f 100644
--- a/phrase-extract/extract-mixed-syntax/gzfilebuf.h
+++ b/phrase-extract/extract-mixed-syntax/gzfilebuf.h
@@ -5,73 +5,77 @@
#include <zlib.h>
#include <cstring>
-class gzfilebuf : public std::streambuf {
+class gzfilebuf : public std::streambuf
+{
public:
- gzfilebuf(const char *filename)
- { _gzf = gzopen(filename, "rb");
+ gzfilebuf(const char *filename) {
+ _gzf = gzopen(filename, "rb");
setg (_buff+sizeof(int), // beginning of putback area
_buff+sizeof(int), // read position
_buff+sizeof(int)); // end position
}
- ~gzfilebuf() { gzclose(_gzf); }
+ ~gzfilebuf() {
+ gzclose(_gzf);
+ }
protected:
virtual int_type overflow (int_type c) {
- throw;
+ throw;
}
-
+
// write multiple characters
virtual
std::streamsize xsputn (const char* s,
std::streamsize num) {
- throw;
+ throw;
}
-
- virtual std::streampos seekpos ( std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out ){ throw;
+
+ virtual std::streampos seekpos ( std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out ) {
+ throw;
}
-
+
//read one character
virtual int_type underflow () {
// is read position before end of _buff?
- if (gptr() < egptr()) {
- return traits_type::to_int_type(*gptr());
- }
-
- /* process size of putback area
- * - use number of characters read
- * - but at most four
- */
- unsigned int numPutback = gptr() - eback();
- if (numPutback > sizeof(int)) {
- numPutback = sizeof(int);
- }
-
- /* copy up to four characters previously read into
- * the putback _buff (area of first four characters)
- */
- std::memmove (_buff+(sizeof(int)-numPutback), gptr()-numPutback,
- numPutback);
-
- // read new characters
- int num = gzread(_gzf, _buff+sizeof(int), _buffsize-sizeof(int));
- if (num <= 0) {
- // ERROR or EOF
- return EOF;
- }
-
- // reset _buff pointers
- setg (_buff+(sizeof(int)-numPutback), // beginning of putback area
- _buff+sizeof(int), // read position
- _buff+sizeof(int)+num); // end of buffer
-
- // return next character
- return traits_type::to_int_type(*gptr());
+ if (gptr() < egptr()) {
+ return traits_type::to_int_type(*gptr());
+ }
+
+ /* process size of putback area
+ * - use number of characters read
+ * - but at most four
+ */
+ unsigned int numPutback = gptr() - eback();
+ if (numPutback > sizeof(int)) {
+ numPutback = sizeof(int);
+ }
+
+ /* copy up to four characters previously read into
+ * the putback _buff (area of first four characters)
+ */
+ std::memmove (_buff+(sizeof(int)-numPutback), gptr()-numPutback,
+ numPutback);
+
+ // read new characters
+ int num = gzread(_gzf, _buff+sizeof(int), _buffsize-sizeof(int));
+ if (num <= 0) {
+ // ERROR or EOF
+ return EOF;
+ }
+
+ // reset _buff pointers
+ setg (_buff+(sizeof(int)-numPutback), // beginning of putback area
+ _buff+sizeof(int), // read position
+ _buff+sizeof(int)+num); // end of buffer
+
+ // return next character
+ return traits_type::to_int_type(*gptr());
}
-
+
std::streamsize xsgetn (char* s,
std::streamsize num) {
return gzread(_gzf,s,num);
}
-
+
private:
gzFile _gzf;
static const unsigned int _buffsize = 1024;
diff --git a/phrase-extract/extract-mixed-syntax/pugixml.cpp b/phrase-extract/extract-mixed-syntax/pugixml.cpp
index 4035ab1cf..f047ea136 100644
--- a/phrase-extract/extract-mixed-syntax/pugixml.cpp
+++ b/phrase-extract/extract-mixed-syntax/pugixml.cpp
@@ -78,7 +78,7 @@
#elif defined(__GNUC__)
# define PUGI__NO_INLINE __attribute__((noinline))
#else
-# define PUGI__NO_INLINE
+# define PUGI__NO_INLINE
#endif
// Simple static assertion
@@ -129,5292 +129,4912 @@ typedef size_t uintptr_t;
#define _UINTPTR_T_DEFINED
# endif
PUGI__NS_BEGIN
- typedef unsigned __int8 uint8_t;
- typedef unsigned __int16 uint16_t;
- typedef unsigned __int32 uint32_t;
+typedef unsigned __int8 uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
PUGI__NS_END
#endif
// Memory allocation
PUGI__NS_BEGIN
- PUGI__FN void* default_allocate(size_t size)
- {
- return malloc(size);
- }
-
- PUGI__FN void default_deallocate(void* ptr)
- {
- free(ptr);
- }
-
- template <typename T>
- struct xml_memory_management_function_storage
- {
- static allocation_function allocate;
- static deallocation_function deallocate;
- };
-
- template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
- template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
-
- typedef xml_memory_management_function_storage<int> xml_memory;
+PUGI__FN void* default_allocate(size_t size)
+{
+ return malloc(size);
+}
+
+PUGI__FN void default_deallocate(void* ptr)
+{
+ free(ptr);
+}
+
+template <typename T>
+struct xml_memory_management_function_storage {
+ static allocation_function allocate;
+ static deallocation_function deallocate;
+};
+
+template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
+template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
+
+typedef xml_memory_management_function_storage<int> xml_memory;
PUGI__NS_END
// String utilities
PUGI__NS_BEGIN
- // Get string length
- PUGI__FN size_t strlength(const char_t* s)
- {
- assert(s);
-
- #ifdef PUGIXML_WCHAR_MODE
- return wcslen(s);
- #else
- return strlen(s);
- #endif
- }
-
- // Compare two strings
- PUGI__FN bool strequal(const char_t* src, const char_t* dst)
- {
- assert(src && dst);
-
- #ifdef PUGIXML_WCHAR_MODE
- return wcscmp(src, dst) == 0;
- #else
- return strcmp(src, dst) == 0;
- #endif
- }
-
- // Compare lhs with [rhs_begin, rhs_end)
- PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
- {
- for (size_t i = 0; i < count; ++i)
- if (lhs[i] != rhs[i])
- return false;
-
- return lhs[count] == 0;
- }
-
+// Get string length
+PUGI__FN size_t strlength(const char_t* s)
+{
+ assert(s);
+
#ifdef PUGIXML_WCHAR_MODE
- // Convert string to wide string, assuming all symbols are ASCII
- PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
- {
- for (const char* i = source; *i; ++i) *dest++ = *i;
- *dest = 0;
- }
+ return wcslen(s);
+#else
+ return strlen(s);
+#endif
+}
+
+// Compare two strings
+PUGI__FN bool strequal(const char_t* src, const char_t* dst)
+{
+ assert(src && dst);
+
+#ifdef PUGIXML_WCHAR_MODE
+ return wcscmp(src, dst) == 0;
+#else
+ return strcmp(src, dst) == 0;
+#endif
+}
+
+// Compare lhs with [rhs_begin, rhs_end)
+PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
+{
+ for (size_t i = 0; i < count; ++i)
+ if (lhs[i] != rhs[i])
+ return false;
+
+ return lhs[count] == 0;
+}
+
+#ifdef PUGIXML_WCHAR_MODE
+// Convert string to wide string, assuming all symbols are ASCII
+PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
+{
+ for (const char* i = source; *i; ++i) *dest++ = *i;
+ *dest = 0;
+}
#endif
PUGI__NS_END
#if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
// auto_ptr-like buffer holder for exception recovery
PUGI__NS_BEGIN
- struct buffer_holder
- {
- void* data;
- void (*deleter)(void*);
-
- buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
- {
- }
-
- ~buffer_holder()
- {
- if (data) deleter(data);
- }
-
- void* release()
- {
- void* result = data;
- data = 0;
- return result;
- }
- };
+struct buffer_holder {
+ void* data;
+ void (*deleter)(void*);
+
+ buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_) {
+ }
+
+ ~buffer_holder() {
+ if (data) deleter(data);
+ }
+
+ void* release() {
+ void* result = data;
+ data = 0;
+ return result;
+ }
+};
PUGI__NS_END
#endif
PUGI__NS_BEGIN
- static const size_t xml_memory_page_size =
- #ifdef PUGIXML_MEMORY_PAGE_SIZE
- PUGIXML_MEMORY_PAGE_SIZE
- #else
- 32768
- #endif
- ;
+static const size_t xml_memory_page_size =
+#ifdef PUGIXML_MEMORY_PAGE_SIZE
+ PUGIXML_MEMORY_PAGE_SIZE
+#else
+ 32768
+#endif
+ ;
+
+static const uintptr_t xml_memory_page_alignment = 32;
+static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
+static const uintptr_t xml_memory_page_name_allocated_mask = 16;
+static const uintptr_t xml_memory_page_value_allocated_mask = 8;
+static const uintptr_t xml_memory_page_type_mask = 7;
+
+struct xml_allocator;
+
+struct xml_memory_page {
+ static xml_memory_page* construct(void* memory) {
+ if (!memory) return 0; //$ redundant, left for performance
+
+ xml_memory_page* result = static_cast<xml_memory_page*>(memory);
+
+ result->allocator = 0;
+ result->memory = 0;
+ result->prev = 0;
+ result->next = 0;
+ result->busy_size = 0;
+ result->freed_size = 0;
+
+ return result;
+ }
+
+ xml_allocator* allocator;
- static const uintptr_t xml_memory_page_alignment = 32;
- static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
- static const uintptr_t xml_memory_page_name_allocated_mask = 16;
- static const uintptr_t xml_memory_page_value_allocated_mask = 8;
- static const uintptr_t xml_memory_page_type_mask = 7;
+ void* memory;
- struct xml_allocator;
+ xml_memory_page* prev;
+ xml_memory_page* next;
- struct xml_memory_page
- {
- static xml_memory_page* construct(void* memory)
- {
- if (!memory) return 0; //$ redundant, left for performance
+ size_t busy_size;
+ size_t freed_size;
- xml_memory_page* result = static_cast<xml_memory_page*>(memory);
+ char data[1];
+};
- result->allocator = 0;
- result->memory = 0;
- result->prev = 0;
- result->next = 0;
- result->busy_size = 0;
- result->freed_size = 0;
+struct xml_memory_string_header {
+ uint16_t page_offset; // offset from page->data
+ uint16_t full_size; // 0 if string occupies whole page
+};
- return result;
- }
+struct xml_allocator {
+ xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) {
+ }
- xml_allocator* allocator;
+ xml_memory_page* allocate_page(size_t data_size) {
+ size_t size = offsetof(xml_memory_page, data) + data_size;
- void* memory;
+ // allocate block with some alignment, leaving memory for worst-case padding
+ void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
+ if (!memory) return 0;
- xml_memory_page* prev;
- xml_memory_page* next;
-
- size_t busy_size;
- size_t freed_size;
-
- char data[1];
- };
+ // align upwards to page boundary
+ void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
- struct xml_memory_string_header
- {
- uint16_t page_offset; // offset from page->data
- uint16_t full_size; // 0 if string occupies whole page
- };
+ // prepare page structure
+ xml_memory_page* page = xml_memory_page::construct(page_memory);
- struct xml_allocator
- {
- xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
- {
- }
+ page->memory = memory;
+ page->allocator = _root->allocator;
- xml_memory_page* allocate_page(size_t data_size)
- {
- size_t size = offsetof(xml_memory_page, data) + data_size;
+ return page;
+ }
- // allocate block with some alignment, leaving memory for worst-case padding
- void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
- if (!memory) return 0;
+ static void deallocate_page(xml_memory_page* page) {
+ xml_memory::deallocate(page->memory);
+ }
- // align upwards to page boundary
- void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
+ void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
- // prepare page structure
- xml_memory_page* page = xml_memory_page::construct(page_memory);
+ void* allocate_memory(size_t size, xml_memory_page*& out_page) {
+ if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
- page->memory = memory;
- page->allocator = _root->allocator;
+ void* buf = _root->data + _busy_size;
- return page;
- }
+ _busy_size += size;
- static void deallocate_page(xml_memory_page* page)
- {
- xml_memory::deallocate(page->memory);
- }
+ out_page = _root;
- void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
+ return buf;
+ }
- void* allocate_memory(size_t size, xml_memory_page*& out_page)
- {
- if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
+ void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) {
+ if (page == _root) page->busy_size = _busy_size;
- void* buf = _root->data + _busy_size;
+ assert(ptr >= page->data && ptr < page->data + page->busy_size);
+ (void)!ptr;
- _busy_size += size;
+ page->freed_size += size;
+ assert(page->freed_size <= page->busy_size);
- out_page = _root;
+ if (page->freed_size == page->busy_size) {
+ if (page->next == 0) {
+ assert(_root == page);
- return buf;
- }
+ // top page freed, just reset sizes
+ page->busy_size = page->freed_size = 0;
+ _busy_size = 0;
+ } else {
+ assert(_root != page);
+ assert(page->prev);
- void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
- {
- if (page == _root) page->busy_size = _busy_size;
+ // remove from the list
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
- assert(ptr >= page->data && ptr < page->data + page->busy_size);
- (void)!ptr;
+ // deallocate
+ deallocate_page(page);
+ }
+ }
+ }
- page->freed_size += size;
- assert(page->freed_size <= page->busy_size);
+ char_t* allocate_string(size_t length) {
+ // allocate memory for string and header block
+ size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
- if (page->freed_size == page->busy_size)
- {
- if (page->next == 0)
- {
- assert(_root == page);
+ // round size up to pointer alignment boundary
+ size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
- // top page freed, just reset sizes
- page->busy_size = page->freed_size = 0;
- _busy_size = 0;
- }
- else
- {
- assert(_root != page);
- assert(page->prev);
+ xml_memory_page* page;
+ xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
- // remove from the list
- page->prev->next = page->next;
- page->next->prev = page->prev;
+ if (!header) return 0;
- // deallocate
- deallocate_page(page);
- }
- }
- }
+ // setup header
+ ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
- char_t* allocate_string(size_t length)
- {
- // allocate memory for string and header block
- size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
-
- // round size up to pointer alignment boundary
- size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
+ assert(page_offset >= 0 && page_offset < (1 << 16));
+ header->page_offset = static_cast<uint16_t>(page_offset);
- xml_memory_page* page;
- xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
+ // full_size == 0 for large strings that occupy the whole page
+ assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
+ header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
- if (!header) return 0;
+ // round-trip through void* to avoid 'cast increases required alignment of target type' warning
+ // header is guaranteed a pointer-sized alignment, which should be enough for char_t
+ return static_cast<char_t*>(static_cast<void*>(header + 1));
+ }
- // setup header
- ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
+ void deallocate_string(char_t* string) {
+ // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
+ // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
- assert(page_offset >= 0 && page_offset < (1 << 16));
- header->page_offset = static_cast<uint16_t>(page_offset);
+ // get header
+ xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
- // full_size == 0 for large strings that occupy the whole page
- assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
- header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
+ // deallocate
+ size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
+ xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
- // round-trip through void* to avoid 'cast increases required alignment of target type' warning
- // header is guaranteed a pointer-sized alignment, which should be enough for char_t
- return static_cast<char_t*>(static_cast<void*>(header + 1));
- }
+ // if full_size == 0 then this string occupies the whole page
+ size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
- void deallocate_string(char_t* string)
- {
- // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
- // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
+ deallocate_memory(header, full_size, page);
+ }
- // get header
- xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
+ xml_memory_page* _root;
+ size_t _busy_size;
+};
- // deallocate
- size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
- xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
+PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
+{
+ const size_t large_allocation_threshold = xml_memory_page_size / 4;
+
+ xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
+ out_page = page;
- // if full_size == 0 then this string occupies the whole page
- size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
+ if (!page) return 0;
- deallocate_memory(header, full_size, page);
- }
+ if (size <= large_allocation_threshold) {
+ _root->busy_size = _busy_size;
- xml_memory_page* _root;
- size_t _busy_size;
- };
+ // insert page at the end of linked list
+ page->prev = _root;
+ _root->next = page;
+ _root = page;
- PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
- {
- const size_t large_allocation_threshold = xml_memory_page_size / 4;
+ _busy_size = size;
+ } else {
+ // insert page before the end of linked list, so that it is deleted as soon as possible
+ // the last page is not deleted even if it's empty (see deallocate_memory)
+ assert(_root->prev);
- xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
- out_page = page;
+ page->prev = _root->prev;
+ page->next = _root;
- if (!page) return 0;
+ _root->prev->next = page;
+ _root->prev = page;
+ }
- if (size <= large_allocation_threshold)
- {
- _root->busy_size = _busy_size;
-
- // insert page at the end of linked list
- page->prev = _root;
- _root->next = page;
- _root = page;
-
- _busy_size = size;
- }
- else
- {
- // insert page before the end of linked list, so that it is deleted as soon as possible
- // the last page is not deleted even if it's empty (see deallocate_memory)
- assert(_root->prev);
-
- page->prev = _root->prev;
- page->next = _root;
-
- _root->prev->next = page;
- _root->prev = page;
- }
-
- // allocate inside page
- page->busy_size = size;
+ // allocate inside page
+ page->busy_size = size;
- return page->data;
- }
+ return page->data;
+}
PUGI__NS_END
namespace pugi
{
- /// A 'name=value' XML attribute structure.
- struct xml_attribute_struct
- {
- /// Default ctor
- xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
- {
- }
+/// A 'name=value' XML attribute structure.
+struct xml_attribute_struct {
+ /// Default ctor
+ xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) {
+ }
+
+ uintptr_t header;
+
+ char_t* name; ///< Pointer to attribute name.
+ char_t* value; ///< Pointer to attribute value.
- uintptr_t header;
+ xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
+ xml_attribute_struct* next_attribute; ///< Next attribute
+};
- char_t* name; ///< Pointer to attribute name.
- char_t* value; ///< Pointer to attribute value.
+/// An XML document tree node.
+struct xml_node_struct {
+ /// Default ctor
+ /// \param type - node type
+ xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) {
+ }
- xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
- xml_attribute_struct* next_attribute; ///< Next attribute
- };
+ uintptr_t header;
- /// An XML document tree node.
- struct xml_node_struct
- {
- /// Default ctor
- /// \param type - node type
- xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
- {
- }
+ xml_node_struct* parent; ///< Pointer to parent
- uintptr_t header;
+ char_t* name; ///< Pointer to element name.
+ char_t* value; ///< Pointer to any associated string data.
- xml_node_struct* parent; ///< Pointer to parent
+ xml_node_struct* first_child; ///< First child
- char_t* name; ///< Pointer to element name.
- char_t* value; ///< Pointer to any associated string data.
+ xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
+ xml_node_struct* next_sibling; ///< Right brother
- xml_node_struct* first_child; ///< First child
-
- xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
- xml_node_struct* next_sibling; ///< Right brother
-
- xml_attribute_struct* first_attribute; ///< First attribute
- };
+ xml_attribute_struct* first_attribute; ///< First attribute
+};
}
PUGI__NS_BEGIN
- struct xml_document_struct: public xml_node_struct, public xml_allocator
- {
- xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0)
- {
- }
+struct xml_document_struct: public xml_node_struct, public xml_allocator {
+ xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0) {
+ }
- const char_t* buffer;
- };
+ const char_t* buffer;
+};
- inline xml_allocator& get_allocator(const xml_node_struct* node)
- {
- assert(node);
+inline xml_allocator& get_allocator(const xml_node_struct* node)
+{
+ assert(node);
- return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
- }
+ return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
+}
PUGI__NS_END
// Low-level DOM operations
PUGI__NS_BEGIN
- inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
- {
- xml_memory_page* page;
- void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
-
- return new (memory) xml_attribute_struct(page);
- }
-
- inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
- {
- xml_memory_page* page;
- void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
-
- return new (memory) xml_node_struct(page, type);
- }
-
- inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
- {
- uintptr_t header = a->header;
-
- if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
- if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
-
- alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
- }
-
- inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
- {
- uintptr_t header = n->header;
-
- if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
- if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
-
- for (xml_attribute_struct* attr = n->first_attribute; attr; )
- {
- xml_attribute_struct* next = attr->next_attribute;
-
- destroy_attribute(attr, alloc);
-
- attr = next;
- }
-
- for (xml_node_struct* child = n->first_child; child; )
- {
- xml_node_struct* next = child->next_sibling;
-
- destroy_node(child, alloc);
-
- child = next;
- }
-
- alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
- }
-
- PUGI__FN_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
- {
- xml_node_struct* child = allocate_node(alloc, type);
- if (!child) return 0;
-
- child->parent = node;
-
- xml_node_struct* first_child = node->first_child;
-
- if (first_child)
- {
- xml_node_struct* last_child = first_child->prev_sibling_c;
-
- last_child->next_sibling = child;
- child->prev_sibling_c = last_child;
- first_child->prev_sibling_c = child;
- }
- else
- {
- node->first_child = child;
- child->prev_sibling_c = child;
- }
-
- return child;
- }
-
- PUGI__FN_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
- {
- xml_attribute_struct* a = allocate_attribute(alloc);
- if (!a) return 0;
-
- xml_attribute_struct* first_attribute = node->first_attribute;
-
- if (first_attribute)
- {
- xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
-
- last_attribute->next_attribute = a;
- a->prev_attribute_c = last_attribute;
- first_attribute->prev_attribute_c = a;
- }
- else
- {
- node->first_attribute = a;
- a->prev_attribute_c = a;
- }
-
- return a;
- }
+inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
+{
+ xml_memory_page* page;
+ void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
+
+ return new (memory) xml_attribute_struct(page);
+}
+
+inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
+{
+ xml_memory_page* page;
+ void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
+
+ return new (memory) xml_node_struct(page, type);
+}
+
+inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
+{
+ uintptr_t header = a->header;
+
+ if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
+ if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
+
+ alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
+}
+
+inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
+{
+ uintptr_t header = n->header;
+
+ if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
+ if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
+
+ for (xml_attribute_struct* attr = n->first_attribute; attr; ) {
+ xml_attribute_struct* next = attr->next_attribute;
+
+ destroy_attribute(attr, alloc);
+
+ attr = next;
+ }
+
+ for (xml_node_struct* child = n->first_child; child; ) {
+ xml_node_struct* next = child->next_sibling;
+
+ destroy_node(child, alloc);
+
+ child = next;
+ }
+
+ alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
+}
+
+PUGI__FN_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
+{
+ xml_node_struct* child = allocate_node(alloc, type);
+ if (!child) return 0;
+
+ child->parent = node;
+
+ xml_node_struct* first_child = node->first_child;
+
+ if (first_child) {
+ xml_node_struct* last_child = first_child->prev_sibling_c;
+
+ last_child->next_sibling = child;
+ child->prev_sibling_c = last_child;
+ first_child->prev_sibling_c = child;
+ } else {
+ node->first_child = child;
+ child->prev_sibling_c = child;
+ }
+
+ return child;
+}
+
+PUGI__FN_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
+{
+ xml_attribute_struct* a = allocate_attribute(alloc);
+ if (!a) return 0;
+
+ xml_attribute_struct* first_attribute = node->first_attribute;
+
+ if (first_attribute) {
+ xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
+
+ last_attribute->next_attribute = a;
+ a->prev_attribute_c = last_attribute;
+ first_attribute->prev_attribute_c = a;
+ } else {
+ node->first_attribute = a;
+ a->prev_attribute_c = a;
+ }
+
+ return a;
+}
PUGI__NS_END
// Helper classes for code generation
PUGI__NS_BEGIN
- struct opt_false
- {
- enum { value = 0 };
- };
-
- struct opt_true
- {
- enum { value = 1 };
- };
+struct opt_false {
+ enum { value = 0 };
+};
+
+struct opt_true {
+ enum { value = 1 };
+};
PUGI__NS_END
// Unicode utilities
PUGI__NS_BEGIN
- inline uint16_t endian_swap(uint16_t value)
- {
- return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
- }
-
- inline uint32_t endian_swap(uint32_t value)
- {
- return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
- }
-
- struct utf8_counter
- {
- typedef size_t value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- // U+0000..U+007F
- if (ch < 0x80) return result + 1;
- // U+0080..U+07FF
- else if (ch < 0x800) return result + 2;
- // U+0800..U+FFFF
- else return result + 3;
- }
-
- static value_type high(value_type result, uint32_t)
- {
- // U+10000..U+10FFFF
- return result + 4;
- }
- };
-
- struct utf8_writer
- {
- typedef uint8_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- // U+0000..U+007F
- if (ch < 0x80)
- {
- *result = static_cast<uint8_t>(ch);
- return result + 1;
- }
- // U+0080..U+07FF
- else if (ch < 0x800)
- {
- result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
- result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
- return result + 2;
- }
- // U+0800..U+FFFF
- else
- {
- result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
- result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
- result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
- return result + 3;
- }
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- // U+10000..U+10FFFF
- result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
- result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
- result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
- result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
- return result + 4;
- }
-
- static value_type any(value_type result, uint32_t ch)
- {
- return (ch < 0x10000) ? low(result, ch) : high(result, ch);
- }
- };
-
- struct utf16_counter
- {
- typedef size_t value_type;
-
- static value_type low(value_type result, uint32_t)
- {
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t)
- {
- return result + 2;
- }
- };
-
- struct utf16_writer
- {
- typedef uint16_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- *result = static_cast<uint16_t>(ch);
-
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
- uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
-
- result[0] = static_cast<uint16_t>(0xD800 + msh);
- result[1] = static_cast<uint16_t>(0xDC00 + lsh);
-
- return result + 2;
- }
-
- static value_type any(value_type result, uint32_t ch)
- {
- return (ch < 0x10000) ? low(result, ch) : high(result, ch);
- }
- };
-
- struct utf32_counter
- {
- typedef size_t value_type;
-
- static value_type low(value_type result, uint32_t)
- {
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t)
- {
- return result + 1;
- }
- };
-
- struct utf32_writer
- {
- typedef uint32_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- *result = ch;
-
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- *result = ch;
-
- return result + 1;
- }
-
- static value_type any(value_type result, uint32_t ch)
- {
- *result = ch;
-
- return result + 1;
- }
- };
-
- struct latin1_writer
- {
- typedef uint8_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
-
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- (void)ch;
-
- *result = '?';
-
- return result + 1;
- }
- };
-
- template <size_t size> struct wchar_selector;
-
- template <> struct wchar_selector<2>
- {
- typedef uint16_t type;
- typedef utf16_counter counter;
- typedef utf16_writer writer;
- };
-
- template <> struct wchar_selector<4>
- {
- typedef uint32_t type;
- typedef utf32_counter counter;
- typedef utf32_writer writer;
- };
-
- typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
- typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
-
- template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
- {
- static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
- {
- const uint8_t utf8_byte_mask = 0x3f;
-
- while (size)
- {
- uint8_t lead = *data;
-
- // 0xxxxxxx -> U+0000..U+007F
- if (lead < 0x80)
- {
- result = Traits::low(result, lead);
- data += 1;
- size -= 1;
-
- // process aligned single-byte (ascii) blocks
- if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
- {
- // round-trip through void* to silence 'cast increases required alignment of target type' warnings
- while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
- {
- result = Traits::low(result, data[0]);
- result = Traits::low(result, data[1]);
- result = Traits::low(result, data[2]);
- result = Traits::low(result, data[3]);
- data += 4;
- size -= 4;
- }
- }
- }
- // 110xxxxx -> U+0080..U+07FF
- else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
- {
- result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
- data += 2;
- size -= 2;
- }
- // 1110xxxx -> U+0800-U+FFFF
- else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
- {
- result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
- data += 3;
- size -= 3;
- }
- // 11110xxx -> U+10000..U+10FFFF
- else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
- {
- result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
- data += 4;
- size -= 4;
- }
- // 10xxxxxx or 11111xxx -> invalid
- else
- {
- data += 1;
- size -= 1;
- }
- }
-
- return result;
- }
-
- static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
- {
- const uint16_t* end = data + size;
-
- while (data < end)
- {
- uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
-
- // U+0000..U+D7FF
- if (lead < 0xD800)
- {
- result = Traits::low(result, lead);
- data += 1;
- }
- // U+E000..U+FFFF
- else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
- {
- result = Traits::low(result, lead);
- data += 1;
- }
- // surrogate pair lead
- else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
- {
- uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
-
- if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
- {
- result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
- data += 2;
- }
- else
- {
- data += 1;
- }
- }
- else
- {
- data += 1;
- }
- }
-
- return result;
- }
-
- static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
- {
- const uint32_t* end = data + size;
-
- while (data < end)
- {
- uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
-
- // U+0000..U+FFFF
- if (lead < 0x10000)
- {
- result = Traits::low(result, lead);
- data += 1;
- }
- // U+10000..U+10FFFF
- else
- {
- result = Traits::high(result, lead);
- data += 1;
- }
- }
-
- return result;
- }
-
- static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
- {
- for (size_t i = 0; i < size; ++i)
- {
- result = Traits::low(result, data[i]);
- }
-
- return result;
- }
-
- static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
- {
- return decode_utf16_block(data, size, result);
- }
-
- static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
- {
- return decode_utf32_block(data, size, result);
- }
-
- static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
- {
- return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
- }
- };
-
- template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
- {
- for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
- }
+inline uint16_t endian_swap(uint16_t value)
+{
+ return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
+}
+
+inline uint32_t endian_swap(uint32_t value)
+{
+ return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
+}
+
+struct utf8_counter {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t ch) {
+ // U+0000..U+007F
+ if (ch < 0x80) return result + 1;
+ // U+0080..U+07FF
+ else if (ch < 0x800) return result + 2;
+ // U+0800..U+FFFF
+ else return result + 3;
+ }
+
+ static value_type high(value_type result, uint32_t) {
+ // U+10000..U+10FFFF
+ return result + 4;
+ }
+};
+
+struct utf8_writer {
+ typedef uint8_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch) {
+ // U+0000..U+007F
+ if (ch < 0x80) {
+ *result = static_cast<uint8_t>(ch);
+ return result + 1;
+ }
+ // U+0080..U+07FF
+ else if (ch < 0x800) {
+ result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
+ result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 2;
+ }
+ // U+0800..U+FFFF
+ else {
+ result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
+ result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+ result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 3;
+ }
+ }
+
+ static value_type high(value_type result, uint32_t ch) {
+ // U+10000..U+10FFFF
+ result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
+ result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
+ result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+ result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 4;
+ }
+
+ static value_type any(value_type result, uint32_t ch) {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+};
+
+struct utf16_counter {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t) {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t) {
+ return result + 2;
+ }
+};
+
+struct utf16_writer {
+ typedef uint16_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch) {
+ *result = static_cast<uint16_t>(ch);
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch) {
+ uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
+ uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
+
+ result[0] = static_cast<uint16_t>(0xD800 + msh);
+ result[1] = static_cast<uint16_t>(0xDC00 + lsh);
+
+ return result + 2;
+ }
+
+ static value_type any(value_type result, uint32_t ch) {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+};
+
+struct utf32_counter {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t) {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t) {
+ return result + 1;
+ }
+};
+
+struct utf32_writer {
+ typedef uint32_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch) {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch) {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type any(value_type result, uint32_t ch) {
+ *result = ch;
+
+ return result + 1;
+ }
+};
+
+struct latin1_writer {
+ typedef uint8_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch) {
+ *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch) {
+ (void)ch;
+
+ *result = '?';
+
+ return result + 1;
+ }
+};
+
+template <size_t size> struct wchar_selector;
+
+template <> struct wchar_selector<2> {
+ typedef uint16_t type;
+ typedef utf16_counter counter;
+ typedef utf16_writer writer;
+};
+
+template <> struct wchar_selector<4> {
+ typedef uint32_t type;
+ typedef utf32_counter counter;
+ typedef utf32_writer writer;
+};
+
+typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
+typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
+
+template <typename Traits, typename opt_swap = opt_false> struct utf_decoder {
+ static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result) {
+ const uint8_t utf8_byte_mask = 0x3f;
+
+ while (size) {
+ uint8_t lead = *data;
+
+ // 0xxxxxxx -> U+0000..U+007F
+ if (lead < 0x80) {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+
+ // process aligned single-byte (ascii) blocks
+ if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) {
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) {
+ result = Traits::low(result, data[0]);
+ result = Traits::low(result, data[1]);
+ result = Traits::low(result, data[2]);
+ result = Traits::low(result, data[3]);
+ data += 4;
+ size -= 4;
+ }
+ }
+ }
+ // 110xxxxx -> U+0080..U+07FF
+ else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) {
+ result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
+ data += 2;
+ size -= 2;
+ }
+ // 1110xxxx -> U+0800-U+FFFF
+ else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) {
+ result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
+ data += 3;
+ size -= 3;
+ }
+ // 11110xxx -> U+10000..U+10FFFF
+ else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) {
+ result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
+ data += 4;
+ size -= 4;
+ }
+ // 10xxxxxx or 11111xxx -> invalid
+ else {
+ data += 1;
+ size -= 1;
+ }
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result) {
+ const uint16_t* end = data + size;
+
+ while (data < end) {
+ uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+D7FF
+ if (lead < 0xD800) {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // U+E000..U+FFFF
+ else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // surrogate pair lead
+ else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end) {
+ uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
+
+ if (static_cast<unsigned int>(next - 0xDC00) < 0x400) {
+ result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
+ data += 2;
+ } else {
+ data += 1;
+ }
+ } else {
+ data += 1;
+ }
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result) {
+ const uint32_t* end = data + size;
+
+ while (data < end) {
+ uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+FFFF
+ if (lead < 0x10000) {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // U+10000..U+10FFFF
+ else {
+ result = Traits::high(result, lead);
+ data += 1;
+ }
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result) {
+ for (size_t i = 0; i < size; ++i) {
+ result = Traits::low(result, data[i]);
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result) {
+ return decode_utf16_block(data, size, result);
+ }
+
+ static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result) {
+ return decode_utf32_block(data, size, result);
+ }
+
+ static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result) {
+ return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
+ }
+};
+
+template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
+{
+ for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
+}
#ifdef PUGIXML_WCHAR_MODE
- PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
- {
- for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
- }
+PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
+{
+ for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
+}
#endif
PUGI__NS_END
PUGI__NS_BEGIN
- enum chartype_t
- {
- ct_parse_pcdata = 1, // \0, &, \r, <
- ct_parse_attr = 2, // \0, &, \r, ', "
- ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
- ct_space = 8, // \r, \n, space, tab
- ct_parse_cdata = 16, // \0, ], >, \r
- ct_parse_comment = 32, // \0, -, >, \r
- ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
- ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
- };
-
- static const unsigned char chartype_table[256] =
- {
- 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
- 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
- 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
- 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
-
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
- };
-
- enum chartypex_t
- {
- ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
- ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
- ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
- ctx_digit = 8, // 0-9
- ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
- };
-
- static const unsigned char chartypex_table[256] =
- {
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
- 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
-
- 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
- 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
-
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
- };
-
+enum chartype_t {
+ ct_parse_pcdata = 1, // \0, &, \r, <
+ ct_parse_attr = 2, // \0, &, \r, ', "
+ ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
+ ct_space = 8, // \r, \n, space, tab
+ ct_parse_cdata = 16, // \0, ], >, \r
+ ct_parse_comment = 32, // \0, -, >, \r
+ ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
+ ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
+};
+
+static const unsigned char chartype_table[256] = {
+ 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
+ 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
+};
+
+enum chartypex_t {
+ ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
+ ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
+ ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
+ ctx_digit = 8, // 0-9
+ ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
+};
+
+static const unsigned char chartypex_table[256] = {
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
+ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
+
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+
#ifdef PUGIXML_WCHAR_MODE
- #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
+#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
#else
- #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
+#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
#endif
- #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
- #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
-
- PUGI__FN bool is_little_endian()
- {
- unsigned int ui = 1;
-
- return *reinterpret_cast<unsigned char*>(&ui) == 1;
- }
+#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
+#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
- PUGI__FN xml_encoding get_wchar_encoding()
- {
- PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
-
- if (sizeof(wchar_t) == 2)
- return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
- else
- return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
- }
+PUGI__FN bool is_little_endian()
+{
+ unsigned int ui = 1;
- PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
- {
- // look for BOM in first few bytes
- if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
- if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
- if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
- if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
- if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
+ return *reinterpret_cast<unsigned char*>(&ui) == 1;
+}
- // look for <, <? or <?xm in various encodings
- if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
- if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
- if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
- if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
- if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
+PUGI__FN xml_encoding get_wchar_encoding()
+{
+ PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
- // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
- if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
- if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
+ if (sizeof(wchar_t) == 2)
+ return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+ else
+ return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+}
- // no known BOM detected, assume utf8
- return encoding_utf8;
- }
+PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
+{
+ // look for BOM in first few bytes
+ if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
+ if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
+ if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
+ if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
+ if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
+
+ // look for <, <? or <?xm in various encodings
+ if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
+ if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
+ if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
+ if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
+ if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
+
+ // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
+ if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
+ if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
+
+ // no known BOM detected, assume utf8
+ return encoding_utf8;
+}
- PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
- {
- // replace wchar encoding with utf implementation
- if (encoding == encoding_wchar) return get_wchar_encoding();
+PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
+{
+ // replace wchar encoding with utf implementation
+ if (encoding == encoding_wchar) return get_wchar_encoding();
- // replace utf16 encoding with utf16 with specific endianness
- if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+ // replace utf16 encoding with utf16 with specific endianness
+ if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
- // replace utf32 encoding with utf32 with specific endianness
- if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+ // replace utf32 encoding with utf32 with specific endianness
+ if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
- // only do autodetection if no explicit encoding is requested
- if (encoding != encoding_auto) return encoding;
+ // only do autodetection if no explicit encoding is requested
+ if (encoding != encoding_auto) return encoding;
- // skip encoding autodetection if input buffer is too small
- if (size < 4) return encoding_utf8;
+ // skip encoding autodetection if input buffer is too small
+ if (size < 4) return encoding_utf8;
- // try to guess encoding (based on XML specification, Appendix F.1)
- const uint8_t* data = static_cast<const uint8_t*>(contents);
+ // try to guess encoding (based on XML specification, Appendix F.1)
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
- PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
+ PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
- return guess_buffer_encoding(d0, d1, d2, d3);
- }
+ return guess_buffer_encoding(d0, d1, d2, d3);
+}
- PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
- {
- if (is_mutable)
- {
- out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
- }
- else
- {
- void* buffer = xml_memory::allocate(size > 0 ? size : 1);
- if (!buffer) return false;
+PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+{
+ if (is_mutable) {
+ out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
+ } else {
+ void* buffer = xml_memory::allocate(size > 0 ? size : 1);
+ if (!buffer) return false;
- memcpy(buffer, contents, size);
+ memcpy(buffer, contents, size);
- out_buffer = static_cast<char_t*>(buffer);
- }
+ out_buffer = static_cast<char_t*>(buffer);
+ }
- out_length = size / sizeof(char_t);
+ out_length = size / sizeof(char_t);
- return true;
- }
+ return true;
+}
#ifdef PUGIXML_WCHAR_MODE
- PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
- {
- return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
- (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
- }
+PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
+{
+ return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
+ (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
+}
+
+PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+{
+ const char_t* data = static_cast<const char_t*>(contents);
- PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
- {
- const char_t* data = static_cast<const char_t*>(contents);
-
- if (is_mutable)
- {
- out_buffer = const_cast<char_t*>(data);
- }
- else
- {
- out_buffer = static_cast<char_t*>(xml_memory::allocate(size > 0 ? size : 1));
- if (!out_buffer) return false;
- }
+ if (is_mutable) {
+ out_buffer = const_cast<char_t*>(data);
+ } else {
+ out_buffer = static_cast<char_t*>(xml_memory::allocate(size > 0 ? size : 1));
+ if (!out_buffer) return false;
+ }
- out_length = size / sizeof(char_t);
+ out_length = size / sizeof(char_t);
- convert_wchar_endian_swap(out_buffer, data, out_length);
+ convert_wchar_endian_swap(out_buffer, data, out_length);
- return true;
- }
+ return true;
+}
- PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
- {
- const uint8_t* data = static_cast<const uint8_t*>(contents);
+PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
+{
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
- // first pass: get length in wchar_t units
- out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
+ // first pass: get length in wchar_t units
+ out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
- // allocate buffer of suitable length
- out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
- if (!out_buffer) return false;
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
- // second pass: convert utf8 input to wchar_t
- wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
- wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin);
+ // second pass: convert utf8 input to wchar_t
+ wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
+ wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin);
- assert(out_end == out_begin + out_length);
- (void)!out_end;
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
- return true;
- }
+ return true;
+}
- template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
- {
- const uint16_t* data = static_cast<const uint16_t*>(contents);
- size_t length = size / sizeof(uint16_t);
+template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+{
+ const uint16_t* data = static_cast<const uint16_t*>(contents);
+ size_t length = size / sizeof(uint16_t);
- // first pass: get length in wchar_t units
- out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0);
+ // first pass: get length in wchar_t units
+ out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0);
- // allocate buffer of suitable length
- out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
- if (!out_buffer) return false;
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
- // second pass: convert utf16 input to wchar_t
- wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
- wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
+ // second pass: convert utf16 input to wchar_t
+ wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
+ wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
- assert(out_end == out_begin + out_length);
- (void)!out_end;
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
- return true;
- }
+ return true;
+}
- template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
- {
- const uint32_t* data = static_cast<const uint32_t*>(contents);
- size_t length = size / sizeof(uint32_t);
+template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+{
+ const uint32_t* data = static_cast<const uint32_t*>(contents);
+ size_t length = size / sizeof(uint32_t);
- // first pass: get length in wchar_t units
- out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0);
+ // first pass: get length in wchar_t units
+ out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0);
- // allocate buffer of suitable length
- out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
- if (!out_buffer) return false;
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
- // second pass: convert utf32 input to wchar_t
- wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
- wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
+ // second pass: convert utf32 input to wchar_t
+ wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
+ wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
- assert(out_end == out_begin + out_length);
- (void)!out_end;
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
- return true;
- }
+ return true;
+}
- PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
- {
- const uint8_t* data = static_cast<const uint8_t*>(contents);
+PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
+{
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
- // get length in wchar_t units
- out_length = size;
+ // get length in wchar_t units
+ out_length = size;
- // allocate buffer of suitable length
- out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
- if (!out_buffer) return false;
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
- // convert latin1 input to wchar_t
- wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
- wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_latin1_block(data, size, out_begin);
+ // convert latin1 input to wchar_t
+ wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
+ wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_latin1_block(data, size, out_begin);
- assert(out_end == out_begin + out_length);
- (void)!out_end;
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
- return true;
- }
+ return true;
+}
- PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
- {
- // get native encoding
- xml_encoding wchar_encoding = get_wchar_encoding();
+PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+{
+ // get native encoding
+ xml_encoding wchar_encoding = get_wchar_encoding();
- // fast path: no conversion required
- if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+ // fast path: no conversion required
+ if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
- // only endian-swapping is required
- if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
+ // only endian-swapping is required
+ if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
- // source encoding is utf8
- if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
+ // source encoding is utf8
+ if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
- // source encoding is utf16
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
- return (native_encoding == encoding) ?
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
- }
+ return (native_encoding == encoding) ?
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ }
- // source encoding is utf32
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+ // source encoding is utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
- return (native_encoding == encoding) ?
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
- }
+ return (native_encoding == encoding) ?
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+ }
- // source encoding is latin1
- if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
+ // source encoding is latin1
+ if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
- assert(!"Invalid encoding");
- return false;
- }
+ assert(!"Invalid encoding");
+ return false;
+}
#else
- template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
- {
- const uint16_t* data = static_cast<const uint16_t*>(contents);
- size_t length = size / sizeof(uint16_t);
+template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+{
+ const uint16_t* data = static_cast<const uint16_t*>(contents);
+ size_t length = size / sizeof(uint16_t);
- // first pass: get length in utf8 units
- out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
+ // first pass: get length in utf8 units
+ out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
- // allocate buffer of suitable length
- out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
- if (!out_buffer) return false;
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
- // second pass: convert utf16 input to utf8
- uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
- uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
+ // second pass: convert utf16 input to utf8
+ uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
+ uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
- assert(out_end == out_begin + out_length);
- (void)!out_end;
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
- return true;
- }
+ return true;
+}
- template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
- {
- const uint32_t* data = static_cast<const uint32_t*>(contents);
- size_t length = size / sizeof(uint32_t);
+template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+{
+ const uint32_t* data = static_cast<const uint32_t*>(contents);
+ size_t length = size / sizeof(uint32_t);
- // first pass: get length in utf8 units
- out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
+ // first pass: get length in utf8 units
+ out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
- // allocate buffer of suitable length
- out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
- if (!out_buffer) return false;
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
- // second pass: convert utf32 input to utf8
- uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
- uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
+ // second pass: convert utf32 input to utf8
+ uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
+ uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
- assert(out_end == out_begin + out_length);
- (void)!out_end;
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
- return true;
- }
+ return true;
+}
- PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
- {
- for (size_t i = 0; i < size; ++i)
- if (data[i] > 127)
- return i;
+PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
+{
+ for (size_t i = 0; i < size; ++i)
+ if (data[i] > 127)
+ return i;
- return size;
- }
+ return size;
+}
- PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
- {
- const uint8_t* data = static_cast<const uint8_t*>(contents);
+PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+{
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
- // get size of prefix that does not need utf8 conversion
- size_t prefix_length = get_latin1_7bit_prefix_length(data, size);
- assert(prefix_length <= size);
+ // get size of prefix that does not need utf8 conversion
+ size_t prefix_length = get_latin1_7bit_prefix_length(data, size);
+ assert(prefix_length <= size);
- const uint8_t* postfix = data + prefix_length;
- size_t postfix_length = size - prefix_length;
+ const uint8_t* postfix = data + prefix_length;
+ size_t postfix_length = size - prefix_length;
- // if no conversion is needed, just return the original buffer
- if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+ // if no conversion is needed, just return the original buffer
+ if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
- // first pass: get length in utf8 units
- out_length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
+ // first pass: get length in utf8 units
+ out_length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
- // allocate buffer of suitable length
- out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
- if (!out_buffer) return false;
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
- // second pass: convert latin1 input to utf8
- memcpy(out_buffer, data, prefix_length);
+ // second pass: convert latin1 input to utf8
+ memcpy(out_buffer, data, prefix_length);
- uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
- uint8_t* out_end = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, out_begin + prefix_length);
+ uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
+ uint8_t* out_end = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, out_begin + prefix_length);
- assert(out_end == out_begin + out_length);
- (void)!out_end;
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
- return true;
- }
+ return true;
+}
- PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
- {
- // fast path: no conversion required
- if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+{
+ // fast path: no conversion required
+ if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
- // source encoding is utf16
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
- return (native_encoding == encoding) ?
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
- }
+ return (native_encoding == encoding) ?
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ }
- // source encoding is utf32
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+ // source encoding is utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
- return (native_encoding == encoding) ?
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
- }
+ return (native_encoding == encoding) ?
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+ }
- // source encoding is latin1
- if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
+ // source encoding is latin1
+ if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
- assert(!"Invalid encoding");
- return false;
- }
+ assert(!"Invalid encoding");
+ return false;
+}
#endif
- PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
- {
- // get length in utf8 characters
- return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
- }
-
- PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
- {
- // convert to utf8
- uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
- uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);
-
- assert(begin + size == end);
- (void)!end;
-
- // zero-terminate
- buffer[size] = 0;
- }
-
+PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
+{
+ // get length in utf8 characters
+ return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
+}
+
+PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
+{
+ // convert to utf8
+ uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
+ uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);
+
+ assert(begin + size == end);
+ (void)!end;
+
+ // zero-terminate
+ buffer[size] = 0;
+}
+
#ifndef PUGIXML_NO_STL
- PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
- {
- // first pass: get length in utf8 characters
- size_t size = as_utf8_begin(str, length);
+PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
+{
+ // first pass: get length in utf8 characters
+ size_t size = as_utf8_begin(str, length);
+
+ // allocate resulting string
+ std::string result;
+ result.resize(size);
+
+ // second pass: convert to utf8
+ if (size > 0) as_utf8_end(&result[0], size, str, length);
+
+ return result;
+}
+
+PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
+{
+ const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
+
+ // first pass: get length in wchar_t units
+ size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
- // allocate resulting string
- std::string result;
- result.resize(size);
+ // allocate resulting string
+ std::basic_string<wchar_t> result;
+ result.resize(length);
- // second pass: convert to utf8
- if (size > 0) as_utf8_end(&result[0], size, str, length);
+ // second pass: convert to wchar_t
+ if (length > 0) {
+ wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
+ wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
- return result;
- }
+ assert(begin + length == end);
+ (void)!end;
+ }
- PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
- {
- const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
+ return result;
+}
+#endif
+
+inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
+{
+ assert(target);
+ size_t target_length = strlength(target);
+
+ // always reuse document buffer memory if possible
+ if (!allocated) return target_length >= length;
+
+ // reuse heap memory if waste is not too great
+ const size_t reuse_threshold = 32;
+
+ return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
+}
+
+PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
+{
+ size_t source_length = strlength(source);
+
+ if (source_length == 0) {
+ // empty string and null pointer are equivalent, so just deallocate old memory
+ xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
+
+ if (header & header_mask) alloc->deallocate_string(dest);
+
+ // mark the string as not allocated
+ dest = 0;
+ header &= ~header_mask;
+
+ return true;
+ } else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest)) {
+ // we can reuse old buffer, so just copy the new data (including zero terminator)
+ memcpy(dest, source, (source_length + 1) * sizeof(char_t));
+
+ return true;
+ } else {
+ xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
+
+ // allocate new buffer
+ char_t* buf = alloc->allocate_string(source_length + 1);
+ if (!buf) return false;
+
+ // copy the string (including zero terminator)
+ memcpy(buf, source, (source_length + 1) * sizeof(char_t));
+
+ // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
+ if (header & header_mask) alloc->deallocate_string(dest);
+
+ // the string is now allocated, so set the flag
+ dest = buf;
+ header |= header_mask;
+
+ return true;
+ }
+}
+
+struct gap {
+ char_t* end;
+ size_t size;
+
+ gap(): end(0), size(0) {
+ }
+
+ // Push new gap, move s count bytes further (skipping the gap).
+ // Collapse previous gap.
+ void push(char_t*& s, size_t count) {
+ if (end) { // there was a gap already; collapse it
+ // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
+ assert(s >= end);
+ memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
+ }
+
+ s += count; // end of current gap
+
+ // "merge" two gaps
+ end = s;
+ size += count;
+ }
+
+ // Collapse all gaps, return past-the-end pointer
+ char_t* flush(char_t* s) {
+ if (end) {
+ // Move [old_gap_end, current_pos) to [old_gap_start, ...)
+ assert(s >= end);
+ memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
+
+ return s - size;
+ } else return s;
+ }
+};
+
+PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
+{
+ char_t* stre = s + 1;
+
+ switch (*stre) {
+ case '#': { // &#...
+ unsigned int ucsc = 0;
+
+ if (stre[1] == 'x') { // &#x... (hex code)
+ stre += 2;
+
+ char_t ch = *stre;
+
+ if (ch == ';') return stre;
+
+ for (;;) {
+ if (static_cast<unsigned int>(ch - '0') <= 9)
+ ucsc = 16 * ucsc + (ch - '0');
+ else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
+ ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
+ else if (ch == ';')
+ break;
+ else // cancel
+ return stre;
+
+ ch = *++stre;
+ }
+
+ ++stre;
+ } else { // &#... (dec code)
+ char_t ch = *++stre;
+
+ if (ch == ';') return stre;
+
+ for (;;) {
+ if (static_cast<unsigned int>(ch - '0') <= 9)
+ ucsc = 10 * ucsc + (ch - '0');
+ else if (ch == ';')
+ break;
+ else // cancel
+ return stre;
+
+ ch = *++stre;
+ }
+
+ ++stre;
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
+#else
+ s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
+#endif
+
+ g.push(s, stre - s);
+ return stre;
+ }
+
+ case 'a': { // &a
+ ++stre;
+
+ if (*stre == 'm') { // &am
+ if (*++stre == 'p' && *++stre == ';') { // &amp;
+ *s++ = '&';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ } else if (*stre == 'p') { // &ap
+ if (*++stre == 'o' && *++stre == 's' && *++stre == ';') { // &apos;
+ *s++ = '\'';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ }
+ break;
+ }
+
+ case 'g': { // &g
+ if (*++stre == 't' && *++stre == ';') { // &gt;
+ *s++ = '>';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ case 'l': { // &l
+ if (*++stre == 't' && *++stre == ';') { // &lt;
+ *s++ = '<';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ case 'q': { // &q
+ if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') { // &quot;
+ *s++ = '"';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return stre;
+}
+
+// Utility macro for last character handling
+#define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
+
+PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
+{
+ gap g;
+
+ while (true) {
+ while (!PUGI__IS_CHARTYPE(*s, ct_parse_comment)) ++s;
+
+ if (*s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ } else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) { // comment ends here
+ *g.flush(s) = 0;
+
+ return s + (s[2] == '>' ? 3 : 2);
+ } else if (*s == 0) {
+ return 0;
+ } else ++s;
+ }
+}
+
+PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
+{
+ gap g;
+
+ while (true) {
+ while (!PUGI__IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
+
+ if (*s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ } else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) { // CDATA ends here
+ *g.flush(s) = 0;
+
+ return s + 1;
+ } else if (*s == 0) {
+ return 0;
+ } else ++s;
+ }
+}
+
+typedef char_t* (*strconv_pcdata_t)(char_t*);
+
+template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl {
+ static char_t* parse(char_t* s) {
+ gap g;
+
+ while (true) {
+ while (!PUGI__IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
+
+ if (*s == '<') { // PCDATA ends here
+ *g.flush(s) = 0;
+
+ return s + 1;
+ } else if (opt_eol::value && *s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ } else if (opt_escape::value && *s == '&') {
+ s = strconv_escape(s, g);
+ } else if (*s == 0) {
+ return s;
+ } else ++s;
+ }
+ }
+};
+
+PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
+{
+ PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20);
+
+ switch ((optmask >> 4) & 3) { // get bitmask for flags (eol escapes)
+ case 0:
+ return strconv_pcdata_impl<opt_false, opt_false>::parse;
+ case 1:
+ return strconv_pcdata_impl<opt_false, opt_true>::parse;
+ case 2:
+ return strconv_pcdata_impl<opt_true, opt_false>::parse;
+ case 3:
+ return strconv_pcdata_impl<opt_true, opt_true>::parse;
+ default:
+ return 0; // should not get here
+ }
+}
+
+typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
+
+template <typename opt_escape> struct strconv_attribute_impl {
+ static char_t* parse_wnorm(char_t* s, char_t end_quote) {
+ gap g;
+
+ // trim leading whitespaces
+ if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+ char_t* str = s;
+
+ do ++str;
+ while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+ g.push(s, str - s);
+ }
+
+ while (true) {
+ while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;
+
+ if (*s == end_quote) {
+ char_t* str = g.flush(s);
+
+ do *str-- = 0;
+ while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+ return s + 1;
+ } else if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+ *s++ = ' ';
+
+ if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+ char_t* str = s + 1;
+ while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
+
+ g.push(s, str - s);
+ }
+ } else if (opt_escape::value && *s == '&') {
+ s = strconv_escape(s, g);
+ } else if (!*s) {
+ return 0;
+ } else ++s;
+ }
+ }
+
+ static char_t* parse_wconv(char_t* s, char_t end_quote) {
+ gap g;
+
+ while (true) {
+ while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;
+
+ if (*s == end_quote) {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ } else if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+ if (*s == '\r') {
+ *s++ = ' ';
+
+ if (*s == '\n') g.push(s, 1);
+ } else *s++ = ' ';
+ } else if (opt_escape::value && *s == '&') {
+ s = strconv_escape(s, g);
+ } else if (!*s) {
+ return 0;
+ } else ++s;
+ }
+ }
+
+ static char_t* parse_eol(char_t* s, char_t end_quote) {
+ gap g;
+
+ while (true) {
+ while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
+
+ if (*s == end_quote) {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ } else if (*s == '\r') {
+ *s++ = '\n';
+
+ if (*s == '\n') g.push(s, 1);
+ } else if (opt_escape::value && *s == '&') {
+ s = strconv_escape(s, g);
+ } else if (!*s) {
+ return 0;
+ } else ++s;
+ }
+ }
+
+ static char_t* parse_simple(char_t* s, char_t end_quote) {
+ gap g;
+
+ while (true) {
+ while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
+
+ if (*s == end_quote) {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ } else if (opt_escape::value && *s == '&') {
+ s = strconv_escape(s, g);
+ } else if (!*s) {
+ return 0;
+ } else ++s;
+ }
+ }
+};
+
+PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
+{
+ PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
+
+ switch ((optmask >> 4) & 15) { // get bitmask for flags (wconv wnorm eol escapes)
+ case 0:
+ return strconv_attribute_impl<opt_false>::parse_simple;
+ case 1:
+ return strconv_attribute_impl<opt_true>::parse_simple;
+ case 2:
+ return strconv_attribute_impl<opt_false>::parse_eol;
+ case 3:
+ return strconv_attribute_impl<opt_true>::parse_eol;
+ case 4:
+ return strconv_attribute_impl<opt_false>::parse_wconv;
+ case 5:
+ return strconv_attribute_impl<opt_true>::parse_wconv;
+ case 6:
+ return strconv_attribute_impl<opt_false>::parse_wconv;
+ case 7:
+ return strconv_attribute_impl<opt_true>::parse_wconv;
+ case 8:
+ return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 9:
+ return strconv_attribute_impl<opt_true>::parse_wnorm;
+ case 10:
+ return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 11:
+ return strconv_attribute_impl<opt_true>::parse_wnorm;
+ case 12:
+ return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 13:
+ return strconv_attribute_impl<opt_true>::parse_wnorm;
+ case 14:
+ return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 15:
+ return strconv_attribute_impl<opt_true>::parse_wnorm;
+ default:
+ return 0; // should not get here
+ }
+}
+
+inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
+{
+ xml_parse_result result;
+ result.status = status;
+ result.offset = offset;
+
+ return result;
+}
+
+struct xml_parser {
+ xml_allocator alloc;
+ char_t* error_offset;
+ xml_parse_status error_status;
+
+ // Parser utilities.
+#define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
+#define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
+#define PUGI__PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
+#define PUGI__POPNODE() { cursor = cursor->parent; }
+#define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
+#define PUGI__SCANWHILE(X) { while ((X)) ++s; }
+#define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
+#define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
+#define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
+
+ xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) {
+ }
+
+ // DOCTYPE consists of nested sections of the following possible types:
+ // <!-- ... -->, <? ... ?>, "...", '...'
+ // <![...]]>
+ // <!...>
+ // First group can not contain nested groups
+ // Second group can contain nested groups of the same type
+ // Third group can contain all other groups
+ char_t* parse_doctype_primitive(char_t* s) {
+ if (*s == '"' || *s == '\'') {
+ // quoted string
+ char_t ch = *s++;
+ PUGI__SCANFOR(*s == ch);
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s++;
+ } else if (s[0] == '<' && s[1] == '?') {
+ // <? ... ?>
+ s += 2;
+ PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s += 2;
+ } else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') {
+ s += 4;
+ PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s += 4;
+ } else PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ return s;
+ }
+
+ char_t* parse_doctype_ignore(char_t* s) {
+ assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
+ s++;
+
+ while (*s) {
+ if (s[0] == '<' && s[1] == '!' && s[2] == '[') {
+ // nested ignore section
+ s = parse_doctype_ignore(s);
+ if (!s) return s;
+ } else if (s[0] == ']' && s[1] == ']' && s[2] == '>') {
+ // ignore section end
+ s += 3;
+
+ return s;
+ } else s++;
+ }
+
+ PUGI__THROW_ERROR(status_bad_doctype, s);
+ }
+
+ char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel) {
+ assert(s[0] == '<' && s[1] == '!');
+ s++;
+
+ while (*s) {
+ if (s[0] == '<' && s[1] == '!' && s[2] != '-') {
+ if (s[2] == '[') {
+ // ignore
+ s = parse_doctype_ignore(s);
+ if (!s) return s;
+ } else {
+ // some control group
+ s = parse_doctype_group(s, endch, false);
+ if (!s) return s;
+ }
+ } else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') {
+ // unknown tag (forbidden), or some primitive group
+ s = parse_doctype_primitive(s);
+ if (!s) return s;
+ } else if (*s == '>') {
+ s++;
+
+ return s;
+ } else s++;
+ }
+
+ if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ return s;
+ }
+
+ char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) {
+ // parse node contents, starting with exclamation mark
+ ++s;
+
+ if (*s == '-') { // '<!-...'
+ ++s;
+
+ if (*s == '-') { // '<!--...'
+ ++s;
+
+ if (PUGI__OPTSET(parse_comments)) {
+ PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
+ cursor->value = s; // Save the offset.
+ }
+
+ if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) {
+ s = strconv_comment(s, endch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
+ } else {
+ // Scan for terminating '-->'.
+ PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_comment, s);
+
+ if (PUGI__OPTSET(parse_comments))
+ *s = 0; // Zero-terminate this segment at the first terminating '-'.
+
+ s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
+ }
+ } else PUGI__THROW_ERROR(status_bad_comment, s);
+ } else if (*s == '[') {
+ // '<![CDATA[...'
+ if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') {
+ ++s;
+
+ if (PUGI__OPTSET(parse_cdata)) {
+ PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
+ cursor->value = s; // Save the offset.
+
+ if (PUGI__OPTSET(parse_eol)) {
+ s = strconv_cdata(s, endch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
+ } else {
+ // Scan for terminating ']]>'.
+ PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+ *s++ = 0; // Zero-terminate this segment.
+ }
+ } else { // Flagged for discard, but we still have to scan for the terminator.
+ // Scan for terminating ']]>'.
+ PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+ ++s;
+ }
+
+ s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
+ } else PUGI__THROW_ERROR(status_bad_cdata, s);
+ } else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E')) {
+ s -= 2;
+
+ if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
- // first pass: get length in wchar_t units
- size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
+ char_t* mark = s + 9;
+
+ s = parse_doctype_group(s, endch, true);
+ if (!s) return s;
+
+ if (PUGI__OPTSET(parse_doctype)) {
+ while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
- // allocate resulting string
- std::basic_string<wchar_t> result;
- result.resize(length);
+ PUGI__PUSHNODE(node_doctype);
+
+ cursor->value = mark;
- // second pass: convert to wchar_t
- if (length > 0)
- {
- wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
- wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
+ assert((s[0] == 0 && endch == '>') || s[-1] == '>');
+ s[*s == 0 ? 0 : -1] = 0;
- assert(begin + length == end);
- (void)!end;
- }
+ PUGI__POPNODE();
+ }
+ } else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
+ else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
+ else PUGI__THROW_ERROR(status_unrecognized_tag, s);
- return result;
- }
+ return s;
+ }
+
+ char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) {
+ // load into registers
+ xml_node_struct* cursor = ref_cursor;
+ char_t ch = 0;
+
+ // parse node contents, starting with question mark
+ ++s;
+
+ // read PI target
+ char_t* target = s;
+
+ if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
+
+ PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ // determine node type; stricmp / strcasecmp is not portable
+ bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
+
+ if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) {
+ if (declaration) {
+ // disallow non top-level declarations
+ if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
+
+ PUGI__PUSHNODE(node_declaration);
+ } else {
+ PUGI__PUSHNODE(node_pi);
+ }
+
+ cursor->name = target;
+
+ PUGI__ENDSEG();
+
+ // parse value/attributes
+ if (ch == '?') {
+ // empty node
+ if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
+ s += (*s == '>');
+
+ PUGI__POPNODE();
+ } else if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+ PUGI__SKIPWS();
+
+ // scan for tag end
+ char_t* value = s;
+
+ PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ if (declaration) {
+ // replace ending ? with / so that 'element' terminates properly
+ *s = '/';
+
+ // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
+ s = value;
+ } else {
+ // store value and step over >
+ cursor->value = value;
+ PUGI__POPNODE();
+
+ PUGI__ENDSEG();
+
+ s += (*s == '>');
+ }
+ } else PUGI__THROW_ERROR(status_bad_pi, s);
+ } else {
+ // scan for tag end
+ PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ s += (s[1] == '>' ? 2 : 1);
+ }
+
+ // store from registers
+ ref_cursor = cursor;
+
+ return s;
+ }
+
+ char_t* parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch) {
+ strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
+ strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
+
+ char_t ch = 0;
+ xml_node_struct* cursor = xmldoc;
+ char_t* mark = s;
+
+ while (*s != 0) {
+ if (*s == '<') {
+ ++s;
+
+LOC_TAG:
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) { // '<#...'
+ PUGI__PUSHNODE(node_element); // Append a new node to the tree.
+
+ cursor->name = s;
+
+ PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
+ PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+ if (ch == '>') {
+ // end of tag
+ } else if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+LOC_ATTRIBUTES:
+ while (true) {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) { // <... #...
+ xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
+ if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
+
+ a->name = s; // Save the offset.
+
+ PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
+ PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
+
+ PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+ PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
+
+ if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+ PUGI__SKIPWS(); // Eat any whitespace.
+ PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
+
+ ch = *s;
+ ++s;
+ }
+
+ if (ch == '=') { // '<... #=...'
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ if (*s == '"' || *s == '\'') { // '<... #="...'
+ ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
+ ++s; // Step over the quote.
+ a->value = s; // Save the offset.
+
+ s = strconv_attribute(s, ch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
+
+ // After this line the loop continues from the start;
+ // Whitespaces, / and > are ok, symbols and EOF are wrong,
+ // everything else will be detected
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
+ } else PUGI__THROW_ERROR(status_bad_attribute, s);
+ } else PUGI__THROW_ERROR(status_bad_attribute, s);
+ } else if (*s == '/') {
+ ++s;
+
+ if (*s == '>') {
+ PUGI__POPNODE();
+ s++;
+ break;
+ } else if (*s == 0 && endch == '>') {
+ PUGI__POPNODE();
+ break;
+ } else PUGI__THROW_ERROR(status_bad_start_element, s);
+ } else if (*s == '>') {
+ ++s;
+
+ break;
+ } else if (*s == 0 && endch == '>') {
+ break;
+ } else PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+
+ // !!!
+ } else if (ch == '/') { // '<#.../'
+ if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
+
+ PUGI__POPNODE(); // Pop.
+
+ s += (*s == '>');
+ } else if (ch == 0) {
+ // we stepped over null terminator, backtrack & handle closing tag
+ --s;
+
+ if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
+ } else PUGI__THROW_ERROR(status_bad_start_element, s);
+ } else if (*s == '/') {
+ ++s;
+
+ char_t* name = cursor->name;
+ if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+ while (PUGI__IS_CHARTYPE(*s, ct_symbol)) {
+ if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+ }
+
+ if (*name) {
+ if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
+ else PUGI__THROW_ERROR(status_end_element_mismatch, s);
+ }
+
+ PUGI__POPNODE(); // Pop.
+
+ PUGI__SKIPWS();
+
+ if (*s == 0) {
+ if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+ } else {
+ if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+ ++s;
+ }
+ } else if (*s == '?') { // '<?...'
+ s = parse_question(s, cursor, optmsk, endch);
+ if (!s) return s;
+
+ assert(cursor);
+ if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
+ } else if (*s == '!') { // '<!...'
+ s = parse_exclamation(s, cursor, optmsk, endch);
+ if (!s) return s;
+ } else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
+ else PUGI__THROW_ERROR(status_unrecognized_tag, s);
+ } else {
+ mark = s; // Save this offset while searching for a terminator.
+
+ PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
+
+ if (*s == '<') {
+ // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
+ assert(mark != s);
+
+ if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single)) {
+ continue;
+ } else if (PUGI__OPTSET(parse_ws_pcdata_single)) {
+ if (s[1] != '/' || cursor->first_child) continue;
+ }
+ }
+
+ s = mark;
+
+ if (cursor->parent) {
+ PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
+ cursor->value = s; // Save the offset.
+
+ s = strconv_pcdata(s);
+
+ PUGI__POPNODE(); // Pop since this is a standalone.
+
+ if (!*s) break;
+ } else {
+ PUGI__SCANFOR(*s == '<'); // '...<'
+ if (!*s) break;
+
+ ++s;
+ }
+
+ // We're after '<'
+ goto LOC_TAG;
+ }
+ }
+
+ // check that last tag is closed
+ if (cursor != xmldoc) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+ return s;
+ }
+
+ static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* root, unsigned int optmsk) {
+ xml_document_struct* xmldoc = static_cast<xml_document_struct*>(root);
+
+ // store buffer for offset_debug
+ xmldoc->buffer = buffer;
+
+ // early-out for empty documents
+ if (length == 0) return make_parse_result(status_ok);
+
+ // create parser on stack
+ xml_parser parser(*xmldoc);
+
+ // save last character and make buffer zero-terminated (speeds up parsing)
+ char_t endch = buffer[length - 1];
+ buffer[length - 1] = 0;
+
+ // perform actual parsing
+ parser.parse(buffer, xmldoc, optmsk, endch);
+
+ xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
+ assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
+
+ // update allocator state
+ *static_cast<xml_allocator*>(xmldoc) = parser.alloc;
+
+ // since we removed last character, we have to handle the only possible false positive
+ if (result && endch == '<') {
+ // there's no possible well-formed document with < at the end
+ return make_parse_result(status_unrecognized_tag, length);
+ }
+
+ return result;
+ }
+};
+
+// Output facilities
+PUGI__FN xml_encoding get_write_native_encoding()
+{
+#ifdef PUGIXML_WCHAR_MODE
+ return get_wchar_encoding();
+#else
+ return encoding_utf8;
#endif
+}
+
+PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
+{
+ // replace wchar encoding with utf implementation
+ if (encoding == encoding_wchar) return get_wchar_encoding();
+
+ // replace utf16 encoding with utf16 with specific endianness
+ if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ // replace utf32 encoding with utf32 with specific endianness
+ if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
- inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
- {
- assert(target);
- size_t target_length = strlength(target);
-
- // always reuse document buffer memory if possible
- if (!allocated) return target_length >= length;
-
- // reuse heap memory if waste is not too great
- const size_t reuse_threshold = 32;
-
- return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
- }
-
- PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
- {
- size_t source_length = strlength(source);
-
- if (source_length == 0)
- {
- // empty string and null pointer are equivalent, so just deallocate old memory
- xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
-
- if (header & header_mask) alloc->deallocate_string(dest);
-
- // mark the string as not allocated
- dest = 0;
- header &= ~header_mask;
-
- return true;
- }
- else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
- {
- // we can reuse old buffer, so just copy the new data (including zero terminator)
- memcpy(dest, source, (source_length + 1) * sizeof(char_t));
-
- return true;
- }
- else
- {
- xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
-
- // allocate new buffer
- char_t* buf = alloc->allocate_string(source_length + 1);
- if (!buf) return false;
-
- // copy the string (including zero terminator)
- memcpy(buf, source, (source_length + 1) * sizeof(char_t));
-
- // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
- if (header & header_mask) alloc->deallocate_string(dest);
-
- // the string is now allocated, so set the flag
- dest = buf;
- header |= header_mask;
-
- return true;
- }
- }
-
- struct gap
- {
- char_t* end;
- size_t size;
-
- gap(): end(0), size(0)
- {
- }
-
- // Push new gap, move s count bytes further (skipping the gap).
- // Collapse previous gap.
- void push(char_t*& s, size_t count)
- {
- if (end) // there was a gap already; collapse it
- {
- // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
- assert(s >= end);
- memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
- }
-
- s += count; // end of current gap
-
- // "merge" two gaps
- end = s;
- size += count;
- }
-
- // Collapse all gaps, return past-the-end pointer
- char_t* flush(char_t* s)
- {
- if (end)
- {
- // Move [old_gap_end, current_pos) to [old_gap_start, ...)
- assert(s >= end);
- memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
-
- return s - size;
- }
- else return s;
- }
- };
-
- PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
- {
- char_t* stre = s + 1;
-
- switch (*stre)
- {
- case '#': // &#...
- {
- unsigned int ucsc = 0;
-
- if (stre[1] == 'x') // &#x... (hex code)
- {
- stre += 2;
-
- char_t ch = *stre;
-
- if (ch == ';') return stre;
-
- for (;;)
- {
- if (static_cast<unsigned int>(ch - '0') <= 9)
- ucsc = 16 * ucsc + (ch - '0');
- else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
- ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
- else if (ch == ';')
- break;
- else // cancel
- return stre;
-
- ch = *++stre;
- }
-
- ++stre;
- }
- else // &#... (dec code)
- {
- char_t ch = *++stre;
-
- if (ch == ';') return stre;
-
- for (;;)
- {
- if (static_cast<unsigned int>(ch - '0') <= 9)
- ucsc = 10 * ucsc + (ch - '0');
- else if (ch == ';')
- break;
- else // cancel
- return stre;
-
- ch = *++stre;
- }
-
- ++stre;
- }
-
- #ifdef PUGIXML_WCHAR_MODE
- s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
- #else
- s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
- #endif
-
- g.push(s, stre - s);
- return stre;
- }
-
- case 'a': // &a
- {
- ++stre;
-
- if (*stre == 'm') // &am
- {
- if (*++stre == 'p' && *++stre == ';') // &amp;
- {
- *s++ = '&';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- }
- else if (*stre == 'p') // &ap
- {
- if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
- {
- *s++ = '\'';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- }
- break;
- }
-
- case 'g': // &g
- {
- if (*++stre == 't' && *++stre == ';') // &gt;
- {
- *s++ = '>';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- break;
- }
-
- case 'l': // &l
- {
- if (*++stre == 't' && *++stre == ';') // &lt;
- {
- *s++ = '<';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- break;
- }
-
- case 'q': // &q
- {
- if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
- {
- *s++ = '"';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- break;
- }
-
- default:
- break;
- }
-
- return stre;
- }
-
- // Utility macro for last character handling
- #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
-
- PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
- {
- gap g;
-
- while (true)
- {
- while (!PUGI__IS_CHARTYPE(*s, ct_parse_comment)) ++s;
-
- if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
- {
- *s++ = '\n'; // replace first one with 0x0a
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here
- {
- *g.flush(s) = 0;
-
- return s + (s[2] == '>' ? 3 : 2);
- }
- else if (*s == 0)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
- {
- gap g;
-
- while (true)
- {
- while (!PUGI__IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
-
- if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
- {
- *s++ = '\n'; // replace first one with 0x0a
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (*s == 0)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- typedef char_t* (*strconv_pcdata_t)(char_t*);
-
- template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
- {
- static char_t* parse(char_t* s)
- {
- gap g;
-
- while (true)
- {
- while (!PUGI__IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
-
- if (*s == '<') // PCDATA ends here
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
- {
- *s++ = '\n'; // replace first one with 0x0a
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (*s == 0)
- {
- return s;
- }
- else ++s;
- }
- }
- };
-
- PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
- {
- PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20);
-
- switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes)
- {
- case 0: return strconv_pcdata_impl<opt_false, opt_false>::parse;
- case 1: return strconv_pcdata_impl<opt_false, opt_true>::parse;
- case 2: return strconv_pcdata_impl<opt_true, opt_false>::parse;
- case 3: return strconv_pcdata_impl<opt_true, opt_true>::parse;
- default: return 0; // should not get here
- }
- }
-
- typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
-
- template <typename opt_escape> struct strconv_attribute_impl
- {
- static char_t* parse_wnorm(char_t* s, char_t end_quote)
- {
- gap g;
-
- // trim leading whitespaces
- if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- char_t* str = s;
-
- do ++str;
- while (PUGI__IS_CHARTYPE(*str, ct_space));
-
- g.push(s, str - s);
- }
-
- while (true)
- {
- while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;
-
- if (*s == end_quote)
- {
- char_t* str = g.flush(s);
-
- do *str-- = 0;
- while (PUGI__IS_CHARTYPE(*str, ct_space));
-
- return s + 1;
- }
- else if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- *s++ = ' ';
-
- if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- char_t* str = s + 1;
- while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
-
- g.push(s, str - s);
- }
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- static char_t* parse_wconv(char_t* s, char_t end_quote)
- {
- gap g;
-
- while (true)
- {
- while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;
-
- if (*s == end_quote)
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- if (*s == '\r')
- {
- *s++ = ' ';
-
- if (*s == '\n') g.push(s, 1);
- }
- else *s++ = ' ';
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- static char_t* parse_eol(char_t* s, char_t end_quote)
- {
- gap g;
-
- while (true)
- {
- while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
-
- if (*s == end_quote)
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (*s == '\r')
- {
- *s++ = '\n';
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- static char_t* parse_simple(char_t* s, char_t end_quote)
- {
- gap g;
-
- while (true)
- {
- while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
-
- if (*s == end_quote)
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
- };
-
- PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
- {
- PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
-
- switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
- {
- case 0: return strconv_attribute_impl<opt_false>::parse_simple;
- case 1: return strconv_attribute_impl<opt_true>::parse_simple;
- case 2: return strconv_attribute_impl<opt_false>::parse_eol;
- case 3: return strconv_attribute_impl<opt_true>::parse_eol;
- case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
- case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
- case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
- case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
- case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
- case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
- case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
- case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
- default: return 0; // should not get here
- }
- }
-
- inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
- {
- xml_parse_result result;
- result.status = status;
- result.offset = offset;
-
- return result;
- }
-
- struct xml_parser
- {
- xml_allocator alloc;
- char_t* error_offset;
- xml_parse_status error_status;
-
- // Parser utilities.
- #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
- #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
- #define PUGI__PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
- #define PUGI__POPNODE() { cursor = cursor->parent; }
- #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
- #define PUGI__SCANWHILE(X) { while ((X)) ++s; }
- #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
- #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
- #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
-
- xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
- {
- }
-
- // DOCTYPE consists of nested sections of the following possible types:
- // <!-- ... -->, <? ... ?>, "...", '...'
- // <![...]]>
- // <!...>
- // First group can not contain nested groups
- // Second group can contain nested groups of the same type
- // Third group can contain all other groups
- char_t* parse_doctype_primitive(char_t* s)
- {
- if (*s == '"' || *s == '\'')
- {
- // quoted string
- char_t ch = *s++;
- PUGI__SCANFOR(*s == ch);
- if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- s++;
- }
- else if (s[0] == '<' && s[1] == '?')
- {
- // <? ... ?>
- s += 2;
- PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
- if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- s += 2;
- }
- else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
- {
- s += 4;
- PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
- if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- s += 4;
- }
- else PUGI__THROW_ERROR(status_bad_doctype, s);
-
- return s;
- }
-
- char_t* parse_doctype_ignore(char_t* s)
- {
- assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
- s++;
-
- while (*s)
- {
- if (s[0] == '<' && s[1] == '!' && s[2] == '[')
- {
- // nested ignore section
- s = parse_doctype_ignore(s);
- if (!s) return s;
- }
- else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
- {
- // ignore section end
- s += 3;
-
- return s;
- }
- else s++;
- }
-
- PUGI__THROW_ERROR(status_bad_doctype, s);
- }
-
- char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
- {
- assert(s[0] == '<' && s[1] == '!');
- s++;
-
- while (*s)
- {
- if (s[0] == '<' && s[1] == '!' && s[2] != '-')
- {
- if (s[2] == '[')
- {
- // ignore
- s = parse_doctype_ignore(s);
- if (!s) return s;
- }
- else
- {
- // some control group
- s = parse_doctype_group(s, endch, false);
- if (!s) return s;
- }
- }
- else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
- {
- // unknown tag (forbidden), or some primitive group
- s = parse_doctype_primitive(s);
- if (!s) return s;
- }
- else if (*s == '>')
- {
- s++;
-
- return s;
- }
- else s++;
- }
-
- if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
-
- return s;
- }
-
- char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
- {
- // parse node contents, starting with exclamation mark
- ++s;
-
- if (*s == '-') // '<!-...'
- {
- ++s;
-
- if (*s == '-') // '<!--...'
- {
- ++s;
-
- if (PUGI__OPTSET(parse_comments))
- {
- PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
- }
-
- if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
- {
- s = strconv_comment(s, endch);
-
- if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
- }
- else
- {
- // Scan for terminating '-->'.
- PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
- PUGI__CHECK_ERROR(status_bad_comment, s);
-
- if (PUGI__OPTSET(parse_comments))
- *s = 0; // Zero-terminate this segment at the first terminating '-'.
-
- s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
- }
- }
- else PUGI__THROW_ERROR(status_bad_comment, s);
- }
- else if (*s == '[')
- {
- // '<![CDATA[...'
- if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
- {
- ++s;
-
- if (PUGI__OPTSET(parse_cdata))
- {
- PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
-
- if (PUGI__OPTSET(parse_eol))
- {
- s = strconv_cdata(s, endch);
-
- if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
- }
- else
- {
- // Scan for terminating ']]>'.
- PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
- PUGI__CHECK_ERROR(status_bad_cdata, s);
-
- *s++ = 0; // Zero-terminate this segment.
- }
- }
- else // Flagged for discard, but we still have to scan for the terminator.
- {
- // Scan for terminating ']]>'.
- PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
- PUGI__CHECK_ERROR(status_bad_cdata, s);
-
- ++s;
- }
-
- s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
- }
- else PUGI__THROW_ERROR(status_bad_cdata, s);
- }
- else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))
- {
- s -= 2;
-
- if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- char_t* mark = s + 9;
-
- s = parse_doctype_group(s, endch, true);
- if (!s) return s;
-
- if (PUGI__OPTSET(parse_doctype))
- {
- while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
-
- PUGI__PUSHNODE(node_doctype);
-
- cursor->value = mark;
-
- assert((s[0] == 0 && endch == '>') || s[-1] == '>');
- s[*s == 0 ? 0 : -1] = 0;
-
- PUGI__POPNODE();
- }
- }
- else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
- else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
- else PUGI__THROW_ERROR(status_unrecognized_tag, s);
-
- return s;
- }
-
- char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
- {
- // load into registers
- xml_node_struct* cursor = ref_cursor;
- char_t ch = 0;
-
- // parse node contents, starting with question mark
- ++s;
-
- // read PI target
- char_t* target = s;
-
- if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
-
- PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
- PUGI__CHECK_ERROR(status_bad_pi, s);
-
- // determine node type; stricmp / strcasecmp is not portable
- bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
-
- if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
- {
- if (declaration)
- {
- // disallow non top-level declarations
- if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
-
- PUGI__PUSHNODE(node_declaration);
- }
- else
- {
- PUGI__PUSHNODE(node_pi);
- }
-
- cursor->name = target;
-
- PUGI__ENDSEG();
-
- // parse value/attributes
- if (ch == '?')
- {
- // empty node
- if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
- s += (*s == '>');
-
- PUGI__POPNODE();
- }
- else if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- PUGI__SKIPWS();
-
- // scan for tag end
- char_t* value = s;
-
- PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
- PUGI__CHECK_ERROR(status_bad_pi, s);
-
- if (declaration)
- {
- // replace ending ? with / so that 'element' terminates properly
- *s = '/';
-
- // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
- s = value;
- }
- else
- {
- // store value and step over >
- cursor->value = value;
- PUGI__POPNODE();
-
- PUGI__ENDSEG();
-
- s += (*s == '>');
- }
- }
- else PUGI__THROW_ERROR(status_bad_pi, s);
- }
- else
- {
- // scan for tag end
- PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
- PUGI__CHECK_ERROR(status_bad_pi, s);
-
- s += (s[1] == '>' ? 2 : 1);
- }
-
- // store from registers
- ref_cursor = cursor;
-
- return s;
- }
-
- char_t* parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch)
- {
- strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
- strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
-
- char_t ch = 0;
- xml_node_struct* cursor = xmldoc;
- char_t* mark = s;
-
- while (*s != 0)
- {
- if (*s == '<')
- {
- ++s;
-
- LOC_TAG:
- if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
- {
- PUGI__PUSHNODE(node_element); // Append a new node to the tree.
-
- cursor->name = s;
-
- PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
- PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
-
- if (ch == '>')
- {
- // end of tag
- }
- else if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- LOC_ATTRIBUTES:
- while (true)
- {
- PUGI__SKIPWS(); // Eat any whitespace.
-
- if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
- {
- xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
- if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
-
- a->name = s; // Save the offset.
-
- PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
- PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
-
- PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
- PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
-
- if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- PUGI__SKIPWS(); // Eat any whitespace.
- PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
-
- ch = *s;
- ++s;
- }
-
- if (ch == '=') // '<... #=...'
- {
- PUGI__SKIPWS(); // Eat any whitespace.
-
- if (*s == '"' || *s == '\'') // '<... #="...'
- {
- ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
- ++s; // Step over the quote.
- a->value = s; // Save the offset.
-
- s = strconv_attribute(s, ch);
-
- if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
-
- // After this line the loop continues from the start;
- // Whitespaces, / and > are ok, symbols and EOF are wrong,
- // everything else will be detected
- if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
- }
- else PUGI__THROW_ERROR(status_bad_attribute, s);
- }
- else PUGI__THROW_ERROR(status_bad_attribute, s);
- }
- else if (*s == '/')
- {
- ++s;
-
- if (*s == '>')
- {
- PUGI__POPNODE();
- s++;
- break;
- }
- else if (*s == 0 && endch == '>')
- {
- PUGI__POPNODE();
- break;
- }
- else PUGI__THROW_ERROR(status_bad_start_element, s);
- }
- else if (*s == '>')
- {
- ++s;
-
- break;
- }
- else if (*s == 0 && endch == '>')
- {
- break;
- }
- else PUGI__THROW_ERROR(status_bad_start_element, s);
- }
-
- // !!!
- }
- else if (ch == '/') // '<#.../'
- {
- if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
-
- PUGI__POPNODE(); // Pop.
-
- s += (*s == '>');
- }
- else if (ch == 0)
- {
- // we stepped over null terminator, backtrack & handle closing tag
- --s;
-
- if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
- }
- else PUGI__THROW_ERROR(status_bad_start_element, s);
- }
- else if (*s == '/')
- {
- ++s;
-
- char_t* name = cursor->name;
- if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
-
- while (PUGI__IS_CHARTYPE(*s, ct_symbol))
- {
- if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
- }
-
- if (*name)
- {
- if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
- else PUGI__THROW_ERROR(status_end_element_mismatch, s);
- }
-
- PUGI__POPNODE(); // Pop.
-
- PUGI__SKIPWS();
-
- if (*s == 0)
- {
- if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
- }
- else
- {
- if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
- ++s;
- }
- }
- else if (*s == '?') // '<?...'
- {
- s = parse_question(s, cursor, optmsk, endch);
- if (!s) return s;
-
- assert(cursor);
- if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
- }
- else if (*s == '!') // '<!...'
- {
- s = parse_exclamation(s, cursor, optmsk, endch);
- if (!s) return s;
- }
- else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
- else PUGI__THROW_ERROR(status_unrecognized_tag, s);
- }
- else
- {
- mark = s; // Save this offset while searching for a terminator.
-
- PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
-
- if (*s == '<')
- {
- // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
- assert(mark != s);
-
- if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single))
- {
- continue;
- }
- else if (PUGI__OPTSET(parse_ws_pcdata_single))
- {
- if (s[1] != '/' || cursor->first_child) continue;
- }
- }
-
- s = mark;
-
- if (cursor->parent)
- {
- PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
-
- s = strconv_pcdata(s);
-
- PUGI__POPNODE(); // Pop since this is a standalone.
-
- if (!*s) break;
- }
- else
- {
- PUGI__SCANFOR(*s == '<'); // '...<'
- if (!*s) break;
-
- ++s;
- }
-
- // We're after '<'
- goto LOC_TAG;
- }
- }
-
- // check that last tag is closed
- if (cursor != xmldoc) PUGI__THROW_ERROR(status_end_element_mismatch, s);
-
- return s;
- }
-
- static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* root, unsigned int optmsk)
- {
- xml_document_struct* xmldoc = static_cast<xml_document_struct*>(root);
-
- // store buffer for offset_debug
- xmldoc->buffer = buffer;
-
- // early-out for empty documents
- if (length == 0) return make_parse_result(status_ok);
-
- // create parser on stack
- xml_parser parser(*xmldoc);
-
- // save last character and make buffer zero-terminated (speeds up parsing)
- char_t endch = buffer[length - 1];
- buffer[length - 1] = 0;
-
- // perform actual parsing
- parser.parse(buffer, xmldoc, optmsk, endch);
-
- xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
- assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
-
- // update allocator state
- *static_cast<xml_allocator*>(xmldoc) = parser.alloc;
-
- // since we removed last character, we have to handle the only possible false positive
- if (result && endch == '<')
- {
- // there's no possible well-formed document with < at the end
- return make_parse_result(status_unrecognized_tag, length);
- }
-
- return result;
- }
- };
-
- // Output facilities
- PUGI__FN xml_encoding get_write_native_encoding()
- {
- #ifdef PUGIXML_WCHAR_MODE
- return get_wchar_encoding();
- #else
- return encoding_utf8;
- #endif
- }
-
- PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
- {
- // replace wchar encoding with utf implementation
- if (encoding == encoding_wchar) return get_wchar_encoding();
-
- // replace utf16 encoding with utf16 with specific endianness
- if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- // replace utf32 encoding with utf32 with specific endianness
- if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- // only do autodetection if no explicit encoding is requested
- if (encoding != encoding_auto) return encoding;
-
- // assume utf8 encoding
- return encoding_utf8;
- }
+ // only do autodetection if no explicit encoding is requested
+ if (encoding != encoding_auto) return encoding;
+
+ // assume utf8 encoding
+ return encoding_utf8;
+}
#ifdef PUGIXML_WCHAR_MODE
- PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
- {
- assert(length > 0);
+PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+{
+ assert(length > 0);
- // discard last character if it's the lead of a surrogate pair
- return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
- }
+ // discard last character if it's the lead of a surrogate pair
+ return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
+}
- PUGI__FN size_t convert_buffer(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
- {
- // only endian-swapping is required
- if (need_endian_swap_utf(encoding, get_wchar_encoding()))
- {
- convert_wchar_endian_swap(r_char, data, length);
+PUGI__FN size_t convert_buffer(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+{
+ // only endian-swapping is required
+ if (need_endian_swap_utf(encoding, get_wchar_encoding())) {
+ convert_wchar_endian_swap(r_char, data, length);
- return length * sizeof(char_t);
- }
-
- // convert to utf8
- if (encoding == encoding_utf8)
- {
- uint8_t* dest = r_u8;
- uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);
+ return length * sizeof(char_t);
+ }
- return static_cast<size_t>(end - dest);
- }
+ // convert to utf8
+ if (encoding == encoding_utf8) {
+ uint8_t* dest = r_u8;
+ uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);
- // convert to utf16
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- uint16_t* dest = r_u16;
+ return static_cast<size_t>(end - dest);
+ }
- // convert to native utf16
- uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);
+ // convert to utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+ uint16_t* dest = r_u16;
- // swap if necessary
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+ // convert to native utf16
+ uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);
- if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
- return static_cast<size_t>(end - dest) * sizeof(uint16_t);
- }
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
- // convert to utf32
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- uint32_t* dest = r_u32;
+ return static_cast<size_t>(end - dest) * sizeof(uint16_t);
+ }
- // convert to native utf32
- uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);
+ // convert to utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+ uint32_t* dest = r_u32;
- // swap if necessary
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+ // convert to native utf32
+ uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);
- if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
- return static_cast<size_t>(end - dest) * sizeof(uint32_t);
- }
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
- // convert to latin1
- if (encoding == encoding_latin1)
- {
- uint8_t* dest = r_u8;
- uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest);
+ return static_cast<size_t>(end - dest) * sizeof(uint32_t);
+ }
- return static_cast<size_t>(end - dest);
- }
+ // convert to latin1
+ if (encoding == encoding_latin1) {
+ uint8_t* dest = r_u8;
+ uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest);
- assert(!"Invalid encoding");
- return 0;
- }
+ return static_cast<size_t>(end - dest);
+ }
+
+ assert(!"Invalid encoding");
+ return 0;
+}
#else
- PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
- {
- assert(length > 4);
+PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+{
+ assert(length > 4);
+
+ for (size_t i = 1; i <= 4; ++i) {
+ uint8_t ch = static_cast<uint8_t>(data[length - i]);
- for (size_t i = 1; i <= 4; ++i)
- {
- uint8_t ch = static_cast<uint8_t>(data[length - i]);
+ // either a standalone character or a leading one
+ if ((ch & 0xc0) != 0x80) return length - i;
+ }
- // either a standalone character or a leading one
- if ((ch & 0xc0) != 0x80) return length - i;
- }
+ // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
+ return length;
+}
- // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
- return length;
- }
+PUGI__FN size_t convert_buffer(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+{
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+ uint16_t* dest = r_u16;
- PUGI__FN size_t convert_buffer(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
- {
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- uint16_t* dest = r_u16;
+ // convert to native utf16
+ uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
- // convert to native utf16
- uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
- // swap if necessary
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
- if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+ return static_cast<size_t>(end - dest) * sizeof(uint16_t);
+ }
- return static_cast<size_t>(end - dest) * sizeof(uint16_t);
- }
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+ uint32_t* dest = r_u32;
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- uint32_t* dest = r_u32;
+ // convert to native utf32
+ uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
- // convert to native utf32
- uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
- // swap if necessary
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
- if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+ return static_cast<size_t>(end - dest) * sizeof(uint32_t);
+ }
- return static_cast<size_t>(end - dest) * sizeof(uint32_t);
- }
+ if (encoding == encoding_latin1) {
+ uint8_t* dest = r_u8;
+ uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
- if (encoding == encoding_latin1)
- {
- uint8_t* dest = r_u8;
- uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+ return static_cast<size_t>(end - dest);
+ }
- return static_cast<size_t>(end - dest);
- }
+ assert(!"Invalid encoding");
+ return 0;
+}
+#endif
- assert(!"Invalid encoding");
- return 0;
- }
+class xml_buffered_writer
+{
+ xml_buffered_writer(const xml_buffered_writer&);
+ xml_buffered_writer& operator=(const xml_buffered_writer&);
+
+public:
+ xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) {
+ PUGI__STATIC_ASSERT(bufcapacity >= 8);
+ }
+
+ ~xml_buffered_writer() {
+ flush();
+ }
+
+ void flush() {
+ flush(buffer, bufsize);
+ bufsize = 0;
+ }
+
+ void flush(const char_t* data, size_t size) {
+ if (size == 0) return;
+
+ // fast path, just write data
+ if (encoding == get_write_native_encoding())
+ writer.write(data, size * sizeof(char_t));
+ else {
+ // convert chunk
+ size_t result = convert_buffer(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
+ assert(result <= sizeof(scratch));
+
+ // write data
+ writer.write(scratch.data_u8, result);
+ }
+ }
+
+ void write(const char_t* data, size_t length) {
+ if (bufsize + length > bufcapacity) {
+ // flush the remaining buffer contents
+ flush();
+
+ // handle large chunks
+ if (length > bufcapacity) {
+ if (encoding == get_write_native_encoding()) {
+ // fast path, can just write data chunk
+ writer.write(data, length * sizeof(char_t));
+ return;
+ }
+
+ // need to convert in suitable chunks
+ while (length > bufcapacity) {
+ // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
+ // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
+ size_t chunk_size = get_valid_length(data, bufcapacity);
+
+ // convert chunk and write
+ flush(data, chunk_size);
+
+ // iterate
+ data += chunk_size;
+ length -= chunk_size;
+ }
+
+ // small tail is copied below
+ bufsize = 0;
+ }
+ }
+
+ memcpy(buffer + bufsize, data, length * sizeof(char_t));
+ bufsize += length;
+ }
+
+ void write(const char_t* data) {
+ write(data, strlength(data));
+ }
+
+ void write(char_t d0) {
+ if (bufsize + 1 > bufcapacity) flush();
+
+ buffer[bufsize + 0] = d0;
+ bufsize += 1;
+ }
+
+ void write(char_t d0, char_t d1) {
+ if (bufsize + 2 > bufcapacity) flush();
+
+ buffer[bufsize + 0] = d0;
+ buffer[bufsize + 1] = d1;
+ bufsize += 2;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2) {
+ if (bufsize + 3 > bufcapacity) flush();
+
+ buffer[bufsize + 0] = d0;
+ buffer[bufsize + 1] = d1;
+ buffer[bufsize + 2] = d2;
+ bufsize += 3;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3) {
+ if (bufsize + 4 > bufcapacity) flush();
+
+ buffer[bufsize + 0] = d0;
+ buffer[bufsize + 1] = d1;
+ buffer[bufsize + 2] = d2;
+ buffer[bufsize + 3] = d3;
+ bufsize += 4;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) {
+ if (bufsize + 5 > bufcapacity) flush();
+
+ buffer[bufsize + 0] = d0;
+ buffer[bufsize + 1] = d1;
+ buffer[bufsize + 2] = d2;
+ buffer[bufsize + 3] = d3;
+ buffer[bufsize + 4] = d4;
+ bufsize += 5;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) {
+ if (bufsize + 6 > bufcapacity) flush();
+
+ buffer[bufsize + 0] = d0;
+ buffer[bufsize + 1] = d1;
+ buffer[bufsize + 2] = d2;
+ buffer[bufsize + 3] = d3;
+ buffer[bufsize + 4] = d4;
+ buffer[bufsize + 5] = d5;
+ bufsize += 6;
+ }
+
+ // utf8 maximum expansion: x4 (-> utf32)
+ // utf16 maximum expansion: x2 (-> utf32)
+ // utf32 maximum expansion: x1
+ enum {
+ bufcapacitybytes =
+#ifdef PUGIXML_MEMORY_OUTPUT_STACK
+ PUGIXML_MEMORY_OUTPUT_STACK
+#else
+ 10240
#endif
+ ,
+ bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
+ };
+
+ char_t buffer[bufcapacity];
- class xml_buffered_writer
- {
- xml_buffered_writer(const xml_buffered_writer&);
- xml_buffered_writer& operator=(const xml_buffered_writer&);
-
- public:
- xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
- {
- PUGI__STATIC_ASSERT(bufcapacity >= 8);
- }
-
- ~xml_buffered_writer()
- {
- flush();
- }
-
- void flush()
- {
- flush(buffer, bufsize);
- bufsize = 0;
- }
-
- void flush(const char_t* data, size_t size)
- {
- if (size == 0) return;
-
- // fast path, just write data
- if (encoding == get_write_native_encoding())
- writer.write(data, size * sizeof(char_t));
- else
- {
- // convert chunk
- size_t result = convert_buffer(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
- assert(result <= sizeof(scratch));
-
- // write data
- writer.write(scratch.data_u8, result);
- }
- }
-
- void write(const char_t* data, size_t length)
- {
- if (bufsize + length > bufcapacity)
- {
- // flush the remaining buffer contents
- flush();
-
- // handle large chunks
- if (length > bufcapacity)
- {
- if (encoding == get_write_native_encoding())
- {
- // fast path, can just write data chunk
- writer.write(data, length * sizeof(char_t));
- return;
- }
-
- // need to convert in suitable chunks
- while (length > bufcapacity)
- {
- // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
- // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
- size_t chunk_size = get_valid_length(data, bufcapacity);
-
- // convert chunk and write
- flush(data, chunk_size);
-
- // iterate
- data += chunk_size;
- length -= chunk_size;
- }
-
- // small tail is copied below
- bufsize = 0;
- }
- }
-
- memcpy(buffer + bufsize, data, length * sizeof(char_t));
- bufsize += length;
- }
-
- void write(const char_t* data)
- {
- write(data, strlength(data));
- }
-
- void write(char_t d0)
- {
- if (bufsize + 1 > bufcapacity) flush();
-
- buffer[bufsize + 0] = d0;
- bufsize += 1;
- }
-
- void write(char_t d0, char_t d1)
- {
- if (bufsize + 2 > bufcapacity) flush();
-
- buffer[bufsize + 0] = d0;
- buffer[bufsize + 1] = d1;
- bufsize += 2;
- }
-
- void write(char_t d0, char_t d1, char_t d2)
- {
- if (bufsize + 3 > bufcapacity) flush();
-
- buffer[bufsize + 0] = d0;
- buffer[bufsize + 1] = d1;
- buffer[bufsize + 2] = d2;
- bufsize += 3;
- }
-
- void write(char_t d0, char_t d1, char_t d2, char_t d3)
- {
- if (bufsize + 4 > bufcapacity) flush();
-
- buffer[bufsize + 0] = d0;
- buffer[bufsize + 1] = d1;
- buffer[bufsize + 2] = d2;
- buffer[bufsize + 3] = d3;
- bufsize += 4;
- }
-
- void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
- {
- if (bufsize + 5 > bufcapacity) flush();
-
- buffer[bufsize + 0] = d0;
- buffer[bufsize + 1] = d1;
- buffer[bufsize + 2] = d2;
- buffer[bufsize + 3] = d3;
- buffer[bufsize + 4] = d4;
- bufsize += 5;
- }
-
- void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
- {
- if (bufsize + 6 > bufcapacity) flush();
-
- buffer[bufsize + 0] = d0;
- buffer[bufsize + 1] = d1;
- buffer[bufsize + 2] = d2;
- buffer[bufsize + 3] = d3;
- buffer[bufsize + 4] = d4;
- buffer[bufsize + 5] = d5;
- bufsize += 6;
- }
-
- // utf8 maximum expansion: x4 (-> utf32)
- // utf16 maximum expansion: x2 (-> utf32)
- // utf32 maximum expansion: x1
- enum
- {
- bufcapacitybytes =
- #ifdef PUGIXML_MEMORY_OUTPUT_STACK
- PUGIXML_MEMORY_OUTPUT_STACK
- #else
- 10240
- #endif
- ,
- bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
- };
-
- char_t buffer[bufcapacity];
-
- union
- {
- uint8_t data_u8[4 * bufcapacity];
- uint16_t data_u16[2 * bufcapacity];
- uint32_t data_u32[bufcapacity];
- char_t data_char[bufcapacity];
- } scratch;
-
- xml_writer& writer;
- size_t bufsize;
- xml_encoding encoding;
- };
-
- PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
- {
- while (*s)
- {
- const char_t* prev = s;
-
- // While *s is a usual symbol
- while (!PUGI__IS_CHARTYPEX(*s, type)) ++s;
-
- writer.write(prev, static_cast<size_t>(s - prev));
-
- switch (*s)
- {
- case 0: break;
- case '&':
- writer.write('&', 'a', 'm', 'p', ';');
- ++s;
- break;
- case '<':
- writer.write('&', 'l', 't', ';');
- ++s;
- break;
- case '>':
- writer.write('&', 'g', 't', ';');
- ++s;
- break;
- case '"':
- writer.write('&', 'q', 'u', 'o', 't', ';');
- ++s;
- break;
- default: // s is not a usual symbol
- {
- unsigned int ch = static_cast<unsigned int>(*s++);
- assert(ch < 32);
-
- writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
- }
- }
- }
- }
-
- PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
- {
- if (flags & format_no_escapes)
- writer.write(s);
- else
- text_output_escaped(writer, s, type);
- }
-
- PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
- {
- do
- {
- writer.write('<', '!', '[', 'C', 'D');
- writer.write('A', 'T', 'A', '[');
-
- const char_t* prev = s;
-
- // look for ]]> sequence - we can't output it as is since it terminates CDATA
- while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
-
- // skip ]] if we stopped at ]]>, > will go to the next CDATA section
- if (*s) s += 2;
-
- writer.write(prev, static_cast<size_t>(s - prev));
-
- writer.write(']', ']', '>');
- }
- while (*s);
- }
-
- PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node& node, unsigned int flags)
- {
- const char_t* default_name = PUGIXML_TEXT(":anonymous");
-
- for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute())
- {
- writer.write(' ');
- writer.write(a.name()[0] ? a.name() : default_name);
- writer.write('=', '"');
-
- text_output(writer, a.value(), ctx_special_attr, flags);
-
- writer.write('"');
- }
- }
-
- PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
- {
- const char_t* default_name = PUGIXML_TEXT(":anonymous");
-
- if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
- for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
-
- switch (node.type())
- {
- case node_document:
- {
- for (xml_node n = node.first_child(); n; n = n.next_sibling())
- node_output(writer, n, indent, flags, depth);
- break;
- }
-
- case node_element:
- {
- const char_t* name = node.name()[0] ? node.name() : default_name;
-
- writer.write('<');
- writer.write(name);
-
- node_output_attributes(writer, node, flags);
-
- if (flags & format_raw)
- {
- if (!node.first_child())
- writer.write(' ', '/', '>');
- else
- {
- writer.write('>');
-
- for (xml_node n = node.first_child(); n; n = n.next_sibling())
- node_output(writer, n, indent, flags, depth + 1);
-
- writer.write('<', '/');
- writer.write(name);
- writer.write('>');
- }
- }
- else if (!node.first_child())
- writer.write(' ', '/', '>', '\n');
- else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata))
- {
- writer.write('>');
-
- if (node.first_child().type() == node_pcdata)
- text_output(writer, node.first_child().value(), ctx_special_pcdata, flags);
- else
- text_output_cdata(writer, node.first_child().value());
-
- writer.write('<', '/');
- writer.write(name);
- writer.write('>', '\n');
- }
- else
- {
- writer.write('>', '\n');
-
- for (xml_node n = node.first_child(); n; n = n.next_sibling())
- node_output(writer, n, indent, flags, depth + 1);
-
- if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
- for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
-
- writer.write('<', '/');
- writer.write(name);
- writer.write('>', '\n');
- }
-
- break;
- }
-
- case node_pcdata:
- text_output(writer, node.value(), ctx_special_pcdata, flags);
- if ((flags & format_raw) == 0) writer.write('\n');
- break;
-
- case node_cdata:
- text_output_cdata(writer, node.value());
- if ((flags & format_raw) == 0) writer.write('\n');
- break;
-
- case node_comment:
- writer.write('<', '!', '-', '-');
- writer.write(node.value());
- writer.write('-', '-', '>');
- if ((flags & format_raw) == 0) writer.write('\n');
- break;
-
- case node_pi:
- case node_declaration:
- writer.write('<', '?');
- writer.write(node.name()[0] ? node.name() : default_name);
-
- if (node.type() == node_declaration)
- {
- node_output_attributes(writer, node, flags);
- }
- else if (node.value()[0])
- {
- writer.write(' ');
- writer.write(node.value());
- }
-
- writer.write('?', '>');
- if ((flags & format_raw) == 0) writer.write('\n');
- break;
-
- case node_doctype:
- writer.write('<', '!', 'D', 'O', 'C');
- writer.write('T', 'Y', 'P', 'E');
-
- if (node.value()[0])
- {
- writer.write(' ');
- writer.write(node.value());
- }
-
- writer.write('>');
- if ((flags & format_raw) == 0) writer.write('\n');
- break;
-
- default:
- assert(!"Invalid node type");
- }
- }
-
- inline bool has_declaration(const xml_node& node)
- {
- for (xml_node child = node.first_child(); child; child = child.next_sibling())
- {
- xml_node_type type = child.type();
-
- if (type == node_declaration) return true;
- if (type == node_element) return false;
- }
-
- return false;
- }
-
- inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
- {
- if (parent != node_document && parent != node_element) return false;
- if (child == node_document || child == node_null) return false;
- if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
-
- return true;
- }
-
- PUGI__FN void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
- {
- assert(dest.type() == source.type());
-
- switch (source.type())
- {
- case node_element:
- {
- dest.set_name(source.name());
-
- for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
- dest.append_attribute(a.name()).set_value(a.value());
-
- for (xml_node c = source.first_child(); c; c = c.next_sibling())
- {
- if (c == skip) continue;
-
- xml_node cc = dest.append_child(c.type());
- assert(cc);
-
- recursive_copy_skip(cc, c, skip);
- }
-
- break;
- }
-
- case node_pcdata:
- case node_cdata:
- case node_comment:
- case node_doctype:
- dest.set_value(source.value());
- break;
-
- case node_pi:
- dest.set_name(source.name());
- dest.set_value(source.value());
- break;
-
- case node_declaration:
- {
- dest.set_name(source.name());
-
- for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
- dest.append_attribute(a.name()).set_value(a.value());
-
- break;
- }
-
- default:
- assert(!"Invalid node type");
- }
- }
-
- inline bool is_text_node(xml_node_struct* node)
- {
- xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
-
- return type == node_pcdata || type == node_cdata;
- }
-
- // get value with conversion functions
- PUGI__FN int get_value_int(const char_t* value, int def)
- {
- if (!value) return def;
-
- #ifdef PUGIXML_WCHAR_MODE
- return static_cast<int>(wcstol(value, 0, 10));
- #else
- return static_cast<int>(strtol(value, 0, 10));
- #endif
- }
-
- PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)
- {
- if (!value) return def;
-
- #ifdef PUGIXML_WCHAR_MODE
- return static_cast<unsigned int>(wcstoul(value, 0, 10));
- #else
- return static_cast<unsigned int>(strtoul(value, 0, 10));
- #endif
- }
-
- PUGI__FN double get_value_double(const char_t* value, double def)
- {
- if (!value) return def;
-
- #ifdef PUGIXML_WCHAR_MODE
- return wcstod(value, 0);
- #else
- return strtod(value, 0);
- #endif
- }
-
- PUGI__FN float get_value_float(const char_t* value, float def)
- {
- if (!value) return def;
-
- #ifdef PUGIXML_WCHAR_MODE
- return static_cast<float>(wcstod(value, 0));
- #else
- return static_cast<float>(strtod(value, 0));
- #endif
- }
-
- PUGI__FN bool get_value_bool(const char_t* value, bool def)
- {
- if (!value) return def;
-
- // only look at first char
- char_t first = *value;
-
- // 1*, t* (true), T* (True), y* (yes), Y* (YES)
- return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
- }
-
- // set value with conversion functions
- PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
- {
- #ifdef PUGIXML_WCHAR_MODE
- char_t wbuf[128];
- impl::widen_ascii(wbuf, buf);
-
- return strcpy_insitu(dest, header, header_mask, wbuf);
- #else
- return strcpy_insitu(dest, header, header_mask, buf);
- #endif
- }
-
- PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)
- {
- char buf[128];
- sprintf(buf, "%d", value);
-
- return set_value_buffer(dest, header, header_mask, buf);
- }
-
- PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)
- {
- char buf[128];
- sprintf(buf, "%u", value);
-
- return set_value_buffer(dest, header, header_mask, buf);
- }
-
- PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)
- {
- char buf[128];
- sprintf(buf, "%g", value);
-
- return set_value_buffer(dest, header, header_mask, buf);
- }
-
- PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)
- {
- return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
- }
-
- // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
- PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
- {
- #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
- // there are 64-bit versions of fseek/ftell, let's use them
- typedef __int64 length_type;
-
- _fseeki64(file, 0, SEEK_END);
- length_type length = _ftelli64(file);
- _fseeki64(file, 0, SEEK_SET);
- #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
- // there are 64-bit versions of fseek/ftell, let's use them
- typedef off64_t length_type;
-
- fseeko64(file, 0, SEEK_END);
- length_type length = ftello64(file);
- fseeko64(file, 0, SEEK_SET);
- #else
- // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
- typedef long length_type;
-
- fseek(file, 0, SEEK_END);
- length_type length = ftell(file);
- fseek(file, 0, SEEK_SET);
- #endif
-
- // check for I/O errors
- if (length < 0) return status_io_error;
-
- // check for overflow
- size_t result = static_cast<size_t>(length);
-
- if (static_cast<length_type>(result) != length) return status_out_of_memory;
-
- // finalize
- out_result = result;
-
- return status_ok;
- }
-
- PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
- {
- if (!file) return make_parse_result(status_file_not_found);
-
- // get file size (can result in I/O errors)
- size_t size = 0;
- xml_parse_status size_status = get_file_size(file, size);
-
- if (size_status != status_ok)
- {
- fclose(file);
- return make_parse_result(size_status);
- }
-
- // allocate buffer for the whole file
- char* contents = static_cast<char*>(xml_memory::allocate(size > 0 ? size : 1));
-
- if (!contents)
- {
- fclose(file);
- return make_parse_result(status_out_of_memory);
- }
-
- // read file in memory
- size_t read_size = fread(contents, 1, size, file);
- fclose(file);
-
- if (read_size != size)
- {
- xml_memory::deallocate(contents);
- return make_parse_result(status_io_error);
- }
-
- return doc.load_buffer_inplace_own(contents, size, options, encoding);
- }
+ union {
+ uint8_t data_u8[4 * bufcapacity];
+ uint16_t data_u16[2 * bufcapacity];
+ uint32_t data_u32[bufcapacity];
+ char_t data_char[bufcapacity];
+ } scratch;
+
+ xml_writer& writer;
+ size_t bufsize;
+ xml_encoding encoding;
+};
+
+PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
+{
+ while (*s) {
+ const char_t* prev = s;
+
+ // While *s is a usual symbol
+ while (!PUGI__IS_CHARTYPEX(*s, type)) ++s;
+
+ writer.write(prev, static_cast<size_t>(s - prev));
+
+ switch (*s) {
+ case 0:
+ break;
+ case '&':
+ writer.write('&', 'a', 'm', 'p', ';');
+ ++s;
+ break;
+ case '<':
+ writer.write('&', 'l', 't', ';');
+ ++s;
+ break;
+ case '>':
+ writer.write('&', 'g', 't', ';');
+ ++s;
+ break;
+ case '"':
+ writer.write('&', 'q', 'u', 'o', 't', ';');
+ ++s;
+ break;
+ default: { // s is not a usual symbol
+ unsigned int ch = static_cast<unsigned int>(*s++);
+ assert(ch < 32);
+
+ writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
+ }
+ }
+ }
+}
+
+PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
+{
+ if (flags & format_no_escapes)
+ writer.write(s);
+ else
+ text_output_escaped(writer, s, type);
+}
+
+PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
+{
+ do {
+ writer.write('<', '!', '[', 'C', 'D');
+ writer.write('A', 'T', 'A', '[');
+
+ const char_t* prev = s;
+
+ // look for ]]> sequence - we can't output it as is since it terminates CDATA
+ while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
+
+ // skip ]] if we stopped at ]]>, > will go to the next CDATA section
+ if (*s) s += 2;
+
+ writer.write(prev, static_cast<size_t>(s - prev));
+
+ writer.write(']', ']', '>');
+ } while (*s);
+}
+
+PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node& node, unsigned int flags)
+{
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+ for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute()) {
+ writer.write(' ');
+ writer.write(a.name()[0] ? a.name() : default_name);
+ writer.write('=', '"');
+
+ text_output(writer, a.value(), ctx_special_attr, flags);
+
+ writer.write('"');
+ }
+}
+
+PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
+{
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+ if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
+ for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
+
+ switch (node.type()) {
+ case node_document: {
+ for (xml_node n = node.first_child(); n; n = n.next_sibling())
+ node_output(writer, n, indent, flags, depth);
+ break;
+ }
+
+ case node_element: {
+ const char_t* name = node.name()[0] ? node.name() : default_name;
+
+ writer.write('<');
+ writer.write(name);
+
+ node_output_attributes(writer, node, flags);
+
+ if (flags & format_raw) {
+ if (!node.first_child())
+ writer.write(' ', '/', '>');
+ else {
+ writer.write('>');
+
+ for (xml_node n = node.first_child(); n; n = n.next_sibling())
+ node_output(writer, n, indent, flags, depth + 1);
+
+ writer.write('<', '/');
+ writer.write(name);
+ writer.write('>');
+ }
+ } else if (!node.first_child())
+ writer.write(' ', '/', '>', '\n');
+ else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata)) {
+ writer.write('>');
+
+ if (node.first_child().type() == node_pcdata)
+ text_output(writer, node.first_child().value(), ctx_special_pcdata, flags);
+ else
+ text_output_cdata(writer, node.first_child().value());
+
+ writer.write('<', '/');
+ writer.write(name);
+ writer.write('>', '\n');
+ } else {
+ writer.write('>', '\n');
+
+ for (xml_node n = node.first_child(); n; n = n.next_sibling())
+ node_output(writer, n, indent, flags, depth + 1);
+
+ if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
+ for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
+
+ writer.write('<', '/');
+ writer.write(name);
+ writer.write('>', '\n');
+ }
+
+ break;
+ }
+
+ case node_pcdata:
+ text_output(writer, node.value(), ctx_special_pcdata, flags);
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_cdata:
+ text_output_cdata(writer, node.value());
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_comment:
+ writer.write('<', '!', '-', '-');
+ writer.write(node.value());
+ writer.write('-', '-', '>');
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_pi:
+ case node_declaration:
+ writer.write('<', '?');
+ writer.write(node.name()[0] ? node.name() : default_name);
+
+ if (node.type() == node_declaration) {
+ node_output_attributes(writer, node, flags);
+ } else if (node.value()[0]) {
+ writer.write(' ');
+ writer.write(node.value());
+ }
+
+ writer.write('?', '>');
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_doctype:
+ writer.write('<', '!', 'D', 'O', 'C');
+ writer.write('T', 'Y', 'P', 'E');
+
+ if (node.value()[0]) {
+ writer.write(' ');
+ writer.write(node.value());
+ }
+
+ writer.write('>');
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ default:
+ assert(!"Invalid node type");
+ }
+}
+
+inline bool has_declaration(const xml_node& node)
+{
+ for (xml_node child = node.first_child(); child; child = child.next_sibling()) {
+ xml_node_type type = child.type();
+
+ if (type == node_declaration) return true;
+ if (type == node_element) return false;
+ }
+
+ return false;
+}
+
+inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
+{
+ if (parent != node_document && parent != node_element) return false;
+ if (child == node_document || child == node_null) return false;
+ if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
+
+ return true;
+}
+
+PUGI__FN void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
+{
+ assert(dest.type() == source.type());
+
+ switch (source.type()) {
+ case node_element: {
+ dest.set_name(source.name());
+
+ for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
+ dest.append_attribute(a.name()).set_value(a.value());
+
+ for (xml_node c = source.first_child(); c; c = c.next_sibling()) {
+ if (c == skip) continue;
+
+ xml_node cc = dest.append_child(c.type());
+ assert(cc);
+
+ recursive_copy_skip(cc, c, skip);
+ }
+
+ break;
+ }
+
+ case node_pcdata:
+ case node_cdata:
+ case node_comment:
+ case node_doctype:
+ dest.set_value(source.value());
+ break;
+
+ case node_pi:
+ dest.set_name(source.name());
+ dest.set_value(source.value());
+ break;
+
+ case node_declaration: {
+ dest.set_name(source.name());
+
+ for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
+ dest.append_attribute(a.name()).set_value(a.value());
+
+ break;
+ }
+
+ default:
+ assert(!"Invalid node type");
+ }
+}
+
+inline bool is_text_node(xml_node_struct* node)
+{
+ xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
+
+ return type == node_pcdata || type == node_cdata;
+}
+
+// get value with conversion functions
+PUGI__FN int get_value_int(const char_t* value, int def)
+{
+ if (!value) return def;
+
+#ifdef PUGIXML_WCHAR_MODE
+ return static_cast<int>(wcstol(value, 0, 10));
+#else
+ return static_cast<int>(strtol(value, 0, 10));
+#endif
+}
+
+PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)
+{
+ if (!value) return def;
+
+#ifdef PUGIXML_WCHAR_MODE
+ return static_cast<unsigned int>(wcstoul(value, 0, 10));
+#else
+ return static_cast<unsigned int>(strtoul(value, 0, 10));
+#endif
+}
+
+PUGI__FN double get_value_double(const char_t* value, double def)
+{
+ if (!value) return def;
+
+#ifdef PUGIXML_WCHAR_MODE
+ return wcstod(value, 0);
+#else
+ return strtod(value, 0);
+#endif
+}
+
+PUGI__FN float get_value_float(const char_t* value, float def)
+{
+ if (!value) return def;
+
+#ifdef PUGIXML_WCHAR_MODE
+ return static_cast<float>(wcstod(value, 0));
+#else
+ return static_cast<float>(strtod(value, 0));
+#endif
+}
+
+PUGI__FN bool get_value_bool(const char_t* value, bool def)
+{
+ if (!value) return def;
+
+ // only look at first char
+ char_t first = *value;
+
+ // 1*, t* (true), T* (True), y* (yes), Y* (YES)
+ return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
+}
+
+// set value with conversion functions
+PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
+{
+#ifdef PUGIXML_WCHAR_MODE
+ char_t wbuf[128];
+ impl::widen_ascii(wbuf, buf);
+
+ return strcpy_insitu(dest, header, header_mask, wbuf);
+#else
+ return strcpy_insitu(dest, header, header_mask, buf);
+#endif
+}
+
+PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)
+{
+ char buf[128];
+ sprintf(buf, "%d", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+}
+
+PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)
+{
+ char buf[128];
+ sprintf(buf, "%u", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+}
+
+PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)
+{
+ char buf[128];
+ sprintf(buf, "%g", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+}
+
+PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)
+{
+ return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
+}
+
+// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
+PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
+{
+#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
+ // there are 64-bit versions of fseek/ftell, let's use them
+ typedef __int64 length_type;
+
+ _fseeki64(file, 0, SEEK_END);
+ length_type length = _ftelli64(file);
+ _fseeki64(file, 0, SEEK_SET);
+#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
+ // there are 64-bit versions of fseek/ftell, let's use them
+ typedef off64_t length_type;
+
+ fseeko64(file, 0, SEEK_END);
+ length_type length = ftello64(file);
+ fseeko64(file, 0, SEEK_SET);
+#else
+ // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
+ typedef long length_type;
+
+ fseek(file, 0, SEEK_END);
+ length_type length = ftell(file);
+ fseek(file, 0, SEEK_SET);
+#endif
+
+ // check for I/O errors
+ if (length < 0) return status_io_error;
+
+ // check for overflow
+ size_t result = static_cast<size_t>(length);
+
+ if (static_cast<length_type>(result) != length) return status_out_of_memory;
+
+ // finalize
+ out_result = result;
+
+ return status_ok;
+}
+
+PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
+{
+ if (!file) return make_parse_result(status_file_not_found);
+
+ // get file size (can result in I/O errors)
+ size_t size = 0;
+ xml_parse_status size_status = get_file_size(file, size);
+
+ if (size_status != status_ok) {
+ fclose(file);
+ return make_parse_result(size_status);
+ }
+
+ // allocate buffer for the whole file
+ char* contents = static_cast<char*>(xml_memory::allocate(size > 0 ? size : 1));
+
+ if (!contents) {
+ fclose(file);
+ return make_parse_result(status_out_of_memory);
+ }
+
+ // read file in memory
+ size_t read_size = fread(contents, 1, size, file);
+ fclose(file);
+
+ if (read_size != size) {
+ xml_memory::deallocate(contents);
+ return make_parse_result(status_io_error);
+ }
+
+ return doc.load_buffer_inplace_own(contents, size, options, encoding);
+}
#ifndef PUGIXML_NO_STL
- template <typename T> struct xml_stream_chunk
- {
- static xml_stream_chunk* create()
- {
- void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
-
- return new (memory) xml_stream_chunk();
- }
-
- static void destroy(void* ptr)
- {
- xml_stream_chunk* chunk = static_cast<xml_stream_chunk*>(ptr);
-
- // free chunk chain
- while (chunk)
- {
- xml_stream_chunk* next = chunk->next;
- xml_memory::deallocate(chunk);
- chunk = next;
- }
- }
-
- xml_stream_chunk(): next(0), size(0)
- {
- }
-
- xml_stream_chunk* next;
- size_t size;
-
- T data[xml_memory_page_size / sizeof(T)];
- };
-
- template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
- {
- buffer_holder chunks(0, xml_stream_chunk<T>::destroy);
-
- // read file to a chunk list
- size_t total = 0;
- xml_stream_chunk<T>* last = 0;
-
- while (!stream.eof())
- {
- // allocate new chunk
- xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
- if (!chunk) return status_out_of_memory;
-
- // append chunk to list
- if (last) last = last->next = chunk;
- else chunks.data = last = chunk;
-
- // read data to chunk
- stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
- chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
-
- // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
- if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
-
- // guard against huge files (chunk size is small enough to make this overflow check work)
- if (total + chunk->size < total) return status_out_of_memory;
- total += chunk->size;
- }
-
- // copy chunk list to a contiguous buffer
- char* buffer = static_cast<char*>(xml_memory::allocate(total));
- if (!buffer) return status_out_of_memory;
-
- char* write = buffer;
-
- for (xml_stream_chunk<T>* chunk = static_cast<xml_stream_chunk<T>*>(chunks.data); chunk; chunk = chunk->next)
- {
- assert(write + chunk->size <= buffer + total);
- memcpy(write, chunk->data, chunk->size);
- write += chunk->size;
- }
-
- assert(write == buffer + total);
-
- // return buffer
- *out_buffer = buffer;
- *out_size = total;
-
- return status_ok;
- }
-
- template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
- {
- // get length of remaining data in stream
- typename std::basic_istream<T>::pos_type pos = stream.tellg();
- stream.seekg(0, std::ios::end);
- std::streamoff length = stream.tellg() - pos;
- stream.seekg(pos);
-
- if (stream.fail() || pos < 0) return status_io_error;
-
- // guard against huge files
- size_t read_length = static_cast<size_t>(length);
-
- if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
-
- // read stream data into memory (guard against stream exceptions with buffer holder)
- buffer_holder buffer(xml_memory::allocate((read_length > 0 ? read_length : 1) * sizeof(T)), xml_memory::deallocate);
- if (!buffer.data) return status_out_of_memory;
+template <typename T> struct xml_stream_chunk {
+ static xml_stream_chunk* create() {
+ void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
+
+ return new (memory) xml_stream_chunk();
+ }
+
+ static void destroy(void* ptr) {
+ xml_stream_chunk* chunk = static_cast<xml_stream_chunk*>(ptr);
+
+ // free chunk chain
+ while (chunk) {
+ xml_stream_chunk* next = chunk->next;
+ xml_memory::deallocate(chunk);
+ chunk = next;
+ }
+ }
- stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
+ xml_stream_chunk(): next(0), size(0) {
+ }
- // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
- if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
+ xml_stream_chunk* next;
+ size_t size;
+
+ T data[xml_memory_page_size / sizeof(T)];
+};
+
+template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
+{
+ buffer_holder chunks(0, xml_stream_chunk<T>::destroy);
+
+ // read file to a chunk list
+ size_t total = 0;
+ xml_stream_chunk<T>* last = 0;
+
+ while (!stream.eof()) {
+ // allocate new chunk
+ xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
+ if (!chunk) return status_out_of_memory;
+
+ // append chunk to list
+ if (last) last = last->next = chunk;
+ else chunks.data = last = chunk;
+
+ // read data to chunk
+ stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
+ chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
+
+ // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
+ if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
+
+ // guard against huge files (chunk size is small enough to make this overflow check work)
+ if (total + chunk->size < total) return status_out_of_memory;
+ total += chunk->size;
+ }
+
+ // copy chunk list to a contiguous buffer
+ char* buffer = static_cast<char*>(xml_memory::allocate(total));
+ if (!buffer) return status_out_of_memory;
+
+ char* write = buffer;
+
+ for (xml_stream_chunk<T>* chunk = static_cast<xml_stream_chunk<T>*>(chunks.data); chunk; chunk = chunk->next) {
+ assert(write + chunk->size <= buffer + total);
+ memcpy(write, chunk->data, chunk->size);
+ write += chunk->size;
+ }
+
+ assert(write == buffer + total);
+
+ // return buffer
+ *out_buffer = buffer;
+ *out_size = total;
+
+ return status_ok;
+}
+
+template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
+{
+ // get length of remaining data in stream
+ typename std::basic_istream<T>::pos_type pos = stream.tellg();
+ stream.seekg(0, std::ios::end);
+ std::streamoff length = stream.tellg() - pos;
+ stream.seekg(pos);
- // return buffer
- size_t actual_length = static_cast<size_t>(stream.gcount());
- assert(actual_length <= read_length);
+ if (stream.fail() || pos < 0) return status_io_error;
- *out_buffer = buffer.release();
- *out_size = actual_length * sizeof(T);
+ // guard against huge files
+ size_t read_length = static_cast<size_t>(length);
- return status_ok;
- }
+ if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
- template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
- {
- void* buffer = 0;
- size_t size = 0;
+ // read stream data into memory (guard against stream exceptions with buffer holder)
+ buffer_holder buffer(xml_memory::allocate((read_length > 0 ? read_length : 1) * sizeof(T)), xml_memory::deallocate);
+ if (!buffer.data) return status_out_of_memory;
- // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
- xml_parse_status status = (stream.tellg() < 0) ? load_stream_data_noseek(stream, &buffer, &size) : load_stream_data_seek(stream, &buffer, &size);
- if (status != status_ok) return make_parse_result(status);
+ stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
- return doc.load_buffer_inplace_own(buffer, size, options, encoding);
- }
+ // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
+ if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
+
+ // return buffer
+ size_t actual_length = static_cast<size_t>(stream.gcount());
+ assert(actual_length <= read_length);
+
+ *out_buffer = buffer.release();
+ *out_size = actual_length * sizeof(T);
+
+ return status_ok;
+}
+
+template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
+{
+ void* buffer = 0;
+ size_t size = 0;
+
+ // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
+ xml_parse_status status = (stream.tellg() < 0) ? load_stream_data_noseek(stream, &buffer, &size) : load_stream_data_seek(stream, &buffer, &size);
+ if (status != status_ok) return make_parse_result(status);
+
+ return doc.load_buffer_inplace_own(buffer, size, options, encoding);
+}
#endif
#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && !defined(__STRICT_ANSI__))
- PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
- {
- return _wfopen(path, mode);
- }
+PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
+{
+ return _wfopen(path, mode);
+}
#else
- PUGI__FN char* convert_path_heap(const wchar_t* str)
- {
- assert(str);
+PUGI__FN char* convert_path_heap(const wchar_t* str)
+{
+ assert(str);
- // first pass: get length in utf8 characters
- size_t length = wcslen(str);
- size_t size = as_utf8_begin(str, length);
+ // first pass: get length in utf8 characters
+ size_t length = wcslen(str);
+ size_t size = as_utf8_begin(str, length);
- // allocate resulting string
- char* result = static_cast<char*>(xml_memory::allocate(size + 1));
- if (!result) return 0;
+ // allocate resulting string
+ char* result = static_cast<char*>(xml_memory::allocate(size + 1));
+ if (!result) return 0;
- // second pass: convert to utf8
- as_utf8_end(result, size, str, length);
+ // second pass: convert to utf8
+ as_utf8_end(result, size, str, length);
- return result;
- }
+ return result;
+}
- PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
- {
- // there is no standard function to open wide paths, so our best bet is to try utf8 path
- char* path_utf8 = convert_path_heap(path);
- if (!path_utf8) return 0;
+PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
+{
+ // there is no standard function to open wide paths, so our best bet is to try utf8 path
+ char* path_utf8 = convert_path_heap(path);
+ if (!path_utf8) return 0;
- // convert mode to ASCII (we mirror _wfopen interface)
- char mode_ascii[4] = {0};
- for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
+ // convert mode to ASCII (we mirror _wfopen interface)
+ char mode_ascii[4] = {0};
+ for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
- // try to open the utf8 path
- FILE* result = fopen(path_utf8, mode_ascii);
+ // try to open the utf8 path
+ FILE* result = fopen(path_utf8, mode_ascii);
- // free dummy buffer
- xml_memory::deallocate(path_utf8);
+ // free dummy buffer
+ xml_memory::deallocate(path_utf8);
- return result;
- }
+ return result;
+}
#endif
- PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
- {
- if (!file) return false;
+PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
+{
+ if (!file) return false;
- xml_writer_file writer(file);
- doc.save(writer, indent, flags, encoding);
+ xml_writer_file writer(file);
+ doc.save(writer, indent, flags, encoding);
- int result = ferror(file);
+ int result = ferror(file);
- fclose(file);
+ fclose(file);
- return result == 0;
- }
+ return result == 0;
+}
PUGI__NS_END
namespace pugi
{
- PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
- {
- }
+PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
+{
+}
- PUGI__FN void xml_writer_file::write(const void* data, size_t size)
- {
- size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
- (void)!result; // unfortunately we can't do proper error handling here
- }
+PUGI__FN void xml_writer_file::write(const void* data, size_t size)
+{
+ size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
+ (void)!result; // unfortunately we can't do proper error handling here
+}
#ifndef PUGIXML_NO_STL
- PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
- {
- }
-
- PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
- {
- }
-
- PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
- {
- if (narrow_stream)
- {
- assert(!wide_stream);
- narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
- }
- else
- {
- assert(wide_stream);
- assert(size % sizeof(wchar_t) == 0);
-
- wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
- }
- }
+PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
+{
+}
+
+PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
+{
+}
+
+PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
+{
+ if (narrow_stream) {
+ assert(!wide_stream);
+ narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
+ } else {
+ assert(wide_stream);
+ assert(size % sizeof(wchar_t) == 0);
+
+ wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
+ }
+}
#endif
- PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
- {
- }
-
- PUGI__FN xml_tree_walker::~xml_tree_walker()
- {
- }
-
- PUGI__FN int xml_tree_walker::depth() const
- {
- return _depth;
- }
-
- PUGI__FN bool xml_tree_walker::begin(xml_node&)
- {
- return true;
- }
-
- PUGI__FN bool xml_tree_walker::end(xml_node&)
- {
- return true;
- }
-
- PUGI__FN xml_attribute::xml_attribute(): _attr(0)
- {
- }
-
- PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
- {
- }
-
- PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
- {
- }
-
- PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
- {
- return _attr ? unspecified_bool_xml_attribute : 0;
- }
-
- PUGI__FN bool xml_attribute::operator!() const
- {
- return !_attr;
- }
-
- PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
- {
- return (_attr == r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
- {
- return (_attr != r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
- {
- return (_attr < r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
- {
- return (_attr > r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
- {
- return (_attr <= r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
- {
- return (_attr >= r._attr);
- }
-
- PUGI__FN xml_attribute xml_attribute::next_attribute() const
- {
- return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
- }
-
- PUGI__FN xml_attribute xml_attribute::previous_attribute() const
- {
- return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
- }
-
- PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
- {
- return (_attr && _attr->value) ? _attr->value : def;
- }
-
- PUGI__FN int xml_attribute::as_int(int def) const
- {
- return impl::get_value_int(_attr ? _attr->value : 0, def);
- }
-
- PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
- {
- return impl::get_value_uint(_attr ? _attr->value : 0, def);
- }
-
- PUGI__FN double xml_attribute::as_double(double def) const
- {
- return impl::get_value_double(_attr ? _attr->value : 0, def);
- }
-
- PUGI__FN float xml_attribute::as_float(float def) const
- {
- return impl::get_value_float(_attr ? _attr->value : 0, def);
- }
-
- PUGI__FN bool xml_attribute::as_bool(bool def) const
- {
- return impl::get_value_bool(_attr ? _attr->value : 0, def);
- }
-
- PUGI__FN bool xml_attribute::empty() const
- {
- return !_attr;
- }
-
- PUGI__FN const char_t* xml_attribute::name() const
- {
- return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* xml_attribute::value() const
- {
- return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
- }
-
- PUGI__FN size_t xml_attribute::hash_value() const
- {
- return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
- }
-
- PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
- {
- return _attr;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
- {
- if (!_attr) return false;
-
- return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
- {
- if (!_attr) return false;
-
- return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(int rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(double rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(bool rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
+PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
+{
+}
+
+PUGI__FN xml_tree_walker::~xml_tree_walker()
+{
+}
+
+PUGI__FN int xml_tree_walker::depth() const
+{
+ return _depth;
+}
+
+PUGI__FN bool xml_tree_walker::begin(xml_node&)
+{
+ return true;
+}
+
+PUGI__FN bool xml_tree_walker::end(xml_node&)
+{
+ return true;
+}
+
+PUGI__FN xml_attribute::xml_attribute(): _attr(0)
+{
+}
+
+PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
+{
+}
+
+PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
+{
+}
+
+PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
+{
+ return _attr ? unspecified_bool_xml_attribute : 0;
+}
+
+PUGI__FN bool xml_attribute::operator!() const
+{
+ return !_attr;
+}
+
+PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
+{
+ return (_attr == r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
+{
+ return (_attr != r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
+{
+ return (_attr < r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
+{
+ return (_attr > r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
+{
+ return (_attr <= r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
+{
+ return (_attr >= r._attr);
+}
+
+PUGI__FN xml_attribute xml_attribute::next_attribute() const
+{
+ return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
+}
+
+PUGI__FN xml_attribute xml_attribute::previous_attribute() const
+{
+ return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
+}
+
+PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
+{
+ return (_attr && _attr->value) ? _attr->value : def;
+}
+
+PUGI__FN int xml_attribute::as_int(int def) const
+{
+ return impl::get_value_int(_attr ? _attr->value : 0, def);
+}
+
+PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
+{
+ return impl::get_value_uint(_attr ? _attr->value : 0, def);
+}
+
+PUGI__FN double xml_attribute::as_double(double def) const
+{
+ return impl::get_value_double(_attr ? _attr->value : 0, def);
+}
+
+PUGI__FN float xml_attribute::as_float(float def) const
+{
+ return impl::get_value_float(_attr ? _attr->value : 0, def);
+}
+
+PUGI__FN bool xml_attribute::as_bool(bool def) const
+{
+ return impl::get_value_bool(_attr ? _attr->value : 0, def);
+}
+
+PUGI__FN bool xml_attribute::empty() const
+{
+ return !_attr;
+}
+
+PUGI__FN const char_t* xml_attribute::name() const
+{
+ return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* xml_attribute::value() const
+{
+ return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
+}
+
+PUGI__FN size_t xml_attribute::hash_value() const
+{
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
+}
+
+PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
+{
+ return _attr;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
+{
+ if (!_attr) return false;
+
+ return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
+{
+ if (!_attr) return false;
+
+ return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(int rhs)
+{
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
+{
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(double rhs)
+{
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(bool rhs)
+{
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
+PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
+{
+ return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
+{
+ return (bool)lhs || rhs;
+}
#endif
- PUGI__FN xml_node::xml_node(): _root(0)
- {
- }
-
- PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
- {
- }
-
- PUGI__FN static void unspecified_bool_xml_node(xml_node***)
- {
- }
-
- PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
- {
- return _root ? unspecified_bool_xml_node : 0;
- }
-
- PUGI__FN bool xml_node::operator!() const
- {
- return !_root;
- }
-
- PUGI__FN xml_node::iterator xml_node::begin() const
- {
- return iterator(_root ? _root->first_child : 0, _root);
- }
-
- PUGI__FN xml_node::iterator xml_node::end() const
- {
- return iterator(0, _root);
- }
-
- PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
- {
- return attribute_iterator(_root ? _root->first_attribute : 0, _root);
- }
-
- PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
- {
- return attribute_iterator(0, _root);
- }
-
- PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
- {
- return xml_object_range<xml_node_iterator>(begin(), end());
- }
-
- PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
- {
- return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_), name_), xml_named_node_iterator());
- }
-
- PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
- {
- return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
- }
-
- PUGI__FN bool xml_node::operator==(const xml_node& r) const
- {
- return (_root == r._root);
- }
-
- PUGI__FN bool xml_node::operator!=(const xml_node& r) const
- {
- return (_root != r._root);
- }
-
- PUGI__FN bool xml_node::operator<(const xml_node& r) const
- {
- return (_root < r._root);
- }
-
- PUGI__FN bool xml_node::operator>(const xml_node& r) const
- {
- return (_root > r._root);
- }
-
- PUGI__FN bool xml_node::operator<=(const xml_node& r) const
- {
- return (_root <= r._root);
- }
-
- PUGI__FN bool xml_node::operator>=(const xml_node& r) const
- {
- return (_root >= r._root);
- }
-
- PUGI__FN bool xml_node::empty() const
- {
- return !_root;
- }
-
- PUGI__FN const char_t* xml_node::name() const
- {
- return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
- }
-
- PUGI__FN xml_node_type xml_node::type() const
- {
- return _root ? static_cast<xml_node_type>((_root->header & impl::xml_memory_page_type_mask) + 1) : node_null;
- }
-
- PUGI__FN const char_t* xml_node::value() const
- {
- return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
- }
-
- PUGI__FN xml_node xml_node::child(const char_t* name_) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
- return xml_node();
- }
-
- PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
- {
- if (!_root) return xml_attribute();
-
- for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
- if (i->name && impl::strequal(name_, i->name))
- return xml_attribute(i);
-
- return xml_attribute();
- }
-
- PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
- if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
- return xml_node();
- }
-
- PUGI__FN xml_node xml_node::next_sibling() const
- {
- if (!_root) return xml_node();
-
- if (_root->next_sibling) return xml_node(_root->next_sibling);
- else return xml_node();
- }
-
- PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
- if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
- return xml_node();
- }
-
- PUGI__FN xml_node xml_node::previous_sibling() const
- {
- if (!_root) return xml_node();
-
- if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
- else return xml_node();
- }
-
- PUGI__FN xml_node xml_node::parent() const
- {
- return _root ? xml_node(_root->parent) : xml_node();
- }
-
- PUGI__FN xml_node xml_node::root() const
- {
- if (!_root) return xml_node();
-
- impl::xml_memory_page* page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
-
- return xml_node(static_cast<impl::xml_document_struct*>(page->allocator));
- }
-
- PUGI__FN xml_text xml_node::text() const
- {
- return xml_text(_root);
- }
-
- PUGI__FN const char_t* xml_node::child_value() const
- {
- if (!_root) return PUGIXML_TEXT("");
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (i->value && impl::is_text_node(i))
- return i->value;
-
- return PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
- {
- return child(name_).child_value();
- }
-
- PUGI__FN xml_attribute xml_node::first_attribute() const
- {
- return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
- }
-
- PUGI__FN xml_attribute xml_node::last_attribute() const
- {
- return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
- }
-
- PUGI__FN xml_node xml_node::first_child() const
- {
- return _root ? xml_node(_root->first_child) : xml_node();
- }
-
- PUGI__FN xml_node xml_node::last_child() const
- {
- return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
- }
-
- PUGI__FN bool xml_node::set_name(const char_t* rhs)
- {
- switch (type())
- {
- case node_pi:
- case node_declaration:
- case node_element:
- return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs);
-
- default:
- return false;
- }
- }
-
- PUGI__FN bool xml_node::set_value(const char_t* rhs)
- {
- switch (type())
- {
- case node_pi:
- case node_cdata:
- case node_pcdata:
- case node_comment:
- case node_doctype:
- return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs);
-
- default:
- return false;
- }
- }
-
- PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
- {
- if (type() != node_element && type() != node_declaration) return xml_attribute();
-
- xml_attribute a(impl::append_attribute_ll(_root, impl::get_allocator(_root)));
- a.set_name(name_);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
- {
- if (type() != node_element && type() != node_declaration) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
- if (!a) return xml_attribute();
-
- a.set_name(name_);
-
- xml_attribute_struct* head = _root->first_attribute;
-
- if (head)
- {
- a._attr->prev_attribute_c = head->prev_attribute_c;
- head->prev_attribute_c = a._attr;
- }
- else
- a._attr->prev_attribute_c = a._attr;
-
- a._attr->next_attribute = head;
- _root->first_attribute = a._attr;
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
- {
- if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
-
- // check that attribute belongs to *this
- xml_attribute_struct* cur = attr._attr;
-
- while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
-
- if (cur != _root->first_attribute) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
- if (!a) return xml_attribute();
-
- a.set_name(name_);
-
- if (attr._attr->prev_attribute_c->next_attribute)
- attr._attr->prev_attribute_c->next_attribute = a._attr;
- else
- _root->first_attribute = a._attr;
-
- a._attr->prev_attribute_c = attr._attr->prev_attribute_c;
- a._attr->next_attribute = attr._attr;
- attr._attr->prev_attribute_c = a._attr;
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
- {
- if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
-
- // check that attribute belongs to *this
- xml_attribute_struct* cur = attr._attr;
-
- while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
-
- if (cur != _root->first_attribute) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
- if (!a) return xml_attribute();
-
- a.set_name(name_);
-
- if (attr._attr->next_attribute)
- attr._attr->next_attribute->prev_attribute_c = a._attr;
- else
- _root->first_attribute->prev_attribute_c = a._attr;
-
- a._attr->next_attribute = attr._attr->next_attribute;
- a._attr->prev_attribute_c = attr._attr;
- attr._attr->next_attribute = a._attr;
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
- {
- if (!proto) return xml_attribute();
-
- xml_attribute result = append_attribute(proto.name());
- result.set_value(proto.value());
-
- return result;
- }
-
- PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
- {
- if (!proto) return xml_attribute();
-
- xml_attribute result = prepend_attribute(proto.name());
- result.set_value(proto.value());
-
- return result;
- }
-
- PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
- {
- if (!proto) return xml_attribute();
-
- xml_attribute result = insert_attribute_after(proto.name(), attr);
- result.set_value(proto.value());
-
- return result;
- }
-
- PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
- {
- if (!proto) return xml_attribute();
-
- xml_attribute result = insert_attribute_before(proto.name(), attr);
- result.set_value(proto.value());
-
- return result;
- }
-
- PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
- {
- if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
-
- xml_node n(impl::append_node(_root, impl::get_allocator(_root), type_));
-
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
- {
- if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
-
- xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
- if (!n) return xml_node();
-
- n._root->parent = _root;
-
- xml_node_struct* head = _root->first_child;
-
- if (head)
- {
- n._root->prev_sibling_c = head->prev_sibling_c;
- head->prev_sibling_c = n._root;
- }
- else
- n._root->prev_sibling_c = n._root;
-
- n._root->next_sibling = head;
- _root->first_child = n._root;
-
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+PUGI__FN xml_node::xml_node(): _root(0)
+{
+}
+
+PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
+{
+}
+
+PUGI__FN static void unspecified_bool_xml_node(xml_node***)
+{
+}
+
+PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
+{
+ return _root ? unspecified_bool_xml_node : 0;
+}
+
+PUGI__FN bool xml_node::operator!() const
+{
+ return !_root;
+}
+
+PUGI__FN xml_node::iterator xml_node::begin() const
+{
+ return iterator(_root ? _root->first_child : 0, _root);
+}
+
+PUGI__FN xml_node::iterator xml_node::end() const
+{
+ return iterator(0, _root);
+}
+
+PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
+{
+ return attribute_iterator(_root ? _root->first_attribute : 0, _root);
+}
+
+PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
+{
+ return attribute_iterator(0, _root);
+}
+
+PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
+{
+ return xml_object_range<xml_node_iterator>(begin(), end());
+}
+
+PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
+{
+ return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_), name_), xml_named_node_iterator());
+}
+
+PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
+{
+ return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
+}
+
+PUGI__FN bool xml_node::operator==(const xml_node& r) const
+{
+ return (_root == r._root);
+}
+
+PUGI__FN bool xml_node::operator!=(const xml_node& r) const
+{
+ return (_root != r._root);
+}
+
+PUGI__FN bool xml_node::operator<(const xml_node& r) const
+{
+ return (_root < r._root);
+}
+
+PUGI__FN bool xml_node::operator>(const xml_node& r) const
+{
+ return (_root > r._root);
+}
+
+PUGI__FN bool xml_node::operator<=(const xml_node& r) const
+{
+ return (_root <= r._root);
+}
+
+PUGI__FN bool xml_node::operator>=(const xml_node& r) const
+{
+ return (_root >= r._root);
+}
+
+PUGI__FN bool xml_node::empty() const
+{
+ return !_root;
+}
+
+PUGI__FN const char_t* xml_node::name() const
+{
+ return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
+}
+
+PUGI__FN xml_node_type xml_node::type() const
+{
+ return _root ? static_cast<xml_node_type>((_root->header & impl::xml_memory_page_type_mask) + 1) : node_null;
+}
+
+PUGI__FN const char_t* xml_node::value() const
+{
+ return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
+}
+
+PUGI__FN xml_node xml_node::child(const char_t* name_) const
+{
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+ return xml_node();
+}
+
+PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
+{
+ if (!_root) return xml_attribute();
+
+ for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
+ if (i->name && impl::strequal(name_, i->name))
+ return xml_attribute(i);
+
+ return xml_attribute();
+}
+
+PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
+{
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
+ if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+ return xml_node();
+}
+
+PUGI__FN xml_node xml_node::next_sibling() const
+{
+ if (!_root) return xml_node();
+
+ if (_root->next_sibling) return xml_node(_root->next_sibling);
+ else return xml_node();
+}
+
+PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
+{
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
+ if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+ return xml_node();
+}
+
+PUGI__FN xml_node xml_node::previous_sibling() const
+{
+ if (!_root) return xml_node();
+
+ if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
+ else return xml_node();
+}
+
+PUGI__FN xml_node xml_node::parent() const
+{
+ return _root ? xml_node(_root->parent) : xml_node();
+}
+
+PUGI__FN xml_node xml_node::root() const
+{
+ if (!_root) return xml_node();
+
+ impl::xml_memory_page* page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
- return n;
- }
+ return xml_node(static_cast<impl::xml_document_struct*>(page->allocator));
+}
- PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
- {
- if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
-
- xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
- if (!n) return xml_node();
+PUGI__FN xml_text xml_node::text() const
+{
+ return xml_text(_root);
+}
- n._root->parent = _root;
-
- if (node._root->prev_sibling_c->next_sibling)
- node._root->prev_sibling_c->next_sibling = n._root;
- else
- _root->first_child = n._root;
-
- n._root->prev_sibling_c = node._root->prev_sibling_c;
- n._root->next_sibling = node._root;
- node._root->prev_sibling_c = n._root;
+PUGI__FN const char_t* xml_node::child_value() const
+{
+ if (!_root) return PUGIXML_TEXT("");
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if (i->value && impl::is_text_node(i))
+ return i->value;
- return n;
- }
+ return PUGIXML_TEXT("");
+}
- PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
- {
- if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
-
- xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
- if (!n) return xml_node();
+PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
+{
+ return child(name_).child_value();
+}
- n._root->parent = _root;
-
- if (node._root->next_sibling)
- node._root->next_sibling->prev_sibling_c = n._root;
- else
- _root->first_child->prev_sibling_c = n._root;
-
- n._root->next_sibling = node._root->next_sibling;
- n._root->prev_sibling_c = node._root;
- node._root->next_sibling = n._root;
+PUGI__FN xml_attribute xml_node::first_attribute() const
+{
+ return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
+}
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+PUGI__FN xml_attribute xml_node::last_attribute() const
+{
+ return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
+}
- return n;
- }
+PUGI__FN xml_node xml_node::first_child() const
+{
+ return _root ? xml_node(_root->first_child) : xml_node();
+}
- PUGI__FN xml_node xml_node::append_child(const char_t* name_)
- {
- xml_node result = append_child(node_element);
+PUGI__FN xml_node xml_node::last_child() const
+{
+ return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
+}
- result.set_name(name_);
+PUGI__FN bool xml_node::set_name(const char_t* rhs)
+{
+ switch (type()) {
+ case node_pi:
+ case node_declaration:
+ case node_element:
+ return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs);
+
+ default:
+ return false;
+ }
+}
- return result;
- }
+PUGI__FN bool xml_node::set_value(const char_t* rhs)
+{
+ switch (type()) {
+ case node_pi:
+ case node_cdata:
+ case node_pcdata:
+ case node_comment:
+ case node_doctype:
+ return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs);
+
+ default:
+ return false;
+ }
+}
- PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
- {
- xml_node result = prepend_child(node_element);
+PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
+{
+ if (type() != node_element && type() != node_declaration) return xml_attribute();
+
+ xml_attribute a(impl::append_attribute_ll(_root, impl::get_allocator(_root)));
+ a.set_name(name_);
- result.set_name(name_);
+ return a;
+}
+
+PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
+{
+ if (type() != node_element && type() != node_declaration) return xml_attribute();
- return result;
- }
+ xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+ if (!a) return xml_attribute();
- PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
- {
- xml_node result = insert_child_after(node_element, node);
+ a.set_name(name_);
- result.set_name(name_);
+ xml_attribute_struct* head = _root->first_attribute;
+
+ if (head) {
+ a._attr->prev_attribute_c = head->prev_attribute_c;
+ head->prev_attribute_c = a._attr;
+ } else
+ a._attr->prev_attribute_c = a._attr;
+
+ a._attr->next_attribute = head;
+ _root->first_attribute = a._attr;
+
+ return a;
+}
- return result;
- }
-
- PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
- {
- xml_node result = insert_child_before(node_element, node);
+PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
+{
+ if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
- result.set_name(name_);
-
- return result;
- }
-
- PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
- {
- xml_node result = append_child(proto.type());
-
- if (result) impl::recursive_copy_skip(result, proto, result);
-
- return result;
- }
-
- PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
- {
- xml_node result = prepend_child(proto.type());
+ // check that attribute belongs to *this
+ xml_attribute_struct* cur = attr._attr;
- if (result) impl::recursive_copy_skip(result, proto, result);
+ while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
- return result;
- }
+ if (cur != _root->first_attribute) return xml_attribute();
- PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
- {
- xml_node result = insert_child_after(proto.type(), node);
+ xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+ if (!a) return xml_attribute();
- if (result) impl::recursive_copy_skip(result, proto, result);
+ a.set_name(name_);
- return result;
- }
+ if (attr._attr->prev_attribute_c->next_attribute)
+ attr._attr->prev_attribute_c->next_attribute = a._attr;
+ else
+ _root->first_attribute = a._attr;
- PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
- {
- xml_node result = insert_child_before(proto.type(), node);
+ a._attr->prev_attribute_c = attr._attr->prev_attribute_c;
+ a._attr->next_attribute = attr._attr;
+ attr._attr->prev_attribute_c = a._attr;
- if (result) impl::recursive_copy_skip(result, proto, result);
+ return a;
+}
- return result;
- }
+PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
+{
+ if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
- PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
- {
- return remove_attribute(attribute(name_));
- }
+ // check that attribute belongs to *this
+ xml_attribute_struct* cur = attr._attr;
- PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
- {
- if (!_root || !a._attr) return false;
+ while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
- // check that attribute belongs to *this
- xml_attribute_struct* attr = a._attr;
+ if (cur != _root->first_attribute) return xml_attribute();
- while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;
+ xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+ if (!a) return xml_attribute();
- if (attr != _root->first_attribute) return false;
+ a.set_name(name_);
- if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;
- else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;
-
- if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;
- else _root->first_attribute = a._attr->next_attribute;
+ if (attr._attr->next_attribute)
+ attr._attr->next_attribute->prev_attribute_c = a._attr;
+ else
+ _root->first_attribute->prev_attribute_c = a._attr;
- impl::destroy_attribute(a._attr, impl::get_allocator(_root));
+ a._attr->next_attribute = attr._attr->next_attribute;
+ a._attr->prev_attribute_c = attr._attr;
+ attr._attr->next_attribute = a._attr;
- return true;
- }
+ return a;
+}
- PUGI__FN bool xml_node::remove_child(const char_t* name_)
- {
- return remove_child(child(name_));
- }
+PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
+{
+ if (!proto) return xml_attribute();
- PUGI__FN bool xml_node::remove_child(const xml_node& n)
- {
- if (!_root || !n._root || n._root->parent != _root) return false;
+ xml_attribute result = append_attribute(proto.name());
+ result.set_value(proto.value());
- if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c;
- else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c;
-
- if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling;
- else _root->first_child = n._root->next_sibling;
-
- impl::destroy_node(n._root, impl::get_allocator(_root));
+ return result;
+}
- return true;
- }
+PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
+{
+ if (!proto) return xml_attribute();
- PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (i->name && impl::strequal(name_, i->name))
- {
- for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
- if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
- return xml_node(i);
- }
+ xml_attribute result = prepend_attribute(proto.name());
+ result.set_value(proto.value());
- return xml_node();
- }
+ return result;
+}
- PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
- if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
- return xml_node(i);
+PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
+{
+ if (!proto) return xml_attribute();
- return xml_node();
- }
+ xml_attribute result = insert_attribute_after(proto.name(), attr);
+ result.set_value(proto.value());
+
+ return result;
+}
+
+PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
+{
+ if (!proto) return xml_attribute();
+
+ xml_attribute result = insert_attribute_before(proto.name(), attr);
+ result.set_value(proto.value());
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
+{
+ if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
+
+ xml_node n(impl::append_node(_root, impl::get_allocator(_root), type_));
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
+{
+ if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
+
+ xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+ if (!n) return xml_node();
+
+ n._root->parent = _root;
+
+ xml_node_struct* head = _root->first_child;
+
+ if (head) {
+ n._root->prev_sibling_c = head->prev_sibling_c;
+ head->prev_sibling_c = n._root;
+ } else
+ n._root->prev_sibling_c = n._root;
+
+ n._root->next_sibling = head;
+ _root->first_child = n._root;
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
+{
+ if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+
+ xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+ if (!n) return xml_node();
+
+ n._root->parent = _root;
+
+ if (node._root->prev_sibling_c->next_sibling)
+ node._root->prev_sibling_c->next_sibling = n._root;
+ else
+ _root->first_child = n._root;
+
+ n._root->prev_sibling_c = node._root->prev_sibling_c;
+ n._root->next_sibling = node._root;
+ node._root->prev_sibling_c = n._root;
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
+{
+ if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+
+ xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+ if (!n) return xml_node();
+
+ n._root->parent = _root;
+
+ if (node._root->next_sibling)
+ node._root->next_sibling->prev_sibling_c = n._root;
+ else
+ _root->first_child->prev_sibling_c = n._root;
+
+ n._root->next_sibling = node._root->next_sibling;
+ n._root->prev_sibling_c = node._root;
+ node._root->next_sibling = n._root;
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::append_child(const char_t* name_)
+{
+ xml_node result = append_child(node_element);
+
+ result.set_name(name_);
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
+{
+ xml_node result = prepend_child(node_element);
+
+ result.set_name(name_);
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
+{
+ xml_node result = insert_child_after(node_element, node);
+
+ result.set_name(name_);
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
+{
+ xml_node result = insert_child_before(node_element, node);
+
+ result.set_name(name_);
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
+{
+ xml_node result = append_child(proto.type());
+
+ if (result) impl::recursive_copy_skip(result, proto, result);
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
+{
+ xml_node result = prepend_child(proto.type());
+
+ if (result) impl::recursive_copy_skip(result, proto, result);
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
+{
+ xml_node result = insert_child_after(proto.type(), node);
+
+ if (result) impl::recursive_copy_skip(result, proto, result);
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
+{
+ xml_node result = insert_child_before(proto.type(), node);
+
+ if (result) impl::recursive_copy_skip(result, proto, result);
+
+ return result;
+}
+
+PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
+{
+ return remove_attribute(attribute(name_));
+}
+
+PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
+{
+ if (!_root || !a._attr) return false;
+
+ // check that attribute belongs to *this
+ xml_attribute_struct* attr = a._attr;
+
+ while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;
+
+ if (attr != _root->first_attribute) return false;
+
+ if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;
+ else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;
+
+ if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;
+ else _root->first_attribute = a._attr->next_attribute;
+
+ impl::destroy_attribute(a._attr, impl::get_allocator(_root));
+
+ return true;
+}
+
+PUGI__FN bool xml_node::remove_child(const char_t* name_)
+{
+ return remove_child(child(name_));
+}
+
+PUGI__FN bool xml_node::remove_child(const xml_node& n)
+{
+ if (!_root || !n._root || n._root->parent != _root) return false;
+
+ if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c;
+ else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c;
+
+ if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling;
+ else _root->first_child = n._root->next_sibling;
+
+ impl::destroy_node(n._root, impl::get_allocator(_root));
+
+ return true;
+}
+
+PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
+{
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if (i->name && impl::strequal(name_, i->name)) {
+ for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
+ if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
+ return xml_node(i);
+ }
+
+ return xml_node();
+}
+
+PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
+{
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
+ if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
+ return xml_node(i);
+
+ return xml_node();
+}
#ifndef PUGIXML_NO_STL
- PUGI__FN string_t xml_node::path(char_t delimiter) const
- {
- xml_node cursor = *this; // Make a copy.
-
- string_t result = cursor.name();
-
- while (cursor.parent())
- {
- cursor = cursor.parent();
-
- string_t temp = cursor.name();
- temp += delimiter;
- temp += result;
- result.swap(temp);
- }
-
- return result;
- }
+PUGI__FN string_t xml_node::path(char_t delimiter) const
+{
+ xml_node cursor = *this; // Make a copy.
+
+ string_t result = cursor.name();
+
+ while (cursor.parent()) {
+ cursor = cursor.parent();
+
+ string_t temp = cursor.name();
+ temp += delimiter;
+ temp += result;
+ result.swap(temp);
+ }
+
+ return result;
+}
#endif
- PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
- {
- xml_node found = *this; // Current search context.
-
- if (!_root || !path_ || !path_[0]) return found;
-
- if (path_[0] == delimiter)
- {
- // Absolute path; e.g. '/foo/bar'
- found = found.root();
- ++path_;
- }
-
- const char_t* path_segment = path_;
-
- while (*path_segment == delimiter) ++path_segment;
-
- const char_t* path_segment_end = path_segment;
-
- while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
-
- if (path_segment == path_segment_end) return found;
-
- const char_t* next_segment = path_segment_end;
-
- while (*next_segment == delimiter) ++next_segment;
-
- if (*path_segment == '.' && path_segment + 1 == path_segment_end)
- return found.first_element_by_path(next_segment, delimiter);
- else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
- return found.parent().first_element_by_path(next_segment, delimiter);
- else
- {
- for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
- {
- if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
- {
- xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
-
- if (subsearch) return subsearch;
- }
- }
-
- return xml_node();
- }
- }
-
- PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
- {
- walker._depth = -1;
-
- xml_node arg_begin = *this;
- if (!walker.begin(arg_begin)) return false;
-
- xml_node cur = first_child();
-
- if (cur)
- {
- ++walker._depth;
-
- do
- {
- xml_node arg_for_each = cur;
- if (!walker.for_each(arg_for_each))
- return false;
-
- if (cur.first_child())
- {
- ++walker._depth;
- cur = cur.first_child();
- }
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- // Borland C++ workaround
- while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
- {
- --walker._depth;
- cur = cur.parent();
- }
-
- if (cur != *this)
- cur = cur.next_sibling();
- }
- }
- while (cur && cur != *this);
- }
-
- assert(walker._depth == -1);
-
- xml_node arg_end = *this;
- return walker.end(arg_end);
- }
-
- PUGI__FN size_t xml_node::hash_value() const
- {
- return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
- }
-
- PUGI__FN xml_node_struct* xml_node::internal_object() const
- {
- return _root;
- }
-
- PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
- {
- if (!_root) return;
-
- impl::xml_buffered_writer buffered_writer(writer, encoding);
-
- impl::node_output(buffered_writer, *this, indent, flags, depth);
- }
+PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
+{
+ xml_node found = *this; // Current search context.
+
+ if (!_root || !path_ || !path_[0]) return found;
+
+ if (path_[0] == delimiter) {
+ // Absolute path; e.g. '/foo/bar'
+ found = found.root();
+ ++path_;
+ }
+
+ const char_t* path_segment = path_;
+
+ while (*path_segment == delimiter) ++path_segment;
+
+ const char_t* path_segment_end = path_segment;
+
+ while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
+
+ if (path_segment == path_segment_end) return found;
+
+ const char_t* next_segment = path_segment_end;
+
+ while (*next_segment == delimiter) ++next_segment;
+
+ if (*path_segment == '.' && path_segment + 1 == path_segment_end)
+ return found.first_element_by_path(next_segment, delimiter);
+ else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
+ return found.parent().first_element_by_path(next_segment, delimiter);
+ else {
+ for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) {
+ if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) {
+ xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
+
+ if (subsearch) return subsearch;
+ }
+ }
+
+ return xml_node();
+ }
+}
+
+PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
+{
+ walker._depth = -1;
+
+ xml_node arg_begin = *this;
+ if (!walker.begin(arg_begin)) return false;
+
+ xml_node cur = first_child();
+
+ if (cur) {
+ ++walker._depth;
+
+ do {
+ xml_node arg_for_each = cur;
+ if (!walker.for_each(arg_for_each))
+ return false;
+
+ if (cur.first_child()) {
+ ++walker._depth;
+ cur = cur.first_child();
+ } else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else {
+ // Borland C++ workaround
+ while (!cur.next_sibling() && cur != *this && !cur.parent().empty()) {
+ --walker._depth;
+ cur = cur.parent();
+ }
+
+ if (cur != *this)
+ cur = cur.next_sibling();
+ }
+ } while (cur && cur != *this);
+ }
+
+ assert(walker._depth == -1);
+
+ xml_node arg_end = *this;
+ return walker.end(arg_end);
+}
+
+PUGI__FN size_t xml_node::hash_value() const
+{
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
+}
+
+PUGI__FN xml_node_struct* xml_node::internal_object() const
+{
+ return _root;
+}
+
+PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
+{
+ if (!_root) return;
+
+ impl::xml_buffered_writer buffered_writer(writer, encoding);
+
+ impl::node_output(buffered_writer, *this, indent, flags, depth);
+}
#ifndef PUGIXML_NO_STL
- PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
- {
- xml_writer_stream writer(stream);
+PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
+{
+ xml_writer_stream writer(stream);
- print(writer, indent, flags, encoding, depth);
- }
+ print(writer, indent, flags, encoding, depth);
+}
- PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
- {
- xml_writer_stream writer(stream);
+PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
+{
+ xml_writer_stream writer(stream);
- print(writer, indent, flags, encoding_wchar, depth);
- }
+ print(writer, indent, flags, encoding_wchar, depth);
+}
#endif
- PUGI__FN ptrdiff_t xml_node::offset_debug() const
- {
- xml_node_struct* r = root()._root;
+PUGI__FN ptrdiff_t xml_node::offset_debug() const
+{
+ xml_node_struct* r = root()._root;
- if (!r) return -1;
+ if (!r) return -1;
- const char_t* buffer = static_cast<impl::xml_document_struct*>(r)->buffer;
+ const char_t* buffer = static_cast<impl::xml_document_struct*>(r)->buffer;
- if (!buffer) return -1;
+ if (!buffer) return -1;
- switch (type())
- {
- case node_document:
- return 0;
+ switch (type()) {
+ case node_document:
+ return 0;
- case node_element:
- case node_declaration:
- case node_pi:
- return (_root->header & impl::xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;
+ case node_element:
+ case node_declaration:
+ case node_pi:
+ return (_root->header & impl::xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;
- case node_pcdata:
- case node_cdata:
- case node_comment:
- case node_doctype:
- return (_root->header & impl::xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;
+ case node_pcdata:
+ case node_cdata:
+ case node_comment:
+ case node_doctype:
+ return (_root->header & impl::xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;
- default:
- return -1;
- }
- }
+ default:
+ return -1;
+ }
+}
#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
+PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
+{
+ return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
+{
+ return (bool)lhs || rhs;
+}
#endif
- PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
- {
- }
+PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
+{
+}
- PUGI__FN xml_node_struct* xml_text::_data() const
- {
- if (!_root || impl::is_text_node(_root)) return _root;
+PUGI__FN xml_node_struct* xml_text::_data() const
+{
+ if (!_root || impl::is_text_node(_root)) return _root;
- for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
- if (impl::is_text_node(node))
- return node;
+ for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
+ if (impl::is_text_node(node))
+ return node;
- return 0;
- }
+ return 0;
+}
- PUGI__FN xml_node_struct* xml_text::_data_new()
- {
- xml_node_struct* d = _data();
- if (d) return d;
+PUGI__FN xml_node_struct* xml_text::_data_new()
+{
+ xml_node_struct* d = _data();
+ if (d) return d;
- return xml_node(_root).append_child(node_pcdata).internal_object();
- }
+ return xml_node(_root).append_child(node_pcdata).internal_object();
+}
- PUGI__FN xml_text::xml_text(): _root(0)
- {
- }
+PUGI__FN xml_text::xml_text(): _root(0)
+{
+}
- PUGI__FN static void unspecified_bool_xml_text(xml_text***)
- {
- }
+PUGI__FN static void unspecified_bool_xml_text(xml_text***)
+{
+}
- PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
- {
- return _data() ? unspecified_bool_xml_text : 0;
- }
+PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
+{
+ return _data() ? unspecified_bool_xml_text : 0;
+}
- PUGI__FN bool xml_text::operator!() const
- {
- return !_data();
- }
+PUGI__FN bool xml_text::operator!() const
+{
+ return !_data();
+}
- PUGI__FN bool xml_text::empty() const
- {
- return _data() == 0;
- }
+PUGI__FN bool xml_text::empty() const
+{
+ return _data() == 0;
+}
- PUGI__FN const char_t* xml_text::get() const
- {
- xml_node_struct* d = _data();
+PUGI__FN const char_t* xml_text::get() const
+{
+ xml_node_struct* d = _data();
- return (d && d->value) ? d->value : PUGIXML_TEXT("");
- }
+ return (d && d->value) ? d->value : PUGIXML_TEXT("");
+}
- PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
- {
- xml_node_struct* d = _data();
+PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
+{
+ xml_node_struct* d = _data();
- return (d && d->value) ? d->value : def;
- }
+ return (d && d->value) ? d->value : def;
+}
- PUGI__FN int xml_text::as_int(int def) const
- {
- xml_node_struct* d = _data();
+PUGI__FN int xml_text::as_int(int def) const
+{
+ xml_node_struct* d = _data();
- return impl::get_value_int(d ? d->value : 0, def);
- }
+ return impl::get_value_int(d ? d->value : 0, def);
+}
- PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
- {
- xml_node_struct* d = _data();
+PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
+{
+ xml_node_struct* d = _data();
- return impl::get_value_uint(d ? d->value : 0, def);
- }
+ return impl::get_value_uint(d ? d->value : 0, def);
+}
- PUGI__FN double xml_text::as_double(double def) const
- {
- xml_node_struct* d = _data();
+PUGI__FN double xml_text::as_double(double def) const
+{
+ xml_node_struct* d = _data();
- return impl::get_value_double(d ? d->value : 0, def);
- }
+ return impl::get_value_double(d ? d->value : 0, def);
+}
- PUGI__FN float xml_text::as_float(float def) const
- {
- xml_node_struct* d = _data();
+PUGI__FN float xml_text::as_float(float def) const
+{
+ xml_node_struct* d = _data();
- return impl::get_value_float(d ? d->value : 0, def);
- }
+ return impl::get_value_float(d ? d->value : 0, def);
+}
- PUGI__FN bool xml_text::as_bool(bool def) const
- {
- xml_node_struct* d = _data();
+PUGI__FN bool xml_text::as_bool(bool def) const
+{
+ xml_node_struct* d = _data();
- return impl::get_value_bool(d ? d->value : 0, def);
- }
+ return impl::get_value_bool(d ? d->value : 0, def);
+}
- PUGI__FN bool xml_text::set(const char_t* rhs)
- {
- xml_node_struct* dn = _data_new();
+PUGI__FN bool xml_text::set(const char_t* rhs)
+{
+ xml_node_struct* dn = _data_new();
- return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
+ return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
- PUGI__FN bool xml_text::set(int rhs)
- {
- xml_node_struct* dn = _data_new();
+PUGI__FN bool xml_text::set(int rhs)
+{
+ xml_node_struct* dn = _data_new();
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
- PUGI__FN bool xml_text::set(unsigned int rhs)
- {
- xml_node_struct* dn = _data_new();
+PUGI__FN bool xml_text::set(unsigned int rhs)
+{
+ xml_node_struct* dn = _data_new();
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
- PUGI__FN bool xml_text::set(double rhs)
- {
- xml_node_struct* dn = _data_new();
+PUGI__FN bool xml_text::set(double rhs)
+{
+ xml_node_struct* dn = _data_new();
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
- PUGI__FN bool xml_text::set(bool rhs)
- {
- xml_node_struct* dn = _data_new();
+PUGI__FN bool xml_text::set(bool rhs)
+{
+ xml_node_struct* dn = _data_new();
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
- PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
- {
- set(rhs);
- return *this;
- }
+PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
+{
+ set(rhs);
+ return *this;
+}
- PUGI__FN xml_text& xml_text::operator=(int rhs)
- {
- set(rhs);
- return *this;
- }
+PUGI__FN xml_text& xml_text::operator=(int rhs)
+{
+ set(rhs);
+ return *this;
+}
- PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
- {
- set(rhs);
- return *this;
- }
+PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
+{
+ set(rhs);
+ return *this;
+}
- PUGI__FN xml_text& xml_text::operator=(double rhs)
- {
- set(rhs);
- return *this;
- }
+PUGI__FN xml_text& xml_text::operator=(double rhs)
+{
+ set(rhs);
+ return *this;
+}
- PUGI__FN xml_text& xml_text::operator=(bool rhs)
- {
- set(rhs);
- return *this;
- }
+PUGI__FN xml_text& xml_text::operator=(bool rhs)
+{
+ set(rhs);
+ return *this;
+}
- PUGI__FN xml_node xml_text::data() const
- {
- return xml_node(_data());
- }
+PUGI__FN xml_node xml_text::data() const
+{
+ return xml_node(_data());
+}
#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
+PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
+{
+ return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
+{
+ return (bool)lhs || rhs;
+}
#endif
- PUGI__FN xml_node_iterator::xml_node_iterator()
- {
- }
-
- PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
- {
- }
-
- PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
- {
- }
-
- PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
- {
- return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
- }
-
- PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
- {
- return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
- }
-
- PUGI__FN xml_node& xml_node_iterator::operator*() const
- {
- assert(_wrap._root);
- return _wrap;
- }
-
- PUGI__FN xml_node* xml_node_iterator::operator->() const
- {
- assert(_wrap._root);
- return const_cast<xml_node*>(&_wrap); // BCC32 workaround
- }
-
- PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
- {
- assert(_wrap._root);
- _wrap._root = _wrap._root->next_sibling;
- return *this;
- }
-
- PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
- {
- xml_node_iterator temp = *this;
- ++*this;
- return temp;
- }
-
- PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
- {
- _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
- return *this;
- }
-
- PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
- {
- xml_node_iterator temp = *this;
- --*this;
- return temp;
- }
-
- PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
- {
- }
-
- PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
- {
- }
-
- PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
- {
- }
-
- PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
- {
- return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
- }
-
- PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
- {
- return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
- }
-
- PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
- {
- assert(_wrap._attr);
- return _wrap;
- }
-
- PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
- {
- assert(_wrap._attr);
- return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
- }
-
- PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
- {
- assert(_wrap._attr);
- _wrap._attr = _wrap._attr->next_attribute;
- return *this;
- }
-
- PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
- {
- xml_attribute_iterator temp = *this;
- ++*this;
- return temp;
- }
-
- PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
- {
- _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
- return *this;
- }
-
- PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
- {
- xml_attribute_iterator temp = *this;
- --*this;
- return temp;
- }
-
- PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
- {
- }
-
- PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _node(node), _name(name)
- {
- }
-
- PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
- {
- return _node == rhs._node;
- }
-
- PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
- {
- return _node != rhs._node;
- }
-
- PUGI__FN xml_node& xml_named_node_iterator::operator*() const
- {
- assert(_node._root);
- return _node;
- }
-
- PUGI__FN xml_node* xml_named_node_iterator::operator->() const
- {
- assert(_node._root);
- return const_cast<xml_node*>(&_node); // BCC32 workaround
- }
-
- PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
- {
- assert(_node._root);
- _node = _node.next_sibling(_name);
- return *this;
- }
-
- PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
- {
- xml_named_node_iterator temp = *this;
- ++*this;
- return temp;
- }
-
- PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
- {
- }
-
- PUGI__FN xml_parse_result::operator bool() const
- {
- return status == status_ok;
- }
-
- PUGI__FN const char* xml_parse_result::description() const
- {
- switch (status)
- {
- case status_ok: return "No error";
-
- case status_file_not_found: return "File was not found";
- case status_io_error: return "Error reading from file/stream";
- case status_out_of_memory: return "Could not allocate memory";
- case status_internal_error: return "Internal error occurred";
-
- case status_unrecognized_tag: return "Could not determine tag type";
-
- case status_bad_pi: return "Error parsing document declaration/processing instruction";
- case status_bad_comment: return "Error parsing comment";
- case status_bad_cdata: return "Error parsing CDATA section";
- case status_bad_doctype: return "Error parsing document type declaration";
- case status_bad_pcdata: return "Error parsing PCDATA section";
- case status_bad_start_element: return "Error parsing start element tag";
- case status_bad_attribute: return "Error parsing element attribute";
- case status_bad_end_element: return "Error parsing end element tag";
- case status_end_element_mismatch: return "Start-end tags mismatch";
-
- default: return "Unknown error";
- }
- }
-
- PUGI__FN xml_document::xml_document(): _buffer(0)
- {
- create();
- }
-
- PUGI__FN xml_document::~xml_document()
- {
- destroy();
- }
-
- PUGI__FN void xml_document::reset()
- {
- destroy();
- create();
- }
-
- PUGI__FN void xml_document::reset(const xml_document& proto)
- {
- reset();
-
- for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
- append_copy(cur);
- }
-
- PUGI__FN void xml_document::create()
- {
- // initialize sentinel page
- PUGI__STATIC_ASSERT(offsetof(impl::xml_memory_page, data) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment <= sizeof(_memory));
-
- // align upwards to page boundary
- void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
-
- // prepare page structure
- impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
-
- page->busy_size = impl::xml_memory_page_size;
-
- // allocate new root
- _root = new (page->data) impl::xml_document_struct(page);
- _root->prev_sibling_c = _root;
-
- // setup sentinel page
- page->allocator = static_cast<impl::xml_document_struct*>(_root);
- }
-
- PUGI__FN void xml_document::destroy()
- {
- // destroy static storage
- if (_buffer)
- {
- impl::xml_memory::deallocate(_buffer);
- _buffer = 0;
- }
-
- // destroy dynamic storage, leave sentinel page (it's in static memory)
- if (_root)
- {
- impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
- assert(root_page && !root_page->prev && !root_page->memory);
-
- // destroy all pages
- for (impl::xml_memory_page* page = root_page->next; page; )
- {
- impl::xml_memory_page* next = page->next;
-
- impl::xml_allocator::deallocate_page(page);
-
- page = next;
- }
-
- // cleanup root page
- root_page->allocator = 0;
- root_page->next = 0;
- root_page->busy_size = root_page->freed_size = 0;
-
- _root = 0;
- }
- }
+PUGI__FN xml_node_iterator::xml_node_iterator()
+{
+}
+
+PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
+{
+}
+
+PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
+{
+}
+
+PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
+{
+ return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
+}
+
+PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
+{
+ return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
+}
+
+PUGI__FN xml_node& xml_node_iterator::operator*() const
+{
+ assert(_wrap._root);
+ return _wrap;
+}
+
+PUGI__FN xml_node* xml_node_iterator::operator->() const
+{
+ assert(_wrap._root);
+ return const_cast<xml_node*>(&_wrap); // BCC32 workaround
+}
+
+PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
+{
+ assert(_wrap._root);
+ _wrap._root = _wrap._root->next_sibling;
+ return *this;
+}
+
+PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
+{
+ xml_node_iterator temp = *this;
+ ++*this;
+ return temp;
+}
+
+PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
+{
+ _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
+ return *this;
+}
+
+PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
+{
+ xml_node_iterator temp = *this;
+ --*this;
+ return temp;
+}
+
+PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
+{
+}
+
+PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
+{
+}
+
+PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
+{
+}
+
+PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
+{
+ return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
+}
+
+PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
+{
+ return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
+}
+
+PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
+{
+ assert(_wrap._attr);
+ return _wrap;
+}
+
+PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
+{
+ assert(_wrap._attr);
+ return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
+}
+
+PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
+{
+ assert(_wrap._attr);
+ _wrap._attr = _wrap._attr->next_attribute;
+ return *this;
+}
+
+PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
+{
+ xml_attribute_iterator temp = *this;
+ ++*this;
+ return temp;
+}
+
+PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
+{
+ _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
+ return *this;
+}
+
+PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
+{
+ xml_attribute_iterator temp = *this;
+ --*this;
+ return temp;
+}
+
+PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
+{
+}
+
+PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _node(node), _name(name)
+{
+}
+
+PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
+{
+ return _node == rhs._node;
+}
+
+PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
+{
+ return _node != rhs._node;
+}
+
+PUGI__FN xml_node& xml_named_node_iterator::operator*() const
+{
+ assert(_node._root);
+ return _node;
+}
+
+PUGI__FN xml_node* xml_named_node_iterator::operator->() const
+{
+ assert(_node._root);
+ return const_cast<xml_node*>(&_node); // BCC32 workaround
+}
+
+PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
+{
+ assert(_node._root);
+ _node = _node.next_sibling(_name);
+ return *this;
+}
+
+PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
+{
+ xml_named_node_iterator temp = *this;
+ ++*this;
+ return temp;
+}
+
+PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
+{
+}
+
+PUGI__FN xml_parse_result::operator bool() const
+{
+ return status == status_ok;
+}
+
+PUGI__FN const char* xml_parse_result::description() const
+{
+ switch (status) {
+ case status_ok:
+ return "No error";
+
+ case status_file_not_found:
+ return "File was not found";
+ case status_io_error:
+ return "Error reading from file/stream";
+ case status_out_of_memory:
+ return "Could not allocate memory";
+ case status_internal_error:
+ return "Internal error occurred";
+
+ case status_unrecognized_tag:
+ return "Could not determine tag type";
+
+ case status_bad_pi:
+ return "Error parsing document declaration/processing instruction";
+ case status_bad_comment:
+ return "Error parsing comment";
+ case status_bad_cdata:
+ return "Error parsing CDATA section";
+ case status_bad_doctype:
+ return "Error parsing document type declaration";
+ case status_bad_pcdata:
+ return "Error parsing PCDATA section";
+ case status_bad_start_element:
+ return "Error parsing start element tag";
+ case status_bad_attribute:
+ return "Error parsing element attribute";
+ case status_bad_end_element:
+ return "Error parsing end element tag";
+ case status_end_element_mismatch:
+ return "Start-end tags mismatch";
+
+ default:
+ return "Unknown error";
+ }
+}
+
+PUGI__FN xml_document::xml_document(): _buffer(0)
+{
+ create();
+}
+
+PUGI__FN xml_document::~xml_document()
+{
+ destroy();
+}
+
+PUGI__FN void xml_document::reset()
+{
+ destroy();
+ create();
+}
+
+PUGI__FN void xml_document::reset(const xml_document& proto)
+{
+ reset();
+
+ for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
+ append_copy(cur);
+}
+
+PUGI__FN void xml_document::create()
+{
+ // initialize sentinel page
+ PUGI__STATIC_ASSERT(offsetof(impl::xml_memory_page, data) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment <= sizeof(_memory));
+
+ // align upwards to page boundary
+ void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
+
+ // prepare page structure
+ impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
+
+ page->busy_size = impl::xml_memory_page_size;
+
+ // allocate new root
+ _root = new (page->data) impl::xml_document_struct(page);
+ _root->prev_sibling_c = _root;
+
+ // setup sentinel page
+ page->allocator = static_cast<impl::xml_document_struct*>(_root);
+}
+
+PUGI__FN void xml_document::destroy()
+{
+ // destroy static storage
+ if (_buffer) {
+ impl::xml_memory::deallocate(_buffer);
+ _buffer = 0;
+ }
+
+ // destroy dynamic storage, leave sentinel page (it's in static memory)
+ if (_root) {
+ impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
+ assert(root_page && !root_page->prev && !root_page->memory);
+
+ // destroy all pages
+ for (impl::xml_memory_page* page = root_page->next; page; ) {
+ impl::xml_memory_page* next = page->next;
+
+ impl::xml_allocator::deallocate_page(page);
+
+ page = next;
+ }
+
+ // cleanup root page
+ root_page->allocator = 0;
+ root_page->next = 0;
+ root_page->busy_size = root_page->freed_size = 0;
+
+ _root = 0;
+ }
+}
#ifndef PUGIXML_NO_STL
- PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
- {
- reset();
+PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
+{
+ reset();
- return impl::load_stream_impl(*this, stream, options, encoding);
- }
+ return impl::load_stream_impl(*this, stream, options, encoding);
+}
- PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
- {
- reset();
+PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
+{
+ reset();
- return impl::load_stream_impl(*this, stream, options, encoding_wchar);
- }
+ return impl::load_stream_impl(*this, stream, options, encoding_wchar);
+}
#endif
- PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
- {
- // Force native encoding (skip autodetection)
- #ifdef PUGIXML_WCHAR_MODE
- xml_encoding encoding = encoding_wchar;
- #else
- xml_encoding encoding = encoding_utf8;
- #endif
-
- return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
- }
-
- PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- FILE* file = fopen(path_, "rb");
-
- return impl::load_file_impl(*this, file, options, encoding);
- }
-
- PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- FILE* file = impl::open_file_wide(path_, L"rb");
-
- return impl::load_file_impl(*this, file, options, encoding);
- }
-
- PUGI__FN xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own)
- {
- reset();
-
- // check input buffer
- assert(contents || size == 0);
-
- // get actual encoding
- xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
-
- // get private buffer
- char_t* buffer = 0;
- size_t length = 0;
-
- if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
-
- // delete original buffer if we performed a conversion
- if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
-
- // parse
- xml_parse_result res = impl::xml_parser::parse(buffer, length, _root, options);
-
- // remember encoding
- res.encoding = buffer_encoding;
-
- // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself
- if (own || buffer != contents) _buffer = buffer;
-
- return res;
- }
-
- PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
- {
- return load_buffer_impl(const_cast<void*>(contents), size, options, encoding, false, false);
- }
-
- PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
- {
- return load_buffer_impl(contents, size, options, encoding, true, false);
- }
-
- PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
- {
- return load_buffer_impl(contents, size, options, encoding, true, true);
- }
-
- PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- impl::xml_buffered_writer buffered_writer(writer, encoding);
-
- if ((flags & format_write_bom) && encoding != encoding_latin1)
- {
- // BOM always represents the codepoint U+FEFF, so just write it in native encoding
- #ifdef PUGIXML_WCHAR_MODE
- unsigned int bom = 0xfeff;
- buffered_writer.write(static_cast<wchar_t>(bom));
- #else
- buffered_writer.write('\xef', '\xbb', '\xbf');
- #endif
- }
-
- if (!(flags & format_no_declaration) && !impl::has_declaration(*this))
- {
- buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\""));
- if (encoding == encoding_latin1) buffered_writer.write(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
- buffered_writer.write('?', '>');
- if (!(flags & format_raw)) buffered_writer.write('\n');
- }
-
- impl::node_output(buffered_writer, *this, indent, flags, 0);
- }
+PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
+{
+ // Force native encoding (skip autodetection)
+#ifdef PUGIXML_WCHAR_MODE
+ xml_encoding encoding = encoding_wchar;
+#else
+ xml_encoding encoding = encoding_utf8;
+#endif
+
+ return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
+}
+
+PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
+{
+ reset();
+
+ FILE* file = fopen(path_, "rb");
+
+ return impl::load_file_impl(*this, file, options, encoding);
+}
+
+PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
+{
+ reset();
+
+ FILE* file = impl::open_file_wide(path_, L"rb");
+
+ return impl::load_file_impl(*this, file, options, encoding);
+}
+
+PUGI__FN xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own)
+{
+ reset();
+
+ // check input buffer
+ assert(contents || size == 0);
+
+ // get actual encoding
+ xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
+
+ // get private buffer
+ char_t* buffer = 0;
+ size_t length = 0;
+
+ if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
+
+ // delete original buffer if we performed a conversion
+ if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
+
+ // parse
+ xml_parse_result res = impl::xml_parser::parse(buffer, length, _root, options);
+
+ // remember encoding
+ res.encoding = buffer_encoding;
+
+ // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself
+ if (own || buffer != contents) _buffer = buffer;
+
+ return res;
+}
+
+PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
+{
+ return load_buffer_impl(const_cast<void*>(contents), size, options, encoding, false, false);
+}
+
+PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
+{
+ return load_buffer_impl(contents, size, options, encoding, true, false);
+}
+
+PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
+{
+ return load_buffer_impl(contents, size, options, encoding, true, true);
+}
+
+PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+ impl::xml_buffered_writer buffered_writer(writer, encoding);
+
+ if ((flags & format_write_bom) && encoding != encoding_latin1) {
+ // BOM always represents the codepoint U+FEFF, so just write it in native encoding
+#ifdef PUGIXML_WCHAR_MODE
+ unsigned int bom = 0xfeff;
+ buffered_writer.write(static_cast<wchar_t>(bom));
+#else
+ buffered_writer.write('\xef', '\xbb', '\xbf');
+#endif
+ }
+
+ if (!(flags & format_no_declaration) && !impl::has_declaration(*this)) {
+ buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\""));
+ if (encoding == encoding_latin1) buffered_writer.write(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
+ buffered_writer.write('?', '>');
+ if (!(flags & format_raw)) buffered_writer.write('\n');
+ }
+
+ impl::node_output(buffered_writer, *this, indent, flags, 0);
+}
#ifndef PUGIXML_NO_STL
- PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- xml_writer_stream writer(stream);
+PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+ xml_writer_stream writer(stream);
- save(writer, indent, flags, encoding);
- }
+ save(writer, indent, flags, encoding);
+}
- PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
- {
- xml_writer_stream writer(stream);
+PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
+{
+ xml_writer_stream writer(stream);
- save(writer, indent, flags, encoding_wchar);
- }
+ save(writer, indent, flags, encoding_wchar);
+}
#endif
- PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- FILE* file = fopen(path_, (flags & format_save_file_text) ? "w" : "wb");
- return impl::save_file_impl(*this, file, indent, flags, encoding);
- }
+PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+ FILE* file = fopen(path_, (flags & format_save_file_text) ? "w" : "wb");
+ return impl::save_file_impl(*this, file, indent, flags, encoding);
+}
- PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- FILE* file = impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb");
- return impl::save_file_impl(*this, file, indent, flags, encoding);
- }
+PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+ FILE* file = impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb");
+ return impl::save_file_impl(*this, file, indent, flags, encoding);
+}
- PUGI__FN xml_node xml_document::document_element() const
- {
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if ((i->header & impl::xml_memory_page_type_mask) + 1 == node_element)
- return xml_node(i);
+PUGI__FN xml_node xml_document::document_element() const
+{
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if ((i->header & impl::xml_memory_page_type_mask) + 1 == node_element)
+ return xml_node(i);
- return xml_node();
- }
+ return xml_node();
+}
#ifndef PUGIXML_NO_STL
- PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
- {
- assert(str);
-
- return impl::as_utf8_impl(str, wcslen(str));
- }
-
- PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
- {
- return impl::as_utf8_impl(str.c_str(), str.size());
- }
-
- PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
- {
- assert(str);
-
- return impl::as_wide_impl(str, strlen(str));
- }
-
- PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
- {
- return impl::as_wide_impl(str.c_str(), str.size());
- }
+PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
+{
+ assert(str);
+
+ return impl::as_utf8_impl(str, wcslen(str));
+}
+
+PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
+{
+ return impl::as_utf8_impl(str.c_str(), str.size());
+}
+
+PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
+{
+ assert(str);
+
+ return impl::as_wide_impl(str, strlen(str));
+}
+
+PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
+{
+ return impl::as_wide_impl(str.c_str(), str.size());
+}
#endif
- PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
- {
- impl::xml_memory::allocate = allocate;
- impl::xml_memory::deallocate = deallocate;
- }
+PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
+{
+ impl::xml_memory::allocate = allocate;
+ impl::xml_memory::deallocate = deallocate;
+}
- PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
- {
- return impl::xml_memory::allocate;
- }
+PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
+{
+ return impl::xml_memory::allocate;
+}
- PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
- {
- return impl::xml_memory::deallocate;
- }
+PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
+{
+ return impl::xml_memory::deallocate;
+}
}
#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
namespace std
{
- // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
- PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
+// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
+PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
+{
+ return std::bidirectional_iterator_tag();
+}
- PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
+PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
+{
+ return std::bidirectional_iterator_tag();
+}
- PUGI__FN std::forward_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
- {
- return std::forward_iterator_tag();
- }
+PUGI__FN std::forward_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
+{
+ return std::forward_iterator_tag();
+}
}
#endif
#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
namespace std
{
- // Workarounds for (non-standard) iterator category detection
- PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
+// Workarounds for (non-standard) iterator category detection
+PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
+{
+ return std::bidirectional_iterator_tag();
+}
- PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
+PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
+{
+ return std::bidirectional_iterator_tag();
+}
- PUGI__FN std::forward_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
- {
- return std::forward_iterator_tag();
- }
+PUGI__FN std::forward_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
+{
+ return std::forward_iterator_tag();
+}
}
#endif
@@ -5422,4770 +5042,4336 @@ namespace std
// STL replacements
PUGI__NS_BEGIN
- struct equal_to
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs == rhs;
- }
- };
-
- struct not_equal_to
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs != rhs;
- }
- };
-
- struct less
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs < rhs;
- }
- };
-
- struct less_equal
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs <= rhs;
- }
- };
-
- template <typename T> void swap(T& lhs, T& rhs)
- {
- T temp = lhs;
- lhs = rhs;
- rhs = temp;
- }
-
- template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
- {
- I result = begin;
-
- for (I it = begin + 1; it != end; ++it)
- if (pred(*it, *result))
- result = it;
-
- return result;
- }
-
- template <typename I> void reverse(I begin, I end)
- {
- while (begin + 1 < end) swap(*begin++, *--end);
- }
-
- template <typename I> I unique(I begin, I end)
- {
- // fast skip head
- while (begin + 1 < end && *begin != *(begin + 1)) begin++;
-
- if (begin == end) return begin;
-
- // last written element
- I write = begin++;
-
- // merge unique elements
- while (begin != end)
- {
- if (*begin != *write)
- *++write = *begin++;
- else
- begin++;
- }
-
- // past-the-end (write points to live element)
- return write + 1;
- }
-
- template <typename I> void copy_backwards(I begin, I end, I target)
- {
- while (begin != end) *--target = *--end;
- }
-
- template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
- {
- assert(begin != end);
-
- for (I it = begin + 1; it != end; ++it)
- {
- T val = *it;
-
- if (pred(val, *begin))
- {
- // move to front
- copy_backwards(begin, it, it + 1);
- *begin = val;
- }
- else
- {
- I hole = it;
-
- // move hole backwards
- while (pred(val, *(hole - 1)))
- {
- *hole = *(hole - 1);
- hole--;
- }
-
- // fill hole with element
- *hole = val;
- }
- }
- }
-
- // std variant for elements with ==
- template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
- {
- I eqbeg = middle, eqend = middle + 1;
-
- // expand equal range
- while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
- while (eqend != end && *eqend == *eqbeg) ++eqend;
-
- // process outer elements
- I ltend = eqbeg, gtbeg = eqend;
-
- for (;;)
- {
- // find the element from the right side that belongs to the left one
- for (; gtbeg != end; ++gtbeg)
- if (!pred(*eqbeg, *gtbeg))
- {
- if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
- else break;
- }
-
- // find the element from the left side that belongs to the right one
- for (; ltend != begin; --ltend)
- if (!pred(*(ltend - 1), *eqbeg))
- {
- if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
- else break;
- }
-
- // scanned all elements
- if (gtbeg == end && ltend == begin)
- {
- *out_eqbeg = eqbeg;
- *out_eqend = eqend;
- return;
- }
-
- // make room for elements by moving equal area
- if (gtbeg == end)
- {
- if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
- swap(*eqbeg, *--eqend);
- }
- else if (ltend == begin)
- {
- if (eqend != gtbeg) swap(*eqbeg, *eqend);
- ++eqend;
- swap(*gtbeg++, *eqbeg++);
- }
- else swap(*gtbeg++, *--ltend);
- }
- }
-
- template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
- {
- if (pred(*middle, *first)) swap(*middle, *first);
- if (pred(*last, *middle)) swap(*last, *middle);
- if (pred(*middle, *first)) swap(*middle, *first);
- }
-
- template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
- {
- if (last - first <= 40)
- {
- // median of three for small chunks
- median3(first, middle, last, pred);
- }
- else
- {
- // median of nine
- size_t step = (last - first + 1) / 8;
-
- median3(first, first + step, first + 2 * step, pred);
- median3(middle - step, middle, middle + step, pred);
- median3(last - 2 * step, last - step, last, pred);
- median3(first + step, middle, last - step, pred);
- }
- }
-
- template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
- {
- // sort large chunks
- while (end - begin > 32)
- {
- // find median element
- I middle = begin + (end - begin) / 2;
- median(begin, middle, end - 1, pred);
-
- // partition in three chunks (< = >)
- I eqbeg, eqend;
- partition(begin, middle, end, pred, &eqbeg, &eqend);
-
- // loop on larger half
- if (eqbeg - begin > end - eqend)
- {
- sort(eqend, end, pred);
- end = eqbeg;
- }
- else
- {
- sort(begin, eqbeg, pred);
- begin = eqend;
- }
- }
-
- // insertion sort small chunk
- if (begin != end) insertion_sort(begin, end, pred, &*begin);
- }
+struct equal_to {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+ return lhs == rhs;
+ }
+};
+
+struct not_equal_to {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+ return lhs != rhs;
+ }
+};
+
+struct less {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+ return lhs < rhs;
+ }
+};
+
+struct less_equal {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+ return lhs <= rhs;
+ }
+};
+
+template <typename T> void swap(T& lhs, T& rhs)
+{
+ T temp = lhs;
+ lhs = rhs;
+ rhs = temp;
+}
+
+template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
+{
+ I result = begin;
+
+ for (I it = begin + 1; it != end; ++it)
+ if (pred(*it, *result))
+ result = it;
+
+ return result;
+}
+
+template <typename I> void reverse(I begin, I end)
+{
+ while (begin + 1 < end) swap(*begin++, *--end);
+}
+
+template <typename I> I unique(I begin, I end)
+{
+ // fast skip head
+ while (begin + 1 < end && *begin != *(begin + 1)) begin++;
+
+ if (begin == end) return begin;
+
+ // last written element
+ I write = begin++;
+
+ // merge unique elements
+ while (begin != end) {
+ if (*begin != *write)
+ *++write = *begin++;
+ else
+ begin++;
+ }
+
+ // past-the-end (write points to live element)
+ return write + 1;
+}
+
+template <typename I> void copy_backwards(I begin, I end, I target)
+{
+ while (begin != end) *--target = *--end;
+}
+
+template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
+{
+ assert(begin != end);
+
+ for (I it = begin + 1; it != end; ++it) {
+ T val = *it;
+
+ if (pred(val, *begin)) {
+ // move to front
+ copy_backwards(begin, it, it + 1);
+ *begin = val;
+ } else {
+ I hole = it;
+
+ // move hole backwards
+ while (pred(val, *(hole - 1))) {
+ *hole = *(hole - 1);
+ hole--;
+ }
+
+ // fill hole with element
+ *hole = val;
+ }
+ }
+}
+
+// std variant for elements with ==
+template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
+{
+ I eqbeg = middle, eqend = middle + 1;
+
+ // expand equal range
+ while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
+ while (eqend != end && *eqend == *eqbeg) ++eqend;
+
+ // process outer elements
+ I ltend = eqbeg, gtbeg = eqend;
+
+ for (;;) {
+ // find the element from the right side that belongs to the left one
+ for (; gtbeg != end; ++gtbeg)
+ if (!pred(*eqbeg, *gtbeg)) {
+ if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
+ else break;
+ }
+
+ // find the element from the left side that belongs to the right one
+ for (; ltend != begin; --ltend)
+ if (!pred(*(ltend - 1), *eqbeg)) {
+ if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
+ else break;
+ }
+
+ // scanned all elements
+ if (gtbeg == end && ltend == begin) {
+ *out_eqbeg = eqbeg;
+ *out_eqend = eqend;
+ return;
+ }
+
+ // make room for elements by moving equal area
+ if (gtbeg == end) {
+ if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
+ swap(*eqbeg, *--eqend);
+ } else if (ltend == begin) {
+ if (eqend != gtbeg) swap(*eqbeg, *eqend);
+ ++eqend;
+ swap(*gtbeg++, *eqbeg++);
+ } else swap(*gtbeg++, *--ltend);
+ }
+}
+
+template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
+{
+ if (pred(*middle, *first)) swap(*middle, *first);
+ if (pred(*last, *middle)) swap(*last, *middle);
+ if (pred(*middle, *first)) swap(*middle, *first);
+}
+
+template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
+{
+ if (last - first <= 40) {
+ // median of three for small chunks
+ median3(first, middle, last, pred);
+ } else {
+ // median of nine
+ size_t step = (last - first + 1) / 8;
+
+ median3(first, first + step, first + 2 * step, pred);
+ median3(middle - step, middle, middle + step, pred);
+ median3(last - 2 * step, last - step, last, pred);
+ median3(first + step, middle, last - step, pred);
+ }
+}
+
+template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
+{
+ // sort large chunks
+ while (end - begin > 32) {
+ // find median element
+ I middle = begin + (end - begin) / 2;
+ median(begin, middle, end - 1, pred);
+
+ // partition in three chunks (< = >)
+ I eqbeg, eqend;
+ partition(begin, middle, end, pred, &eqbeg, &eqend);
+
+ // loop on larger half
+ if (eqbeg - begin > end - eqend) {
+ sort(eqend, end, pred);
+ end = eqbeg;
+ } else {
+ sort(begin, eqbeg, pred);
+ begin = eqend;
+ }
+ }
+
+ // insertion sort small chunk
+ if (begin != end) insertion_sort(begin, end, pred, &*begin);
+}
PUGI__NS_END
// Allocator used for AST and evaluation stacks
PUGI__NS_BEGIN
- struct xpath_memory_block
- {
- xpath_memory_block* next;
-
- char data[
- #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
- PUGIXML_MEMORY_XPATH_PAGE_SIZE
- #else
- 4096
- #endif
- ];
- };
-
- class xpath_allocator
- {
- xpath_memory_block* _root;
- size_t _root_size;
-
- public:
- #ifdef PUGIXML_NO_EXCEPTIONS
- jmp_buf* error_handler;
- #endif
-
- xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- error_handler = 0;
- #endif
- }
-
- void* allocate_nothrow(size_t size)
- {
- const size_t block_capacity = sizeof(_root->data);
-
- // align size so that we're able to store pointers in subsequent blocks
- size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-
- if (_root_size + size <= block_capacity)
- {
- void* buf = _root->data + _root_size;
- _root_size += size;
- return buf;
- }
- else
- {
- size_t block_data_size = (size > block_capacity) ? size : block_capacity;
- size_t block_size = block_data_size + offsetof(xpath_memory_block, data);
-
- xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
- if (!block) return 0;
-
- block->next = _root;
-
- _root = block;
- _root_size = size;
-
- return block->data;
- }
- }
-
- void* allocate(size_t size)
- {
- void* result = allocate_nothrow(size);
-
- if (!result)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- assert(error_handler);
- longjmp(*error_handler, 1);
- #else
- throw std::bad_alloc();
- #endif
- }
-
- return result;
- }
-
- void* reallocate(void* ptr, size_t old_size, size_t new_size)
- {
- // align size so that we're able to store pointers in subsequent blocks
- old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
- new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-
- // we can only reallocate the last object
- assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
-
- // adjust root size so that we have not allocated the object at all
- bool only_object = (_root_size == old_size);
-
- if (ptr) _root_size -= old_size;
-
- // allocate a new version (this will obviously reuse the memory if possible)
- void* result = allocate(new_size);
- assert(result);
-
- // we have a new block
- if (result != ptr && ptr)
- {
- // copy old data
- assert(new_size > old_size);
- memcpy(result, ptr, old_size);
-
- // free the previous page if it had no other objects
- if (only_object)
- {
- assert(_root->data == result);
- assert(_root->next);
-
- xpath_memory_block* next = _root->next->next;
-
- if (next)
- {
- // deallocate the whole page, unless it was the first one
- xml_memory::deallocate(_root->next);
- _root->next = next;
- }
- }
- }
-
- return result;
- }
-
- void revert(const xpath_allocator& state)
- {
- // free all new pages
- xpath_memory_block* cur = _root;
-
- while (cur != state._root)
- {
- xpath_memory_block* next = cur->next;
-
- xml_memory::deallocate(cur);
-
- cur = next;
- }
-
- // restore state
- _root = state._root;
- _root_size = state._root_size;
- }
-
- void release()
- {
- xpath_memory_block* cur = _root;
- assert(cur);
-
- while (cur->next)
- {
- xpath_memory_block* next = cur->next;
-
- xml_memory::deallocate(cur);
-
- cur = next;
- }
- }
- };
-
- struct xpath_allocator_capture
- {
- xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
- {
- }
-
- ~xpath_allocator_capture()
- {
- _target->revert(_state);
- }
-
- xpath_allocator* _target;
- xpath_allocator _state;
- };
-
- struct xpath_stack
- {
- xpath_allocator* result;
- xpath_allocator* temp;
- };
-
- struct xpath_stack_data
- {
- xpath_memory_block blocks[2];
- xpath_allocator result;
- xpath_allocator temp;
- xpath_stack stack;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- jmp_buf error_handler;
- #endif
-
- xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
- {
- blocks[0].next = blocks[1].next = 0;
-
- stack.result = &result;
- stack.temp = &temp;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- result.error_handler = temp.error_handler = &error_handler;
- #endif
- }
-
- ~xpath_stack_data()
- {
- result.release();
- temp.release();
- }
- };
+struct xpath_memory_block {
+ xpath_memory_block* next;
+
+ char data[
+#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
+ PUGIXML_MEMORY_XPATH_PAGE_SIZE
+#else
+ 4096
+#endif
+ ];
+};
+
+class xpath_allocator
+{
+ xpath_memory_block* _root;
+ size_t _root_size;
+
+public:
+#ifdef PUGIXML_NO_EXCEPTIONS
+ jmp_buf* error_handler;
+#endif
+
+ xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ error_handler = 0;
+#endif
+ }
+
+ void* allocate_nothrow(size_t size) {
+ const size_t block_capacity = sizeof(_root->data);
+
+ // align size so that we're able to store pointers in subsequent blocks
+ size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+
+ if (_root_size + size <= block_capacity) {
+ void* buf = _root->data + _root_size;
+ _root_size += size;
+ return buf;
+ } else {
+ size_t block_data_size = (size > block_capacity) ? size : block_capacity;
+ size_t block_size = block_data_size + offsetof(xpath_memory_block, data);
+
+ xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
+ if (!block) return 0;
+
+ block->next = _root;
+
+ _root = block;
+ _root_size = size;
+
+ return block->data;
+ }
+ }
+
+ void* allocate(size_t size) {
+ void* result = allocate_nothrow(size);
+
+ if (!result) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ assert(error_handler);
+ longjmp(*error_handler, 1);
+#else
+ throw std::bad_alloc();
+#endif
+ }
+
+ return result;
+ }
+
+ void* reallocate(void* ptr, size_t old_size, size_t new_size) {
+ // align size so that we're able to store pointers in subsequent blocks
+ old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+ new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+
+ // we can only reallocate the last object
+ assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
+
+ // adjust root size so that we have not allocated the object at all
+ bool only_object = (_root_size == old_size);
+
+ if (ptr) _root_size -= old_size;
+
+ // allocate a new version (this will obviously reuse the memory if possible)
+ void* result = allocate(new_size);
+ assert(result);
+
+ // we have a new block
+ if (result != ptr && ptr) {
+ // copy old data
+ assert(new_size > old_size);
+ memcpy(result, ptr, old_size);
+
+ // free the previous page if it had no other objects
+ if (only_object) {
+ assert(_root->data == result);
+ assert(_root->next);
+
+ xpath_memory_block* next = _root->next->next;
+
+ if (next) {
+ // deallocate the whole page, unless it was the first one
+ xml_memory::deallocate(_root->next);
+ _root->next = next;
+ }
+ }
+ }
+
+ return result;
+ }
+
+ void revert(const xpath_allocator& state) {
+ // free all new pages
+ xpath_memory_block* cur = _root;
+
+ while (cur != state._root) {
+ xpath_memory_block* next = cur->next;
+
+ xml_memory::deallocate(cur);
+
+ cur = next;
+ }
+
+ // restore state
+ _root = state._root;
+ _root_size = state._root_size;
+ }
+
+ void release() {
+ xpath_memory_block* cur = _root;
+ assert(cur);
+
+ while (cur->next) {
+ xpath_memory_block* next = cur->next;
+
+ xml_memory::deallocate(cur);
+
+ cur = next;
+ }
+ }
+};
+
+struct xpath_allocator_capture {
+ xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) {
+ }
+
+ ~xpath_allocator_capture() {
+ _target->revert(_state);
+ }
+
+ xpath_allocator* _target;
+ xpath_allocator _state;
+};
+
+struct xpath_stack {
+ xpath_allocator* result;
+ xpath_allocator* temp;
+};
+
+struct xpath_stack_data {
+ xpath_memory_block blocks[2];
+ xpath_allocator result;
+ xpath_allocator temp;
+ xpath_stack stack;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ jmp_buf error_handler;
+#endif
+
+ xpath_stack_data(): result(blocks + 0), temp(blocks + 1) {
+ blocks[0].next = blocks[1].next = 0;
+
+ stack.result = &result;
+ stack.temp = &temp;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ result.error_handler = temp.error_handler = &error_handler;
+#endif
+ }
+
+ ~xpath_stack_data() {
+ result.release();
+ temp.release();
+ }
+};
PUGI__NS_END
// String class
PUGI__NS_BEGIN
- class xpath_string
- {
- const char_t* _buffer;
- bool _uses_heap;
-
- static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
- {
- char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
- assert(result);
-
- memcpy(result, string, length * sizeof(char_t));
- result[length] = 0;
-
- return result;
- }
-
- static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc)
- {
- return duplicate_string(string, strlength(string), alloc);
- }
-
- public:
- xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
- {
- }
-
- explicit xpath_string(const char_t* str, xpath_allocator* alloc)
- {
- bool empty_ = (*str == 0);
-
- _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(str, alloc);
- _uses_heap = !empty_;
- }
-
- explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
- {
- }
-
- xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc)
- {
- assert(begin <= end);
-
- bool empty_ = (begin == end);
-
- _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
- _uses_heap = !empty_;
- }
-
- void append(const xpath_string& o, xpath_allocator* alloc)
- {
- // skip empty sources
- if (!*o._buffer) return;
-
- // fast append for constant empty target and constant source
- if (!*_buffer && !_uses_heap && !o._uses_heap)
- {
- _buffer = o._buffer;
- }
- else
- {
- // need to make heap copy
- size_t target_length = strlength(_buffer);
- size_t source_length = strlength(o._buffer);
- size_t result_length = target_length + source_length;
-
- // allocate new buffer
- char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
- assert(result);
-
- // append first string to the new buffer in case there was no reallocation
- if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
-
- // append second string to the new buffer
- memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
- result[result_length] = 0;
-
- // finalize
- _buffer = result;
- _uses_heap = true;
- }
- }
-
- const char_t* c_str() const
- {
- return _buffer;
- }
-
- size_t length() const
- {
- return strlength(_buffer);
- }
-
- char_t* data(xpath_allocator* alloc)
- {
- // make private heap copy
- if (!_uses_heap)
- {
- _buffer = duplicate_string(_buffer, alloc);
- _uses_heap = true;
- }
-
- return const_cast<char_t*>(_buffer);
- }
-
- bool empty() const
- {
- return *_buffer == 0;
- }
-
- bool operator==(const xpath_string& o) const
- {
- return strequal(_buffer, o._buffer);
- }
-
- bool operator!=(const xpath_string& o) const
- {
- return !strequal(_buffer, o._buffer);
- }
-
- bool uses_heap() const
- {
- return _uses_heap;
- }
- };
-
- PUGI__FN xpath_string xpath_string_const(const char_t* str)
- {
- return xpath_string(str, false);
- }
+class xpath_string
+{
+ const char_t* _buffer;
+ bool _uses_heap;
+
+ static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) {
+ char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
+ assert(result);
+
+ memcpy(result, string, length * sizeof(char_t));
+ result[length] = 0;
+
+ return result;
+ }
+
+ static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc) {
+ return duplicate_string(string, strlength(string), alloc);
+ }
+
+public:
+ xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false) {
+ }
+
+ explicit xpath_string(const char_t* str, xpath_allocator* alloc) {
+ bool empty_ = (*str == 0);
+
+ _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(str, alloc);
+ _uses_heap = !empty_;
+ }
+
+ explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap) {
+ }
+
+ xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc) {
+ assert(begin <= end);
+
+ bool empty_ = (begin == end);
+
+ _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
+ _uses_heap = !empty_;
+ }
+
+ void append(const xpath_string& o, xpath_allocator* alloc) {
+ // skip empty sources
+ if (!*o._buffer) return;
+
+ // fast append for constant empty target and constant source
+ if (!*_buffer && !_uses_heap && !o._uses_heap) {
+ _buffer = o._buffer;
+ } else {
+ // need to make heap copy
+ size_t target_length = strlength(_buffer);
+ size_t source_length = strlength(o._buffer);
+ size_t result_length = target_length + source_length;
+
+ // allocate new buffer
+ char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
+ assert(result);
+
+ // append first string to the new buffer in case there was no reallocation
+ if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
+
+ // append second string to the new buffer
+ memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
+ result[result_length] = 0;
+
+ // finalize
+ _buffer = result;
+ _uses_heap = true;
+ }
+ }
+
+ const char_t* c_str() const {
+ return _buffer;
+ }
+
+ size_t length() const {
+ return strlength(_buffer);
+ }
+
+ char_t* data(xpath_allocator* alloc) {
+ // make private heap copy
+ if (!_uses_heap) {
+ _buffer = duplicate_string(_buffer, alloc);
+ _uses_heap = true;
+ }
+
+ return const_cast<char_t*>(_buffer);
+ }
+
+ bool empty() const {
+ return *_buffer == 0;
+ }
+
+ bool operator==(const xpath_string& o) const {
+ return strequal(_buffer, o._buffer);
+ }
+
+ bool operator!=(const xpath_string& o) const {
+ return !strequal(_buffer, o._buffer);
+ }
+
+ bool uses_heap() const {
+ return _uses_heap;
+ }
+};
+
+PUGI__FN xpath_string xpath_string_const(const char_t* str)
+{
+ return xpath_string(str, false);
+}
PUGI__NS_END
PUGI__NS_BEGIN
- PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
- {
- while (*pattern && *string == *pattern)
- {
- string++;
- pattern++;
- }
-
- return *pattern == 0;
- }
-
- PUGI__FN const char_t* find_char(const char_t* s, char_t c)
- {
- #ifdef PUGIXML_WCHAR_MODE
- return wcschr(s, c);
- #else
- return strchr(s, c);
- #endif
- }
-
- PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
- {
- #ifdef PUGIXML_WCHAR_MODE
- // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
- return (*p == 0) ? s : wcsstr(s, p);
- #else
- return strstr(s, p);
- #endif
- }
-
- // Converts symbol to lower case, if it is an ASCII one
- PUGI__FN char_t tolower_ascii(char_t ch)
- {
- return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
- }
-
- PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
- {
- if (na.attribute())
- return xpath_string_const(na.attribute().value());
- else
- {
- const xml_node& n = na.node();
-
- switch (n.type())
- {
- case node_pcdata:
- case node_cdata:
- case node_comment:
- case node_pi:
- return xpath_string_const(n.value());
-
- case node_document:
- case node_element:
- {
- xpath_string result;
-
- xml_node cur = n.first_child();
-
- while (cur && cur != n)
- {
- if (cur.type() == node_pcdata || cur.type() == node_cdata)
- result.append(xpath_string_const(cur.value()), alloc);
-
- if (cur.first_child())
- cur = cur.first_child();
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- while (!cur.next_sibling() && cur != n)
- cur = cur.parent();
-
- if (cur != n) cur = cur.next_sibling();
- }
- }
-
- return result;
- }
-
- default:
- return xpath_string();
- }
- }
- }
-
- PUGI__FN unsigned int node_height(xml_node n)
- {
- unsigned int result = 0;
-
- while (n)
- {
- ++result;
- n = n.parent();
- }
-
- return result;
- }
-
- PUGI__FN bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
- {
- // normalize heights
- for (unsigned int i = rh; i < lh; i++) ln = ln.parent();
- for (unsigned int j = lh; j < rh; j++) rn = rn.parent();
-
- // one node is the ancestor of the other
- if (ln == rn) return lh < rh;
-
- // find common ancestor
- while (ln.parent() != rn.parent())
- {
- ln = ln.parent();
- rn = rn.parent();
- }
-
- // there is no common ancestor (the shared parent is null), nodes are from different documents
- if (!ln.parent()) return ln < rn;
-
- // determine sibling order
- for (; ln; ln = ln.next_sibling())
- if (ln == rn)
- return true;
-
- return false;
- }
-
- PUGI__FN bool node_is_ancestor(xml_node parent, xml_node node)
- {
- while (node && node != parent) node = node.parent();
-
- return parent && node == parent;
- }
-
- PUGI__FN const void* document_order(const xpath_node& xnode)
- {
- xml_node_struct* node = xnode.node().internal_object();
-
- if (node)
- {
- if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;
- if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;
- return 0;
- }
-
- xml_attribute_struct* attr = xnode.attribute().internal_object();
-
- if (attr)
- {
- if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;
- if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;
- return 0;
- }
-
- return 0;
- }
-
- struct document_order_comparator
- {
- bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
- {
- // optimized document order based check
- const void* lo = document_order(lhs);
- const void* ro = document_order(rhs);
-
- if (lo && ro) return lo < ro;
-
- // slow comparison
- xml_node ln = lhs.node(), rn = rhs.node();
-
- // compare attributes
- if (lhs.attribute() && rhs.attribute())
- {
- // shared parent
- if (lhs.parent() == rhs.parent())
- {
- // determine sibling order
- for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
- if (a == rhs.attribute())
- return true;
-
- return false;
- }
-
- // compare attribute parents
- ln = lhs.parent();
- rn = rhs.parent();
- }
- else if (lhs.attribute())
- {
- // attributes go after the parent element
- if (lhs.parent() == rhs.node()) return false;
-
- ln = lhs.parent();
- }
- else if (rhs.attribute())
- {
- // attributes go after the parent element
- if (rhs.parent() == lhs.node()) return true;
-
- rn = rhs.parent();
- }
-
- if (ln == rn) return false;
-
- unsigned int lh = node_height(ln);
- unsigned int rh = node_height(rn);
-
- return node_is_before(ln, lh, rn, rh);
- }
- };
-
- struct duplicate_comparator
- {
- bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
- {
- if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
- else return rhs.attribute() ? false : lhs.node() < rhs.node();
- }
- };
-
- PUGI__FN double gen_nan()
- {
- #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
- union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
- u[0].i = 0x7fc00000;
- return u[0].f;
- #else
- // fallback
- const volatile double zero = 0.0;
- return zero / zero;
- #endif
- }
-
- PUGI__FN bool is_nan(double value)
- {
- #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
- return !!_isnan(value);
- #elif defined(fpclassify) && defined(FP_NAN)
- return fpclassify(value) == FP_NAN;
- #else
- // fallback
- const volatile double v = value;
- return v != v;
- #endif
- }
-
- PUGI__FN const char_t* convert_number_to_string_special(double value)
- {
- #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
- if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
- if (_isnan(value)) return PUGIXML_TEXT("NaN");
- return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
- #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
- switch (fpclassify(value))
- {
- case FP_NAN:
- return PUGIXML_TEXT("NaN");
-
- case FP_INFINITE:
- return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
-
- case FP_ZERO:
- return PUGIXML_TEXT("0");
-
- default:
- return 0;
- }
- #else
- // fallback
- const volatile double v = value;
-
- if (v == 0) return PUGIXML_TEXT("0");
- if (v != v) return PUGIXML_TEXT("NaN");
- if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
- return 0;
- #endif
- }
-
- PUGI__FN bool convert_number_to_boolean(double value)
- {
- return (value != 0 && !is_nan(value));
- }
-
- PUGI__FN void truncate_zeros(char* begin, char* end)
- {
- while (begin != end && end[-1] == '0') end--;
-
- *end = 0;
- }
-
- // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
+PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
+{
+ while (*pattern && *string == *pattern) {
+ string++;
+ pattern++;
+ }
+
+ return *pattern == 0;
+}
+
+PUGI__FN const char_t* find_char(const char_t* s, char_t c)
+{
+#ifdef PUGIXML_WCHAR_MODE
+ return wcschr(s, c);
+#else
+ return strchr(s, c);
+#endif
+}
+
+PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
+{
+#ifdef PUGIXML_WCHAR_MODE
+ // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
+ return (*p == 0) ? s : wcsstr(s, p);
+#else
+ return strstr(s, p);
+#endif
+}
+
+// Converts symbol to lower case, if it is an ASCII one
+PUGI__FN char_t tolower_ascii(char_t ch)
+{
+ return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
+}
+
+PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
+{
+ if (na.attribute())
+ return xpath_string_const(na.attribute().value());
+ else {
+ const xml_node& n = na.node();
+
+ switch (n.type()) {
+ case node_pcdata:
+ case node_cdata:
+ case node_comment:
+ case node_pi:
+ return xpath_string_const(n.value());
+
+ case node_document:
+ case node_element: {
+ xpath_string result;
+
+ xml_node cur = n.first_child();
+
+ while (cur && cur != n) {
+ if (cur.type() == node_pcdata || cur.type() == node_cdata)
+ result.append(xpath_string_const(cur.value()), alloc);
+
+ if (cur.first_child())
+ cur = cur.first_child();
+ else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else {
+ while (!cur.next_sibling() && cur != n)
+ cur = cur.parent();
+
+ if (cur != n) cur = cur.next_sibling();
+ }
+ }
+
+ return result;
+ }
+
+ default:
+ return xpath_string();
+ }
+ }
+}
+
+PUGI__FN unsigned int node_height(xml_node n)
+{
+ unsigned int result = 0;
+
+ while (n) {
+ ++result;
+ n = n.parent();
+ }
+
+ return result;
+}
+
+PUGI__FN bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
+{
+ // normalize heights
+ for (unsigned int i = rh; i < lh; i++) ln = ln.parent();
+ for (unsigned int j = lh; j < rh; j++) rn = rn.parent();
+
+ // one node is the ancestor of the other
+ if (ln == rn) return lh < rh;
+
+ // find common ancestor
+ while (ln.parent() != rn.parent()) {
+ ln = ln.parent();
+ rn = rn.parent();
+ }
+
+ // there is no common ancestor (the shared parent is null), nodes are from different documents
+ if (!ln.parent()) return ln < rn;
+
+ // determine sibling order
+ for (; ln; ln = ln.next_sibling())
+ if (ln == rn)
+ return true;
+
+ return false;
+}
+
+PUGI__FN bool node_is_ancestor(xml_node parent, xml_node node)
+{
+ while (node && node != parent) node = node.parent();
+
+ return parent && node == parent;
+}
+
+PUGI__FN const void* document_order(const xpath_node& xnode)
+{
+ xml_node_struct* node = xnode.node().internal_object();
+
+ if (node) {
+ if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;
+ if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;
+ return 0;
+ }
+
+ xml_attribute_struct* attr = xnode.attribute().internal_object();
+
+ if (attr) {
+ if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;
+ if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;
+ return 0;
+ }
+
+ return 0;
+}
+
+struct document_order_comparator {
+ bool operator()(const xpath_node& lhs, const xpath_node& rhs) const {
+ // optimized document order based check
+ const void* lo = document_order(lhs);
+ const void* ro = document_order(rhs);
+
+ if (lo && ro) return lo < ro;
+
+ // slow comparison
+ xml_node ln = lhs.node(), rn = rhs.node();
+
+ // compare attributes
+ if (lhs.attribute() && rhs.attribute()) {
+ // shared parent
+ if (lhs.parent() == rhs.parent()) {
+ // determine sibling order
+ for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
+ if (a == rhs.attribute())
+ return true;
+
+ return false;
+ }
+
+ // compare attribute parents
+ ln = lhs.parent();
+ rn = rhs.parent();
+ } else if (lhs.attribute()) {
+ // attributes go after the parent element
+ if (lhs.parent() == rhs.node()) return false;
+
+ ln = lhs.parent();
+ } else if (rhs.attribute()) {
+ // attributes go after the parent element
+ if (rhs.parent() == lhs.node()) return true;
+
+ rn = rhs.parent();
+ }
+
+ if (ln == rn) return false;
+
+ unsigned int lh = node_height(ln);
+ unsigned int rh = node_height(rn);
+
+ return node_is_before(ln, lh, rn, rh);
+ }
+};
+
+struct duplicate_comparator {
+ bool operator()(const xpath_node& lhs, const xpath_node& rhs) const {
+ if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
+ else return rhs.attribute() ? false : lhs.node() < rhs.node();
+ }
+};
+
+PUGI__FN double gen_nan()
+{
+#if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
+ union {
+ float f;
+ uint32_t i;
+ } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
+ u[0].i = 0x7fc00000;
+ return u[0].f;
+#else
+ // fallback
+ const volatile double zero = 0.0;
+ return zero / zero;
+#endif
+}
+
+PUGI__FN bool is_nan(double value)
+{
+#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
+ return !!_isnan(value);
+#elif defined(fpclassify) && defined(FP_NAN)
+ return fpclassify(value) == FP_NAN;
+#else
+ // fallback
+ const volatile double v = value;
+ return v != v;
+#endif
+}
+
+PUGI__FN const char_t* convert_number_to_string_special(double value)
+{
+#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
+ if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
+ if (_isnan(value)) return PUGIXML_TEXT("NaN");
+ return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
+ switch (fpclassify(value)) {
+ case FP_NAN:
+ return PUGIXML_TEXT("NaN");
+
+ case FP_INFINITE:
+ return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+
+ case FP_ZERO:
+ return PUGIXML_TEXT("0");
+
+ default:
+ return 0;
+ }
+#else
+ // fallback
+ const volatile double v = value;
+
+ if (v == 0) return PUGIXML_TEXT("0");
+ if (v != v) return PUGIXML_TEXT("NaN");
+ if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+ return 0;
+#endif
+}
+
+PUGI__FN bool convert_number_to_boolean(double value)
+{
+ return (value != 0 && !is_nan(value));
+}
+
+PUGI__FN void truncate_zeros(char* begin, char* end)
+{
+ while (begin != end && end[-1] == '0') end--;
+
+ *end = 0;
+}
+
+// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
- PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
- {
- // get base values
- int sign, exponent;
- _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
-
- // truncate redundant zeros
- truncate_zeros(buffer, buffer + strlen(buffer));
-
- // fill results
- *out_mantissa = buffer;
- *out_exponent = exponent;
- }
+PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+{
+ // get base values
+ int sign, exponent;
+ _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
+
+ // truncate redundant zeros
+ truncate_zeros(buffer, buffer + strlen(buffer));
+
+ // fill results
+ *out_mantissa = buffer;
+ *out_exponent = exponent;
+}
+#else
+PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+{
+ // get a scientific notation value with IEEE DBL_DIG decimals
+ sprintf(buffer, "%.*e", DBL_DIG, value);
+ assert(strlen(buffer) < buffer_size);
+ (void)!buffer_size;
+
+ // get the exponent (possibly negative)
+ char* exponent_string = strchr(buffer, 'e');
+ assert(exponent_string);
+
+ int exponent = atoi(exponent_string + 1);
+
+ // extract mantissa string: skip sign
+ char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
+ assert(mantissa[0] != '0' && mantissa[1] == '.');
+
+ // divide mantissa by 10 to eliminate integer part
+ mantissa[1] = mantissa[0];
+ mantissa++;
+ exponent++;
+
+ // remove extra mantissa digits and zero-terminate mantissa
+ truncate_zeros(mantissa, exponent_string);
+
+ // fill results
+ *out_mantissa = mantissa;
+ *out_exponent = exponent;
+}
+#endif
+
+PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
+{
+ // try special number conversion
+ const char_t* special = convert_number_to_string_special(value);
+ if (special) return xpath_string_const(special);
+
+ // get mantissa + exponent form
+ char mantissa_buffer[64];
+
+ char* mantissa;
+ int exponent;
+ convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
+
+ // make the number!
+ char_t result[512];
+ char_t* s = result;
+
+ // sign
+ if (value < 0) *s++ = '-';
+
+ // integer part
+ if (exponent <= 0) {
+ *s++ = '0';
+ } else {
+ while (exponent > 0) {
+ assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
+ *s++ = *mantissa ? *mantissa++ : '0';
+ exponent--;
+ }
+ }
+
+ // fractional part
+ if (*mantissa) {
+ // decimal point
+ *s++ = '.';
+
+ // extra zeroes from negative exponent
+ while (exponent < 0) {
+ *s++ = '0';
+ exponent++;
+ }
+
+ // extra mantissa digits
+ while (*mantissa) {
+ assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
+ *s++ = *mantissa++;
+ }
+ }
+
+ // zero-terminate
+ assert(s < result + sizeof(result) / sizeof(result[0]));
+ *s = 0;
+
+ return xpath_string(result, alloc);
+}
+
+PUGI__FN bool check_string_to_number_format(const char_t* string)
+{
+ // parse leading whitespace
+ while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
+
+ // parse sign
+ if (*string == '-') ++string;
+
+ if (!*string) return false;
+
+ // if there is no integer part, there should be a decimal part with at least one digit
+ if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
+
+ // parse integer part
+ while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
+
+ // parse decimal part
+ if (*string == '.') {
+ ++string;
+
+ while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
+ }
+
+ // parse trailing whitespace
+ while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
+
+ return *string == 0;
+}
+
+PUGI__FN double convert_string_to_number(const char_t* string)
+{
+ // check string format
+ if (!check_string_to_number_format(string)) return gen_nan();
+
+ // parse string
+#ifdef PUGIXML_WCHAR_MODE
+ return wcstod(string, 0);
#else
- PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
- {
- // get a scientific notation value with IEEE DBL_DIG decimals
- sprintf(buffer, "%.*e", DBL_DIG, value);
- assert(strlen(buffer) < buffer_size);
- (void)!buffer_size;
-
- // get the exponent (possibly negative)
- char* exponent_string = strchr(buffer, 'e');
- assert(exponent_string);
-
- int exponent = atoi(exponent_string + 1);
-
- // extract mantissa string: skip sign
- char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
- assert(mantissa[0] != '0' && mantissa[1] == '.');
-
- // divide mantissa by 10 to eliminate integer part
- mantissa[1] = mantissa[0];
- mantissa++;
- exponent++;
-
- // remove extra mantissa digits and zero-terminate mantissa
- truncate_zeros(mantissa, exponent_string);
-
- // fill results
- *out_mantissa = mantissa;
- *out_exponent = exponent;
- }
+ return atof(string);
#endif
+}
+
+PUGI__FN bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result)
+{
+ char_t buffer[32];
+
+ size_t length = static_cast<size_t>(end - begin);
+ char_t* scratch = buffer;
+
+ if (length >= sizeof(buffer) / sizeof(buffer[0])) {
+ // need to make dummy on-heap copy
+ scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!scratch) return false;
+ }
+
+ // copy string to zero-terminated buffer and perform conversion
+ memcpy(scratch, begin, length * sizeof(char_t));
+ scratch[length] = 0;
+
+ *out_result = convert_string_to_number(scratch);
+
+ // free dummy buffer
+ if (scratch != buffer) xml_memory::deallocate(scratch);
+
+ return true;
+}
+
+PUGI__FN double round_nearest(double value)
+{
+ return floor(value + 0.5);
+}
+
+PUGI__FN double round_nearest_nzero(double value)
+{
+ // same as round_nearest, but returns -0 for [-0.5, -0]
+ // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
+ return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
+}
+
+PUGI__FN const char_t* qualified_name(const xpath_node& node)
+{
+ return node.attribute() ? node.attribute().name() : node.node().name();
+}
+
+PUGI__FN const char_t* local_name(const xpath_node& node)
+{
+ const char_t* name = qualified_name(node);
+ const char_t* p = find_char(name, ':');
+
+ return p ? p + 1 : name;
+}
+
+struct namespace_uri_predicate {
+ const char_t* prefix;
+ size_t prefix_length;
+
+ namespace_uri_predicate(const char_t* name) {
+ const char_t* pos = find_char(name, ':');
+
+ prefix = pos ? name : 0;
+ prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
+ }
+
+ bool operator()(const xml_attribute& a) const {
+ const char_t* name = a.name();
+
+ if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
+
+ return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
+ }
+};
+
+PUGI__FN const char_t* namespace_uri(const xml_node& node)
+{
+ namespace_uri_predicate pred = node.name();
+
+ xml_node p = node;
+
+ while (p) {
+ xml_attribute a = p.find_attribute(pred);
+
+ if (a) return a.value();
+
+ p = p.parent();
+ }
+
+ return PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
+{
+ namespace_uri_predicate pred = attr.name();
+
+ // Default namespace does not apply to attributes
+ if (!pred.prefix) return PUGIXML_TEXT("");
+
+ xml_node p = parent;
+
+ while (p) {
+ xml_attribute a = p.find_attribute(pred);
+
+ if (a) return a.value();
+
+ p = p.parent();
+ }
+
+ return PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* namespace_uri(const xpath_node& node)
+{
+ return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
+}
- PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
- {
- // try special number conversion
- const char_t* special = convert_number_to_string_special(value);
- if (special) return xpath_string_const(special);
-
- // get mantissa + exponent form
- char mantissa_buffer[64];
-
- char* mantissa;
- int exponent;
- convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
-
- // make the number!
- char_t result[512];
- char_t* s = result;
-
- // sign
- if (value < 0) *s++ = '-';
-
- // integer part
- if (exponent <= 0)
- {
- *s++ = '0';
- }
- else
- {
- while (exponent > 0)
- {
- assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
- *s++ = *mantissa ? *mantissa++ : '0';
- exponent--;
- }
- }
-
- // fractional part
- if (*mantissa)
- {
- // decimal point
- *s++ = '.';
-
- // extra zeroes from negative exponent
- while (exponent < 0)
- {
- *s++ = '0';
- exponent++;
- }
-
- // extra mantissa digits
- while (*mantissa)
- {
- assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
- *s++ = *mantissa++;
- }
- }
-
- // zero-terminate
- assert(s < result + sizeof(result) / sizeof(result[0]));
- *s = 0;
-
- return xpath_string(result, alloc);
- }
-
- PUGI__FN bool check_string_to_number_format(const char_t* string)
- {
- // parse leading whitespace
- while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
-
- // parse sign
- if (*string == '-') ++string;
-
- if (!*string) return false;
-
- // if there is no integer part, there should be a decimal part with at least one digit
- if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
-
- // parse integer part
- while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
-
- // parse decimal part
- if (*string == '.')
- {
- ++string;
-
- while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
- }
-
- // parse trailing whitespace
- while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
-
- return *string == 0;
- }
-
- PUGI__FN double convert_string_to_number(const char_t* string)
- {
- // check string format
- if (!check_string_to_number_format(string)) return gen_nan();
-
- // parse string
- #ifdef PUGIXML_WCHAR_MODE
- return wcstod(string, 0);
- #else
- return atof(string);
- #endif
- }
-
- PUGI__FN bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result)
- {
- char_t buffer[32];
-
- size_t length = static_cast<size_t>(end - begin);
- char_t* scratch = buffer;
-
- if (length >= sizeof(buffer) / sizeof(buffer[0]))
- {
- // need to make dummy on-heap copy
- scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!scratch) return false;
- }
-
- // copy string to zero-terminated buffer and perform conversion
- memcpy(scratch, begin, length * sizeof(char_t));
- scratch[length] = 0;
-
- *out_result = convert_string_to_number(scratch);
-
- // free dummy buffer
- if (scratch != buffer) xml_memory::deallocate(scratch);
-
- return true;
- }
-
- PUGI__FN double round_nearest(double value)
- {
- return floor(value + 0.5);
- }
-
- PUGI__FN double round_nearest_nzero(double value)
- {
- // same as round_nearest, but returns -0 for [-0.5, -0]
- // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
- return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
- }
-
- PUGI__FN const char_t* qualified_name(const xpath_node& node)
- {
- return node.attribute() ? node.attribute().name() : node.node().name();
- }
-
- PUGI__FN const char_t* local_name(const xpath_node& node)
- {
- const char_t* name = qualified_name(node);
- const char_t* p = find_char(name, ':');
-
- return p ? p + 1 : name;
- }
-
- struct namespace_uri_predicate
- {
- const char_t* prefix;
- size_t prefix_length;
-
- namespace_uri_predicate(const char_t* name)
- {
- const char_t* pos = find_char(name, ':');
-
- prefix = pos ? name : 0;
- prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
- }
-
- bool operator()(const xml_attribute& a) const
- {
- const char_t* name = a.name();
-
- if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
-
- return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
- }
- };
-
- PUGI__FN const char_t* namespace_uri(const xml_node& node)
- {
- namespace_uri_predicate pred = node.name();
-
- xml_node p = node;
-
- while (p)
- {
- xml_attribute a = p.find_attribute(pred);
-
- if (a) return a.value();
-
- p = p.parent();
- }
-
- return PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
- {
- namespace_uri_predicate pred = attr.name();
-
- // Default namespace does not apply to attributes
- if (!pred.prefix) return PUGIXML_TEXT("");
-
- xml_node p = parent;
-
- while (p)
- {
- xml_attribute a = p.find_attribute(pred);
-
- if (a) return a.value();
-
- p = p.parent();
- }
-
- return PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* namespace_uri(const xpath_node& node)
- {
- return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
- }
-
- PUGI__FN void normalize_space(char_t* buffer)
- {
- char_t* write = buffer;
-
- for (char_t* it = buffer; *it; )
- {
- char_t ch = *it++;
-
- if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- // replace whitespace sequence with single space
- while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
-
- // avoid leading spaces
- if (write != buffer) *write++ = ' ';
- }
- else *write++ = ch;
- }
-
- // remove trailing space
- if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
-
- // zero-terminate
- *write = 0;
- }
-
- PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)
- {
- size_t to_length = strlength(to);
-
- char_t* write = buffer;
-
- while (*buffer)
- {
- PUGI__DMC_VOLATILE char_t ch = *buffer++;
-
- const char_t* pos = find_char(from, ch);
-
- if (!pos)
- *write++ = ch; // do not process
- else if (static_cast<size_t>(pos - from) < to_length)
- *write++ = to[pos - from]; // replace
- }
-
- // zero-terminate
- *write = 0;
- }
-
- struct xpath_variable_boolean: xpath_variable
- {
- xpath_variable_boolean(): value(false)
- {
- }
-
- bool value;
- char_t name[1];
- };
-
- struct xpath_variable_number: xpath_variable
- {
- xpath_variable_number(): value(0)
- {
- }
-
- double value;
- char_t name[1];
- };
-
- struct xpath_variable_string: xpath_variable
- {
- xpath_variable_string(): value(0)
- {
- }
-
- ~xpath_variable_string()
- {
- if (value) xml_memory::deallocate(value);
- }
-
- char_t* value;
- char_t name[1];
- };
-
- struct xpath_variable_node_set: xpath_variable
- {
- xpath_node_set value;
- char_t name[1];
- };
-
- static const xpath_node_set dummy_node_set;
-
- PUGI__FN unsigned int hash_string(const char_t* str)
- {
- // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
- unsigned int result = 0;
-
- while (*str)
- {
- result += static_cast<unsigned int>(*str++);
- result += result << 10;
- result ^= result >> 6;
- }
-
- result += result << 3;
- result ^= result >> 11;
- result += result << 15;
-
- return result;
- }
-
- template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
- {
- size_t length = strlength(name);
- if (length == 0) return 0; // empty variable names are invalid
-
- // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
- void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
- if (!memory) return 0;
-
- T* result = new (memory) T();
-
- memcpy(result->name, name, (length + 1) * sizeof(char_t));
-
- return result;
- }
-
- PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
- {
- switch (type)
- {
- case xpath_type_node_set:
- return new_xpath_variable<xpath_variable_node_set>(name);
-
- case xpath_type_number:
- return new_xpath_variable<xpath_variable_number>(name);
-
- case xpath_type_string:
- return new_xpath_variable<xpath_variable_string>(name);
-
- case xpath_type_boolean:
- return new_xpath_variable<xpath_variable_boolean>(name);
-
- default:
- return 0;
- }
- }
-
- template <typename T> PUGI__FN void delete_xpath_variable(T* var)
- {
- var->~T();
- xml_memory::deallocate(var);
- }
-
- PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
- {
- switch (type)
- {
- case xpath_type_node_set:
- delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
- break;
-
- case xpath_type_number:
- delete_xpath_variable(static_cast<xpath_variable_number*>(var));
- break;
-
- case xpath_type_string:
- delete_xpath_variable(static_cast<xpath_variable_string*>(var));
- break;
-
- case xpath_type_boolean:
- delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
- break;
-
- default:
- assert(!"Invalid variable type");
- }
- }
-
- PUGI__FN xpath_variable* get_variable(xpath_variable_set* set, const char_t* begin, const char_t* end)
- {
- char_t buffer[32];
-
- size_t length = static_cast<size_t>(end - begin);
- char_t* scratch = buffer;
-
- if (length >= sizeof(buffer) / sizeof(buffer[0]))
- {
- // need to make dummy on-heap copy
- scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!scratch) return 0;
- }
-
- // copy string to zero-terminated buffer and perform lookup
- memcpy(scratch, begin, length * sizeof(char_t));
- scratch[length] = 0;
-
- xpath_variable* result = set->get(scratch);
-
- // free dummy buffer
- if (scratch != buffer) xml_memory::deallocate(scratch);
-
- return result;
- }
+PUGI__FN void normalize_space(char_t* buffer)
+{
+ char_t* write = buffer;
+
+ for (char_t* it = buffer; *it; ) {
+ char_t ch = *it++;
+
+ if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+ // replace whitespace sequence with single space
+ while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
+
+ // avoid leading spaces
+ if (write != buffer) *write++ = ' ';
+ } else *write++ = ch;
+ }
+
+ // remove trailing space
+ if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
+
+ // zero-terminate
+ *write = 0;
+}
+
+PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)
+{
+ size_t to_length = strlength(to);
+
+ char_t* write = buffer;
+
+ while (*buffer) {
+ PUGI__DMC_VOLATILE char_t ch = *buffer++;
+
+ const char_t* pos = find_char(from, ch);
+
+ if (!pos)
+ *write++ = ch; // do not process
+ else if (static_cast<size_t>(pos - from) < to_length)
+ *write++ = to[pos - from]; // replace
+ }
+
+ // zero-terminate
+ *write = 0;
+}
+
+struct xpath_variable_boolean: xpath_variable {
+ xpath_variable_boolean(): value(false) {
+ }
+
+ bool value;
+ char_t name[1];
+};
+
+struct xpath_variable_number: xpath_variable {
+ xpath_variable_number(): value(0) {
+ }
+
+ double value;
+ char_t name[1];
+};
+
+struct xpath_variable_string: xpath_variable {
+ xpath_variable_string(): value(0) {
+ }
+
+ ~xpath_variable_string() {
+ if (value) xml_memory::deallocate(value);
+ }
+
+ char_t* value;
+ char_t name[1];
+};
+
+struct xpath_variable_node_set: xpath_variable {
+ xpath_node_set value;
+ char_t name[1];
+};
+
+static const xpath_node_set dummy_node_set;
+
+PUGI__FN unsigned int hash_string(const char_t* str)
+{
+ // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
+ unsigned int result = 0;
+
+ while (*str) {
+ result += static_cast<unsigned int>(*str++);
+ result += result << 10;
+ result ^= result >> 6;
+ }
+
+ result += result << 3;
+ result ^= result >> 11;
+ result += result << 15;
+
+ return result;
+}
+
+template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
+{
+ size_t length = strlength(name);
+ if (length == 0) return 0; // empty variable names are invalid
+
+ // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
+ void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
+ if (!memory) return 0;
+
+ T* result = new (memory) T();
+
+ memcpy(result->name, name, (length + 1) * sizeof(char_t));
+
+ return result;
+}
+
+PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
+{
+ switch (type) {
+ case xpath_type_node_set:
+ return new_xpath_variable<xpath_variable_node_set>(name);
+
+ case xpath_type_number:
+ return new_xpath_variable<xpath_variable_number>(name);
+
+ case xpath_type_string:
+ return new_xpath_variable<xpath_variable_string>(name);
+
+ case xpath_type_boolean:
+ return new_xpath_variable<xpath_variable_boolean>(name);
+
+ default:
+ return 0;
+ }
+}
+
+template <typename T> PUGI__FN void delete_xpath_variable(T* var)
+{
+ var->~T();
+ xml_memory::deallocate(var);
+}
+
+PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
+{
+ switch (type) {
+ case xpath_type_node_set:
+ delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
+ break;
+
+ case xpath_type_number:
+ delete_xpath_variable(static_cast<xpath_variable_number*>(var));
+ break;
+
+ case xpath_type_string:
+ delete_xpath_variable(static_cast<xpath_variable_string*>(var));
+ break;
+
+ case xpath_type_boolean:
+ delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
+ break;
+
+ default:
+ assert(!"Invalid variable type");
+ }
+}
+
+PUGI__FN xpath_variable* get_variable(xpath_variable_set* set, const char_t* begin, const char_t* end)
+{
+ char_t buffer[32];
+
+ size_t length = static_cast<size_t>(end - begin);
+ char_t* scratch = buffer;
+
+ if (length >= sizeof(buffer) / sizeof(buffer[0])) {
+ // need to make dummy on-heap copy
+ scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!scratch) return 0;
+ }
+
+ // copy string to zero-terminated buffer and perform lookup
+ memcpy(scratch, begin, length * sizeof(char_t));
+ scratch[length] = 0;
+
+ xpath_variable* result = set->get(scratch);
+
+ // free dummy buffer
+ if (scratch != buffer) xml_memory::deallocate(scratch);
+
+ return result;
+}
PUGI__NS_END
// Internal node set class
PUGI__NS_BEGIN
- PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
- {
- xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
-
- if (type == xpath_node_set::type_unsorted)
- {
- sort(begin, end, document_order_comparator());
-
- type = xpath_node_set::type_sorted;
- }
-
- if (type != order) reverse(begin, end);
-
- return order;
- }
-
- PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
- {
- if (begin == end) return xpath_node();
-
- switch (type)
- {
- case xpath_node_set::type_sorted:
- return *begin;
-
- case xpath_node_set::type_sorted_reverse:
- return *(end - 1);
-
- case xpath_node_set::type_unsorted:
- return *min_element(begin, end, document_order_comparator());
-
- default:
- assert(!"Invalid node set type");
- return xpath_node();
- }
- }
-
- class xpath_node_set_raw
- {
- xpath_node_set::type_t _type;
-
- xpath_node* _begin;
- xpath_node* _end;
- xpath_node* _eos;
-
- public:
- xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
- {
- }
-
- xpath_node* begin() const
- {
- return _begin;
- }
-
- xpath_node* end() const
- {
- return _end;
- }
-
- bool empty() const
- {
- return _begin == _end;
- }
-
- size_t size() const
- {
- return static_cast<size_t>(_end - _begin);
- }
-
- xpath_node first() const
- {
- return xpath_first(_begin, _end, _type);
- }
-
- void push_back(const xpath_node& node, xpath_allocator* alloc)
- {
- if (_end == _eos)
- {
- size_t capacity = static_cast<size_t>(_eos - _begin);
-
- // get new capacity (1.5x rule)
- size_t new_capacity = capacity + capacity / 2 + 1;
-
- // reallocate the old array or allocate a new one
- xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
- assert(data);
-
- // finalize
- _begin = data;
- _end = data + capacity;
- _eos = data + new_capacity;
- }
-
- *_end++ = node;
- }
-
- void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
- {
- size_t size_ = static_cast<size_t>(_end - _begin);
- size_t capacity = static_cast<size_t>(_eos - _begin);
- size_t count = static_cast<size_t>(end_ - begin_);
-
- if (size_ + count > capacity)
- {
- // reallocate the old array or allocate a new one
- xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
- assert(data);
-
- // finalize
- _begin = data;
- _end = data + size_;
- _eos = data + size_ + count;
- }
-
- memcpy(_end, begin_, count * sizeof(xpath_node));
- _end += count;
- }
-
- void sort_do()
- {
- _type = xpath_sort(_begin, _end, _type, false);
- }
-
- void truncate(xpath_node* pos)
- {
- assert(_begin <= pos && pos <= _end);
-
- _end = pos;
- }
-
- void remove_duplicates()
- {
- if (_type == xpath_node_set::type_unsorted)
- sort(_begin, _end, duplicate_comparator());
-
- _end = unique(_begin, _end);
- }
-
- xpath_node_set::type_t type() const
- {
- return _type;
- }
-
- void set_type(xpath_node_set::type_t value)
- {
- _type = value;
- }
- };
+PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
+{
+ xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
+
+ if (type == xpath_node_set::type_unsorted) {
+ sort(begin, end, document_order_comparator());
+
+ type = xpath_node_set::type_sorted;
+ }
+
+ if (type != order) reverse(begin, end);
+
+ return order;
+}
+
+PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
+{
+ if (begin == end) return xpath_node();
+
+ switch (type) {
+ case xpath_node_set::type_sorted:
+ return *begin;
+
+ case xpath_node_set::type_sorted_reverse:
+ return *(end - 1);
+
+ case xpath_node_set::type_unsorted:
+ return *min_element(begin, end, document_order_comparator());
+
+ default:
+ assert(!"Invalid node set type");
+ return xpath_node();
+ }
+}
+
+class xpath_node_set_raw
+{
+ xpath_node_set::type_t _type;
+
+ xpath_node* _begin;
+ xpath_node* _end;
+ xpath_node* _eos;
+
+public:
+ xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) {
+ }
+
+ xpath_node* begin() const {
+ return _begin;
+ }
+
+ xpath_node* end() const {
+ return _end;
+ }
+
+ bool empty() const {
+ return _begin == _end;
+ }
+
+ size_t size() const {
+ return static_cast<size_t>(_end - _begin);
+ }
+
+ xpath_node first() const {
+ return xpath_first(_begin, _end, _type);
+ }
+
+ void push_back(const xpath_node& node, xpath_allocator* alloc) {
+ if (_end == _eos) {
+ size_t capacity = static_cast<size_t>(_eos - _begin);
+
+ // get new capacity (1.5x rule)
+ size_t new_capacity = capacity + capacity / 2 + 1;
+
+ // reallocate the old array or allocate a new one
+ xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
+ assert(data);
+
+ // finalize
+ _begin = data;
+ _end = data + capacity;
+ _eos = data + new_capacity;
+ }
+
+ *_end++ = node;
+ }
+
+ void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) {
+ size_t size_ = static_cast<size_t>(_end - _begin);
+ size_t capacity = static_cast<size_t>(_eos - _begin);
+ size_t count = static_cast<size_t>(end_ - begin_);
+
+ if (size_ + count > capacity) {
+ // reallocate the old array or allocate a new one
+ xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
+ assert(data);
+
+ // finalize
+ _begin = data;
+ _end = data + size_;
+ _eos = data + size_ + count;
+ }
+
+ memcpy(_end, begin_, count * sizeof(xpath_node));
+ _end += count;
+ }
+
+ void sort_do() {
+ _type = xpath_sort(_begin, _end, _type, false);
+ }
+
+ void truncate(xpath_node* pos) {
+ assert(_begin <= pos && pos <= _end);
+
+ _end = pos;
+ }
+
+ void remove_duplicates() {
+ if (_type == xpath_node_set::type_unsorted)
+ sort(_begin, _end, duplicate_comparator());
+
+ _end = unique(_begin, _end);
+ }
+
+ xpath_node_set::type_t type() const {
+ return _type;
+ }
+
+ void set_type(xpath_node_set::type_t value) {
+ _type = value;
+ }
+};
PUGI__NS_END
PUGI__NS_BEGIN
- struct xpath_context
- {
- xpath_node n;
- size_t position, size;
-
- xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
- {
- }
- };
-
- enum lexeme_t
- {
- lex_none = 0,
- lex_equal,
- lex_not_equal,
- lex_less,
- lex_greater,
- lex_less_or_equal,
- lex_greater_or_equal,
- lex_plus,
- lex_minus,
- lex_multiply,
- lex_union,
- lex_var_ref,
- lex_open_brace,
- lex_close_brace,
- lex_quoted_string,
- lex_number,
- lex_slash,
- lex_double_slash,
- lex_open_square_brace,
- lex_close_square_brace,
- lex_string,
- lex_comma,
- lex_axis_attribute,
- lex_dot,
- lex_double_dot,
- lex_double_colon,
- lex_eof
- };
-
- struct xpath_lexer_string
- {
- const char_t* begin;
- const char_t* end;
-
- xpath_lexer_string(): begin(0), end(0)
- {
- }
-
- bool operator==(const char_t* other) const
- {
- size_t length = static_cast<size_t>(end - begin);
-
- return strequalrange(other, begin, length);
- }
- };
-
- class xpath_lexer
- {
- const char_t* _cur;
- const char_t* _cur_lexeme_pos;
- xpath_lexer_string _cur_lexeme_contents;
-
- lexeme_t _cur_lexeme;
-
- public:
- explicit xpath_lexer(const char_t* query): _cur(query)
- {
- next();
- }
-
- const char_t* state() const
- {
- return _cur;
- }
-
- void next()
- {
- const char_t* cur = _cur;
-
- while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
-
- // save lexeme position for error reporting
- _cur_lexeme_pos = cur;
-
- switch (*cur)
- {
- case 0:
- _cur_lexeme = lex_eof;
- break;
-
- case '>':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_greater_or_equal;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_greater;
- }
- break;
-
- case '<':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_less_or_equal;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_less;
- }
- break;
-
- case '!':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_not_equal;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- break;
-
- case '=':
- cur += 1;
- _cur_lexeme = lex_equal;
-
- break;
-
- case '+':
- cur += 1;
- _cur_lexeme = lex_plus;
-
- break;
-
- case '-':
- cur += 1;
- _cur_lexeme = lex_minus;
-
- break;
-
- case '*':
- cur += 1;
- _cur_lexeme = lex_multiply;
-
- break;
-
- case '|':
- cur += 1;
- _cur_lexeme = lex_union;
-
- break;
-
- case '$':
- cur += 1;
-
- if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
- {
- _cur_lexeme_contents.begin = cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
-
- if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
- {
- cur++; // :
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
- }
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_var_ref;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
-
- break;
-
- case '(':
- cur += 1;
- _cur_lexeme = lex_open_brace;
-
- break;
-
- case ')':
- cur += 1;
- _cur_lexeme = lex_close_brace;
-
- break;
-
- case '[':
- cur += 1;
- _cur_lexeme = lex_open_square_brace;
-
- break;
-
- case ']':
- cur += 1;
- _cur_lexeme = lex_close_square_brace;
-
- break;
-
- case ',':
- cur += 1;
- _cur_lexeme = lex_comma;
-
- break;
-
- case '/':
- if (*(cur+1) == '/')
- {
- cur += 2;
- _cur_lexeme = lex_double_slash;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_slash;
- }
- break;
-
- case '.':
- if (*(cur+1) == '.')
- {
- cur += 2;
- _cur_lexeme = lex_double_dot;
- }
- else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
- {
- _cur_lexeme_contents.begin = cur; // .
-
- ++cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_number;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_dot;
- }
- break;
-
- case '@':
- cur += 1;
- _cur_lexeme = lex_axis_attribute;
-
- break;
-
- case '"':
- case '\'':
- {
- char_t terminator = *cur;
-
- ++cur;
-
- _cur_lexeme_contents.begin = cur;
- while (*cur && *cur != terminator) cur++;
- _cur_lexeme_contents.end = cur;
-
- if (!*cur)
- _cur_lexeme = lex_none;
- else
- {
- cur += 1;
- _cur_lexeme = lex_quoted_string;
- }
-
- break;
- }
-
- case ':':
- if (*(cur+1) == ':')
- {
- cur += 2;
- _cur_lexeme = lex_double_colon;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- break;
-
- default:
- if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
- {
- _cur_lexeme_contents.begin = cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
-
- if (*cur == '.')
- {
- cur++;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
- }
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_number;
- }
- else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
- {
- _cur_lexeme_contents.begin = cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
-
- if (cur[0] == ':')
- {
- if (cur[1] == '*') // namespace test ncname:*
- {
- cur += 2; // :*
- }
- else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
- {
- cur++; // :
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
- }
- }
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_string;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- }
-
- _cur = cur;
- }
-
- lexeme_t current() const
- {
- return _cur_lexeme;
- }
-
- const char_t* current_pos() const
- {
- return _cur_lexeme_pos;
- }
-
- const xpath_lexer_string& contents() const
- {
- assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
-
- return _cur_lexeme_contents;
- }
- };
-
- enum ast_type_t
- {
- ast_op_or, // left or right
- ast_op_and, // left and right
- ast_op_equal, // left = right
- ast_op_not_equal, // left != right
- ast_op_less, // left < right
- ast_op_greater, // left > right
- ast_op_less_or_equal, // left <= right
- ast_op_greater_or_equal, // left >= right
- ast_op_add, // left + right
- ast_op_subtract, // left - right
- ast_op_multiply, // left * right
- ast_op_divide, // left / right
- ast_op_mod, // left % right
- ast_op_negate, // left - right
- ast_op_union, // left | right
- ast_predicate, // apply predicate to set; next points to next predicate
- ast_filter, // select * from left where right
- ast_filter_posinv, // select * from left where right; proximity position invariant
- ast_string_constant, // string constant
- ast_number_constant, // number constant
- ast_variable, // variable
- ast_func_last, // last()
- ast_func_position, // position()
- ast_func_count, // count(left)
- ast_func_id, // id(left)
- ast_func_local_name_0, // local-name()
- ast_func_local_name_1, // local-name(left)
- ast_func_namespace_uri_0, // namespace-uri()
- ast_func_namespace_uri_1, // namespace-uri(left)
- ast_func_name_0, // name()
- ast_func_name_1, // name(left)
- ast_func_string_0, // string()
- ast_func_string_1, // string(left)
- ast_func_concat, // concat(left, right, siblings)
- ast_func_starts_with, // starts_with(left, right)
- ast_func_contains, // contains(left, right)
- ast_func_substring_before, // substring-before(left, right)
- ast_func_substring_after, // substring-after(left, right)
- ast_func_substring_2, // substring(left, right)
- ast_func_substring_3, // substring(left, right, third)
- ast_func_string_length_0, // string-length()
- ast_func_string_length_1, // string-length(left)
- ast_func_normalize_space_0, // normalize-space()
- ast_func_normalize_space_1, // normalize-space(left)
- ast_func_translate, // translate(left, right, third)
- ast_func_boolean, // boolean(left)
- ast_func_not, // not(left)
- ast_func_true, // true()
- ast_func_false, // false()
- ast_func_lang, // lang(left)
- ast_func_number_0, // number()
- ast_func_number_1, // number(left)
- ast_func_sum, // sum(left)
- ast_func_floor, // floor(left)
- ast_func_ceiling, // ceiling(left)
- ast_func_round, // round(left)
- ast_step, // process set left with step
- ast_step_root // select root node
- };
-
- enum axis_t
- {
- axis_ancestor,
- axis_ancestor_or_self,
- axis_attribute,
- axis_child,
- axis_descendant,
- axis_descendant_or_self,
- axis_following,
- axis_following_sibling,
- axis_namespace,
- axis_parent,
- axis_preceding,
- axis_preceding_sibling,
- axis_self
- };
-
- enum nodetest_t
- {
- nodetest_none,
- nodetest_name,
- nodetest_type_node,
- nodetest_type_comment,
- nodetest_type_pi,
- nodetest_type_text,
- nodetest_pi,
- nodetest_all,
- nodetest_all_in_namespace
- };
-
- template <axis_t N> struct axis_to_type
- {
- static const axis_t axis;
- };
-
- template <axis_t N> const axis_t axis_to_type<N>::axis = N;
-
- class xpath_ast_node
- {
- private:
- // node type
- char _type;
- char _rettype;
-
- // for ast_step / ast_predicate
- char _axis;
- char _test;
-
- // tree node structure
- xpath_ast_node* _left;
- xpath_ast_node* _right;
- xpath_ast_node* _next;
-
- union
- {
- // value for ast_string_constant
- const char_t* string;
- // value for ast_number_constant
- double number;
- // variable for ast_variable
- xpath_variable* variable;
- // node test for ast_step (node name/namespace/node type/pi target)
- const char_t* nodetest;
- } _data;
-
- xpath_ast_node(const xpath_ast_node&);
- xpath_ast_node& operator=(const xpath_ast_node&);
-
- template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
- {
- xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
-
- if (lt != xpath_type_node_set && rt != xpath_type_node_set)
- {
- if (lt == xpath_type_boolean || rt == xpath_type_boolean)
- return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
- else if (lt == xpath_type_number || rt == xpath_type_number)
- return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
- else if (lt == xpath_type_string || rt == xpath_type_string)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string ls = lhs->eval_string(c, stack);
- xpath_string rs = rhs->eval_string(c, stack);
-
- return comp(ls, rs);
- }
- }
- else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
-
- for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
- return true;
- }
-
- return false;
- }
- else
- {
- if (lt == xpath_type_node_set)
- {
- swap(lhs, rhs);
- swap(lt, rt);
- }
-
- if (lt == xpath_type_boolean)
- return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
- else if (lt == xpath_type_number)
- {
- xpath_allocator_capture cr(stack.result);
-
- double l = lhs->eval_number(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
- return true;
- }
-
- return false;
- }
- else if (lt == xpath_type_string)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string l = lhs->eval_string(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(l, string_value(*ri, stack.result)))
- return true;
- }
-
- return false;
- }
- }
-
- assert(!"Wrong types");
- return false;
- }
-
- template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
- {
- xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
-
- if (lt != xpath_type_node_set && rt != xpath_type_node_set)
- return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
- else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
-
- for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
- {
- xpath_allocator_capture cri(stack.result);
-
- double l = convert_string_to_number(string_value(*li, stack.result).c_str());
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture crii(stack.result);
-
- if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
- return true;
- }
- }
-
- return false;
- }
- else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- double l = lhs->eval_number(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
- return true;
- }
-
- return false;
- }
- else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
- double r = rhs->eval_number(c, stack);
-
- for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
- return true;
- }
-
- return false;
- }
- else
- {
- assert(!"Wrong types");
- return false;
- }
- }
-
- void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
- {
- assert(ns.size() >= first);
-
- size_t i = 1;
- size_t size = ns.size() - first;
-
- xpath_node* last = ns.begin() + first;
-
- // remove_if... or well, sort of
- for (xpath_node* it = last; it != ns.end(); ++it, ++i)
- {
- xpath_context c(*it, i, size);
-
- if (expr->rettype() == xpath_type_number)
- {
- if (expr->eval_number(c, stack) == i)
- *last++ = *it;
- }
- else if (expr->eval_boolean(c, stack))
- *last++ = *it;
- }
-
- ns.truncate(last);
- }
-
- void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack)
- {
- if (ns.size() == first) return;
-
- for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
- {
- apply_predicate(ns, first, pred->_left, stack);
- }
- }
-
- void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc)
- {
- if (!a) return;
-
- const char_t* name = a.name();
-
- // There are no attribute nodes corresponding to attributes that declare namespaces
- // That is, "xmlns:..." or "xmlns"
- if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
-
- switch (_test)
- {
- case nodetest_name:
- if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);
- break;
-
- case nodetest_type_node:
- case nodetest_all:
- ns.push_back(xpath_node(a, parent), alloc);
- break;
-
- case nodetest_all_in_namespace:
- if (starts_with(name, _data.nodetest))
- ns.push_back(xpath_node(a, parent), alloc);
- break;
-
- default:
- ;
- }
- }
-
- void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc)
- {
- if (!n) return;
-
- switch (_test)
- {
- case nodetest_name:
- if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);
- break;
-
- case nodetest_type_node:
- ns.push_back(n, alloc);
- break;
-
- case nodetest_type_comment:
- if (n.type() == node_comment)
- ns.push_back(n, alloc);
- break;
-
- case nodetest_type_text:
- if (n.type() == node_pcdata || n.type() == node_cdata)
- ns.push_back(n, alloc);
- break;
-
- case nodetest_type_pi:
- if (n.type() == node_pi)
- ns.push_back(n, alloc);
- break;
-
- case nodetest_pi:
- if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
- ns.push_back(n, alloc);
- break;
-
- case nodetest_all:
- if (n.type() == node_element)
- ns.push_back(n, alloc);
- break;
-
- case nodetest_all_in_namespace:
- if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
- ns.push_back(n, alloc);
- break;
-
- default:
- assert(!"Unknown axis");
- }
- }
-
- template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T)
- {
- const axis_t axis = T::axis;
-
- switch (axis)
- {
- case axis_attribute:
- {
- for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
- step_push(ns, a, n, alloc);
-
- break;
- }
-
- case axis_child:
- {
- for (xml_node c = n.first_child(); c; c = c.next_sibling())
- step_push(ns, c, alloc);
-
- break;
- }
-
- case axis_descendant:
- case axis_descendant_or_self:
- {
- if (axis == axis_descendant_or_self)
- step_push(ns, n, alloc);
-
- xml_node cur = n.first_child();
-
- while (cur && cur != n)
- {
- step_push(ns, cur, alloc);
-
- if (cur.first_child())
- cur = cur.first_child();
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- while (!cur.next_sibling() && cur != n)
- cur = cur.parent();
-
- if (cur != n) cur = cur.next_sibling();
- }
- }
-
- break;
- }
-
- case axis_following_sibling:
- {
- for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
- step_push(ns, c, alloc);
-
- break;
- }
-
- case axis_preceding_sibling:
- {
- for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
- step_push(ns, c, alloc);
-
- break;
- }
-
- case axis_following:
- {
- xml_node cur = n;
-
- // exit from this node so that we don't include descendants
- while (cur && !cur.next_sibling()) cur = cur.parent();
- cur = cur.next_sibling();
-
- for (;;)
- {
- step_push(ns, cur, alloc);
-
- if (cur.first_child())
- cur = cur.first_child();
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- while (cur && !cur.next_sibling()) cur = cur.parent();
- cur = cur.next_sibling();
-
- if (!cur) break;
- }
- }
-
- break;
- }
-
- case axis_preceding:
- {
- xml_node cur = n;
-
- while (cur && !cur.previous_sibling()) cur = cur.parent();
- cur = cur.previous_sibling();
-
- for (;;)
- {
- if (cur.last_child())
- cur = cur.last_child();
- else
- {
- // leaf node, can't be ancestor
- step_push(ns, cur, alloc);
-
- if (cur.previous_sibling())
- cur = cur.previous_sibling();
- else
- {
- do
- {
- cur = cur.parent();
- if (!cur) break;
-
- if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
- }
- while (!cur.previous_sibling());
-
- cur = cur.previous_sibling();
-
- if (!cur) break;
- }
- }
- }
-
- break;
- }
-
- case axis_ancestor:
- case axis_ancestor_or_self:
- {
- if (axis == axis_ancestor_or_self)
- step_push(ns, n, alloc);
-
- xml_node cur = n.parent();
-
- while (cur)
- {
- step_push(ns, cur, alloc);
-
- cur = cur.parent();
- }
-
- break;
- }
-
- case axis_self:
- {
- step_push(ns, n, alloc);
-
- break;
- }
-
- case axis_parent:
- {
- if (n.parent()) step_push(ns, n.parent(), alloc);
-
- break;
- }
-
- default:
- assert(!"Unimplemented axis");
- }
- }
-
- template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v)
- {
- const axis_t axis = T::axis;
-
- switch (axis)
- {
- case axis_ancestor:
- case axis_ancestor_or_self:
- {
- if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
- step_push(ns, a, p, alloc);
-
- xml_node cur = p;
-
- while (cur)
- {
- step_push(ns, cur, alloc);
-
- cur = cur.parent();
- }
-
- break;
- }
-
- case axis_descendant_or_self:
- case axis_self:
- {
- if (_test == nodetest_type_node) // reject attributes based on principal node type test
- step_push(ns, a, p, alloc);
-
- break;
- }
-
- case axis_following:
- {
- xml_node cur = p;
-
- for (;;)
- {
- if (cur.first_child())
- cur = cur.first_child();
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- while (cur && !cur.next_sibling()) cur = cur.parent();
- cur = cur.next_sibling();
-
- if (!cur) break;
- }
-
- step_push(ns, cur, alloc);
- }
-
- break;
- }
-
- case axis_parent:
- {
- step_push(ns, p, alloc);
-
- break;
- }
-
- case axis_preceding:
- {
- // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
- step_fill(ns, p, alloc, v);
- break;
- }
-
- default:
- assert(!"Unimplemented axis");
- }
- }
-
- template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)
- {
- const axis_t axis = T::axis;
- bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
-
- xpath_node_set_raw ns;
- ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
-
- if (_left)
- {
- xpath_node_set_raw s = _left->eval_node_set(c, stack);
-
- // self axis preserves the original order
- if (axis == axis_self) ns.set_type(s.type());
-
- for (const xpath_node* it = s.begin(); it != s.end(); ++it)
- {
- size_t size = ns.size();
-
- // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
- if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
-
- if (it->node())
- step_fill(ns, it->node(), stack.result, v);
- else if (attributes)
- step_fill(ns, it->attribute(), it->parent(), stack.result, v);
-
- apply_predicates(ns, size, stack);
- }
- }
- else
- {
- if (c.n.node())
- step_fill(ns, c.n.node(), stack.result, v);
- else if (attributes)
- step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
-
- apply_predicates(ns, 0, stack);
- }
-
- // child, attribute and self axes always generate unique set of nodes
- // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
- if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
- ns.remove_duplicates();
-
- return ns;
- }
-
- public:
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
- {
- assert(type == ast_string_constant);
- _data.string = value;
- }
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
- {
- assert(type == ast_number_constant);
- _data.number = value;
- }
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
- {
- assert(type == ast_variable);
- _data.variable = value;
- }
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
- {
- }
-
- xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
- _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
- {
- _data.nodetest = contents;
- }
-
- void set_next(xpath_ast_node* value)
- {
- _next = value;
- }
-
- void set_right(xpath_ast_node* value)
- {
- _right = value;
- }
-
- bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
- {
- switch (_type)
- {
- case ast_op_or:
- return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
-
- case ast_op_and:
- return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
-
- case ast_op_equal:
- return compare_eq(_left, _right, c, stack, equal_to());
-
- case ast_op_not_equal:
- return compare_eq(_left, _right, c, stack, not_equal_to());
-
- case ast_op_less:
- return compare_rel(_left, _right, c, stack, less());
-
- case ast_op_greater:
- return compare_rel(_right, _left, c, stack, less());
-
- case ast_op_less_or_equal:
- return compare_rel(_left, _right, c, stack, less_equal());
-
- case ast_op_greater_or_equal:
- return compare_rel(_right, _left, c, stack, less_equal());
-
- case ast_func_starts_with:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string lr = _left->eval_string(c, stack);
- xpath_string rr = _right->eval_string(c, stack);
-
- return starts_with(lr.c_str(), rr.c_str());
- }
-
- case ast_func_contains:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string lr = _left->eval_string(c, stack);
- xpath_string rr = _right->eval_string(c, stack);
-
- return find_substring(lr.c_str(), rr.c_str()) != 0;
- }
-
- case ast_func_boolean:
- return _left->eval_boolean(c, stack);
-
- case ast_func_not:
- return !_left->eval_boolean(c, stack);
-
- case ast_func_true:
- return true;
-
- case ast_func_false:
- return false;
-
- case ast_func_lang:
- {
- if (c.n.attribute()) return false;
-
- xpath_allocator_capture cr(stack.result);
-
- xpath_string lang = _left->eval_string(c, stack);
-
- for (xml_node n = c.n.node(); n; n = n.parent())
- {
- xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
-
- if (a)
- {
- const char_t* value = a.value();
-
- // strnicmp / strncasecmp is not portable
- for (const char_t* lit = lang.c_str(); *lit; ++lit)
- {
- if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
- ++value;
- }
-
- return *value == 0 || *value == '-';
- }
- }
-
- return false;
- }
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_boolean)
- return _data.variable->get_boolean();
-
- // fallthrough to type conversion
- }
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_number:
- return convert_number_to_boolean(eval_number(c, stack));
-
- case xpath_type_string:
- {
- xpath_allocator_capture cr(stack.result);
-
- return !eval_string(c, stack).empty();
- }
-
- case xpath_type_node_set:
- {
- xpath_allocator_capture cr(stack.result);
-
- return !eval_node_set(c, stack).empty();
- }
-
- default:
- assert(!"Wrong expression for return type boolean");
- return false;
- }
- }
- }
- }
-
- double eval_number(const xpath_context& c, const xpath_stack& stack)
- {
- switch (_type)
- {
- case ast_op_add:
- return _left->eval_number(c, stack) + _right->eval_number(c, stack);
-
- case ast_op_subtract:
- return _left->eval_number(c, stack) - _right->eval_number(c, stack);
-
- case ast_op_multiply:
- return _left->eval_number(c, stack) * _right->eval_number(c, stack);
-
- case ast_op_divide:
- return _left->eval_number(c, stack) / _right->eval_number(c, stack);
-
- case ast_op_mod:
- return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
-
- case ast_op_negate:
- return -_left->eval_number(c, stack);
-
- case ast_number_constant:
- return _data.number;
-
- case ast_func_last:
- return static_cast<double>(c.size);
-
- case ast_func_position:
- return static_cast<double>(c.position);
-
- case ast_func_count:
- {
- xpath_allocator_capture cr(stack.result);
-
- return static_cast<double>(_left->eval_node_set(c, stack).size());
- }
-
- case ast_func_string_length_0:
- {
- xpath_allocator_capture cr(stack.result);
-
- return static_cast<double>(string_value(c.n, stack.result).length());
- }
-
- case ast_func_string_length_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- return static_cast<double>(_left->eval_string(c, stack).length());
- }
-
- case ast_func_number_0:
- {
- xpath_allocator_capture cr(stack.result);
-
- return convert_string_to_number(string_value(c.n, stack.result).c_str());
- }
-
- case ast_func_number_1:
- return _left->eval_number(c, stack);
-
- case ast_func_sum:
- {
- xpath_allocator_capture cr(stack.result);
-
- double r = 0;
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
-
- for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
- {
- xpath_allocator_capture cri(stack.result);
-
- r += convert_string_to_number(string_value(*it, stack.result).c_str());
- }
-
- return r;
- }
-
- case ast_func_floor:
- {
- double r = _left->eval_number(c, stack);
-
- return r == r ? floor(r) : r;
- }
-
- case ast_func_ceiling:
- {
- double r = _left->eval_number(c, stack);
-
- return r == r ? ceil(r) : r;
- }
-
- case ast_func_round:
- return round_nearest_nzero(_left->eval_number(c, stack));
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_number)
- return _data.variable->get_number();
-
- // fallthrough to type conversion
- }
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_boolean:
- return eval_boolean(c, stack) ? 1 : 0;
-
- case xpath_type_string:
- {
- xpath_allocator_capture cr(stack.result);
-
- return convert_string_to_number(eval_string(c, stack).c_str());
- }
-
- case xpath_type_node_set:
- {
- xpath_allocator_capture cr(stack.result);
-
- return convert_string_to_number(eval_string(c, stack).c_str());
- }
-
- default:
- assert(!"Wrong expression for return type number");
- return 0;
- }
-
- }
- }
- }
-
- xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
- {
- assert(_type == ast_func_concat);
-
- xpath_allocator_capture ct(stack.temp);
-
- // count the string number
- size_t count = 1;
- for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
-
- // gather all strings
- xpath_string static_buffer[4];
- xpath_string* buffer = static_buffer;
-
- // allocate on-heap for large concats
- if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
- {
- buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
- assert(buffer);
- }
-
- // evaluate all strings to temporary stack
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- buffer[0] = _left->eval_string(c, swapped_stack);
-
- size_t pos = 1;
- for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
- assert(pos == count);
-
- // get total length
- size_t length = 0;
- for (size_t i = 0; i < count; ++i) length += buffer[i].length();
-
- // create final string
- char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
- assert(result);
-
- char_t* ri = result;
-
- for (size_t j = 0; j < count; ++j)
- for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
- *ri++ = *bi;
-
- *ri = 0;
-
- return xpath_string(result, true);
- }
-
- xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
- {
- switch (_type)
- {
- case ast_string_constant:
- return xpath_string_const(_data.string);
-
- case ast_func_local_name_0:
- {
- xpath_node na = c.n;
-
- return xpath_string_const(local_name(na));
- }
-
- case ast_func_local_name_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
- xpath_node na = ns.first();
-
- return xpath_string_const(local_name(na));
- }
-
- case ast_func_name_0:
- {
- xpath_node na = c.n;
-
- return xpath_string_const(qualified_name(na));
- }
-
- case ast_func_name_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
- xpath_node na = ns.first();
-
- return xpath_string_const(qualified_name(na));
- }
-
- case ast_func_namespace_uri_0:
- {
- xpath_node na = c.n;
-
- return xpath_string_const(namespace_uri(na));
- }
-
- case ast_func_namespace_uri_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
- xpath_node na = ns.first();
-
- return xpath_string_const(namespace_uri(na));
- }
-
- case ast_func_string_0:
- return string_value(c.n, stack.result);
-
- case ast_func_string_1:
- return _left->eval_string(c, stack);
-
- case ast_func_concat:
- return eval_string_concat(c, stack);
-
- case ast_func_substring_before:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- xpath_string p = _right->eval_string(c, swapped_stack);
-
- const char_t* pos = find_substring(s.c_str(), p.c_str());
-
- return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string();
- }
-
- case ast_func_substring_after:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- xpath_string p = _right->eval_string(c, swapped_stack);
-
- const char_t* pos = find_substring(s.c_str(), p.c_str());
- if (!pos) return xpath_string();
-
- const char_t* result = pos + p.length();
-
- return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result);
- }
-
- case ast_func_substring_2:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- size_t s_length = s.length();
-
- double first = round_nearest(_right->eval_number(c, stack));
-
- if (is_nan(first)) return xpath_string(); // NaN
- else if (first >= s_length + 1) return xpath_string();
-
- size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
- assert(1 <= pos && pos <= s_length + 1);
-
- const char_t* rbegin = s.c_str() + (pos - 1);
-
- return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin);
- }
-
- case ast_func_substring_3:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- size_t s_length = s.length();
-
- double first = round_nearest(_right->eval_number(c, stack));
- double last = first + round_nearest(_right->_next->eval_number(c, stack));
-
- if (is_nan(first) || is_nan(last)) return xpath_string();
- else if (first >= s_length + 1) return xpath_string();
- else if (first >= last) return xpath_string();
- else if (last < 1) return xpath_string();
-
- size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
- size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
-
- assert(1 <= pos && pos <= end && end <= s_length + 1);
- const char_t* rbegin = s.c_str() + (pos - 1);
- const char_t* rend = s.c_str() + (end - 1);
-
- return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result);
- }
-
- case ast_func_normalize_space_0:
- {
- xpath_string s = string_value(c.n, stack.result);
-
- normalize_space(s.data(stack.result));
-
- return s;
- }
-
- case ast_func_normalize_space_1:
- {
- xpath_string s = _left->eval_string(c, stack);
-
- normalize_space(s.data(stack.result));
-
- return s;
- }
-
- case ast_func_translate:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, stack);
- xpath_string from = _right->eval_string(c, swapped_stack);
- xpath_string to = _right->_next->eval_string(c, swapped_stack);
-
- translate(s.data(stack.result), from.c_str(), to.c_str());
-
- return s;
- }
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_string)
- return xpath_string_const(_data.variable->get_string());
-
- // fallthrough to type conversion
- }
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_boolean:
- return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
-
- case xpath_type_number:
- return convert_number_to_string(eval_number(c, stack), stack.result);
-
- case xpath_type_node_set:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
- return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
- }
-
- default:
- assert(!"Wrong expression for return type string");
- return xpath_string();
- }
- }
- }
- }
-
- xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack)
- {
- switch (_type)
- {
- case ast_op_union:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
- xpath_node_set_raw rs = _right->eval_node_set(c, stack);
-
- // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
- rs.set_type(xpath_node_set::type_unsorted);
-
- rs.append(ls.begin(), ls.end(), stack.result);
- rs.remove_duplicates();
-
- return rs;
- }
-
- case ast_filter:
- case ast_filter_posinv:
- {
- xpath_node_set_raw set = _left->eval_node_set(c, stack);
-
- // either expression is a number or it contains position() call; sort by document order
- if (_type == ast_filter) set.sort_do();
-
- apply_predicate(set, 0, _right, stack);
-
- return set;
- }
-
- case ast_func_id:
- return xpath_node_set_raw();
-
- case ast_step:
- {
- switch (_axis)
- {
- case axis_ancestor:
- return step_do(c, stack, axis_to_type<axis_ancestor>());
-
- case axis_ancestor_or_self:
- return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
-
- case axis_attribute:
- return step_do(c, stack, axis_to_type<axis_attribute>());
-
- case axis_child:
- return step_do(c, stack, axis_to_type<axis_child>());
-
- case axis_descendant:
- return step_do(c, stack, axis_to_type<axis_descendant>());
-
- case axis_descendant_or_self:
- return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
-
- case axis_following:
- return step_do(c, stack, axis_to_type<axis_following>());
-
- case axis_following_sibling:
- return step_do(c, stack, axis_to_type<axis_following_sibling>());
-
- case axis_namespace:
- // namespaced axis is not supported
- return xpath_node_set_raw();
-
- case axis_parent:
- return step_do(c, stack, axis_to_type<axis_parent>());
-
- case axis_preceding:
- return step_do(c, stack, axis_to_type<axis_preceding>());
-
- case axis_preceding_sibling:
- return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
-
- case axis_self:
- return step_do(c, stack, axis_to_type<axis_self>());
-
- default:
- assert(!"Unknown axis");
- return xpath_node_set_raw();
- }
- }
-
- case ast_step_root:
- {
- assert(!_right); // root step can't have any predicates
-
- xpath_node_set_raw ns;
-
- ns.set_type(xpath_node_set::type_sorted);
-
- if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
- else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
-
- return ns;
- }
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_node_set)
- {
- const xpath_node_set& s = _data.variable->get_node_set();
-
- xpath_node_set_raw ns;
-
- ns.set_type(s.type());
- ns.append(s.begin(), s.end(), stack.result);
-
- return ns;
- }
-
- // fallthrough to type conversion
- }
-
- default:
- assert(!"Wrong expression for return type node set");
- return xpath_node_set_raw();
- }
- }
-
- bool is_posinv()
- {
- switch (_type)
- {
- case ast_func_position:
- return false;
-
- case ast_string_constant:
- case ast_number_constant:
- case ast_variable:
- return true;
-
- case ast_step:
- case ast_step_root:
- return true;
-
- case ast_predicate:
- case ast_filter:
- case ast_filter_posinv:
- return true;
-
- default:
- if (_left && !_left->is_posinv()) return false;
-
- for (xpath_ast_node* n = _right; n; n = n->_next)
- if (!n->is_posinv()) return false;
-
- return true;
- }
- }
-
- xpath_value_type rettype() const
- {
- return static_cast<xpath_value_type>(_rettype);
- }
- };
-
- struct xpath_parser
- {
- xpath_allocator* _alloc;
- xpath_lexer _lexer;
-
- const char_t* _query;
- xpath_variable_set* _variables;
-
- xpath_parse_result* _result;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- jmp_buf _error_handler;
- #endif
-
- void throw_error(const char* message)
- {
- _result->error = message;
- _result->offset = _lexer.current_pos() - _query;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- longjmp(_error_handler, 1);
- #else
- throw xpath_exception(*_result);
- #endif
- }
-
- void throw_error_oom()
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- throw_error("Out of memory");
- #else
- throw std::bad_alloc();
- #endif
- }
-
- void* alloc_node()
- {
- void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
-
- if (!result) throw_error_oom();
-
- return result;
- }
-
- const char_t* alloc_string(const xpath_lexer_string& value)
- {
- if (value.begin)
- {
- size_t length = static_cast<size_t>(value.end - value.begin);
-
- char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
- if (!c) throw_error_oom();
-
- memcpy(c, value.begin, length * sizeof(char_t));
- c[length] = 0;
-
- return c;
- }
- else return 0;
- }
-
- xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
- {
- assert(argc <= 1);
-
- if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
-
- return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
- }
-
- xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
- {
- switch (name.begin[0])
- {
- case 'b':
- if (name == PUGIXML_TEXT("boolean") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
-
- break;
-
- case 'c':
- if (name == PUGIXML_TEXT("count") && argc == 1)
- {
- if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
- return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
- }
- else if (name == PUGIXML_TEXT("contains") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("concat") && argc >= 2)
- return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
-
- break;
-
- case 'f':
- if (name == PUGIXML_TEXT("false") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
- else if (name == PUGIXML_TEXT("floor") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
-
- break;
-
- case 'i':
- if (name == PUGIXML_TEXT("id") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
-
- break;
-
- case 'l':
- if (name == PUGIXML_TEXT("last") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
- else if (name == PUGIXML_TEXT("lang") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
- else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
- return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("name") && argc <= 1)
- return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
- else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
- return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
- else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("not") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
- else if (name == PUGIXML_TEXT("number") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("position") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
-
- break;
-
- case 'r':
- if (name == PUGIXML_TEXT("round") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
-
- break;
-
- case 's':
- if (name == PUGIXML_TEXT("string") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
- else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]);
- else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
- return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("sum") && argc == 1)
- {
- if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
- return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
- }
-
- break;
-
- case 't':
- if (name == PUGIXML_TEXT("translate") && argc == 3)
- return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("true") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
-
- break;
-
- default:
- break;
- }
-
- throw_error("Unrecognized function or wrong parameter count");
-
- return 0;
- }
-
- axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
- {
- specified = true;
-
- switch (name.begin[0])
- {
- case 'a':
- if (name == PUGIXML_TEXT("ancestor"))
- return axis_ancestor;
- else if (name == PUGIXML_TEXT("ancestor-or-self"))
- return axis_ancestor_or_self;
- else if (name == PUGIXML_TEXT("attribute"))
- return axis_attribute;
-
- break;
-
- case 'c':
- if (name == PUGIXML_TEXT("child"))
- return axis_child;
-
- break;
-
- case 'd':
- if (name == PUGIXML_TEXT("descendant"))
- return axis_descendant;
- else if (name == PUGIXML_TEXT("descendant-or-self"))
- return axis_descendant_or_self;
-
- break;
-
- case 'f':
- if (name == PUGIXML_TEXT("following"))
- return axis_following;
- else if (name == PUGIXML_TEXT("following-sibling"))
- return axis_following_sibling;
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("namespace"))
- return axis_namespace;
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("parent"))
- return axis_parent;
- else if (name == PUGIXML_TEXT("preceding"))
- return axis_preceding;
- else if (name == PUGIXML_TEXT("preceding-sibling"))
- return axis_preceding_sibling;
-
- break;
-
- case 's':
- if (name == PUGIXML_TEXT("self"))
- return axis_self;
-
- break;
-
- default:
- break;
- }
-
- specified = false;
- return axis_child;
- }
-
- nodetest_t parse_node_test_type(const xpath_lexer_string& name)
- {
- switch (name.begin[0])
- {
- case 'c':
- if (name == PUGIXML_TEXT("comment"))
- return nodetest_type_comment;
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("node"))
- return nodetest_type_node;
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("processing-instruction"))
- return nodetest_type_pi;
-
- break;
-
- case 't':
- if (name == PUGIXML_TEXT("text"))
- return nodetest_type_text;
-
- break;
-
- default:
- break;
- }
-
- return nodetest_none;
- }
-
- // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
- xpath_ast_node* parse_primary_expression()
- {
- switch (_lexer.current())
- {
- case lex_var_ref:
- {
- xpath_lexer_string name = _lexer.contents();
-
- if (!_variables)
- throw_error("Unknown variable: variable set is not provided");
-
- xpath_variable* var = get_variable(_variables, name.begin, name.end);
-
- if (!var)
- throw_error("Unknown variable: variable set does not contain the given name");
-
- _lexer.next();
-
- return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
- }
-
- case lex_open_brace:
- {
- _lexer.next();
-
- xpath_ast_node* n = parse_expression();
-
- if (_lexer.current() != lex_close_brace)
- throw_error("Unmatched braces");
-
- _lexer.next();
-
- return n;
- }
-
- case lex_quoted_string:
- {
- const char_t* value = alloc_string(_lexer.contents());
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
- _lexer.next();
-
- return n;
- }
-
- case lex_number:
- {
- double value = 0;
-
- if (!convert_string_to_number(_lexer.contents().begin, _lexer.contents().end, &value))
- throw_error_oom();
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
- _lexer.next();
-
- return n;
- }
-
- case lex_string:
- {
- xpath_ast_node* args[2] = {0};
- size_t argc = 0;
-
- xpath_lexer_string function = _lexer.contents();
- _lexer.next();
-
- xpath_ast_node* last_arg = 0;
-
- if (_lexer.current() != lex_open_brace)
- throw_error("Unrecognized function call");
- _lexer.next();
-
- if (_lexer.current() != lex_close_brace)
- args[argc++] = parse_expression();
-
- while (_lexer.current() != lex_close_brace)
- {
- if (_lexer.current() != lex_comma)
- throw_error("No comma between function arguments");
- _lexer.next();
-
- xpath_ast_node* n = parse_expression();
-
- if (argc < 2) args[argc] = n;
- else last_arg->set_next(n);
-
- argc++;
- last_arg = n;
- }
-
- _lexer.next();
-
- return parse_function(function, argc, args);
- }
-
- default:
- throw_error("Unrecognizable primary expression");
-
- return 0;
- }
- }
-
- // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
- // Predicate ::= '[' PredicateExpr ']'
- // PredicateExpr ::= Expr
- xpath_ast_node* parse_filter_expression()
- {
- xpath_ast_node* n = parse_primary_expression();
-
- while (_lexer.current() == lex_open_square_brace)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_expression();
-
- if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
-
- bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
-
- n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
-
- if (_lexer.current() != lex_close_square_brace)
- throw_error("Unmatched square brace");
-
- _lexer.next();
- }
-
- return n;
- }
-
- // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
- // AxisSpecifier ::= AxisName '::' | '@'?
- // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
- // NameTest ::= '*' | NCName ':' '*' | QName
- // AbbreviatedStep ::= '.' | '..'
- xpath_ast_node* parse_step(xpath_ast_node* set)
- {
- if (set && set->rettype() != xpath_type_node_set)
- throw_error("Step has to be applied to node set");
-
- bool axis_specified = false;
- axis_t axis = axis_child; // implied child axis
-
- if (_lexer.current() == lex_axis_attribute)
- {
- axis = axis_attribute;
- axis_specified = true;
-
- _lexer.next();
- }
- else if (_lexer.current() == lex_dot)
- {
- _lexer.next();
-
- return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
- }
- else if (_lexer.current() == lex_double_dot)
- {
- _lexer.next();
-
- return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
- }
-
- nodetest_t nt_type = nodetest_none;
- xpath_lexer_string nt_name;
-
- if (_lexer.current() == lex_string)
- {
- // node name test
- nt_name = _lexer.contents();
- _lexer.next();
-
- // was it an axis name?
- if (_lexer.current() == lex_double_colon)
- {
- // parse axis name
- if (axis_specified) throw_error("Two axis specifiers in one step");
-
- axis = parse_axis_name(nt_name, axis_specified);
-
- if (!axis_specified) throw_error("Unknown axis");
-
- // read actual node test
- _lexer.next();
-
- if (_lexer.current() == lex_multiply)
- {
- nt_type = nodetest_all;
- nt_name = xpath_lexer_string();
- _lexer.next();
- }
- else if (_lexer.current() == lex_string)
- {
- nt_name = _lexer.contents();
- _lexer.next();
- }
- else throw_error("Unrecognized node test");
- }
-
- if (nt_type == nodetest_none)
- {
- // node type test or processing-instruction
- if (_lexer.current() == lex_open_brace)
- {
- _lexer.next();
-
- if (_lexer.current() == lex_close_brace)
- {
- _lexer.next();
-
- nt_type = parse_node_test_type(nt_name);
-
- if (nt_type == nodetest_none) throw_error("Unrecognized node type");
-
- nt_name = xpath_lexer_string();
- }
- else if (nt_name == PUGIXML_TEXT("processing-instruction"))
- {
- if (_lexer.current() != lex_quoted_string)
- throw_error("Only literals are allowed as arguments to processing-instruction()");
-
- nt_type = nodetest_pi;
- nt_name = _lexer.contents();
- _lexer.next();
-
- if (_lexer.current() != lex_close_brace)
- throw_error("Unmatched brace near processing-instruction()");
- _lexer.next();
- }
- else
- throw_error("Unmatched brace near node type test");
-
- }
- // QName or NCName:*
- else
- {
- if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
- {
- nt_name.end--; // erase *
-
- nt_type = nodetest_all_in_namespace;
- }
- else nt_type = nodetest_name;
- }
- }
- }
- else if (_lexer.current() == lex_multiply)
- {
- nt_type = nodetest_all;
- _lexer.next();
- }
- else throw_error("Unrecognized node test");
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
-
- xpath_ast_node* last = 0;
-
- while (_lexer.current() == lex_open_square_brace)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_expression();
-
- xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
-
- if (_lexer.current() != lex_close_square_brace)
- throw_error("Unmatched square brace");
- _lexer.next();
-
- if (last) last->set_next(pred);
- else n->set_right(pred);
-
- last = pred;
- }
-
- return n;
- }
-
- // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
- xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
- {
- xpath_ast_node* n = parse_step(set);
-
- while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
- {
- lexeme_t l = _lexer.current();
- _lexer.next();
-
- if (l == lex_double_slash)
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
-
- n = parse_step(n);
- }
-
- return n;
- }
-
- // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
- // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
- xpath_ast_node* parse_location_path()
- {
- if (_lexer.current() == lex_slash)
- {
- _lexer.next();
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
-
- // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
- lexeme_t l = _lexer.current();
-
- if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
- return parse_relative_location_path(n);
- else
- return n;
- }
- else if (_lexer.current() == lex_double_slash)
- {
- _lexer.next();
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
-
- return parse_relative_location_path(n);
- }
-
- // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
- return parse_relative_location_path(0);
- }
-
- // PathExpr ::= LocationPath
- // | FilterExpr
- // | FilterExpr '/' RelativeLocationPath
- // | FilterExpr '//' RelativeLocationPath
- xpath_ast_node* parse_path_expression()
- {
- // Clarification.
- // PathExpr begins with either LocationPath or FilterExpr.
- // FilterExpr begins with PrimaryExpr
- // PrimaryExpr begins with '$' in case of it being a variable reference,
- // '(' in case of it being an expression, string literal, number constant or
- // function call.
-
- if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
- _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
- _lexer.current() == lex_string)
- {
- if (_lexer.current() == lex_string)
- {
- // This is either a function call, or not - if not, we shall proceed with location path
- const char_t* state = _lexer.state();
-
- while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
-
- if (*state != '(') return parse_location_path();
-
- // This looks like a function call; however this still can be a node-test. Check it.
- if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
- }
-
- xpath_ast_node* n = parse_filter_expression();
-
- if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
- {
- lexeme_t l = _lexer.current();
- _lexer.next();
-
- if (l == lex_double_slash)
- {
- if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
-
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
- }
-
- // select from location path
- return parse_relative_location_path(n);
- }
-
- return n;
- }
- else return parse_location_path();
- }
-
- // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
- xpath_ast_node* parse_union_expression()
- {
- xpath_ast_node* n = parse_path_expression();
-
- while (_lexer.current() == lex_union)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_union_expression();
-
- if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set)
- throw_error("Union operator has to be applied to node sets");
-
- n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr);
- }
-
- return n;
- }
-
- // UnaryExpr ::= UnionExpr | '-' UnaryExpr
- xpath_ast_node* parse_unary_expression()
- {
- if (_lexer.current() == lex_minus)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_unary_expression();
-
- return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
- }
- else return parse_union_expression();
- }
-
- // MultiplicativeExpr ::= UnaryExpr
- // | MultiplicativeExpr '*' UnaryExpr
- // | MultiplicativeExpr 'div' UnaryExpr
- // | MultiplicativeExpr 'mod' UnaryExpr
- xpath_ast_node* parse_multiplicative_expression()
- {
- xpath_ast_node* n = parse_unary_expression();
-
- while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
- (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div"))))
- {
- ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
- _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
- _lexer.next();
-
- xpath_ast_node* expr = parse_unary_expression();
-
- n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr);
- }
-
- return n;
- }
-
- // AdditiveExpr ::= MultiplicativeExpr
- // | AdditiveExpr '+' MultiplicativeExpr
- // | AdditiveExpr '-' MultiplicativeExpr
- xpath_ast_node* parse_additive_expression()
- {
- xpath_ast_node* n = parse_multiplicative_expression();
-
- while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
- {
- lexeme_t l = _lexer.current();
-
- _lexer.next();
-
- xpath_ast_node* expr = parse_multiplicative_expression();
-
- n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr);
- }
-
- return n;
- }
-
- // RelationalExpr ::= AdditiveExpr
- // | RelationalExpr '<' AdditiveExpr
- // | RelationalExpr '>' AdditiveExpr
- // | RelationalExpr '<=' AdditiveExpr
- // | RelationalExpr '>=' AdditiveExpr
- xpath_ast_node* parse_relational_expression()
- {
- xpath_ast_node* n = parse_additive_expression();
-
- while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
- _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
- {
- lexeme_t l = _lexer.current();
- _lexer.next();
-
- xpath_ast_node* expr = parse_additive_expression();
-
- n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
- l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr);
- }
-
- return n;
- }
-
- // EqualityExpr ::= RelationalExpr
- // | EqualityExpr '=' RelationalExpr
- // | EqualityExpr '!=' RelationalExpr
- xpath_ast_node* parse_equality_expression()
- {
- xpath_ast_node* n = parse_relational_expression();
-
- while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
- {
- lexeme_t l = _lexer.current();
-
- _lexer.next();
-
- xpath_ast_node* expr = parse_relational_expression();
-
- n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr);
- }
-
- return n;
- }
-
- // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
- xpath_ast_node* parse_and_expression()
- {
- xpath_ast_node* n = parse_equality_expression();
-
- while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and"))
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_equality_expression();
-
- n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr);
- }
-
- return n;
- }
-
- // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
- xpath_ast_node* parse_or_expression()
- {
- xpath_ast_node* n = parse_and_expression();
-
- while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or"))
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_and_expression();
-
- n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr);
- }
-
- return n;
- }
-
- // Expr ::= OrExpr
- xpath_ast_node* parse_expression()
- {
- return parse_or_expression();
- }
-
- xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
- {
- }
-
- xpath_ast_node* parse()
- {
- xpath_ast_node* result = parse_expression();
-
- if (_lexer.current() != lex_eof)
- {
- // there are still unparsed tokens left, error
- throw_error("Incorrect query");
- }
-
- return result;
- }
-
- static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
- {
- xpath_parser parser(query, variables, alloc, result);
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- int error = setjmp(parser._error_handler);
-
- return (error == 0) ? parser.parse() : 0;
- #else
- return parser.parse();
- #endif
- }
- };
-
- struct xpath_query_impl
- {
- static xpath_query_impl* create()
- {
- void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
-
- return new (memory) xpath_query_impl();
- }
-
- static void destroy(void* ptr)
- {
- if (!ptr) return;
-
- // free all allocated pages
- static_cast<xpath_query_impl*>(ptr)->alloc.release();
-
- // free allocator memory (with the first page)
- xml_memory::deallocate(ptr);
- }
-
- xpath_query_impl(): root(0), alloc(&block)
- {
- block.next = 0;
- }
-
- xpath_ast_node* root;
- xpath_allocator alloc;
- xpath_memory_block block;
- };
-
- PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
- {
- if (!impl) return xpath_string();
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return xpath_string();
- #endif
-
- xpath_context c(n, 1, 1);
-
- return impl->root->eval_string(c, sd.stack);
- }
+struct xpath_context {
+ xpath_node n;
+ size_t position, size;
+
+ xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) {
+ }
+};
+
+enum lexeme_t {
+ lex_none = 0,
+ lex_equal,
+ lex_not_equal,
+ lex_less,
+ lex_greater,
+ lex_less_or_equal,
+ lex_greater_or_equal,
+ lex_plus,
+ lex_minus,
+ lex_multiply,
+ lex_union,
+ lex_var_ref,
+ lex_open_brace,
+ lex_close_brace,
+ lex_quoted_string,
+ lex_number,
+ lex_slash,
+ lex_double_slash,
+ lex_open_square_brace,
+ lex_close_square_brace,
+ lex_string,
+ lex_comma,
+ lex_axis_attribute,
+ lex_dot,
+ lex_double_dot,
+ lex_double_colon,
+ lex_eof
+};
+
+struct xpath_lexer_string {
+ const char_t* begin;
+ const char_t* end;
+
+ xpath_lexer_string(): begin(0), end(0) {
+ }
+
+ bool operator==(const char_t* other) const {
+ size_t length = static_cast<size_t>(end - begin);
+
+ return strequalrange(other, begin, length);
+ }
+};
+
+class xpath_lexer
+{
+ const char_t* _cur;
+ const char_t* _cur_lexeme_pos;
+ xpath_lexer_string _cur_lexeme_contents;
+
+ lexeme_t _cur_lexeme;
+
+public:
+ explicit xpath_lexer(const char_t* query): _cur(query) {
+ next();
+ }
+
+ const char_t* state() const {
+ return _cur;
+ }
+
+ void next() {
+ const char_t* cur = _cur;
+
+ while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
+
+ // save lexeme position for error reporting
+ _cur_lexeme_pos = cur;
+
+ switch (*cur) {
+ case 0:
+ _cur_lexeme = lex_eof;
+ break;
+
+ case '>':
+ if (*(cur+1) == '=') {
+ cur += 2;
+ _cur_lexeme = lex_greater_or_equal;
+ } else {
+ cur += 1;
+ _cur_lexeme = lex_greater;
+ }
+ break;
+
+ case '<':
+ if (*(cur+1) == '=') {
+ cur += 2;
+ _cur_lexeme = lex_less_or_equal;
+ } else {
+ cur += 1;
+ _cur_lexeme = lex_less;
+ }
+ break;
+
+ case '!':
+ if (*(cur+1) == '=') {
+ cur += 2;
+ _cur_lexeme = lex_not_equal;
+ } else {
+ _cur_lexeme = lex_none;
+ }
+ break;
+
+ case '=':
+ cur += 1;
+ _cur_lexeme = lex_equal;
+
+ break;
+
+ case '+':
+ cur += 1;
+ _cur_lexeme = lex_plus;
+
+ break;
+
+ case '-':
+ cur += 1;
+ _cur_lexeme = lex_minus;
+
+ break;
+
+ case '*':
+ cur += 1;
+ _cur_lexeme = lex_multiply;
+
+ break;
+
+ case '|':
+ cur += 1;
+ _cur_lexeme = lex_union;
+
+ break;
+
+ case '$':
+ cur += 1;
+
+ if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) {
+ _cur_lexeme_contents.begin = cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+
+ if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) { // qname
+ cur++; // :
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+ }
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_var_ref;
+ } else {
+ _cur_lexeme = lex_none;
+ }
+
+ break;
+
+ case '(':
+ cur += 1;
+ _cur_lexeme = lex_open_brace;
+
+ break;
+
+ case ')':
+ cur += 1;
+ _cur_lexeme = lex_close_brace;
+
+ break;
+
+ case '[':
+ cur += 1;
+ _cur_lexeme = lex_open_square_brace;
+
+ break;
+
+ case ']':
+ cur += 1;
+ _cur_lexeme = lex_close_square_brace;
+
+ break;
+
+ case ',':
+ cur += 1;
+ _cur_lexeme = lex_comma;
+
+ break;
+
+ case '/':
+ if (*(cur+1) == '/') {
+ cur += 2;
+ _cur_lexeme = lex_double_slash;
+ } else {
+ cur += 1;
+ _cur_lexeme = lex_slash;
+ }
+ break;
+
+ case '.':
+ if (*(cur+1) == '.') {
+ cur += 2;
+ _cur_lexeme = lex_double_dot;
+ } else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) {
+ _cur_lexeme_contents.begin = cur; // .
+
+ ++cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_number;
+ } else {
+ cur += 1;
+ _cur_lexeme = lex_dot;
+ }
+ break;
+
+ case '@':
+ cur += 1;
+ _cur_lexeme = lex_axis_attribute;
+
+ break;
+
+ case '"':
+ case '\'': {
+ char_t terminator = *cur;
+
+ ++cur;
+
+ _cur_lexeme_contents.begin = cur;
+ while (*cur && *cur != terminator) cur++;
+ _cur_lexeme_contents.end = cur;
+
+ if (!*cur)
+ _cur_lexeme = lex_none;
+ else {
+ cur += 1;
+ _cur_lexeme = lex_quoted_string;
+ }
+
+ break;
+ }
+
+ case ':':
+ if (*(cur+1) == ':') {
+ cur += 2;
+ _cur_lexeme = lex_double_colon;
+ } else {
+ _cur_lexeme = lex_none;
+ }
+ break;
+
+ default:
+ if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) {
+ _cur_lexeme_contents.begin = cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+
+ if (*cur == '.') {
+ cur++;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+ }
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_number;
+ } else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) {
+ _cur_lexeme_contents.begin = cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+
+ if (cur[0] == ':') {
+ if (cur[1] == '*') { // namespace test ncname:*
+ cur += 2; // :*
+ } else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) { // namespace test qname
+ cur++; // :
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+ }
+ }
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_string;
+ } else {
+ _cur_lexeme = lex_none;
+ }
+ }
+
+ _cur = cur;
+ }
+
+ lexeme_t current() const {
+ return _cur_lexeme;
+ }
+
+ const char_t* current_pos() const {
+ return _cur_lexeme_pos;
+ }
+
+ const xpath_lexer_string& contents() const {
+ assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
+
+ return _cur_lexeme_contents;
+ }
+};
+
+enum ast_type_t {
+ ast_op_or, // left or right
+ ast_op_and, // left and right
+ ast_op_equal, // left = right
+ ast_op_not_equal, // left != right
+ ast_op_less, // left < right
+ ast_op_greater, // left > right
+ ast_op_less_or_equal, // left <= right
+ ast_op_greater_or_equal, // left >= right
+ ast_op_add, // left + right
+ ast_op_subtract, // left - right
+ ast_op_multiply, // left * right
+ ast_op_divide, // left / right
+ ast_op_mod, // left % right
+ ast_op_negate, // left - right
+ ast_op_union, // left | right
+ ast_predicate, // apply predicate to set; next points to next predicate
+ ast_filter, // select * from left where right
+ ast_filter_posinv, // select * from left where right; proximity position invariant
+ ast_string_constant, // string constant
+ ast_number_constant, // number constant
+ ast_variable, // variable
+ ast_func_last, // last()
+ ast_func_position, // position()
+ ast_func_count, // count(left)
+ ast_func_id, // id(left)
+ ast_func_local_name_0, // local-name()
+ ast_func_local_name_1, // local-name(left)
+ ast_func_namespace_uri_0, // namespace-uri()
+ ast_func_namespace_uri_1, // namespace-uri(left)
+ ast_func_name_0, // name()
+ ast_func_name_1, // name(left)
+ ast_func_string_0, // string()
+ ast_func_string_1, // string(left)
+ ast_func_concat, // concat(left, right, siblings)
+ ast_func_starts_with, // starts_with(left, right)
+ ast_func_contains, // contains(left, right)
+ ast_func_substring_before, // substring-before(left, right)
+ ast_func_substring_after, // substring-after(left, right)
+ ast_func_substring_2, // substring(left, right)
+ ast_func_substring_3, // substring(left, right, third)
+ ast_func_string_length_0, // string-length()
+ ast_func_string_length_1, // string-length(left)
+ ast_func_normalize_space_0, // normalize-space()
+ ast_func_normalize_space_1, // normalize-space(left)
+ ast_func_translate, // translate(left, right, third)
+ ast_func_boolean, // boolean(left)
+ ast_func_not, // not(left)
+ ast_func_true, // true()
+ ast_func_false, // false()
+ ast_func_lang, // lang(left)
+ ast_func_number_0, // number()
+ ast_func_number_1, // number(left)
+ ast_func_sum, // sum(left)
+ ast_func_floor, // floor(left)
+ ast_func_ceiling, // ceiling(left)
+ ast_func_round, // round(left)
+ ast_step, // process set left with step
+ ast_step_root // select root node
+};
+
+enum axis_t {
+ axis_ancestor,
+ axis_ancestor_or_self,
+ axis_attribute,
+ axis_child,
+ axis_descendant,
+ axis_descendant_or_self,
+ axis_following,
+ axis_following_sibling,
+ axis_namespace,
+ axis_parent,
+ axis_preceding,
+ axis_preceding_sibling,
+ axis_self
+};
+
+enum nodetest_t {
+ nodetest_none,
+ nodetest_name,
+ nodetest_type_node,
+ nodetest_type_comment,
+ nodetest_type_pi,
+ nodetest_type_text,
+ nodetest_pi,
+ nodetest_all,
+ nodetest_all_in_namespace
+};
+
+template <axis_t N> struct axis_to_type {
+ static const axis_t axis;
+};
+
+template <axis_t N> const axis_t axis_to_type<N>::axis = N;
+
+class xpath_ast_node
+{
+private:
+ // node type
+ char _type;
+ char _rettype;
+
+ // for ast_step / ast_predicate
+ char _axis;
+ char _test;
+
+ // tree node structure
+ xpath_ast_node* _left;
+ xpath_ast_node* _right;
+ xpath_ast_node* _next;
+
+ union {
+ // value for ast_string_constant
+ const char_t* string;
+ // value for ast_number_constant
+ double number;
+ // variable for ast_variable
+ xpath_variable* variable;
+ // node test for ast_step (node name/namespace/node type/pi target)
+ const char_t* nodetest;
+ } _data;
+
+ xpath_ast_node(const xpath_ast_node&);
+ xpath_ast_node& operator=(const xpath_ast_node&);
+
+ template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) {
+ xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+ if (lt != xpath_type_node_set && rt != xpath_type_node_set) {
+ if (lt == xpath_type_boolean || rt == xpath_type_boolean)
+ return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
+ else if (lt == xpath_type_number || rt == xpath_type_number)
+ return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
+ else if (lt == xpath_type_string || rt == xpath_type_string) {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string ls = lhs->eval_string(c, stack);
+ xpath_string rs = rhs->eval_string(c, stack);
+
+ return comp(ls, rs);
+ }
+ } else if (lt == xpath_type_node_set && rt == xpath_type_node_set) {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+
+ for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
+ return true;
+ }
+
+ return false;
+ } else {
+ if (lt == xpath_type_node_set) {
+ swap(lhs, rhs);
+ swap(lt, rt);
+ }
+
+ if (lt == xpath_type_boolean)
+ return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
+ else if (lt == xpath_type_number) {
+ xpath_allocator_capture cr(stack.result);
+
+ double l = lhs->eval_number(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+ return true;
+ }
+
+ return false;
+ } else if (lt == xpath_type_string) {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string l = lhs->eval_string(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(l, string_value(*ri, stack.result)))
+ return true;
+ }
+
+ return false;
+ }
+ }
+
+ assert(!"Wrong types");
+ return false;
+ }
+
+ template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) {
+ xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+ if (lt != xpath_type_node_set && rt != xpath_type_node_set)
+ return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
+ else if (lt == xpath_type_node_set && rt == xpath_type_node_set) {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+
+ for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) {
+ xpath_allocator_capture cri(stack.result);
+
+ double l = convert_string_to_number(string_value(*li, stack.result).c_str());
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+ xpath_allocator_capture crii(stack.result);
+
+ if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+ return true;
+ }
+ }
+
+ return false;
+ } else if (lt != xpath_type_node_set && rt == xpath_type_node_set) {
+ xpath_allocator_capture cr(stack.result);
+
+ double l = lhs->eval_number(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+ return true;
+ }
+
+ return false;
+ } else if (lt == xpath_type_node_set && rt != xpath_type_node_set) {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
+ double r = rhs->eval_number(c, stack);
+
+ for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
+ return true;
+ }
+
+ return false;
+ } else {
+ assert(!"Wrong types");
+ return false;
+ }
+ }
+
+ void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) {
+ assert(ns.size() >= first);
+
+ size_t i = 1;
+ size_t size = ns.size() - first;
+
+ xpath_node* last = ns.begin() + first;
+
+ // remove_if... or well, sort of
+ for (xpath_node* it = last; it != ns.end(); ++it, ++i) {
+ xpath_context c(*it, i, size);
+
+ if (expr->rettype() == xpath_type_number) {
+ if (expr->eval_number(c, stack) == i)
+ *last++ = *it;
+ } else if (expr->eval_boolean(c, stack))
+ *last++ = *it;
+ }
+
+ ns.truncate(last);
+ }
+
+ void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack) {
+ if (ns.size() == first) return;
+
+ for (xpath_ast_node* pred = _right; pred; pred = pred->_next) {
+ apply_predicate(ns, first, pred->_left, stack);
+ }
+ }
+
+ void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc) {
+ if (!a) return;
+
+ const char_t* name = a.name();
+
+ // There are no attribute nodes corresponding to attributes that declare namespaces
+ // That is, "xmlns:..." or "xmlns"
+ if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
+
+ switch (_test) {
+ case nodetest_name:
+ if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);
+ break;
+
+ case nodetest_type_node:
+ case nodetest_all:
+ ns.push_back(xpath_node(a, parent), alloc);
+ break;
+
+ case nodetest_all_in_namespace:
+ if (starts_with(name, _data.nodetest))
+ ns.push_back(xpath_node(a, parent), alloc);
+ break;
+
+ default:
+ ;
+ }
+ }
+
+ void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc) {
+ if (!n) return;
+
+ switch (_test) {
+ case nodetest_name:
+ if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);
+ break;
+
+ case nodetest_type_node:
+ ns.push_back(n, alloc);
+ break;
+
+ case nodetest_type_comment:
+ if (n.type() == node_comment)
+ ns.push_back(n, alloc);
+ break;
+
+ case nodetest_type_text:
+ if (n.type() == node_pcdata || n.type() == node_cdata)
+ ns.push_back(n, alloc);
+ break;
+
+ case nodetest_type_pi:
+ if (n.type() == node_pi)
+ ns.push_back(n, alloc);
+ break;
+
+ case nodetest_pi:
+ if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
+ ns.push_back(n, alloc);
+ break;
+
+ case nodetest_all:
+ if (n.type() == node_element)
+ ns.push_back(n, alloc);
+ break;
+
+ case nodetest_all_in_namespace:
+ if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
+ ns.push_back(n, alloc);
+ break;
+
+ default:
+ assert(!"Unknown axis");
+ }
+ }
+
+ template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T) {
+ const axis_t axis = T::axis;
+
+ switch (axis) {
+ case axis_attribute: {
+ for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
+ step_push(ns, a, n, alloc);
+
+ break;
+ }
+
+ case axis_child: {
+ for (xml_node c = n.first_child(); c; c = c.next_sibling())
+ step_push(ns, c, alloc);
+
+ break;
+ }
+
+ case axis_descendant:
+ case axis_descendant_or_self: {
+ if (axis == axis_descendant_or_self)
+ step_push(ns, n, alloc);
+
+ xml_node cur = n.first_child();
+
+ while (cur && cur != n) {
+ step_push(ns, cur, alloc);
+
+ if (cur.first_child())
+ cur = cur.first_child();
+ else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else {
+ while (!cur.next_sibling() && cur != n)
+ cur = cur.parent();
+
+ if (cur != n) cur = cur.next_sibling();
+ }
+ }
+
+ break;
+ }
+
+ case axis_following_sibling: {
+ for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
+ step_push(ns, c, alloc);
+
+ break;
+ }
+
+ case axis_preceding_sibling: {
+ for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
+ step_push(ns, c, alloc);
+
+ break;
+ }
+
+ case axis_following: {
+ xml_node cur = n;
+
+ // exit from this node so that we don't include descendants
+ while (cur && !cur.next_sibling()) cur = cur.parent();
+ cur = cur.next_sibling();
+
+ for (;;) {
+ step_push(ns, cur, alloc);
+
+ if (cur.first_child())
+ cur = cur.first_child();
+ else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else {
+ while (cur && !cur.next_sibling()) cur = cur.parent();
+ cur = cur.next_sibling();
+
+ if (!cur) break;
+ }
+ }
+
+ break;
+ }
+
+ case axis_preceding: {
+ xml_node cur = n;
+
+ while (cur && !cur.previous_sibling()) cur = cur.parent();
+ cur = cur.previous_sibling();
+
+ for (;;) {
+ if (cur.last_child())
+ cur = cur.last_child();
+ else {
+ // leaf node, can't be ancestor
+ step_push(ns, cur, alloc);
+
+ if (cur.previous_sibling())
+ cur = cur.previous_sibling();
+ else {
+ do {
+ cur = cur.parent();
+ if (!cur) break;
+
+ if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
+ } while (!cur.previous_sibling());
+
+ cur = cur.previous_sibling();
+
+ if (!cur) break;
+ }
+ }
+ }
+
+ break;
+ }
+
+ case axis_ancestor:
+ case axis_ancestor_or_self: {
+ if (axis == axis_ancestor_or_self)
+ step_push(ns, n, alloc);
+
+ xml_node cur = n.parent();
+
+ while (cur) {
+ step_push(ns, cur, alloc);
+
+ cur = cur.parent();
+ }
+
+ break;
+ }
+
+ case axis_self: {
+ step_push(ns, n, alloc);
+
+ break;
+ }
+
+ case axis_parent: {
+ if (n.parent()) step_push(ns, n.parent(), alloc);
+
+ break;
+ }
+
+ default:
+ assert(!"Unimplemented axis");
+ }
+ }
+
+ template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v) {
+ const axis_t axis = T::axis;
+
+ switch (axis) {
+ case axis_ancestor:
+ case axis_ancestor_or_self: {
+ if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
+ step_push(ns, a, p, alloc);
+
+ xml_node cur = p;
+
+ while (cur) {
+ step_push(ns, cur, alloc);
+
+ cur = cur.parent();
+ }
+
+ break;
+ }
+
+ case axis_descendant_or_self:
+ case axis_self: {
+ if (_test == nodetest_type_node) // reject attributes based on principal node type test
+ step_push(ns, a, p, alloc);
+
+ break;
+ }
+
+ case axis_following: {
+ xml_node cur = p;
+
+ for (;;) {
+ if (cur.first_child())
+ cur = cur.first_child();
+ else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else {
+ while (cur && !cur.next_sibling()) cur = cur.parent();
+ cur = cur.next_sibling();
+
+ if (!cur) break;
+ }
+
+ step_push(ns, cur, alloc);
+ }
+
+ break;
+ }
+
+ case axis_parent: {
+ step_push(ns, p, alloc);
+
+ break;
+ }
+
+ case axis_preceding: {
+ // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
+ step_fill(ns, p, alloc, v);
+ break;
+ }
+
+ default:
+ assert(!"Unimplemented axis");
+ }
+ }
+
+ template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v) {
+ const axis_t axis = T::axis;
+ bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
+
+ xpath_node_set_raw ns;
+ ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
+
+ if (_left) {
+ xpath_node_set_raw s = _left->eval_node_set(c, stack);
+
+ // self axis preserves the original order
+ if (axis == axis_self) ns.set_type(s.type());
+
+ for (const xpath_node* it = s.begin(); it != s.end(); ++it) {
+ size_t size = ns.size();
+
+ // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
+ if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
+
+ if (it->node())
+ step_fill(ns, it->node(), stack.result, v);
+ else if (attributes)
+ step_fill(ns, it->attribute(), it->parent(), stack.result, v);
+
+ apply_predicates(ns, size, stack);
+ }
+ } else {
+ if (c.n.node())
+ step_fill(ns, c.n.node(), stack.result, v);
+ else if (attributes)
+ step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
+
+ apply_predicates(ns, 0, stack);
+ }
+
+ // child, attribute and self axes always generate unique set of nodes
+ // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
+ if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
+ ns.remove_duplicates();
+
+ return ns;
+ }
+
+public:
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) {
+ assert(type == ast_string_constant);
+ _data.string = value;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) {
+ assert(type == ast_number_constant);
+ _data.number = value;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) {
+ assert(type == ast_variable);
+ _data.variable = value;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) {
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
+ _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) {
+ _data.nodetest = contents;
+ }
+
+ void set_next(xpath_ast_node* value) {
+ _next = value;
+ }
+
+ void set_right(xpath_ast_node* value) {
+ _right = value;
+ }
+
+ bool eval_boolean(const xpath_context& c, const xpath_stack& stack) {
+ switch (_type) {
+ case ast_op_or:
+ return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
+
+ case ast_op_and:
+ return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
+
+ case ast_op_equal:
+ return compare_eq(_left, _right, c, stack, equal_to());
+
+ case ast_op_not_equal:
+ return compare_eq(_left, _right, c, stack, not_equal_to());
+
+ case ast_op_less:
+ return compare_rel(_left, _right, c, stack, less());
+
+ case ast_op_greater:
+ return compare_rel(_right, _left, c, stack, less());
+
+ case ast_op_less_or_equal:
+ return compare_rel(_left, _right, c, stack, less_equal());
+
+ case ast_op_greater_or_equal:
+ return compare_rel(_right, _left, c, stack, less_equal());
+
+ case ast_func_starts_with: {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string lr = _left->eval_string(c, stack);
+ xpath_string rr = _right->eval_string(c, stack);
+
+ return starts_with(lr.c_str(), rr.c_str());
+ }
+
+ case ast_func_contains: {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string lr = _left->eval_string(c, stack);
+ xpath_string rr = _right->eval_string(c, stack);
+
+ return find_substring(lr.c_str(), rr.c_str()) != 0;
+ }
+
+ case ast_func_boolean:
+ return _left->eval_boolean(c, stack);
+
+ case ast_func_not:
+ return !_left->eval_boolean(c, stack);
+
+ case ast_func_true:
+ return true;
+
+ case ast_func_false:
+ return false;
+
+ case ast_func_lang: {
+ if (c.n.attribute()) return false;
+
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string lang = _left->eval_string(c, stack);
+
+ for (xml_node n = c.n.node(); n; n = n.parent()) {
+ xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
+
+ if (a) {
+ const char_t* value = a.value();
+
+ // strnicmp / strncasecmp is not portable
+ for (const char_t* lit = lang.c_str(); *lit; ++lit) {
+ if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
+ ++value;
+ }
+
+ return *value == 0 || *value == '-';
+ }
+ }
+
+ return false;
+ }
+
+ case ast_variable: {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_boolean)
+ return _data.variable->get_boolean();
+
+ // fallthrough to type conversion
+ }
+
+ default: {
+ switch (_rettype) {
+ case xpath_type_number:
+ return convert_number_to_boolean(eval_number(c, stack));
+
+ case xpath_type_string: {
+ xpath_allocator_capture cr(stack.result);
+
+ return !eval_string(c, stack).empty();
+ }
+
+ case xpath_type_node_set: {
+ xpath_allocator_capture cr(stack.result);
+
+ return !eval_node_set(c, stack).empty();
+ }
+
+ default:
+ assert(!"Wrong expression for return type boolean");
+ return false;
+ }
+ }
+ }
+ }
+
+ double eval_number(const xpath_context& c, const xpath_stack& stack) {
+ switch (_type) {
+ case ast_op_add:
+ return _left->eval_number(c, stack) + _right->eval_number(c, stack);
+
+ case ast_op_subtract:
+ return _left->eval_number(c, stack) - _right->eval_number(c, stack);
+
+ case ast_op_multiply:
+ return _left->eval_number(c, stack) * _right->eval_number(c, stack);
+
+ case ast_op_divide:
+ return _left->eval_number(c, stack) / _right->eval_number(c, stack);
+
+ case ast_op_mod:
+ return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
+
+ case ast_op_negate:
+ return -_left->eval_number(c, stack);
+
+ case ast_number_constant:
+ return _data.number;
+
+ case ast_func_last:
+ return static_cast<double>(c.size);
+
+ case ast_func_position:
+ return static_cast<double>(c.position);
+
+ case ast_func_count: {
+ xpath_allocator_capture cr(stack.result);
+
+ return static_cast<double>(_left->eval_node_set(c, stack).size());
+ }
+
+ case ast_func_string_length_0: {
+ xpath_allocator_capture cr(stack.result);
+
+ return static_cast<double>(string_value(c.n, stack.result).length());
+ }
+
+ case ast_func_string_length_1: {
+ xpath_allocator_capture cr(stack.result);
+
+ return static_cast<double>(_left->eval_string(c, stack).length());
+ }
+
+ case ast_func_number_0: {
+ xpath_allocator_capture cr(stack.result);
+
+ return convert_string_to_number(string_value(c.n, stack.result).c_str());
+ }
+
+ case ast_func_number_1:
+ return _left->eval_number(c, stack);
+
+ case ast_func_sum: {
+ xpath_allocator_capture cr(stack.result);
+
+ double r = 0;
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+
+ for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) {
+ xpath_allocator_capture cri(stack.result);
+
+ r += convert_string_to_number(string_value(*it, stack.result).c_str());
+ }
+
+ return r;
+ }
+
+ case ast_func_floor: {
+ double r = _left->eval_number(c, stack);
+
+ return r == r ? floor(r) : r;
+ }
+
+ case ast_func_ceiling: {
+ double r = _left->eval_number(c, stack);
+
+ return r == r ? ceil(r) : r;
+ }
+
+ case ast_func_round:
+ return round_nearest_nzero(_left->eval_number(c, stack));
+
+ case ast_variable: {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_number)
+ return _data.variable->get_number();
+
+ // fallthrough to type conversion
+ }
+
+ default: {
+ switch (_rettype) {
+ case xpath_type_boolean:
+ return eval_boolean(c, stack) ? 1 : 0;
+
+ case xpath_type_string: {
+ xpath_allocator_capture cr(stack.result);
+
+ return convert_string_to_number(eval_string(c, stack).c_str());
+ }
+
+ case xpath_type_node_set: {
+ xpath_allocator_capture cr(stack.result);
+
+ return convert_string_to_number(eval_string(c, stack).c_str());
+ }
+
+ default:
+ assert(!"Wrong expression for return type number");
+ return 0;
+ }
+
+ }
+ }
+ }
+
+ xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) {
+ assert(_type == ast_func_concat);
+
+ xpath_allocator_capture ct(stack.temp);
+
+ // count the string number
+ size_t count = 1;
+ for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
+
+ // gather all strings
+ xpath_string static_buffer[4];
+ xpath_string* buffer = static_buffer;
+
+ // allocate on-heap for large concats
+ if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) {
+ buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
+ assert(buffer);
+ }
+
+ // evaluate all strings to temporary stack
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ buffer[0] = _left->eval_string(c, swapped_stack);
+
+ size_t pos = 1;
+ for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
+ assert(pos == count);
+
+ // get total length
+ size_t length = 0;
+ for (size_t i = 0; i < count; ++i) length += buffer[i].length();
+
+ // create final string
+ char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
+ assert(result);
+
+ char_t* ri = result;
+
+ for (size_t j = 0; j < count; ++j)
+ for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
+ *ri++ = *bi;
+
+ *ri = 0;
+
+ return xpath_string(result, true);
+ }
+
+ xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) {
+ switch (_type) {
+ case ast_string_constant:
+ return xpath_string_const(_data.string);
+
+ case ast_func_local_name_0: {
+ xpath_node na = c.n;
+
+ return xpath_string_const(local_name(na));
+ }
+
+ case ast_func_local_name_1: {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+ xpath_node na = ns.first();
+
+ return xpath_string_const(local_name(na));
+ }
+
+ case ast_func_name_0: {
+ xpath_node na = c.n;
+
+ return xpath_string_const(qualified_name(na));
+ }
+
+ case ast_func_name_1: {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+ xpath_node na = ns.first();
+
+ return xpath_string_const(qualified_name(na));
+ }
+
+ case ast_func_namespace_uri_0: {
+ xpath_node na = c.n;
+
+ return xpath_string_const(namespace_uri(na));
+ }
+
+ case ast_func_namespace_uri_1: {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+ xpath_node na = ns.first();
+
+ return xpath_string_const(namespace_uri(na));
+ }
+
+ case ast_func_string_0:
+ return string_value(c.n, stack.result);
+
+ case ast_func_string_1:
+ return _left->eval_string(c, stack);
+
+ case ast_func_concat:
+ return eval_string_concat(c, stack);
+
+ case ast_func_substring_before: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ xpath_string p = _right->eval_string(c, swapped_stack);
+
+ const char_t* pos = find_substring(s.c_str(), p.c_str());
+
+ return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string();
+ }
+
+ case ast_func_substring_after: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ xpath_string p = _right->eval_string(c, swapped_stack);
+
+ const char_t* pos = find_substring(s.c_str(), p.c_str());
+ if (!pos) return xpath_string();
+
+ const char_t* result = pos + p.length();
+
+ return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result);
+ }
+
+ case ast_func_substring_2: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ size_t s_length = s.length();
+
+ double first = round_nearest(_right->eval_number(c, stack));
+
+ if (is_nan(first)) return xpath_string(); // NaN
+ else if (first >= s_length + 1) return xpath_string();
+
+ size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
+ assert(1 <= pos && pos <= s_length + 1);
+
+ const char_t* rbegin = s.c_str() + (pos - 1);
+
+ return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin);
+ }
+
+ case ast_func_substring_3: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ size_t s_length = s.length();
+
+ double first = round_nearest(_right->eval_number(c, stack));
+ double last = first + round_nearest(_right->_next->eval_number(c, stack));
+
+ if (is_nan(first) || is_nan(last)) return xpath_string();
+ else if (first >= s_length + 1) return xpath_string();
+ else if (first >= last) return xpath_string();
+ else if (last < 1) return xpath_string();
+
+ size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
+ size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
+
+ assert(1 <= pos && pos <= end && end <= s_length + 1);
+ const char_t* rbegin = s.c_str() + (pos - 1);
+ const char_t* rend = s.c_str() + (end - 1);
+
+ return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result);
+ }
+
+ case ast_func_normalize_space_0: {
+ xpath_string s = string_value(c.n, stack.result);
+
+ normalize_space(s.data(stack.result));
+
+ return s;
+ }
+
+ case ast_func_normalize_space_1: {
+ xpath_string s = _left->eval_string(c, stack);
+
+ normalize_space(s.data(stack.result));
+
+ return s;
+ }
+
+ case ast_func_translate: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, stack);
+ xpath_string from = _right->eval_string(c, swapped_stack);
+ xpath_string to = _right->_next->eval_string(c, swapped_stack);
+
+ translate(s.data(stack.result), from.c_str(), to.c_str());
+
+ return s;
+ }
+
+ case ast_variable: {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_string)
+ return xpath_string_const(_data.variable->get_string());
+
+ // fallthrough to type conversion
+ }
+
+ default: {
+ switch (_rettype) {
+ case xpath_type_boolean:
+ return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
+
+ case xpath_type_number:
+ return convert_number_to_string(eval_number(c, stack), stack.result);
+
+ case xpath_type_node_set: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
+ return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
+ }
+
+ default:
+ assert(!"Wrong expression for return type string");
+ return xpath_string();
+ }
+ }
+ }
+ }
+
+ xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack) {
+ switch (_type) {
+ case ast_op_union: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
+ xpath_node_set_raw rs = _right->eval_node_set(c, stack);
+
+ // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
+ rs.set_type(xpath_node_set::type_unsorted);
+
+ rs.append(ls.begin(), ls.end(), stack.result);
+ rs.remove_duplicates();
+
+ return rs;
+ }
+
+ case ast_filter:
+ case ast_filter_posinv: {
+ xpath_node_set_raw set = _left->eval_node_set(c, stack);
+
+ // either expression is a number or it contains position() call; sort by document order
+ if (_type == ast_filter) set.sort_do();
+
+ apply_predicate(set, 0, _right, stack);
+
+ return set;
+ }
+
+ case ast_func_id:
+ return xpath_node_set_raw();
+
+ case ast_step: {
+ switch (_axis) {
+ case axis_ancestor:
+ return step_do(c, stack, axis_to_type<axis_ancestor>());
+
+ case axis_ancestor_or_self:
+ return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
+
+ case axis_attribute:
+ return step_do(c, stack, axis_to_type<axis_attribute>());
+
+ case axis_child:
+ return step_do(c, stack, axis_to_type<axis_child>());
+
+ case axis_descendant:
+ return step_do(c, stack, axis_to_type<axis_descendant>());
+
+ case axis_descendant_or_self:
+ return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
+
+ case axis_following:
+ return step_do(c, stack, axis_to_type<axis_following>());
+
+ case axis_following_sibling:
+ return step_do(c, stack, axis_to_type<axis_following_sibling>());
+
+ case axis_namespace:
+ // namespaced axis is not supported
+ return xpath_node_set_raw();
+
+ case axis_parent:
+ return step_do(c, stack, axis_to_type<axis_parent>());
+
+ case axis_preceding:
+ return step_do(c, stack, axis_to_type<axis_preceding>());
+
+ case axis_preceding_sibling:
+ return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
+
+ case axis_self:
+ return step_do(c, stack, axis_to_type<axis_self>());
+
+ default:
+ assert(!"Unknown axis");
+ return xpath_node_set_raw();
+ }
+ }
+
+ case ast_step_root: {
+ assert(!_right); // root step can't have any predicates
+
+ xpath_node_set_raw ns;
+
+ ns.set_type(xpath_node_set::type_sorted);
+
+ if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
+ else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
+
+ return ns;
+ }
+
+ case ast_variable: {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_node_set) {
+ const xpath_node_set& s = _data.variable->get_node_set();
+
+ xpath_node_set_raw ns;
+
+ ns.set_type(s.type());
+ ns.append(s.begin(), s.end(), stack.result);
+
+ return ns;
+ }
+
+ // fallthrough to type conversion
+ }
+
+ default:
+ assert(!"Wrong expression for return type node set");
+ return xpath_node_set_raw();
+ }
+ }
+
+ bool is_posinv() {
+ switch (_type) {
+ case ast_func_position:
+ return false;
+
+ case ast_string_constant:
+ case ast_number_constant:
+ case ast_variable:
+ return true;
+
+ case ast_step:
+ case ast_step_root:
+ return true;
+
+ case ast_predicate:
+ case ast_filter:
+ case ast_filter_posinv:
+ return true;
+
+ default:
+ if (_left && !_left->is_posinv()) return false;
+
+ for (xpath_ast_node* n = _right; n; n = n->_next)
+ if (!n->is_posinv()) return false;
+
+ return true;
+ }
+ }
+
+ xpath_value_type rettype() const {
+ return static_cast<xpath_value_type>(_rettype);
+ }
+};
+
+struct xpath_parser {
+ xpath_allocator* _alloc;
+ xpath_lexer _lexer;
+
+ const char_t* _query;
+ xpath_variable_set* _variables;
+
+ xpath_parse_result* _result;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ jmp_buf _error_handler;
+#endif
+
+ void throw_error(const char* message) {
+ _result->error = message;
+ _result->offset = _lexer.current_pos() - _query;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ longjmp(_error_handler, 1);
+#else
+ throw xpath_exception(*_result);
+#endif
+ }
+
+ void throw_error_oom() {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ throw_error("Out of memory");
+#else
+ throw std::bad_alloc();
+#endif
+ }
+
+ void* alloc_node() {
+ void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
+
+ if (!result) throw_error_oom();
+
+ return result;
+ }
+
+ const char_t* alloc_string(const xpath_lexer_string& value) {
+ if (value.begin) {
+ size_t length = static_cast<size_t>(value.end - value.begin);
+
+ char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
+ if (!c) throw_error_oom();
+
+ memcpy(c, value.begin, length * sizeof(char_t));
+ c[length] = 0;
+
+ return c;
+ } else return 0;
+ }
+
+ xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) {
+ assert(argc <= 1);
+
+ if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
+ }
+
+ xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) {
+ switch (name.begin[0]) {
+ case 'b':
+ if (name == PUGIXML_TEXT("boolean") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
+
+ break;
+
+ case 'c':
+ if (name == PUGIXML_TEXT("count") && argc == 1) {
+ if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+ return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
+ } else if (name == PUGIXML_TEXT("contains") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("concat") && argc >= 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
+
+ break;
+
+ case 'f':
+ if (name == PUGIXML_TEXT("false") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
+ else if (name == PUGIXML_TEXT("floor") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
+
+ break;
+
+ case 'i':
+ if (name == PUGIXML_TEXT("id") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
+
+ break;
+
+ case 'l':
+ if (name == PUGIXML_TEXT("last") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
+ else if (name == PUGIXML_TEXT("lang") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
+ else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
+ return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("name") && argc <= 1)
+ return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
+ else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
+ return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
+ else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("not") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
+ else if (name == PUGIXML_TEXT("number") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("position") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
+
+ break;
+
+ case 'r':
+ if (name == PUGIXML_TEXT("round") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
+
+ break;
+
+ case 's':
+ if (name == PUGIXML_TEXT("string") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
+ else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]);
+ else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
+ return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("sum") && argc == 1) {
+ if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+ return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
+ }
+
+ break;
+
+ case 't':
+ if (name == PUGIXML_TEXT("translate") && argc == 3)
+ return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("true") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
+
+ break;
+
+ default:
+ break;
+ }
+
+ throw_error("Unrecognized function or wrong parameter count");
+
+ return 0;
+ }
+
+ axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) {
+ specified = true;
+
+ switch (name.begin[0]) {
+ case 'a':
+ if (name == PUGIXML_TEXT("ancestor"))
+ return axis_ancestor;
+ else if (name == PUGIXML_TEXT("ancestor-or-self"))
+ return axis_ancestor_or_self;
+ else if (name == PUGIXML_TEXT("attribute"))
+ return axis_attribute;
+
+ break;
+
+ case 'c':
+ if (name == PUGIXML_TEXT("child"))
+ return axis_child;
+
+ break;
+
+ case 'd':
+ if (name == PUGIXML_TEXT("descendant"))
+ return axis_descendant;
+ else if (name == PUGIXML_TEXT("descendant-or-self"))
+ return axis_descendant_or_self;
+
+ break;
+
+ case 'f':
+ if (name == PUGIXML_TEXT("following"))
+ return axis_following;
+ else if (name == PUGIXML_TEXT("following-sibling"))
+ return axis_following_sibling;
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("namespace"))
+ return axis_namespace;
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("parent"))
+ return axis_parent;
+ else if (name == PUGIXML_TEXT("preceding"))
+ return axis_preceding;
+ else if (name == PUGIXML_TEXT("preceding-sibling"))
+ return axis_preceding_sibling;
+
+ break;
+
+ case 's':
+ if (name == PUGIXML_TEXT("self"))
+ return axis_self;
+
+ break;
+
+ default:
+ break;
+ }
+
+ specified = false;
+ return axis_child;
+ }
+
+ nodetest_t parse_node_test_type(const xpath_lexer_string& name) {
+ switch (name.begin[0]) {
+ case 'c':
+ if (name == PUGIXML_TEXT("comment"))
+ return nodetest_type_comment;
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("node"))
+ return nodetest_type_node;
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("processing-instruction"))
+ return nodetest_type_pi;
+
+ break;
+
+ case 't':
+ if (name == PUGIXML_TEXT("text"))
+ return nodetest_type_text;
+
+ break;
+
+ default:
+ break;
+ }
+
+ return nodetest_none;
+ }
+
+ // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
+ xpath_ast_node* parse_primary_expression() {
+ switch (_lexer.current()) {
+ case lex_var_ref: {
+ xpath_lexer_string name = _lexer.contents();
+
+ if (!_variables)
+ throw_error("Unknown variable: variable set is not provided");
+
+ xpath_variable* var = get_variable(_variables, name.begin, name.end);
+
+ if (!var)
+ throw_error("Unknown variable: variable set does not contain the given name");
+
+ _lexer.next();
+
+ return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
+ }
+
+ case lex_open_brace: {
+ _lexer.next();
+
+ xpath_ast_node* n = parse_expression();
+
+ if (_lexer.current() != lex_close_brace)
+ throw_error("Unmatched braces");
+
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_quoted_string: {
+ const char_t* value = alloc_string(_lexer.contents());
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_number: {
+ double value = 0;
+
+ if (!convert_string_to_number(_lexer.contents().begin, _lexer.contents().end, &value))
+ throw_error_oom();
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_string: {
+ xpath_ast_node* args[2] = {0};
+ size_t argc = 0;
+
+ xpath_lexer_string function = _lexer.contents();
+ _lexer.next();
+
+ xpath_ast_node* last_arg = 0;
+
+ if (_lexer.current() != lex_open_brace)
+ throw_error("Unrecognized function call");
+ _lexer.next();
+
+ if (_lexer.current() != lex_close_brace)
+ args[argc++] = parse_expression();
+
+ while (_lexer.current() != lex_close_brace) {
+ if (_lexer.current() != lex_comma)
+ throw_error("No comma between function arguments");
+ _lexer.next();
+
+ xpath_ast_node* n = parse_expression();
+
+ if (argc < 2) args[argc] = n;
+ else last_arg->set_next(n);
+
+ argc++;
+ last_arg = n;
+ }
+
+ _lexer.next();
+
+ return parse_function(function, argc, args);
+ }
+
+ default:
+ throw_error("Unrecognizable primary expression");
+
+ return 0;
+ }
+ }
+
+ // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
+ // Predicate ::= '[' PredicateExpr ']'
+ // PredicateExpr ::= Expr
+ xpath_ast_node* parse_filter_expression() {
+ xpath_ast_node* n = parse_primary_expression();
+
+ while (_lexer.current() == lex_open_square_brace) {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_expression();
+
+ if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
+
+ bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
+
+ n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
+
+ if (_lexer.current() != lex_close_square_brace)
+ throw_error("Unmatched square brace");
+
+ _lexer.next();
+ }
+
+ return n;
+ }
+
+ // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
+ // AxisSpecifier ::= AxisName '::' | '@'?
+ // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
+ // NameTest ::= '*' | NCName ':' '*' | QName
+ // AbbreviatedStep ::= '.' | '..'
+ xpath_ast_node* parse_step(xpath_ast_node* set) {
+ if (set && set->rettype() != xpath_type_node_set)
+ throw_error("Step has to be applied to node set");
+
+ bool axis_specified = false;
+ axis_t axis = axis_child; // implied child axis
+
+ if (_lexer.current() == lex_axis_attribute) {
+ axis = axis_attribute;
+ axis_specified = true;
+
+ _lexer.next();
+ } else if (_lexer.current() == lex_dot) {
+ _lexer.next();
+
+ return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
+ } else if (_lexer.current() == lex_double_dot) {
+ _lexer.next();
+
+ return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
+ }
+
+ nodetest_t nt_type = nodetest_none;
+ xpath_lexer_string nt_name;
+
+ if (_lexer.current() == lex_string) {
+ // node name test
+ nt_name = _lexer.contents();
+ _lexer.next();
+
+ // was it an axis name?
+ if (_lexer.current() == lex_double_colon) {
+ // parse axis name
+ if (axis_specified) throw_error("Two axis specifiers in one step");
+
+ axis = parse_axis_name(nt_name, axis_specified);
+
+ if (!axis_specified) throw_error("Unknown axis");
+
+ // read actual node test
+ _lexer.next();
+
+ if (_lexer.current() == lex_multiply) {
+ nt_type = nodetest_all;
+ nt_name = xpath_lexer_string();
+ _lexer.next();
+ } else if (_lexer.current() == lex_string) {
+ nt_name = _lexer.contents();
+ _lexer.next();
+ } else throw_error("Unrecognized node test");
+ }
+
+ if (nt_type == nodetest_none) {
+ // node type test or processing-instruction
+ if (_lexer.current() == lex_open_brace) {
+ _lexer.next();
+
+ if (_lexer.current() == lex_close_brace) {
+ _lexer.next();
+
+ nt_type = parse_node_test_type(nt_name);
+
+ if (nt_type == nodetest_none) throw_error("Unrecognized node type");
+
+ nt_name = xpath_lexer_string();
+ } else if (nt_name == PUGIXML_TEXT("processing-instruction")) {
+ if (_lexer.current() != lex_quoted_string)
+ throw_error("Only literals are allowed as arguments to processing-instruction()");
+
+ nt_type = nodetest_pi;
+ nt_name = _lexer.contents();
+ _lexer.next();
+
+ if (_lexer.current() != lex_close_brace)
+ throw_error("Unmatched brace near processing-instruction()");
+ _lexer.next();
+ } else
+ throw_error("Unmatched brace near node type test");
+
+ }
+ // QName or NCName:*
+ else {
+ if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') { // NCName:*
+ nt_name.end--; // erase *
+
+ nt_type = nodetest_all_in_namespace;
+ } else nt_type = nodetest_name;
+ }
+ }
+ } else if (_lexer.current() == lex_multiply) {
+ nt_type = nodetest_all;
+ _lexer.next();
+ } else throw_error("Unrecognized node test");
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
+
+ xpath_ast_node* last = 0;
+
+ while (_lexer.current() == lex_open_square_brace) {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_expression();
+
+ xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
+
+ if (_lexer.current() != lex_close_square_brace)
+ throw_error("Unmatched square brace");
+ _lexer.next();
+
+ if (last) last->set_next(pred);
+ else n->set_right(pred);
+
+ last = pred;
+ }
+
+ return n;
+ }
+
+ // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
+ xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) {
+ xpath_ast_node* n = parse_step(set);
+
+ while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) {
+ lexeme_t l = _lexer.current();
+ _lexer.next();
+
+ if (l == lex_double_slash)
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+
+ n = parse_step(n);
+ }
+
+ return n;
+ }
+
+ // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
+ // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
+ xpath_ast_node* parse_location_path() {
+ if (_lexer.current() == lex_slash) {
+ _lexer.next();
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+
+ // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
+ lexeme_t l = _lexer.current();
+
+ if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
+ return parse_relative_location_path(n);
+ else
+ return n;
+ } else if (_lexer.current() == lex_double_slash) {
+ _lexer.next();
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+
+ return parse_relative_location_path(n);
+ }
+
+ // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
+ return parse_relative_location_path(0);
+ }
+
+ // PathExpr ::= LocationPath
+ // | FilterExpr
+ // | FilterExpr '/' RelativeLocationPath
+ // | FilterExpr '//' RelativeLocationPath
+ xpath_ast_node* parse_path_expression() {
+ // Clarification.
+ // PathExpr begins with either LocationPath or FilterExpr.
+ // FilterExpr begins with PrimaryExpr
+ // PrimaryExpr begins with '$' in case of it being a variable reference,
+ // '(' in case of it being an expression, string literal, number constant or
+ // function call.
+
+ if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
+ _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
+ _lexer.current() == lex_string) {
+ if (_lexer.current() == lex_string) {
+ // This is either a function call, or not - if not, we shall proceed with location path
+ const char_t* state = _lexer.state();
+
+ while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
+
+ if (*state != '(') return parse_location_path();
+
+ // This looks like a function call; however this still can be a node-test. Check it.
+ if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
+ }
+
+ xpath_ast_node* n = parse_filter_expression();
+
+ if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) {
+ lexeme_t l = _lexer.current();
+ _lexer.next();
+
+ if (l == lex_double_slash) {
+ if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
+
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+ }
+
+ // select from location path
+ return parse_relative_location_path(n);
+ }
+
+ return n;
+ } else return parse_location_path();
+ }
+
+ // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
+ xpath_ast_node* parse_union_expression() {
+ xpath_ast_node* n = parse_path_expression();
+
+ while (_lexer.current() == lex_union) {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_union_expression();
+
+ if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set)
+ throw_error("Union operator has to be applied to node sets");
+
+ n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr);
+ }
+
+ return n;
+ }
+
+ // UnaryExpr ::= UnionExpr | '-' UnaryExpr
+ xpath_ast_node* parse_unary_expression() {
+ if (_lexer.current() == lex_minus) {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_unary_expression();
+
+ return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
+ } else return parse_union_expression();
+ }
+
+ // MultiplicativeExpr ::= UnaryExpr
+ // | MultiplicativeExpr '*' UnaryExpr
+ // | MultiplicativeExpr 'div' UnaryExpr
+ // | MultiplicativeExpr 'mod' UnaryExpr
+ xpath_ast_node* parse_multiplicative_expression() {
+ xpath_ast_node* n = parse_unary_expression();
+
+ while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
+ (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div")))) {
+ ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
+ _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_unary_expression();
+
+ n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr);
+ }
+
+ return n;
+ }
+
+ // AdditiveExpr ::= MultiplicativeExpr
+ // | AdditiveExpr '+' MultiplicativeExpr
+ // | AdditiveExpr '-' MultiplicativeExpr
+ xpath_ast_node* parse_additive_expression() {
+ xpath_ast_node* n = parse_multiplicative_expression();
+
+ while (_lexer.current() == lex_plus || _lexer.current() == lex_minus) {
+ lexeme_t l = _lexer.current();
+
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_multiplicative_expression();
+
+ n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr);
+ }
+
+ return n;
+ }
+
+ // RelationalExpr ::= AdditiveExpr
+ // | RelationalExpr '<' AdditiveExpr
+ // | RelationalExpr '>' AdditiveExpr
+ // | RelationalExpr '<=' AdditiveExpr
+ // | RelationalExpr '>=' AdditiveExpr
+ xpath_ast_node* parse_relational_expression() {
+ xpath_ast_node* n = parse_additive_expression();
+
+ while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
+ _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal) {
+ lexeme_t l = _lexer.current();
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_additive_expression();
+
+ n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
+ l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr);
+ }
+
+ return n;
+ }
+
+ // EqualityExpr ::= RelationalExpr
+ // | EqualityExpr '=' RelationalExpr
+ // | EqualityExpr '!=' RelationalExpr
+ xpath_ast_node* parse_equality_expression() {
+ xpath_ast_node* n = parse_relational_expression();
+
+ while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal) {
+ lexeme_t l = _lexer.current();
+
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_relational_expression();
+
+ n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr);
+ }
+
+ return n;
+ }
+
+ // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
+ xpath_ast_node* parse_and_expression() {
+ xpath_ast_node* n = parse_equality_expression();
+
+ while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and")) {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_equality_expression();
+
+ n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr);
+ }
+
+ return n;
+ }
+
+ // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
+ xpath_ast_node* parse_or_expression() {
+ xpath_ast_node* n = parse_and_expression();
+
+ while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or")) {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_and_expression();
+
+ n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr);
+ }
+
+ return n;
+ }
+
+ // Expr ::= OrExpr
+ xpath_ast_node* parse_expression() {
+ return parse_or_expression();
+ }
+
+ xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) {
+ }
+
+ xpath_ast_node* parse() {
+ xpath_ast_node* result = parse_expression();
+
+ if (_lexer.current() != lex_eof) {
+ // there are still unparsed tokens left, error
+ throw_error("Incorrect query");
+ }
+
+ return result;
+ }
+
+ static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) {
+ xpath_parser parser(query, variables, alloc, result);
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ int error = setjmp(parser._error_handler);
+
+ return (error == 0) ? parser.parse() : 0;
+#else
+ return parser.parse();
+#endif
+ }
+};
+
+struct xpath_query_impl {
+ static xpath_query_impl* create() {
+ void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
+
+ return new (memory) xpath_query_impl();
+ }
+
+ static void destroy(void* ptr) {
+ if (!ptr) return;
+
+ // free all allocated pages
+ static_cast<xpath_query_impl*>(ptr)->alloc.release();
+
+ // free allocator memory (with the first page)
+ xml_memory::deallocate(ptr);
+ }
+
+ xpath_query_impl(): root(0), alloc(&block) {
+ block.next = 0;
+ }
+
+ xpath_ast_node* root;
+ xpath_allocator alloc;
+ xpath_memory_block block;
+};
+
+PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
+{
+ if (!impl) return xpath_string();
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return xpath_string();
+#endif
+
+ xpath_context c(n, 1, 1);
+
+ return impl->root->eval_string(c, sd.stack);
+}
PUGI__NS_END
namespace pugi
{
#ifndef PUGIXML_NO_EXCEPTIONS
- PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
- {
- assert(_result.error);
- }
-
- PUGI__FN const char* xpath_exception::what() const throw()
- {
- return _result.error;
- }
-
- PUGI__FN const xpath_parse_result& xpath_exception::result() const
- {
- return _result;
- }
+PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
+{
+ assert(_result.error);
+}
+
+PUGI__FN const char* xpath_exception::what() const throw()
+{
+ return _result.error;
+}
+
+PUGI__FN const xpath_parse_result& xpath_exception::result() const
+{
+ return _result;
+}
#endif
-
- PUGI__FN xpath_node::xpath_node()
- {
- }
-
- PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
- {
- }
-
- PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
- {
- }
-
- PUGI__FN xml_node xpath_node::node() const
- {
- return _attribute ? xml_node() : _node;
- }
-
- PUGI__FN xml_attribute xpath_node::attribute() const
- {
- return _attribute;
- }
-
- PUGI__FN xml_node xpath_node::parent() const
- {
- return _attribute ? _node : _node.parent();
- }
-
- PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
- {
- }
-
- PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
- {
- return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
- }
-
- PUGI__FN bool xpath_node::operator!() const
- {
- return !(_node || _attribute);
- }
-
- PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
- {
- return _node == n._node && _attribute == n._attribute;
- }
-
- PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
- {
- return _node != n._node || _attribute != n._attribute;
- }
+
+PUGI__FN xpath_node::xpath_node()
+{
+}
+
+PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
+{
+}
+
+PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
+{
+}
+
+PUGI__FN xml_node xpath_node::node() const
+{
+ return _attribute ? xml_node() : _node;
+}
+
+PUGI__FN xml_attribute xpath_node::attribute() const
+{
+ return _attribute;
+}
+
+PUGI__FN xml_node xpath_node::parent() const
+{
+ return _attribute ? _node : _node.parent();
+}
+
+PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
+{
+}
+
+PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
+{
+ return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
+}
+
+PUGI__FN bool xpath_node::operator!() const
+{
+ return !(_node || _attribute);
+}
+
+PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
+{
+ return _node == n._node && _attribute == n._attribute;
+}
+
+PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
+{
+ return _node != n._node || _attribute != n._attribute;
+}
#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
+PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
+{
+ return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
+{
+ return (bool)lhs || rhs;
+}
+#endif
+
+PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_)
+{
+ assert(begin_ <= end_);
+
+ size_t size_ = static_cast<size_t>(end_ - begin_);
+
+ if (size_ <= 1) {
+ // deallocate old buffer
+ if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+
+ // use internal buffer
+ if (begin_ != end_) _storage = *begin_;
+
+ _begin = &_storage;
+ _end = &_storage + size_;
+ } else {
+ // make heap copy
+ xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
+
+ if (!storage) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ return;
+#else
+ throw std::bad_alloc();
#endif
+ }
+
+ memcpy(storage, begin_, size_ * sizeof(xpath_node));
+
+ // deallocate old buffer
+ if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+
+ // finalize
+ _begin = storage;
+ _end = storage + size_;
+ }
+}
+
+PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
+{
+}
+
+PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_), _begin(&_storage), _end(&_storage)
+{
+ _assign(begin_, end_);
+}
+
+PUGI__FN xpath_node_set::~xpath_node_set()
+{
+ if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+}
+
+PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)
+{
+ _assign(ns._begin, ns._end);
+}
+
+PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
+{
+ if (this == &ns) return *this;
+
+ _type = ns._type;
+ _assign(ns._begin, ns._end);
+
+ return *this;
+}
+
+PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
+{
+ return _type;
+}
+
+PUGI__FN size_t xpath_node_set::size() const
+{
+ return _end - _begin;
+}
+
+PUGI__FN bool xpath_node_set::empty() const
+{
+ return _begin == _end;
+}
+
+PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
+{
+ assert(index < size());
+ return _begin[index];
+}
+
+PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
+{
+ return _begin;
+}
- PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_)
- {
- assert(begin_ <= end_);
-
- size_t size_ = static_cast<size_t>(end_ - begin_);
-
- if (size_ <= 1)
- {
- // deallocate old buffer
- if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
-
- // use internal buffer
- if (begin_ != end_) _storage = *begin_;
-
- _begin = &_storage;
- _end = &_storage + size_;
- }
- else
- {
- // make heap copy
- xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
-
- if (!storage)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- return;
- #else
- throw std::bad_alloc();
- #endif
- }
-
- memcpy(storage, begin_, size_ * sizeof(xpath_node));
-
- // deallocate old buffer
- if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
-
- // finalize
- _begin = storage;
- _end = storage + size_;
- }
- }
-
- PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
- {
- }
-
- PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_), _begin(&_storage), _end(&_storage)
- {
- _assign(begin_, end_);
- }
-
- PUGI__FN xpath_node_set::~xpath_node_set()
- {
- if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
- }
-
- PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)
- {
- _assign(ns._begin, ns._end);
- }
-
- PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
- {
- if (this == &ns) return *this;
-
- _type = ns._type;
- _assign(ns._begin, ns._end);
-
- return *this;
- }
-
- PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
- {
- return _type;
- }
-
- PUGI__FN size_t xpath_node_set::size() const
- {
- return _end - _begin;
- }
-
- PUGI__FN bool xpath_node_set::empty() const
- {
- return _begin == _end;
- }
-
- PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
- {
- assert(index < size());
- return _begin[index];
- }
-
- PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
- {
- return _begin;
- }
-
- PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
- {
- return _end;
- }
-
- PUGI__FN void xpath_node_set::sort(bool reverse)
- {
- _type = impl::xpath_sort(_begin, _end, _type, reverse);
- }
-
- PUGI__FN xpath_node xpath_node_set::first() const
- {
- return impl::xpath_first(_begin, _end, _type);
- }
-
- PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
- {
- }
-
- PUGI__FN xpath_parse_result::operator bool() const
- {
- return error == 0;
- }
-
- PUGI__FN const char* xpath_parse_result::description() const
- {
- return error ? error : "No error";
- }
-
- PUGI__FN xpath_variable::xpath_variable()
- {
- }
-
- PUGI__FN const char_t* xpath_variable::name() const
- {
- switch (_type)
- {
- case xpath_type_node_set:
- return static_cast<const impl::xpath_variable_node_set*>(this)->name;
-
- case xpath_type_number:
- return static_cast<const impl::xpath_variable_number*>(this)->name;
-
- case xpath_type_string:
- return static_cast<const impl::xpath_variable_string*>(this)->name;
-
- case xpath_type_boolean:
- return static_cast<const impl::xpath_variable_boolean*>(this)->name;
-
- default:
- assert(!"Invalid variable type");
- return 0;
- }
- }
-
- PUGI__FN xpath_value_type xpath_variable::type() const
- {
- return _type;
- }
-
- PUGI__FN bool xpath_variable::get_boolean() const
- {
- return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
- }
-
- PUGI__FN double xpath_variable::get_number() const
- {
- return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
- }
-
- PUGI__FN const char_t* xpath_variable::get_string() const
- {
- const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
- return value ? value : PUGIXML_TEXT("");
- }
-
- PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
- {
- return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
- }
-
- PUGI__FN bool xpath_variable::set(bool value)
- {
- if (_type != xpath_type_boolean) return false;
-
- static_cast<impl::xpath_variable_boolean*>(this)->value = value;
- return true;
- }
-
- PUGI__FN bool xpath_variable::set(double value)
- {
- if (_type != xpath_type_number) return false;
-
- static_cast<impl::xpath_variable_number*>(this)->value = value;
- return true;
- }
-
- PUGI__FN bool xpath_variable::set(const char_t* value)
- {
- if (_type != xpath_type_string) return false;
-
- impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
-
- // duplicate string
- size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
-
- char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
- if (!copy) return false;
-
- memcpy(copy, value, size);
-
- // replace old string
- if (var->value) impl::xml_memory::deallocate(var->value);
- var->value = copy;
-
- return true;
- }
-
- PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
- {
- if (_type != xpath_type_node_set) return false;
-
- static_cast<impl::xpath_variable_node_set*>(this)->value = value;
- return true;
- }
-
- PUGI__FN xpath_variable_set::xpath_variable_set()
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
- }
-
- PUGI__FN xpath_variable_set::~xpath_variable_set()
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- {
- xpath_variable* var = _data[i];
-
- while (var)
- {
- xpath_variable* next = var->_next;
-
- impl::delete_xpath_variable(var->_type, var);
-
- var = next;
- }
- }
- }
-
- PUGI__FN xpath_variable* xpath_variable_set::find(const char_t* name) const
- {
- const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
- size_t hash = impl::hash_string(name) % hash_size;
-
- // look for existing variable
- for (xpath_variable* var = _data[hash]; var; var = var->_next)
- if (impl::strequal(var->name(), name))
- return var;
-
- return 0;
- }
-
- PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
- {
- const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
- size_t hash = impl::hash_string(name) % hash_size;
-
- // look for existing variable
- for (xpath_variable* var = _data[hash]; var; var = var->_next)
- if (impl::strequal(var->name(), name))
- return var->type() == type ? var : 0;
-
- // add new variable
- xpath_variable* result = impl::new_xpath_variable(type, name);
-
- if (result)
- {
- result->_type = type;
- result->_next = _data[hash];
-
- _data[hash] = result;
- }
-
- return result;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
- {
- xpath_variable* var = add(name, xpath_type_boolean);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
- {
- xpath_variable* var = add(name, xpath_type_number);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
- {
- xpath_variable* var = add(name, xpath_type_string);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
- {
- xpath_variable* var = add(name, xpath_type_node_set);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
- {
- return find(name);
- }
-
- PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
- {
- return find(name);
- }
-
- PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
- {
- impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
-
- if (!qimpl)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- _result.error = "Out of memory";
- #else
- throw std::bad_alloc();
- #endif
- }
- else
- {
- impl::buffer_holder impl_holder(qimpl, impl::xpath_query_impl::destroy);
-
- qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
-
- if (qimpl->root)
- {
- _impl = static_cast<impl::xpath_query_impl*>(impl_holder.release());
- _result.error = 0;
- }
- }
- }
-
- PUGI__FN xpath_query::~xpath_query()
- {
- impl::xpath_query_impl::destroy(_impl);
- }
-
- PUGI__FN xpath_value_type xpath_query::return_type() const
- {
- if (!_impl) return xpath_type_none;
-
- return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
- }
-
- PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
- {
- if (!_impl) return false;
-
- impl::xpath_context c(n, 1, 1);
- impl::xpath_stack_data sd;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return false;
- #endif
-
- return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
- }
-
- PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
- {
- if (!_impl) return impl::gen_nan();
-
- impl::xpath_context c(n, 1, 1);
- impl::xpath_stack_data sd;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return impl::gen_nan();
- #endif
-
- return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
- }
+PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
+{
+ return _end;
+}
+
+PUGI__FN void xpath_node_set::sort(bool reverse)
+{
+ _type = impl::xpath_sort(_begin, _end, _type, reverse);
+}
+
+PUGI__FN xpath_node xpath_node_set::first() const
+{
+ return impl::xpath_first(_begin, _end, _type);
+}
+
+PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
+{
+}
+
+PUGI__FN xpath_parse_result::operator bool() const
+{
+ return error == 0;
+}
+
+PUGI__FN const char* xpath_parse_result::description() const
+{
+ return error ? error : "No error";
+}
+
+PUGI__FN xpath_variable::xpath_variable()
+{
+}
+
+PUGI__FN const char_t* xpath_variable::name() const
+{
+ switch (_type) {
+ case xpath_type_node_set:
+ return static_cast<const impl::xpath_variable_node_set*>(this)->name;
+
+ case xpath_type_number:
+ return static_cast<const impl::xpath_variable_number*>(this)->name;
+
+ case xpath_type_string:
+ return static_cast<const impl::xpath_variable_string*>(this)->name;
+
+ case xpath_type_boolean:
+ return static_cast<const impl::xpath_variable_boolean*>(this)->name;
+
+ default:
+ assert(!"Invalid variable type");
+ return 0;
+ }
+}
+
+PUGI__FN xpath_value_type xpath_variable::type() const
+{
+ return _type;
+}
+
+PUGI__FN bool xpath_variable::get_boolean() const
+{
+ return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
+}
+
+PUGI__FN double xpath_variable::get_number() const
+{
+ return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
+}
+
+PUGI__FN const char_t* xpath_variable::get_string() const
+{
+ const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
+ return value ? value : PUGIXML_TEXT("");
+}
+
+PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
+{
+ return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
+}
+
+PUGI__FN bool xpath_variable::set(bool value)
+{
+ if (_type != xpath_type_boolean) return false;
+
+ static_cast<impl::xpath_variable_boolean*>(this)->value = value;
+ return true;
+}
+
+PUGI__FN bool xpath_variable::set(double value)
+{
+ if (_type != xpath_type_number) return false;
+
+ static_cast<impl::xpath_variable_number*>(this)->value = value;
+ return true;
+}
+
+PUGI__FN bool xpath_variable::set(const char_t* value)
+{
+ if (_type != xpath_type_string) return false;
+
+ impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
+
+ // duplicate string
+ size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
+
+ char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
+ if (!copy) return false;
+
+ memcpy(copy, value, size);
+
+ // replace old string
+ if (var->value) impl::xml_memory::deallocate(var->value);
+ var->value = copy;
+
+ return true;
+}
+
+PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
+{
+ if (_type != xpath_type_node_set) return false;
+
+ static_cast<impl::xpath_variable_node_set*>(this)->value = value;
+ return true;
+}
+
+PUGI__FN xpath_variable_set::xpath_variable_set()
+{
+ for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
+}
+
+PUGI__FN xpath_variable_set::~xpath_variable_set()
+{
+ for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) {
+ xpath_variable* var = _data[i];
+
+ while (var) {
+ xpath_variable* next = var->_next;
+
+ impl::delete_xpath_variable(var->_type, var);
+
+ var = next;
+ }
+ }
+}
+
+PUGI__FN xpath_variable* xpath_variable_set::find(const char_t* name) const
+{
+ const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
+ size_t hash = impl::hash_string(name) % hash_size;
+
+ // look for existing variable
+ for (xpath_variable* var = _data[hash]; var; var = var->_next)
+ if (impl::strequal(var->name(), name))
+ return var;
+
+ return 0;
+}
+
+PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
+{
+ const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
+ size_t hash = impl::hash_string(name) % hash_size;
+
+ // look for existing variable
+ for (xpath_variable* var = _data[hash]; var; var = var->_next)
+ if (impl::strequal(var->name(), name))
+ return var->type() == type ? var : 0;
+
+ // add new variable
+ xpath_variable* result = impl::new_xpath_variable(type, name);
+
+ if (result) {
+ result->_type = type;
+ result->_next = _data[hash];
+
+ _data[hash] = result;
+ }
+
+ return result;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
+{
+ xpath_variable* var = add(name, xpath_type_boolean);
+ return var ? var->set(value) : false;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
+{
+ xpath_variable* var = add(name, xpath_type_number);
+ return var ? var->set(value) : false;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
+{
+ xpath_variable* var = add(name, xpath_type_string);
+ return var ? var->set(value) : false;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
+{
+ xpath_variable* var = add(name, xpath_type_node_set);
+ return var ? var->set(value) : false;
+}
+
+PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
+{
+ return find(name);
+}
+
+PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
+{
+ return find(name);
+}
+
+PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
+{
+ impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
+
+ if (!qimpl) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ _result.error = "Out of memory";
+#else
+ throw std::bad_alloc();
+#endif
+ } else {
+ impl::buffer_holder impl_holder(qimpl, impl::xpath_query_impl::destroy);
+
+ qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
+
+ if (qimpl->root) {
+ _impl = static_cast<impl::xpath_query_impl*>(impl_holder.release());
+ _result.error = 0;
+ }
+ }
+}
+
+PUGI__FN xpath_query::~xpath_query()
+{
+ impl::xpath_query_impl::destroy(_impl);
+}
+
+PUGI__FN xpath_value_type xpath_query::return_type() const
+{
+ if (!_impl) return xpath_type_none;
+
+ return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
+}
+
+PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
+{
+ if (!_impl) return false;
+
+ impl::xpath_context c(n, 1, 1);
+ impl::xpath_stack_data sd;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return false;
+#endif
+
+ return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
+}
+
+PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
+{
+ if (!_impl) return impl::gen_nan();
+
+ impl::xpath_context c(n, 1, 1);
+ impl::xpath_stack_data sd;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return impl::gen_nan();
+#endif
+
+ return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
+}
#ifndef PUGIXML_NO_STL
- PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
- {
- impl::xpath_stack_data sd;
+PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
+{
+ impl::xpath_stack_data sd;
- return impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd).c_str();
- }
+ return impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd).c_str();
+}
#endif
- PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
- {
- impl::xpath_stack_data sd;
-
- impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
-
- size_t full_size = r.length() + 1;
-
- if (capacity > 0)
- {
- size_t size = (full_size < capacity) ? full_size : capacity;
- assert(size > 0);
-
- memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
- buffer[size - 1] = 0;
- }
-
- return full_size;
- }
-
- PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
- {
- if (!_impl) return xpath_node_set();
-
- impl::xpath_ast_node* root = static_cast<impl::xpath_query_impl*>(_impl)->root;
-
- if (root->rettype() != xpath_type_node_set)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- return xpath_node_set();
- #else
- xpath_parse_result res;
- res.error = "Expression does not evaluate to node set";
-
- throw xpath_exception(res);
- #endif
- }
-
- impl::xpath_context c(n, 1, 1);
- impl::xpath_stack_data sd;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return xpath_node_set();
- #endif
-
- impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
-
- return xpath_node_set(r.begin(), r.end(), r.type());
- }
-
- PUGI__FN const xpath_parse_result& xpath_query::result() const
- {
- return _result;
- }
-
- PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
- {
- }
-
- PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
- {
- return _impl ? unspecified_bool_xpath_query : 0;
- }
-
- PUGI__FN bool xpath_query::operator!() const
- {
- return !_impl;
- }
-
- PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
- {
- xpath_query q(query, variables);
- return select_single_node(q);
- }
-
- PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
- {
- xpath_node_set s = query.evaluate_node_set(*this);
- return s.empty() ? xpath_node() : s.first();
- }
-
- PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
- {
- xpath_query q(query, variables);
- return select_nodes(q);
- }
-
- PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
- {
- return query.evaluate_node_set(*this);
- }
+PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
+{
+ impl::xpath_stack_data sd;
+
+ impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
+
+ size_t full_size = r.length() + 1;
+
+ if (capacity > 0) {
+ size_t size = (full_size < capacity) ? full_size : capacity;
+ assert(size > 0);
+
+ memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
+ buffer[size - 1] = 0;
+ }
+
+ return full_size;
+}
+
+PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
+{
+ if (!_impl) return xpath_node_set();
+
+ impl::xpath_ast_node* root = static_cast<impl::xpath_query_impl*>(_impl)->root;
+
+ if (root->rettype() != xpath_type_node_set) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ return xpath_node_set();
+#else
+ xpath_parse_result res;
+ res.error = "Expression does not evaluate to node set";
+
+ throw xpath_exception(res);
+#endif
+ }
+
+ impl::xpath_context c(n, 1, 1);
+ impl::xpath_stack_data sd;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return xpath_node_set();
+#endif
+
+ impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
+
+ return xpath_node_set(r.begin(), r.end(), r.type());
+}
+
+PUGI__FN const xpath_parse_result& xpath_query::result() const
+{
+ return _result;
+}
+
+PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
+{
+}
+
+PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
+{
+ return _impl ? unspecified_bool_xpath_query : 0;
+}
+
+PUGI__FN bool xpath_query::operator!() const
+{
+ return !_impl;
+}
+
+PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
+{
+ xpath_query q(query, variables);
+ return select_single_node(q);
+}
+
+PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
+{
+ xpath_node_set s = query.evaluate_node_set(*this);
+ return s.empty() ? xpath_node() : s.first();
+}
+
+PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
+{
+ xpath_query q(query, variables);
+ return select_nodes(q);
+}
+
+PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
+{
+ return query.evaluate_node_set(*this);
+}
}
#endif
@@ -10238,7 +9424,7 @@ namespace pugi
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
diff --git a/phrase-extract/filter-rule-table/StringBasedFilter.cpp b/phrase-extract/filter-rule-table/StringBasedFilter.cpp
index 1d8b69698..6e67cee17 100644
--- a/phrase-extract/filter-rule-table/StringBasedFilter.cpp
+++ b/phrase-extract/filter-rule-table/StringBasedFilter.cpp
@@ -8,7 +8,7 @@ namespace FilterRuleTable
{
StringBasedFilter::StringBasedFilter(
- const std::vector<std::vector<std::string> > &sentences)
+ const std::vector<std::vector<std::string> > &sentences)
{
}
diff --git a/phrase-extract/filter-rule-table/StringBasedFilter.h b/phrase-extract/filter-rule-table/StringBasedFilter.h
index e74e174eb..31444e586 100644
--- a/phrase-extract/filter-rule-table/StringBasedFilter.h
+++ b/phrase-extract/filter-rule-table/StringBasedFilter.h
@@ -12,8 +12,9 @@ namespace Syntax
namespace FilterRuleTable
{
-class StringBasedFilter {
- public:
+class StringBasedFilter
+{
+public:
StringBasedFilter(const std::vector<std::vector<std::string> > &);
void Filter(std::istream &, std::ostream &);
diff --git a/phrase-extract/filter-rule-table/TreeBasedFilter.cpp b/phrase-extract/filter-rule-table/TreeBasedFilter.cpp
index f53c2faa7..fee03641a 100644
--- a/phrase-extract/filter-rule-table/TreeBasedFilter.cpp
+++ b/phrase-extract/filter-rule-table/TreeBasedFilter.cpp
@@ -14,25 +14,25 @@ namespace FilterRuleTable
{
TreeBasedFilter::TreeBasedFilter(
- const std::vector<boost::shared_ptr<StringTree> > &sentences)
+ const std::vector<boost::shared_ptr<StringTree> > &sentences)
{
// Convert each StringTree to an IdTree.
m_sentences.reserve(sentences.size());
for (std::vector<boost::shared_ptr<StringTree> >::const_iterator p =
- sentences.begin(); p != sentences.end(); ++p) {
+ sentences.begin(); p != sentences.end(); ++p) {
m_sentences.push_back(boost::shared_ptr<IdTree>(StringTreeToIdTree(**p)));
}
m_labelToTree.resize(m_testVocab.Size());
// Construct a map from root labels to IdTree nodes.
for (std::vector<boost::shared_ptr<IdTree> >::const_iterator p =
- m_sentences.begin(); p != m_sentences.end(); ++p) {
+ m_sentences.begin(); p != m_sentences.end(); ++p) {
AddNodesToMap(**p);
}
}
TreeBasedFilter::IdTree *TreeBasedFilter::StringTreeToIdTree(
- const StringTree &s)
+ const StringTree &s)
{
IdTree *t = new IdTree(m_testVocab.Insert(s.value()));
const std::vector<StringTree*> &sChildren = s.children();
@@ -164,8 +164,8 @@ bool TreeBasedFilter::MatchFragment(const IdTree &fragment,
}
TreeBasedFilter::IdTree *TreeBasedFilter::BuildTree(
- const std::vector<TreeFragmentToken> &tokens, int &i,
- std::vector<IdTree *> &leaves)
+ const std::vector<TreeFragmentToken> &tokens, int &i,
+ std::vector<IdTree *> &leaves)
{
// The subtree starting at tokens[i] is either:
// 1. a single non-variable symbol (like NP or dog), or
@@ -175,8 +175,8 @@ TreeBasedFilter::IdTree *TreeBasedFilter::BuildTree(
// First check for case 1.
if (tokens[i].type == TreeFragmentToken_WORD) {
Vocabulary::IdType id = m_testVocab.Lookup(tokens[i++].value,
- StringPieceCompatibleHash(),
- StringPieceCompatibleEquals());
+ StringPieceCompatibleHash(),
+ StringPieceCompatibleEquals());
if (id == Vocabulary::NullId()) {
return 0;
}
@@ -193,8 +193,8 @@ TreeBasedFilter::IdTree *TreeBasedFilter::BuildTree(
// Read the root symbol of the subtree.
Vocabulary::IdType id = m_testVocab.Lookup(tokens[i++].value,
- StringPieceCompatibleHash(),
- StringPieceCompatibleEquals());
+ StringPieceCompatibleHash(),
+ StringPieceCompatibleEquals());
if (id == Vocabulary::NullId()) {
return 0;
}
diff --git a/phrase-extract/filter-rule-table/TreeBasedFilter.h b/phrase-extract/filter-rule-table/TreeBasedFilter.h
index e22731fe8..f30c9dd97 100644
--- a/phrase-extract/filter-rule-table/TreeBasedFilter.h
+++ b/phrase-extract/filter-rule-table/TreeBasedFilter.h
@@ -22,8 +22,9 @@ namespace FilterRuleTable
// Filters a rule table (currently assumed to be tree-to-string, STSG),
// discarding rules that cannot be applied to a given set of test sentences.
-class TreeBasedFilter {
- public:
+class TreeBasedFilter
+{
+public:
// Initialize the filter for a given set of test sentences.
TreeBasedFilter(const std::vector<boost::shared_ptr<StringTree> > &);
@@ -46,7 +47,7 @@ class TreeBasedFilter {
//
void Filter(std::istream &in, std::ostream &out);
- private:
+private:
// Maps source-side symbols (terminals and non-terminals) from strings to
// integers.
typedef NumberedSet<std::string, std::size_t> Vocabulary;
diff --git a/phrase-extract/pcfg-extract/options.h b/phrase-extract/pcfg-extract/options.h
index ffaa3bb17..5a6da80ea 100644
--- a/phrase-extract/pcfg-extract/options.h
+++ b/phrase-extract/pcfg-extract/options.h
@@ -23,9 +23,12 @@
#include <string>
-namespace MosesTraining {
-namespace Syntax {
-namespace PCFG {
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace PCFG
+{
struct Options {
std::string corpus_file;
diff --git a/phrase-extract/pcfg-extract/pcfg_extract.h b/phrase-extract/pcfg-extract/pcfg_extract.h
index 835564341..5882e45da 100644
--- a/phrase-extract/pcfg-extract/pcfg_extract.h
+++ b/phrase-extract/pcfg-extract/pcfg_extract.h
@@ -23,13 +23,17 @@
#include "pcfg-common/tool.h"
-namespace MosesTraining {
-namespace Syntax {
-namespace PCFG {
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace PCFG
+{
struct Options;
-class PcfgExtract : public Tool {
+class PcfgExtract : public Tool
+{
public:
PcfgExtract() : Tool("pcfg-extract") {}
virtual int Main(int, char *[]);
diff --git a/phrase-extract/pcfg-extract/rule_collection.h b/phrase-extract/pcfg-extract/rule_collection.h
index 66fa98657..3d9a9f98b 100644
--- a/phrase-extract/pcfg-extract/rule_collection.h
+++ b/phrase-extract/pcfg-extract/rule_collection.h
@@ -27,13 +27,17 @@
#include "pcfg-common/pcfg.h"
-namespace MosesTraining {
-namespace Syntax {
-namespace PCFG {
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace PCFG
+{
// Contains PCFG rules and their counts.
-class RuleCollection {
- public:
+class RuleCollection
+{
+public:
typedef boost::unordered_map<std::vector<std::size_t>, std::size_t> RhsCountMap;
typedef boost::unordered_map<std::size_t, RhsCountMap> Map;
typedef Map::iterator iterator;
@@ -41,16 +45,24 @@ class RuleCollection {
RuleCollection() {}
- iterator begin() { return collection_.begin(); }
- const_iterator begin() const { return collection_.begin(); }
+ iterator begin() {
+ return collection_.begin();
+ }
+ const_iterator begin() const {
+ return collection_.begin();
+ }
- iterator end() { return collection_.end(); }
- const_iterator end() const { return collection_.end(); }
+ iterator end() {
+ return collection_.end();
+ }
+ const_iterator end() const {
+ return collection_.end();
+ }
void Add(std::size_t, const std::vector<std::size_t> &);
void CreatePcfg(Pcfg &);
- private:
+private:
Map collection_;
};
diff --git a/phrase-extract/pcfg-extract/rule_extractor.h b/phrase-extract/pcfg-extract/rule_extractor.h
index 1dddd796f..f35460909 100644
--- a/phrase-extract/pcfg-extract/rule_extractor.h
+++ b/phrase-extract/pcfg-extract/rule_extractor.h
@@ -25,18 +25,22 @@
#include "rule_collection.h"
-namespace MosesTraining {
-namespace Syntax {
-namespace PCFG {
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace PCFG
+{
class PcfgTree;
// Extracts PCFG rules from syntax trees and adds them to a RuleCollection.
-class RuleExtractor {
- public:
+class RuleExtractor
+{
+public:
RuleExtractor(Vocabulary &);
void Extract(const PcfgTree &, RuleCollection &) const;
- private:
+private:
Vocabulary &non_term_vocab_;
};
diff --git a/phrase-extract/pcfg-score/options.h b/phrase-extract/pcfg-score/options.h
index bbd56d6d0..b2e6c282a 100644
--- a/phrase-extract/pcfg-score/options.h
+++ b/phrase-extract/pcfg-score/options.h
@@ -23,9 +23,12 @@
#include <string>
-namespace MosesTraining {
-namespace Syntax {
-namespace PCFG {
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace PCFG
+{
struct Options {
std::string pcfg_file;
diff --git a/phrase-extract/pcfg-score/pcfg_score.h b/phrase-extract/pcfg-score/pcfg_score.h
index fb9971c35..b0b4a77cd 100644
--- a/phrase-extract/pcfg-score/pcfg_score.h
+++ b/phrase-extract/pcfg-score/pcfg_score.h
@@ -23,17 +23,21 @@
#include "pcfg-common/tool.h"
-namespace MosesTraining {
-namespace Syntax {
-namespace PCFG {
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace PCFG
+{
struct Options;
-class PcfgScore : public Tool {
- public:
+class PcfgScore : public Tool
+{
+public:
PcfgScore() : Tool("pcfg-score") {}
virtual int Main(int, char *[]);
- private:
+private:
void ProcessOptions(int, char *[], Options &) const;
};
diff --git a/phrase-extract/pcfg-score/tree_scorer.h b/phrase-extract/pcfg-score/tree_scorer.h
index 3cf4693a6..8b1afcc3a 100644
--- a/phrase-extract/pcfg-score/tree_scorer.h
+++ b/phrase-extract/pcfg-score/tree_scorer.h
@@ -25,19 +25,23 @@
#include "pcfg-common/pcfg_tree.h"
#include "pcfg-common/typedef.h"
-namespace MosesTraining {
-namespace Syntax {
-namespace PCFG {
-
-class TreeScorer {
- public:
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace PCFG
+{
+
+class TreeScorer
+{
+public:
TreeScorer(const Pcfg &, const Vocabulary &);
// Score tree according to PCFG. Returns false if unsuccessful (due to
// missing rule).
bool Score(PcfgTree &) const;
- private:
+private:
const Pcfg &pcfg_;
const Vocabulary &non_term_vocab_;
};
diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp
index 2a8413c48..97ce0b220 100644
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@@ -75,13 +75,13 @@ bool phraseOrientationPriorsFlag = false;
boost::unordered_map<std::string,float> sourceLHSCounts;
boost::unordered_map<std::string, boost::unordered_map<std::string,float>* > targetLHSAndSourceLHSJointCounts;
std::set<std::string> sourceLabelSet;
-std::map<std::string,size_t> sourceLabels;
+std::map<std::string,size_t> sourceLabels;
std::vector<std::string> sourceLabelsByIndex;
boost::unordered_map<std::string,float> targetPreferenceLHSCounts;
boost::unordered_map<std::string, boost::unordered_map<std::string,float>* > ruleTargetLHSAndTargetPreferenceLHSJointCounts;
std::set<std::string> targetPreferenceLabelSet;
-std::map<std::string,size_t> targetPreferenceLabels;
+std::map<std::string,size_t> targetPreferenceLabels;
std::vector<std::string> targetPreferenceLabelsByIndex;
std::vector<float> orientationClassPriorsL2R(4,0); // mono swap dright dleft
@@ -95,17 +95,17 @@ Vocabulary vcbS;
std::vector<std::string> tokenize( const char [] );
void processLine( std::string line,
- int lineID, bool includeSentenceIdFlag, int &sentenceId,
+ int lineID, bool includeSentenceIdFlag, int &sentenceId,
PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment,
std::string &additionalPropertiesString,
float &count, float &pcfgSum );
void writeCountOfCounts( const std::string &fileNameCountOfCounts );
void writeLeftHandSideLabelCounts( const boost::unordered_map<std::string,float> &countsLabelLHS,
const boost::unordered_map<std::string, boost::unordered_map<std::string,float>* > &jointCountsLabelLHS,
- const std::string &fileNameLeftHandSideSourceLabelCounts,
+ const std::string &fileNameLeftHandSideSourceLabelCounts,
const std::string &fileNameLeftHandSideTargetSourceLabelCounts );
void writeLabelSet( const std::set<std::string> &labelSet, const std::string &fileName );
-void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
+void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb );
void outputPhrasePair(const ExtractionPhrasePair &phrasePair, float, int, ostream &phraseTableFile, const ScoreFeatureManager &featureManager, const MaybeLog &maybeLog );
double computeLexicalTranslation( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
@@ -122,7 +122,7 @@ void invertAlignment( const PHRASE *phraseSource, const PHRASE *phraseTarget, co
int main(int argc, char* argv[])
{
- std::cerr << "Score v2.1 -- "
+ std::cerr << "Score v2.1 -- "
<< "scoring methods for extracted rules" << std::endl;
ScoreFeatureManager featureManager;
@@ -207,7 +207,7 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--UnalignedFunctionWordPenalty") == 0) {
unalignedFWFlag = true;
if (i+1==argc) {
- std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl;
+ std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl;
exit(1);
}
fileNameFunctionWords = argv[++i];
@@ -229,7 +229,7 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--PhraseOrientationPriors") == 0) {
phraseOrientationPriorsFlag = true;
if (i+1==argc) {
- std::cerr << "ERROR: specify priors file for phrase orientation!" << std::endl;
+ std::cerr << "ERROR: specify priors file for phrase orientation!" << std::endl;
exit(1);
}
fileNamePhraseOrientationPriors = argv[++i];
@@ -294,8 +294,8 @@ int main(int argc, char* argv[])
Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
bool success = outputFile->Open(fileNamePhraseTable);
if (!success) {
- std::cerr << "ERROR: could not open file phrase table file "
- << fileNamePhraseTable << std::endl;
+ std::cerr << "ERROR: could not open file phrase table file "
+ << fileNamePhraseTable << std::endl;
exit(1);
}
phraseTableFile = outputFile;
@@ -321,12 +321,12 @@ int main(int argc, char* argv[])
tmpPhraseSource = new PHRASE();
tmpPhraseTarget = new PHRASE();
tmpTargetToSourceAlignment = new ALIGNMENT();
- processLine( std::string(line),
+ processLine( std::string(line),
i, featureManager.includeSentenceId(), tmpSentenceId,
- tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
+ tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
tmpAdditionalPropertiesString,
tmpCount, tmpPcfgSum);
- phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
+ phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
tmpTargetToSourceAlignment,
tmpCount, tmpPcfgSum );
phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount );
@@ -356,14 +356,16 @@ int main(int argc, char* argv[])
tmpPhraseTarget = new PHRASE();
tmpTargetToSourceAlignment = new ALIGNMENT();
tmpAdditionalPropertiesString.clear();
- processLine( std::string(line),
+ processLine( std::string(line),
i, featureManager.includeSentenceId(), tmpSentenceId,
- tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
+ tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
tmpAdditionalPropertiesString,
- tmpCount, tmpPcfgSum);
+ tmpCount, tmpPcfgSum);
bool matchesPrevious = false;
- bool sourceMatch = true; bool targetMatch = true; bool alignmentMatch = true; // be careful with these,
+ bool sourceMatch = true;
+ bool targetMatch = true;
+ bool alignmentMatch = true; // be careful with these,
// ExtractionPhrasePair::Matches() checks them in order and does not continue with the others
// once the first of them has been found to have to be set to false
@@ -398,7 +400,7 @@ int main(int argc, char* argv[])
if ( !phrasePairsWithSameSource.empty() &&
!sourceMatch ) {
processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb );
- for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
+ for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
delete *iter;
}
@@ -415,8 +417,8 @@ int main(int argc, char* argv[])
}
}
- phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
- tmpTargetToSourceAlignment,
+ phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
+ tmpTargetToSourceAlignment,
tmpCount, tmpPcfgSum );
phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount );
featureManager.addPropertiesToPhrasePair( *phrasePair, tmpCount, tmpSentenceId );
@@ -430,7 +432,7 @@ int main(int argc, char* argv[])
}
processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb );
- for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
+ for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
delete *iter;
}
@@ -454,7 +456,7 @@ int main(int argc, char* argv[])
if (sourceSyntaxLabelsFlag && sourceSyntaxLabelCountsLHSFlag && !inverseFlag) {
writeLeftHandSideLabelCounts( sourceLHSCounts,
targetLHSAndSourceLHSJointCounts,
- fileNameLeftHandSideSourceLabelCounts,
+ fileNameLeftHandSideSourceLabelCounts,
fileNameLeftHandSideTargetSourceLabelCounts );
}
@@ -463,14 +465,14 @@ int main(int argc, char* argv[])
writeLabelSet( targetPreferenceLabelSet, fileNameTargetPreferenceLabelSet );
writeLeftHandSideLabelCounts( targetPreferenceLHSCounts,
ruleTargetLHSAndTargetPreferenceLHSJointCounts,
- fileNameLeftHandSideTargetPreferenceLabelCounts,
+ fileNameLeftHandSideTargetPreferenceLabelCounts,
fileNameLeftHandSideRuleTargetTargetPreferenceLabelCounts );
}
}
void processLine( std::string line,
- int lineID, bool includeSentenceIdFlag, int &sentenceId,
+ int lineID, bool includeSentenceIdFlag, int &sentenceId,
PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment,
std::string &additionalPropertiesString,
float &count, float &pcfgSum )
@@ -625,7 +627,7 @@ void writeLabelSet( const std::set<std::string> &labelSet, const std::string &fi
}
-void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
+void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb )
{
if (phrasePairsWithSameSource.size() == 0) {
@@ -637,23 +639,23 @@ void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSa
//std::cerr << "phrasePairs.size() = " << phrasePairs.size() << std::endl;
// loop through phrase pairs
- for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
+ for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
// add to total count
totalSource += (*iter)->GetCount();
}
// output the distinct phrase pairs, one at a time
- for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
+ for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
// add to total count
outputPhrasePair( **iter, totalSource, phrasePairsWithSameSource.size(), phraseTableFile, featureManager, maybeLogProb );
}
}
-void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
- float totalCount, int distinctCount,
- ostream &phraseTableFile,
+void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
+ float totalCount, int distinctCount,
+ ostream &phraseTableFile,
const ScoreFeatureManager& featureManager,
const MaybeLog& maybeLogProb )
{
@@ -708,45 +710,45 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
// alignment
if ( hierarchicalFlag ) {
- // always output alignment if hiero style
- assert(phraseTarget->size() == bestAlignmentT2S->size()+1);
- std::vector<std::string> alignment;
- for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) {
- if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) {
- if ( bestAlignmentT2S->at(j).size() != 1 ) {
- std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl;
- phraseTableFile.flush();
- assert(bestAlignmentT2S->at(j).size() == 1);
- }
- size_t sourcePos = *(bestAlignmentT2S->at(j).begin());
- //phraseTableFile << sourcePos << "-" << j << " ";
+ // always output alignment if hiero style
+ assert(phraseTarget->size() == bestAlignmentT2S->size()+1);
+ std::vector<std::string> alignment;
+ for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) {
+ if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) {
+ if ( bestAlignmentT2S->at(j).size() != 1 ) {
+ std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl;
+ phraseTableFile.flush();
+ assert(bestAlignmentT2S->at(j).size() == 1);
+ }
+ size_t sourcePos = *(bestAlignmentT2S->at(j).begin());
+ //phraseTableFile << sourcePos << "-" << j << " ";
+ std::stringstream point;
+ point << sourcePos << "-" << j;
+ alignment.push_back(point.str());
+ } else {
+ for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
+ setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
+ size_t sourcePos = *setIter;
std::stringstream point;
point << sourcePos << "-" << j;
alignment.push_back(point.str());
- } else {
- for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
- setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
- size_t sourcePos = *setIter;
- std::stringstream point;
- point << sourcePos << "-" << j;
- alignment.push_back(point.str());
- }
}
}
- // now print all alignments, sorted by source index
- sort(alignment.begin(), alignment.end());
- for (size_t i = 0; i < alignment.size(); ++i) {
- phraseTableFile << alignment[i] << " ";
- }
+ }
+ // now print all alignments, sorted by source index
+ sort(alignment.begin(), alignment.end());
+ for (size_t i = 0; i < alignment.size(); ++i) {
+ phraseTableFile << alignment[i] << " ";
+ }
} else if ( !inverseFlag && wordAlignmentFlag) {
- // alignment info in pb model
- for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) {
- for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
- setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
- size_t sourcePos = *setIter;
- phraseTableFile << sourcePos << "-" << j << " ";
- }
+ // alignment info in pb model
+ for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) {
+ for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
+ setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
+ size_t sourcePos = *setIter;
+ phraseTableFile << sourcePos << "-" << j << " ";
}
+ }
}
phraseTableFile << " ||| ";
@@ -818,10 +820,10 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
if (sourceSyntaxLabelsFlag) {
std::string sourceLabelCounts;
sourceLabelCounts = phrasePair.CollectAllLabelsSeparateLHSAndRHS("SourceLabels",
- sourceLabelSet,
- sourceLHSCounts,
- targetLHSAndSourceLHSJointCounts,
- vcbT);
+ sourceLabelSet,
+ sourceLHSCounts,
+ targetLHSAndSourceLHSJointCounts,
+ vcbT);
if ( !sourceLabelCounts.empty() ) {
phraseTableFile << " {{SourceLabels "
<< nNTs // for convenience: number of non-terminal symbols in this rule (incl. left hand side NT)
@@ -835,10 +837,10 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
if (targetPreferenceLabelsFlag) {
std::string targetPreferenceLabelCounts;
targetPreferenceLabelCounts = phrasePair.CollectAllLabelsSeparateLHSAndRHS("TargetPreferences",
- targetPreferenceLabelSet,
- targetPreferenceLHSCounts,
- ruleTargetLHSAndTargetPreferenceLHSJointCounts,
- vcbT);
+ targetPreferenceLabelSet,
+ targetPreferenceLHSCounts,
+ ruleTargetLHSAndTargetPreferenceLHSJointCounts,
+ vcbT);
if ( !targetPreferenceLabelCounts.empty() ) {
phraseTableFile << " {{TargetPreferences "
<< nNTs // for convenience: number of non-terminal symbols in this rule (incl. left hand side NT)
@@ -858,17 +860,17 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
}
if (spanLength && !inverseFlag) {
- string propValue = phrasePair.CollectAllPropertyValues("SpanLength");
- if (!propValue.empty()) {
- phraseTableFile << " {{SpanLength " << propValue << "}}";
- }
+ string propValue = phrasePair.CollectAllPropertyValues("SpanLength");
+ if (!propValue.empty()) {
+ phraseTableFile << " {{SpanLength " << propValue << "}}";
+ }
}
if (nonTermContext && !inverseFlag) {
- string propValue = phrasePair.CollectAllPropertyValues("NonTermContext");
- if (!propValue.empty()) {
- phraseTableFile << " {{NonTermContext " << propValue << "}}";
- }
+ string propValue = phrasePair.CollectAllPropertyValues("NonTermContext");
+ if (!propValue.empty()) {
+ phraseTableFile << " {{NonTermContext " << propValue << "}}";
+ }
}
phraseTableFile << std::endl;
@@ -876,12 +878,12 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
-void loadOrientationPriors(const std::string &fileNamePhraseOrientationPriors,
- std::vector<float> &orientationClassPriorsL2R,
+void loadOrientationPriors(const std::string &fileNamePhraseOrientationPriors,
+ std::vector<float> &orientationClassPriorsL2R,
std::vector<float> &orientationClassPriorsR2L)
{
assert(orientationClassPriorsL2R.size()==4 && orientationClassPriorsR2L.size()==4); // mono swap dright dleft
-
+
std::cerr << "Loading phrase orientation priors from " << fileNamePhraseOrientationPriors;
ifstream inFile;
inFile.open(fileNamePhraseOrientationPriors.c_str());
@@ -908,7 +910,7 @@ void loadOrientationPriors(const std::string &fileNamePhraseOrientationPriors,
r2lFlag = true;
}
if (!l2rFlag && !r2lFlag) {
- std::cerr << " - ERROR: malformed line in orientation priors file" << std::endl;
+ std::cerr << " - ERROR: malformed line in orientation priors file" << std::endl;
}
key.erase(0,4);
@@ -926,7 +928,7 @@ void loadOrientationPriors(const std::string &fileNamePhraseOrientationPriors,
orientationClassId = 3;
}
if (orientationClassId == -1) {
- std::cerr << " - ERROR: malformed line in orientation priors file" << std::endl;
+ std::cerr << " - ERROR: malformed line in orientation priors file" << std::endl;
}
float count;
@@ -971,7 +973,7 @@ bool calcCrossedNonTerm( size_t targetPos, size_t sourcePos, const ALIGNMENT *al
// skip
} else {
const std::set<size_t> &sourceSet = alignmentTargetToSource->at(currTarget);
- for (std::set<size_t>::const_iterator iter = sourceSet.begin();
+ for (std::set<size_t>::const_iterator iter = sourceSet.begin();
iter != sourceSet.end(); ++iter) {
size_t currSource = *iter;
@@ -1103,9 +1105,9 @@ void LexicalTable::load( const string &fileName )
std::vector<string> token = tokenize( line.c_str() );
if (token.size() != 3) {
- std::cerr << "line " << i << " in " << fileName
- << " has wrong number of tokens, skipping:" << std::endl
- << token.size() << " " << token[0] << " " << line << std::endl;
+ std::cerr << "line " << i << " in " << fileName
+ << " has wrong number of tokens, skipping:" << std::endl
+ << token.size() << " " << token[0] << " " << line << std::endl;
continue;
}
@@ -1184,15 +1186,16 @@ void printTargetPhrase(const PHRASE *phraseSource, const PHRASE *phraseTarget,
void invertAlignment(const PHRASE *phraseSource, const PHRASE *phraseTarget,
- const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment) {
-// typedef std::vector< std::set<size_t> > ALIGNMENT;
+ const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment)
+{
+// typedef std::vector< std::set<size_t> > ALIGNMENT;
outSourceToTargetAlignment->clear();
size_t numberOfSourceSymbols = (hierarchicalFlag ? phraseSource->size()-1 : phraseSource->size());
outSourceToTargetAlignment->resize(numberOfSourceSymbols);
// add alignment point
for (size_t targetPosition = 0; targetPosition < inTargetToSourceAlignment->size(); ++targetPosition) {
- for ( std::set<size_t>::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin();
+ for ( std::set<size_t>::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin();
setIter != (inTargetToSourceAlignment->at(targetPosition)).end(); ++setIter ) {
size_t sourcePosition = *setIter;
outSourceToTargetAlignment->at(sourcePosition).insert(targetPosition);
diff --git a/phrase-extract/score-stsg/LexicalTable.cpp b/phrase-extract/score-stsg/LexicalTable.cpp
index d5d7ce6ab..48815ba26 100644
--- a/phrase-extract/score-stsg/LexicalTable.cpp
+++ b/phrase-extract/score-stsg/LexicalTable.cpp
@@ -13,8 +13,8 @@ namespace ScoreStsg
{
LexicalTable::LexicalTable(Vocabulary &srcVocab, Vocabulary &tgtVocab)
- : m_srcVocab(srcVocab)
- , m_tgtVocab(tgtVocab)
+ : m_srcVocab(srcVocab)
+ , m_tgtVocab(tgtVocab)
{
}
diff --git a/phrase-extract/score-stsg/RuleGroup.h b/phrase-extract/score-stsg/RuleGroup.h
index 8d9933263..1d03eeeb1 100644
--- a/phrase-extract/score-stsg/RuleGroup.h
+++ b/phrase-extract/score-stsg/RuleGroup.h
@@ -38,17 +38,27 @@ public:
typedef std::vector<DistinctRule>::const_iterator ConstIterator;
// Begin and End iterators for iterating over the group's distinct rules.
- ConstIterator Begin() const { return m_distinctRules.begin(); }
- ConstIterator End() const { return m_distinctRules.end(); }
+ ConstIterator Begin() const {
+ return m_distinctRules.begin();
+ }
+ ConstIterator End() const {
+ return m_distinctRules.end();
+ }
// Get the current source-side value.
- const std::string &GetSource() const { return m_source; }
+ const std::string &GetSource() const {
+ return m_source;
+ }
// Get the number of distinct rules.
- int GetSize() const { return m_distinctRules.size(); }
+ int GetSize() const {
+ return m_distinctRules.size();
+ }
// Get the total count.
- int GetTotalCount() const { return m_totalCount; }
+ int GetTotalCount() const {
+ return m_totalCount;
+ }
// Clear the rule group and set a new source-side value. This must be
// done once for every new source-side value, prior to the first call to
diff --git a/phrase-extract/score-stsg/RuleSymbol.h b/phrase-extract/score-stsg/RuleSymbol.h
index efefe6266..e8cd96458 100644
--- a/phrase-extract/score-stsg/RuleSymbol.h
+++ b/phrase-extract/score-stsg/RuleSymbol.h
@@ -9,8 +9,7 @@ namespace Syntax
namespace ScoreStsg
{
-struct RuleSymbol
-{
+struct RuleSymbol {
StringPiece value;
bool isNonTerminal;
};
diff --git a/phrase-extract/score-stsg/RuleTableWriter.h b/phrase-extract/score-stsg/RuleTableWriter.h
index 340a4bf19..4f7df9924 100644
--- a/phrase-extract/score-stsg/RuleTableWriter.h
+++ b/phrase-extract/score-stsg/RuleTableWriter.h
@@ -19,8 +19,8 @@ class RuleTableWriter
{
public:
RuleTableWriter(const Options &options, Moses::OutputFileStream &out)
- : m_options(options)
- , m_out(out) {}
+ : m_options(options)
+ , m_out(out) {}
void WriteLine(const TokenizedRuleHalf &, const TokenizedRuleHalf &,
const std::string &, double, double, int, int, int);
diff --git a/phrase-extract/score-stsg/ScoreStsg.cpp b/phrase-extract/score-stsg/ScoreStsg.cpp
index 642c5dc05..09395e21e 100644
--- a/phrase-extract/score-stsg/ScoreStsg.cpp
+++ b/phrase-extract/score-stsg/ScoreStsg.cpp
@@ -35,10 +35,10 @@ namespace ScoreStsg
const int ScoreStsg::kCountOfCountsMax = 10;
ScoreStsg::ScoreStsg()
- : m_name("score-stsg")
- , m_lexTable(m_srcVocab, m_tgtVocab)
- , m_countOfCounts(kCountOfCountsMax, 0)
- , m_totalDistinct(0)
+ : m_name("score-stsg")
+ , m_lexTable(m_srcVocab, m_tgtVocab)
+ , m_countOfCounts(kCountOfCountsMax, 0)
+ , m_totalDistinct(0)
{
}
@@ -278,8 +278,8 @@ double ScoreStsg::ComputeLexProb(const std::vector<RuleSymbol> &sourceFrontier,
continue;
}
Vocabulary::IdType tgtId = m_tgtVocab.Lookup(targetFrontier[i].value,
- StringPieceCompatibleHash(),
- StringPieceCompatibleEquals());
+ StringPieceCompatibleHash(),
+ StringPieceCompatibleEquals());
const std::set<std::size_t> &srcIndices = tgtToSrc[i];
if (srcIndices.empty()) {
// Explain unaligned word by NULL.
@@ -289,9 +289,9 @@ double ScoreStsg::ComputeLexProb(const std::vector<RuleSymbol> &sourceFrontier,
for (std::set<std::size_t>::const_iterator p = srcIndices.begin();
p != srcIndices.end(); ++p) {
Vocabulary::IdType srcId =
- m_srcVocab.Lookup(sourceFrontier[*p].value,
- StringPieceCompatibleHash(),
- StringPieceCompatibleEquals());
+ m_srcVocab.Lookup(sourceFrontier[*p].value,
+ StringPieceCompatibleHash(),
+ StringPieceCompatibleEquals());
thisWordScore += m_lexTable.PermissiveLookup(srcId, tgtId);
}
lexScore *= thisWordScore / static_cast<double>(srcIndices.size());
@@ -343,8 +343,8 @@ void ScoreStsg::ProcessOptions(int argc, char *argv[], Options &options) const
"output log probabilities")
("MinCountHierarchical",
po::value(&options.minCountHierarchical)->
- default_value(options.minCountHierarchical),
- "filter out rules with frequency < arg (except fully lexical rules)")
+ default_value(options.minCountHierarchical),
+ "filter out rules with frequency < arg (except fully lexical rules)")
("NegLogProb",
"output negative log probabilities")
("NoLex",
diff --git a/phrase-extract/score-stsg/TokenizedRuleHalf.h b/phrase-extract/score-stsg/TokenizedRuleHalf.h
index 2fbb80f38..7d2b74216 100644
--- a/phrase-extract/score-stsg/TokenizedRuleHalf.h
+++ b/phrase-extract/score-stsg/TokenizedRuleHalf.h
@@ -20,8 +20,7 @@ namespace ScoreStsg
// that 'tokens' and 'frontierSymbols' use StringPiece objects that depend on
// the original string. Therefore changing the value of 'string' invalidates
// both 'tokens' and 'frontierSymbols'.
-struct TokenizedRuleHalf
-{
+struct TokenizedRuleHalf {
bool IsFullyLexical() const;
bool IsString() const;
bool IsTree() const;
diff --git a/phrase-extract/score-stsg/Vocabulary.h b/phrase-extract/score-stsg/Vocabulary.h
index db31c73f5..6370544f4 100644
--- a/phrase-extract/score-stsg/Vocabulary.h
+++ b/phrase-extract/score-stsg/Vocabulary.h
@@ -4,9 +4,12 @@
#include "syntax-common/numbered_set.h"
-namespace MosesTraining {
-namespace Syntax {
-namespace ScoreStsg {
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace ScoreStsg
+{
typedef NumberedSet<std::string, std::size_t> Vocabulary;
diff --git a/symal/cmd.h b/symal/cmd.h
index 01a00abc0..a728dda78 100644
--- a/symal/cmd.h
+++ b/symal/cmd.h
@@ -33,14 +33,14 @@ extern "C" {
#endif
#if defined(__STDC__)
- int DeclareParams(char *, ...);
+int DeclareParams(char *, ...);
#else
- int DeclareParams();
+int DeclareParams();
#endif
- int GetParams(int *n, char ***a,char *CmdFileName),
- SPrintParams(),
- PrintParams();
+int GetParams(int *n, char ***a,char *CmdFileName),
+ SPrintParams(),
+ PrintParams();
#ifdef __cplusplus
}
diff --git a/vw/Classifier.h b/vw/Classifier.h
index 8b307d86e..31bd6ff82 100644
--- a/vw/Classifier.h
+++ b/vw/Classifier.h
@@ -13,12 +13,12 @@
#include <boost/thread/condition_variable.hpp>
#include <boost/thread/locks.hpp>
#include <boost/thread/mutex.hpp>
-#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include "../util/string_piece.hh"
#include "../moses/Util.h"
-// forward declarations to avoid dependency on VW
+// forward declarations to avoid dependency on VW
struct vw;
class ezexample;
@@ -46,7 +46,7 @@ public:
* Throws away current label-dependent features (so that features for another label/class can now be set).
*/
virtual void Train(const StringPiece &label, float loss) = 0;
-
+
/**
* Predict the loss (inverse of score) of current example.
* Throws away current label-dependent features (so that features for another label/class can now be set).
@@ -54,13 +54,11 @@ public:
virtual float Predict(const StringPiece &label) = 0;
// helper methods for indicator features
- void AddLabelIndependentFeature(const StringPiece &name)
- {
+ void AddLabelIndependentFeature(const StringPiece &name) {
AddLabelIndependentFeature(name, 1.0);
}
- void AddLabelDependentFeature(const StringPiece &name)
- {
+ void AddLabelDependentFeature(const StringPiece &name) {
AddLabelDependentFeature(name, 1.0);
}
@@ -70,8 +68,7 @@ protected:
/**
* Escape special characters in a unified way.
*/
- static std::string EscapeSpecialChars(const std::string &str)
- {
+ static std::string EscapeSpecialChars(const std::string &str) {
std::string out;
out = Moses::Replace(str, "\\", "_/_");
out = Moses::Replace(out, "|", "\\/");
@@ -89,7 +86,7 @@ protected:
const std::string VW_DEFAULT_OPTIONS = " --hash all --noconstant -q st -t --ldf_override s ";
const std::string VW_DEFAULT_PARSER_OPTIONS = " --quiet --hash all --noconstant -q st -t --csoaa_ldf s ";
-/**
+/**
* Produce VW training file (does not use the VW library!)
*/
class VWTrainer : public Classifier
@@ -143,7 +140,7 @@ protected:
private:
// instantiation by classifier factory
- VWPredictor(vw * instance, const std::string &vwOption);
+ VWPredictor(vw * instance, const std::string &vwOption);
};
/**
@@ -157,7 +154,7 @@ public:
/**
* Creates VWPredictor instances to be used by individual threads.
*/
- ClassifierFactory(const std::string &modelFile, const std::string &vwOptions);
+ ClassifierFactory(const std::string &modelFile, const std::string &vwOptions);
/**
* Creates VWTrainer instances (which write features to a file).
diff --git a/vw/ClassifierFactory.cpp b/vw/ClassifierFactory.cpp
index 16c313fb1..286bf84a6 100644
--- a/vw/ClassifierFactory.cpp
+++ b/vw/ClassifierFactory.cpp
@@ -6,15 +6,15 @@
namespace Discriminative
{
-ClassifierFactory::ClassifierFactory(const std::string &modelFile, const std::string &vwOptions)
+ClassifierFactory::ClassifierFactory(const std::string &modelFile, const std::string &vwOptions)
: m_vwOptions(vwOptions), m_train(false)
{
m_VWInstance = VW::initialize(VW_DEFAULT_OPTIONS + " -i " + modelFile + vwOptions);
}
-ClassifierFactory::ClassifierFactory(const std::string &modelFilePrefix)
+ClassifierFactory::ClassifierFactory(const std::string &modelFilePrefix)
: m_lastId(0), m_train(true)
-{
+{
if (modelFilePrefix.size() > 3 && modelFilePrefix.substr(modelFilePrefix.size() - 3, 3) == ".gz") {
m_modelFilePrefix = modelFilePrefix.substr(0, modelFilePrefix.size() - 3);
m_gzip = true;
@@ -24,22 +24,22 @@ ClassifierFactory::ClassifierFactory(const std::string &modelFilePrefix)
}
}
-ClassifierFactory::~ClassifierFactory()
+ClassifierFactory::~ClassifierFactory()
{
if (! m_train)
VW::finish(*m_VWInstance);
}
-ClassifierFactory::ClassifierPtr ClassifierFactory::operator()()
+ClassifierFactory::ClassifierPtr ClassifierFactory::operator()()
{
if (m_train) {
boost::unique_lock<boost::mutex> lock(m_mutex); // avoid possible race for m_lastId
return ClassifierFactory::ClassifierPtr(
- new VWTrainer(m_modelFilePrefix + "." + Moses::SPrint(m_lastId++) + (m_gzip ? ".gz" : "")));
+ new VWTrainer(m_modelFilePrefix + "." + Moses::SPrint(m_lastId++) + (m_gzip ? ".gz" : "")));
} else {
return ClassifierFactory::ClassifierPtr(
- new VWPredictor(m_VWInstance, VW_DEFAULT_PARSER_OPTIONS + m_vwOptions));
- }
+ new VWPredictor(m_VWInstance, VW_DEFAULT_PARSER_OPTIONS + m_vwOptions));
+ }
}
}
diff --git a/vw/Normalizer.h b/vw/Normalizer.h
index 0f927a145..74d94a79f 100644
--- a/vw/Normalizer.h
+++ b/vw/Normalizer.h
@@ -7,16 +7,17 @@
namespace Discriminative
{
-class Normalizer {
+class Normalizer
+{
public:
virtual void operator()(std::vector<float> &losses) const = 0;
virtual ~Normalizer() {}
};
-class SquaredLossNormalizer : public Normalizer {
+class SquaredLossNormalizer : public Normalizer
+{
public:
- virtual void operator()(std::vector<float> &losses) const
- {
+ virtual void operator()(std::vector<float> &losses) const {
// This is (?) a good choice for sqrt loss (default loss function in VW)
float sum = 0;
@@ -44,10 +45,10 @@ public:
virtual ~SquaredLossNormalizer() {}
};
-class LogisticLossNormalizer : public Normalizer {
+class LogisticLossNormalizer : public Normalizer
+{
public:
- virtual void operator()(std::vector<float> &losses) const
- {
+ virtual void operator()(std::vector<float> &losses) const {
float sum = 0;
std::vector<float>::iterator it;
for (it = losses.begin(); it != losses.end(); it++) {
diff --git a/vw/VWPredictor.cpp b/vw/VWPredictor.cpp
index 3185d7783..95158363c 100644
--- a/vw/VWPredictor.cpp
+++ b/vw/VWPredictor.cpp
@@ -5,7 +5,8 @@
#include "ezexample.h"
#include "../moses/Util.h"
-namespace Discriminative {
+namespace Discriminative
+{
using namespace std;