Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Germann <ugermann@inf.ed.ac.uk>2017-05-25 01:22:04 +0300
committerUlrich Germann <ugermann@inf.ed.ac.uk>2017-05-25 01:22:04 +0300
commit0eec9270f2f70226e43409fef93860f9142f0d22 (patch)
tree076aec75d4dee0006cb747c837ea296ec1ee7d78
parent66bd0452308f9a99c13652f6d5b400b908ee714b (diff)
parentb8de7c352840a9786af832774831c51c3863ec60 (diff)
Merge branch 'master' of https://github.com/moses-smt/mosesdecoder
-rw-r--r--.travis.yml24
-rw-r--r--BUILD-INSTRUCTIONS.txt1
-rw-r--r--Jamroot3
-rw-r--r--contrib/moses2/ArcLists.cpp129
-rw-r--r--contrib/moses2/FF/FFState.cpp0
-rw-r--r--contrib/moses2/FF/PointerState.cpp0
-rw-r--r--contrib/moses2/FF/SkeletonStatefulFF.h48
-rw-r--r--contrib/moses2/FF/SkeletonStatelessFF.h34
-rw-r--r--contrib/moses2/LM/LanguageModelDALM.cpp246
-rw-r--r--contrib/moses2/LM/LanguageModelDALM.h75
-rw-r--r--contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp248
-rw-r--r--contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp125
-rw-r--r--contrib/moses2/PhraseBased/Manager.cpp280
-rw-r--r--contrib/moses2/PhraseBased/Normal/Search.cpp161
-rw-r--r--contrib/moses2/PhraseBased/Sentence.cpp174
-rw-r--r--contrib/moses2/SCFG/Sentence.cpp155
-rw-r--r--contrib/moses2/SCFG/nbest/KBestExtractor.cpp74
-rw-r--r--contrib/moses2/SCFG/nbest/NBest.cpp193
-rw-r--r--contrib/moses2/SCFG/nbest/NBest.h99
-rw-r--r--contrib/moses2/SCFG/nbest/NBests.cpp111
-rw-r--r--contrib/moses2/SCFG/nbest/NBests.h53
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp466
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h142
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp222
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.h68
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/StoreTarget.cpp266
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/StoreTarget.h51
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/StoreVocab.h64
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/hash.cpp44
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/line_splitter.hh59
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/storing.cpp303
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/storing.hh95
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/vocabid.cpp59
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/vocabid.hh29
-rw-r--r--contrib/moses2/TranslationModel/Transliteration.h91
-rw-r--r--contrib/moses2/TranslationModel/UnknownWordPenalty.h89
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Misc.cpp161
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Misc.h111
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Search.cpp206
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Search.h57
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Stack.cpp303
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Stack.h109
-rw-r--r--contrib/moses2/defer/CubePruningCardinalStack/Misc.cpp161
-rw-r--r--contrib/moses2/defer/CubePruningCardinalStack/Misc.h112
-rw-r--r--contrib/moses2/defer/CubePruningCardinalStack/Search.cpp206
-rw-r--r--contrib/moses2/defer/CubePruningCardinalStack/Search.h57
-rw-r--r--contrib/moses2/defer/CubePruningCardinalStack/Stack.h68
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp161
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Misc.h113
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Search.cpp273
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Search.h66
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp72
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Stacks.h51
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Misc.cpp161
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Misc.h113
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Search.cpp248
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Search.h66
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Stacks.cpp72
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Stacks.h51
-rw-r--r--contrib/moses2/legacy/Bitmap.h244
-rw-r--r--contrib/moses2/parameters/AllOptions.cpp123
-rw-r--r--contrib/moses2/parameters/AllOptions.h51
-rw-r--r--contrib/moses2/parameters/BeamSearchOptions.h15
-rw-r--r--contrib/moses2/parameters/BookkeepingOptions.h18
-rw-r--r--contrib/moses2/parameters/CubePruningOptions.cpp80
-rw-r--r--contrib/moses2/parameters/CubePruningOptions.h25
-rw-r--r--contrib/moses2/parameters/InputOptions.cpp102
-rw-r--r--contrib/moses2/parameters/InputOptions.h32
-rw-r--r--contrib/moses2/parameters/LMBR_Options.cpp39
-rw-r--r--contrib/moses2/parameters/LMBR_Options.h26
-rw-r--r--contrib/moses2/parameters/MBR_Options.cpp26
-rw-r--r--contrib/moses2/parameters/MBR_Options.h21
-rw-r--r--contrib/moses2/parameters/OOVHandlingOptions.cpp50
-rw-r--r--contrib/moses2/parameters/OOVHandlingOptions.h27
-rw-r--r--contrib/moses2/parameters/OptionsBaseClass.cpp30
-rw-r--r--contrib/moses2/parameters/OptionsBaseClass.h20
-rw-r--r--contrib/moses2/parameters/ReorderingOptions.cpp31
-rw-r--r--contrib/moses2/parameters/ReorderingOptions.h20
-rw-r--r--contrib/moses2/parameters/ReportingOptions.cpp152
-rw-r--r--contrib/moses2/parameters/ReportingOptions.h70
-rw-r--r--contrib/moses2/parameters/SearchOptions.cpp107
-rw-r--r--contrib/moses2/parameters/SearchOptions.h54
-rw-r--r--contrib/moses2/parameters/ServerOptions.h43
-rw-r--r--contrib/moses2/pugixml.cpp12444
-rw-r--r--contrib/moses2/pugixml.hpp1400
-rw-r--r--contrib/other-builds/.metadata/.plugins/org.eclipse.cdt.make.core/specs.cpp1
-rw-r--r--contrib/other-builds/CreateOnDiskPt/.cproject27
-rw-r--r--contrib/other-builds/CreateProbingPT/.cproject5
-rw-r--r--contrib/other-builds/CreateProbingPT/.project2
-rw-r--r--contrib/other-builds/OnDiskPt/.cproject25
-rw-r--r--contrib/other-builds/consolidate/.cproject23
-rw-r--r--contrib/other-builds/extract-ghkm/.cproject27
-rw-r--r--contrib/other-builds/extract-mixed-syntax/.cproject27
-rw-r--r--contrib/other-builds/extract-rules/.cproject23
-rw-r--r--contrib/other-builds/extract/.cproject23
-rw-r--r--contrib/other-builds/extractor/.cproject23
-rw-r--r--contrib/other-builds/lm/.cproject36
-rw-r--r--contrib/other-builds/mert_lib/.cproject25
-rw-r--r--contrib/other-builds/moses-cmd/.cproject39
-rw-r--r--contrib/other-builds/moses-cmd/.project1
-rw-r--r--contrib/other-builds/moses/.cproject12
-rw-r--r--contrib/other-builds/moses/.project1015
-rw-r--r--contrib/other-builds/moses2-cmd/.cproject (renamed from contrib/moses2-cmd/.cproject)37
-rw-r--r--contrib/other-builds/moses2-cmd/.project (renamed from contrib/moses2-cmd/.project)5
-rw-r--r--contrib/other-builds/moses2/.cproject (renamed from contrib/moses2/.cproject)1
-rw-r--r--contrib/other-builds/moses2/.project1621
-rw-r--r--contrib/other-builds/probingpt/.cproject120
-rw-r--r--contrib/other-builds/probingpt/.project (renamed from contrib/moses2/.project)11
-rw-r--r--contrib/other-builds/score/.cproject23
-rw-r--r--contrib/other-builds/search/.cproject26
-rw-r--r--contrib/other-builds/server/.cproject23
-rw-r--r--contrib/other-builds/util/.cproject25
-rw-r--r--mert/ForestRescore.cpp2
-rw-r--r--misc/Jamfile7
-rw-r--r--misc/misc.xcodeproj/project.pbxproj323
-rw-r--r--misc/processLexicalTable.vcxproj108
-rw-r--r--misc/processPhraseTable.vcxproj108
-rw-r--r--moses-cmd/MainVW.cpp13
-rw-r--r--moses/FF/ExampleStatefulFF.cpp (renamed from moses/FF/SkeletonStatefulFF.cpp)20
-rw-r--r--moses/FF/ExampleStatefulFF.h (renamed from moses/FF/SkeletonStatefulFF.h)12
-rw-r--r--moses/FF/ExampleStatelessFF.cpp (renamed from moses/FF/SkeletonStatelessFF.cpp)16
-rw-r--r--moses/FF/ExampleStatelessFF.h (renamed from moses/FF/SkeletonStatelessFF.h)4
-rw-r--r--moses/FF/ExampleTranslationOptionListFeature.h (renamed from moses/FF/SkeletonTranslationOptionListFeature.h)4
-rw-r--r--moses/FF/Factory.cpp26
-rw-r--r--moses/GenerationDictionary.cpp9
-rw-r--r--moses/Jamfile4
-rw-r--r--moses/LM/ExampleLM.cpp (renamed from moses/LM/SkeletonLM.cpp)8
-rw-r--r--moses/LM/ExampleLM.h (renamed from moses/LM/SkeletonLM.h)6
-rw-r--r--moses/LM/Jamfile2
-rw-r--r--moses/StaticData.cpp9
-rw-r--r--moses/TrainingTask.h12
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp (renamed from moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp)20
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h (renamed from moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h)14
-rw-r--r--moses/TranslationModel/ExamplePT.cpp (renamed from moses/TranslationModel/SkeletonPT.cpp)26
-rw-r--r--moses/TranslationModel/ExamplePT.h (renamed from moses/TranslationModel/SkeletonPT.h)6
-rw-r--r--moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp2
-rw-r--r--moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp1
-rw-r--r--moses/TranslationModel/PhraseDictionaryTransliteration.cpp1
-rw-r--r--moses/TranslationModel/ProbingPT.cpp (renamed from moses/TranslationModel/ProbingPT/ProbingPT.cpp)33
-rw-r--r--moses/TranslationModel/ProbingPT.h (renamed from moses/TranslationModel/ProbingPT/ProbingPT.h)15
-rw-r--r--moses/TranslationModel/ProbingPT/Jamfile8
-rw-r--r--moses/TranslationModel/ProbingPT/StoreVocab.cpp13
-rw-r--r--moses/TranslationModel/ProbingPT/hash.hh17
-rw-r--r--moses/TranslationModel/ProbingPT/line_splitter.cpp103
-rw-r--r--moses/TranslationModel/ProbingPT/probing_hash_utils.cpp50
-rw-r--r--moses/TranslationModel/ProbingPT/probing_hash_utils.hh51
-rw-r--r--moses/TranslationModel/ProbingPT/querying.cpp141
-rw-r--r--moses/TranslationModel/ProbingPT/querying.hh66
-rw-r--r--moses/parameters/AllOptions.cpp5
-rw-r--r--moses/parameters/CubePruningOptions.cpp5
-rw-r--r--moses/parameters/InputOptions.cpp5
-rw-r--r--moses/parameters/NBestOptions.cpp5
-rw-r--r--moses/parameters/OOVHandlingOptions.cpp5
-rw-r--r--moses/parameters/OptionsBaseClass.cpp5
-rw-r--r--moses/parameters/ReportingOptions.cpp14
-rw-r--r--moses/parameters/SearchOptions.cpp5
-rw-r--r--moses/parameters/SyntaxOptions.cpp5
-rw-r--r--moses/server/TranslationRequest.cpp19
-rw-r--r--moses/server/TranslationRequest.h4
-rw-r--r--moses2/AlignmentInfo.cpp (renamed from contrib/moses2/AlignmentInfo.cpp)0
-rw-r--r--moses2/AlignmentInfo.h (renamed from contrib/moses2/AlignmentInfo.h)0
-rw-r--r--moses2/AlignmentInfoCollection.cpp (renamed from contrib/moses2/AlignmentInfoCollection.cpp)0
-rw-r--r--moses2/AlignmentInfoCollection.h (renamed from contrib/moses2/AlignmentInfoCollection.h)0
-rw-r--r--moses2/ArcLists.cpp127
-rw-r--r--moses2/ArcLists.h (renamed from contrib/moses2/ArcLists.h)2
-rw-r--r--moses2/Array.h (renamed from contrib/moses2/Array.h)41
-rw-r--r--moses2/EstimatedScores.cpp (renamed from contrib/moses2/EstimatedScores.cpp)4
-rw-r--r--moses2/EstimatedScores.h (renamed from contrib/moses2/EstimatedScores.h)6
-rw-r--r--moses2/FF/Distortion.cpp (renamed from contrib/moses2/FF/Distortion.cpp)56
-rw-r--r--moses2/FF/Distortion.h (renamed from contrib/moses2/FF/Distortion.h)23
-rw-r--r--moses2/FF/ExampleStatefulFF.cpp (renamed from contrib/moses2/FF/SkeletonStatefulFF.cpp)44
-rw-r--r--moses2/FF/ExampleStatefulFF.h46
-rw-r--r--moses2/FF/ExampleStatelessFF.cpp (renamed from contrib/moses2/FF/SkeletonStatelessFF.cpp)14
-rw-r--r--moses2/FF/ExampleStatelessFF.h34
-rw-r--r--moses2/FF/FFState.cpp1
-rw-r--r--moses2/FF/FFState.h (renamed from contrib/moses2/FF/FFState.h)15
-rw-r--r--moses2/FF/FeatureFunction.cpp (renamed from contrib/moses2/FF/FeatureFunction.cpp)21
-rw-r--r--moses2/FF/FeatureFunction.h (renamed from contrib/moses2/FF/FeatureFunction.h)44
-rw-r--r--moses2/FF/FeatureFunctions.cpp (renamed from contrib/moses2/FF/FeatureFunctions.cpp)82
-rw-r--r--moses2/FF/FeatureFunctions.h (renamed from contrib/moses2/FF/FeatureFunctions.h)37
-rw-r--r--moses2/FF/FeatureRegistry.cpp (renamed from contrib/moses2/FF/FeatureRegistry.cpp)27
-rw-r--r--moses2/FF/FeatureRegistry.h (renamed from contrib/moses2/FF/FeatureRegistry.h)22
-rw-r--r--moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp (renamed from contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp)22
-rw-r--r--moses2/FF/LexicalReordering/BidirectionalReorderingState.h (renamed from contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.h)8
-rw-r--r--moses2/FF/LexicalReordering/HReorderingBackwardState.cpp (renamed from contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp)16
-rw-r--r--moses2/FF/LexicalReordering/HReorderingBackwardState.h (renamed from contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.h)6
-rw-r--r--moses2/FF/LexicalReordering/HReorderingForwardState.cpp (renamed from contrib/moses2/FF/LexicalReordering/HReorderingForwardState.cpp)22
-rw-r--r--moses2/FF/LexicalReordering/HReorderingForwardState.h (renamed from contrib/moses2/FF/LexicalReordering/HReorderingForwardState.h)6
-rw-r--r--moses2/FF/LexicalReordering/LRModel.cpp (renamed from contrib/moses2/FF/LexicalReordering/LRModel.cpp)91
-rw-r--r--moses2/FF/LexicalReordering/LRModel.h (renamed from contrib/moses2/FF/LexicalReordering/LRModel.h)30
-rw-r--r--moses2/FF/LexicalReordering/LRState.cpp (renamed from contrib/moses2/FF/LexicalReordering/LRState.cpp)12
-rw-r--r--moses2/FF/LexicalReordering/LRState.h (renamed from contrib/moses2/FF/LexicalReordering/LRState.h)8
-rw-r--r--moses2/FF/LexicalReordering/LexicalReordering.cpp (renamed from contrib/moses2/FF/LexicalReordering/LexicalReordering.cpp)84
-rw-r--r--moses2/FF/LexicalReordering/LexicalReordering.h (renamed from contrib/moses2/FF/LexicalReordering/LexicalReordering.h)37
-rw-r--r--moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp (renamed from contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp)28
-rw-r--r--moses2/FF/LexicalReordering/PhraseBasedReorderingState.h (renamed from contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h)11
-rw-r--r--moses2/FF/LexicalReordering/ReorderingStack.cpp (renamed from contrib/moses2/FF/LexicalReordering/ReorderingStack.cpp)17
-rw-r--r--moses2/FF/LexicalReordering/ReorderingStack.h (renamed from contrib/moses2/FF/LexicalReordering/ReorderingStack.h)0
-rw-r--r--moses2/FF/OSM/KenOSM.cpp (renamed from contrib/moses2/FF/OSM/KenOSM.cpp)0
-rw-r--r--moses2/FF/OSM/KenOSM.h (renamed from contrib/moses2/FF/OSM/KenOSM.h)0
-rw-r--r--moses2/FF/OSM/OpSequenceModel.cpp (renamed from contrib/moses2/FF/OSM/OpSequenceModel.cpp)4
-rw-r--r--moses2/FF/OSM/OpSequenceModel.h (renamed from contrib/moses2/FF/OSM/OpSequenceModel.h)20
-rw-r--r--moses2/FF/OSM/osmHyp.cpp (renamed from contrib/moses2/FF/OSM/osmHyp.cpp)0
-rw-r--r--moses2/FF/OSM/osmHyp.h (renamed from contrib/moses2/FF/OSM/osmHyp.h)5
-rw-r--r--moses2/FF/PhrasePenalty.cpp (renamed from contrib/moses2/FF/PhrasePenalty.cpp)10
-rw-r--r--moses2/FF/PhrasePenalty.h (renamed from contrib/moses2/FF/PhrasePenalty.h)8
-rw-r--r--moses2/FF/PointerState.cpp6
-rw-r--r--moses2/FF/PointerState.h (renamed from contrib/moses2/FF/PointerState.h)18
-rw-r--r--moses2/FF/StatefulFeatureFunction.cpp (renamed from contrib/moses2/FF/StatefulFeatureFunction.cpp)10
-rw-r--r--moses2/FF/StatefulFeatureFunction.h (renamed from contrib/moses2/FF/StatefulFeatureFunction.h)20
-rw-r--r--moses2/FF/StatelessFeatureFunction.cpp (renamed from contrib/moses2/FF/StatelessFeatureFunction.cpp)2
-rw-r--r--moses2/FF/StatelessFeatureFunction.h (renamed from contrib/moses2/FF/StatelessFeatureFunction.h)0
-rw-r--r--moses2/FF/WordPenalty.cpp (renamed from contrib/moses2/FF/WordPenalty.cpp)10
-rw-r--r--moses2/FF/WordPenalty.h (renamed from contrib/moses2/FF/WordPenalty.h)8
-rw-r--r--moses2/HypothesisBase.cpp (renamed from contrib/moses2/HypothesisBase.cpp)16
-rw-r--r--moses2/HypothesisBase.h (renamed from contrib/moses2/HypothesisBase.h)34
-rw-r--r--moses2/HypothesisColl.cpp (renamed from contrib/moses2/HypothesisColl.cpp)213
-rw-r--r--moses2/HypothesisColl.h (renamed from contrib/moses2/HypothesisColl.h)22
-rw-r--r--moses2/InMemoryTrie/InMemoryTrie.h (renamed from contrib/moses2/MorphoTrie/MorphTrie.h)34
-rw-r--r--moses2/InMemoryTrie/Node.h (renamed from contrib/moses2/MorphoTrie/Node.h)26
-rw-r--r--moses2/InMemoryTrie/utils.h (renamed from contrib/moses2/MorphoTrie/utils.h)4
-rw-r--r--moses2/InputPathBase.cpp (renamed from contrib/moses2/InputPathBase.cpp)4
-rw-r--r--moses2/InputPathBase.h (renamed from contrib/moses2/InputPathBase.h)2
-rw-r--r--moses2/InputPathsBase.cpp (renamed from contrib/moses2/InputPathsBase.cpp)0
-rw-r--r--moses2/InputPathsBase.h (renamed from contrib/moses2/InputPathsBase.h)15
-rw-r--r--moses2/InputType.cpp (renamed from contrib/moses2/InputType.cpp)42
-rw-r--r--moses2/InputType.h (renamed from contrib/moses2/InputType.h)48
-rw-r--r--moses2/Jamfile (renamed from contrib/moses2/Jamfile)52
-rw-r--r--moses2/LM/GPULM.cpp (renamed from contrib/moses2/LM/GPULM.cpp)59
-rw-r--r--moses2/LM/GPULM.h (renamed from contrib/moses2/LM/GPULM.h)31
-rw-r--r--moses2/LM/KENLM.cpp (renamed from contrib/moses2/LM/KENLM.cpp)155
-rw-r--r--moses2/LM/KENLM.h (renamed from contrib/moses2/LM/KENLM.h)31
-rw-r--r--moses2/LM/KENLMBatch.cpp (renamed from contrib/moses2/LM/KENLMBatch.cpp)122
-rw-r--r--moses2/LM/KENLMBatch.h (renamed from contrib/moses2/LM/KENLMBatch.h)29
-rw-r--r--moses2/LM/LanguageModel.cpp (renamed from contrib/moses2/LM/LanguageModel.cpp)68
-rw-r--r--moses2/LM/LanguageModel.h (renamed from contrib/moses2/LM/LanguageModel.h)41
-rw-r--r--moses2/Main.cpp (renamed from contrib/moses2/Main.cpp)44
-rw-r--r--moses2/Main.h (renamed from contrib/moses2/Main.h)3
-rw-r--r--moses2/ManagerBase.cpp (renamed from contrib/moses2/ManagerBase.cpp)20
-rw-r--r--moses2/ManagerBase.h (renamed from contrib/moses2/ManagerBase.h)27
-rw-r--r--moses2/MemPool.cpp (renamed from contrib/moses2/MemPool.cpp)10
-rw-r--r--moses2/MemPool.h (renamed from contrib/moses2/MemPool.h)51
-rw-r--r--moses2/MemPoolAllocator.h (renamed from contrib/moses2/MemPoolAllocator.h)31
-rw-r--r--moses2/Phrase.cpp (renamed from contrib/moses2/Phrase.cpp)0
-rw-r--r--moses2/Phrase.h (renamed from contrib/moses2/Phrase.h)29
-rw-r--r--moses2/PhraseBased/CubePruningMiniStack/Misc.cpp (renamed from contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp)40
-rw-r--r--moses2/PhraseBased/CubePruningMiniStack/Misc.h (renamed from contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.h)21
-rw-r--r--moses2/PhraseBased/CubePruningMiniStack/Search.cpp248
-rw-r--r--moses2/PhraseBased/CubePruningMiniStack/Search.h (renamed from contrib/moses2/PhraseBased/CubePruningMiniStack/Search.h)0
-rw-r--r--moses2/PhraseBased/CubePruningMiniStack/Stack.cpp123
-rw-r--r--moses2/PhraseBased/CubePruningMiniStack/Stack.h (renamed from contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.h)12
-rw-r--r--moses2/PhraseBased/Hypothesis.cpp (renamed from contrib/moses2/PhraseBased/Hypothesis.cpp)38
-rw-r--r--moses2/PhraseBased/Hypothesis.h (renamed from contrib/moses2/PhraseBased/Hypothesis.h)28
-rw-r--r--moses2/PhraseBased/InputPath.cpp (renamed from contrib/moses2/PhraseBased/InputPath.cpp)10
-rw-r--r--moses2/PhraseBased/InputPath.h (renamed from contrib/moses2/PhraseBased/InputPath.h)7
-rw-r--r--moses2/PhraseBased/InputPaths.cpp (renamed from contrib/moses2/PhraseBased/InputPaths.cpp)0
-rw-r--r--moses2/PhraseBased/InputPaths.h (renamed from contrib/moses2/PhraseBased/InputPaths.h)13
-rw-r--r--moses2/PhraseBased/Manager.cpp278
-rw-r--r--moses2/PhraseBased/Manager.h (renamed from contrib/moses2/PhraseBased/Manager.h)22
-rw-r--r--moses2/PhraseBased/Normal/Search.cpp161
-rw-r--r--moses2/PhraseBased/Normal/Search.h (renamed from contrib/moses2/PhraseBased/Normal/Search.h)4
-rw-r--r--moses2/PhraseBased/Normal/Stack.cpp (renamed from contrib/moses2/PhraseBased/Normal/Stack.cpp)2
-rw-r--r--moses2/PhraseBased/Normal/Stack.h (renamed from contrib/moses2/PhraseBased/Normal/Stack.h)0
-rw-r--r--moses2/PhraseBased/Normal/Stacks.cpp (renamed from contrib/moses2/PhraseBased/Normal/Stacks.cpp)7
-rw-r--r--moses2/PhraseBased/Normal/Stacks.h (renamed from contrib/moses2/PhraseBased/Normal/Stacks.h)14
-rw-r--r--moses2/PhraseBased/PhraseImpl.cpp (renamed from contrib/moses2/PhraseBased/PhraseImpl.cpp)2
-rw-r--r--moses2/PhraseBased/PhraseImpl.h (renamed from contrib/moses2/PhraseBased/PhraseImpl.h)5
-rw-r--r--moses2/PhraseBased/ReorderingConstraint.cpp (renamed from contrib/moses2/PhraseBased/ReorderingConstraint.cpp)4
-rw-r--r--moses2/PhraseBased/ReorderingConstraint.h (renamed from contrib/moses2/PhraseBased/ReorderingConstraint.h)0
-rw-r--r--moses2/PhraseBased/Search.cpp (renamed from contrib/moses2/PhraseBased/Search.cpp)11
-rw-r--r--moses2/PhraseBased/Search.h (renamed from contrib/moses2/PhraseBased/Search.h)8
-rw-r--r--moses2/PhraseBased/Sentence.cpp173
-rw-r--r--moses2/PhraseBased/Sentence.h (renamed from contrib/moses2/PhraseBased/Sentence.h)20
-rw-r--r--moses2/PhraseBased/TargetPhraseImpl.cpp (renamed from contrib/moses2/PhraseBased/TargetPhraseImpl.cpp)8
-rw-r--r--moses2/PhraseBased/TargetPhraseImpl.h (renamed from contrib/moses2/PhraseBased/TargetPhraseImpl.h)17
-rw-r--r--moses2/PhraseBased/TargetPhrases.cpp (renamed from contrib/moses2/PhraseBased/TargetPhrases.cpp)10
-rw-r--r--moses2/PhraseBased/TargetPhrases.h (renamed from contrib/moses2/PhraseBased/TargetPhrases.h)15
-rw-r--r--moses2/PhraseBased/TrellisPath.cpp (renamed from contrib/moses2/PhraseBased/TrellisPath.cpp)18
-rw-r--r--moses2/PhraseBased/TrellisPath.h (renamed from contrib/moses2/PhraseBased/TrellisPath.h)17
-rw-r--r--moses2/PhraseImplTemplate.h (renamed from contrib/moses2/PhraseImplTemplate.h)30
-rw-r--r--moses2/Recycler.cpp (renamed from contrib/moses2/Recycler.cpp)0
-rw-r--r--moses2/Recycler.h (renamed from contrib/moses2/Recycler.h)24
-rw-r--r--moses2/SCFG/ActiveChart.cpp (renamed from contrib/moses2/SCFG/ActiveChart.cpp)32
-rw-r--r--moses2/SCFG/ActiveChart.h (renamed from contrib/moses2/SCFG/ActiveChart.h)41
-rw-r--r--moses2/SCFG/Hypothesis.cpp (renamed from contrib/moses2/SCFG/Hypothesis.cpp)62
-rw-r--r--moses2/SCFG/Hypothesis.h (renamed from contrib/moses2/SCFG/Hypothesis.h)35
-rw-r--r--moses2/SCFG/InputPath.cpp (renamed from contrib/moses2/SCFG/InputPath.cpp)20
-rw-r--r--moses2/SCFG/InputPath.h (renamed from contrib/moses2/SCFG/InputPath.h)17
-rw-r--r--moses2/SCFG/InputPaths.cpp (renamed from contrib/moses2/SCFG/InputPaths.cpp)2
-rw-r--r--moses2/SCFG/InputPaths.h (renamed from contrib/moses2/SCFG/InputPaths.h)3
-rw-r--r--moses2/SCFG/Manager.cpp (renamed from contrib/moses2/SCFG/Manager.cpp)61
-rw-r--r--moses2/SCFG/Manager.h (renamed from contrib/moses2/SCFG/Manager.h)37
-rw-r--r--moses2/SCFG/Misc.cpp (renamed from contrib/moses2/SCFG/Misc.cpp)75
-rw-r--r--moses2/SCFG/Misc.h (renamed from contrib/moses2/SCFG/Misc.h)68
-rw-r--r--moses2/SCFG/PhraseImpl.cpp (renamed from contrib/moses2/SCFG/PhraseImpl.cpp)2
-rw-r--r--moses2/SCFG/PhraseImpl.h (renamed from contrib/moses2/SCFG/PhraseImpl.h)5
-rw-r--r--moses2/SCFG/Sentence.cpp154
-rw-r--r--moses2/SCFG/Sentence.h (renamed from contrib/moses2/SCFG/Sentence.h)20
-rw-r--r--moses2/SCFG/Stack.cpp (renamed from contrib/moses2/SCFG/Stack.cpp)12
-rw-r--r--moses2/SCFG/Stack.h (renamed from contrib/moses2/SCFG/Stack.h)7
-rw-r--r--moses2/SCFG/Stacks.cpp (renamed from contrib/moses2/SCFG/Stacks.cpp)0
-rw-r--r--moses2/SCFG/Stacks.h (renamed from contrib/moses2/SCFG/Stacks.h)15
-rw-r--r--moses2/SCFG/TargetPhraseImpl.cpp (renamed from contrib/moses2/SCFG/TargetPhraseImpl.cpp)28
-rw-r--r--moses2/SCFG/TargetPhraseImpl.h (renamed from contrib/moses2/SCFG/TargetPhraseImpl.h)19
-rw-r--r--moses2/SCFG/TargetPhrases.cpp (renamed from contrib/moses2/SCFG/TargetPhrases.cpp)10
-rw-r--r--moses2/SCFG/TargetPhrases.h (renamed from contrib/moses2/SCFG/TargetPhrases.h)17
-rw-r--r--moses2/SCFG/Word.cpp (renamed from contrib/moses2/SCFG/Word.cpp)82
-rw-r--r--moses2/SCFG/Word.h (renamed from contrib/moses2/SCFG/Word.h)22
-rw-r--r--moses2/SCFG/nbest/KBestExtractor.cpp74
-rw-r--r--moses2/SCFG/nbest/KBestExtractor.h (renamed from contrib/moses2/SCFG/nbest/KBestExtractor.h)0
-rw-r--r--moses2/SCFG/nbest/NBest.cpp192
-rw-r--r--moses2/SCFG/nbest/NBest.h99
-rw-r--r--moses2/SCFG/nbest/NBestColl.cpp (renamed from contrib/moses2/SCFG/nbest/NBestColl.cpp)31
-rw-r--r--moses2/SCFG/nbest/NBestColl.h (renamed from contrib/moses2/SCFG/nbest/NBestColl.h)10
-rw-r--r--moses2/SCFG/nbest/NBests.cpp109
-rw-r--r--moses2/SCFG/nbest/NBests.h54
-rw-r--r--moses2/Scores.cpp (renamed from contrib/moses2/Scores.cpp)38
-rw-r--r--moses2/Scores.h (renamed from contrib/moses2/Scores.h)27
-rw-r--r--moses2/SubPhrase.cpp (renamed from contrib/moses2/SubPhrase.cpp)0
-rw-r--r--moses2/SubPhrase.h (renamed from contrib/moses2/SubPhrase.h)22
-rw-r--r--moses2/System.cpp (renamed from contrib/moses2/System.cpp)80
-rw-r--r--moses2/System.h (renamed from contrib/moses2/System.h)0
-rw-r--r--moses2/TargetPhrase.cpp (renamed from contrib/moses2/TargetPhrase.cpp)0
-rw-r--r--moses2/TargetPhrase.h (renamed from contrib/moses2/TargetPhrase.h)96
-rw-r--r--moses2/TranslationModel/CompactPT/BlockHashIndex.cpp (renamed from contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp)32
-rw-r--r--moses2/TranslationModel/CompactPT/BlockHashIndex.h (renamed from contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h)19
-rw-r--r--moses2/TranslationModel/CompactPT/CanonicalHuffman.h (renamed from contrib/moses2/TranslationModel/CompactPT/CanonicalHuffman.h)84
-rw-r--r--moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp (renamed from contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp)2
-rw-r--r--moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h (renamed from contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h)9
-rw-r--r--moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp (renamed from contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp)39
-rw-r--r--moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h (renamed from contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h)44
-rw-r--r--moses2/TranslationModel/CompactPT/ListCoders.h (renamed from contrib/moses2/TranslationModel/CompactPT/ListCoders.h)37
-rw-r--r--moses2/TranslationModel/CompactPT/MmapAllocator.h (renamed from contrib/moses2/TranslationModel/CompactPT/MmapAllocator.h)89
-rw-r--r--moses2/TranslationModel/CompactPT/MonotonicVector.h (renamed from contrib/moses2/TranslationModel/CompactPT/MonotonicVector.h)63
-rw-r--r--moses2/TranslationModel/CompactPT/MurmurHash3.cpp (renamed from contrib/moses2/TranslationModel/CompactPT/MurmurHash3.cpp)4
-rw-r--r--moses2/TranslationModel/CompactPT/MurmurHash3.h (renamed from contrib/moses2/TranslationModel/CompactPT/MurmurHash3.h)7
-rw-r--r--moses2/TranslationModel/CompactPT/PackedArray.h (renamed from contrib/moses2/TranslationModel/CompactPT/PackedArray.h)48
-rw-r--r--moses2/TranslationModel/CompactPT/StringVector.h (renamed from contrib/moses2/TranslationModel/CompactPT/StringVector.h)110
-rw-r--r--moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp (renamed from contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp)0
-rw-r--r--moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h (renamed from contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h)6
-rw-r--r--moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp (renamed from contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp)0
-rw-r--r--moses2/TranslationModel/CompactPT/ThrowingFwrite.h (renamed from contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.h)0
-rw-r--r--moses2/TranslationModel/Memory/Node.h (renamed from contrib/moses2/TranslationModel/Memory/Node.h)43
-rw-r--r--moses2/TranslationModel/Memory/PhraseTableMemory.cpp (renamed from contrib/moses2/TranslationModel/Memory/PhraseTableMemory.cpp)63
-rw-r--r--moses2/TranslationModel/Memory/PhraseTableMemory.h (renamed from contrib/moses2/TranslationModel/Memory/PhraseTableMemory.h)44
-rw-r--r--moses2/TranslationModel/PhraseTable.cpp (renamed from contrib/moses2/TranslationModel/PhraseTable.cpp)97
-rw-r--r--moses2/TranslationModel/PhraseTable.h (renamed from contrib/moses2/TranslationModel/PhraseTable.h)85
-rw-r--r--moses2/TranslationModel/ProbingPT.cpp (renamed from contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp)338
-rw-r--r--moses2/TranslationModel/ProbingPT.h (renamed from contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h)111
-rw-r--r--moses2/TranslationModel/Transliteration.cpp (renamed from contrib/moses2/TranslationModel/Transliteration.cpp)92
-rw-r--r--moses2/TranslationModel/Transliteration.h91
-rw-r--r--moses2/TranslationModel/UnknownWordPenalty.cpp (renamed from contrib/moses2/TranslationModel/UnknownWordPenalty.cpp)118
-rw-r--r--moses2/TranslationModel/UnknownWordPenalty.h89
-rw-r--r--moses2/TranslationTask.cpp (renamed from contrib/moses2/TranslationTask.cpp)11
-rw-r--r--moses2/TranslationTask.h (renamed from contrib/moses2/TranslationTask.h)0
-rw-r--r--moses2/TrellisPaths.cpp (renamed from contrib/moses2/TrellisPaths.cpp)0
-rw-r--r--moses2/TrellisPaths.h (renamed from contrib/moses2/TrellisPaths.h)25
-rw-r--r--moses2/TypeDef.cpp (renamed from contrib/moses2/TypeDef.cpp)0
-rw-r--r--moses2/TypeDef.h (renamed from contrib/moses2/TypeDef.h)9
-rw-r--r--moses2/Vector.cpp (renamed from contrib/moses2/Vector.cpp)0
-rw-r--r--moses2/Vector.h (renamed from contrib/moses2/Vector.h)6
-rw-r--r--moses2/Weights.cpp (renamed from contrib/moses2/Weights.cpp)4
-rw-r--r--moses2/Weights.h (renamed from contrib/moses2/Weights.h)3
-rw-r--r--moses2/Word.cpp (renamed from contrib/moses2/Word.cpp)16
-rw-r--r--moses2/Word.h (renamed from contrib/moses2/Word.h)14
-rw-r--r--moses2/defer/CubePruningBitmapStack/Misc.cpp159
-rw-r--r--moses2/defer/CubePruningBitmapStack/Misc.h111
-rw-r--r--moses2/defer/CubePruningBitmapStack/Search.cpp206
-rw-r--r--moses2/defer/CubePruningBitmapStack/Search.h57
-rw-r--r--moses2/defer/CubePruningBitmapStack/Stack.cpp299
-rw-r--r--moses2/defer/CubePruningBitmapStack/Stack.h114
-rw-r--r--moses2/defer/CubePruningCardinalStack/Misc.cpp159
-rw-r--r--moses2/defer/CubePruningCardinalStack/Misc.h112
-rw-r--r--moses2/defer/CubePruningCardinalStack/Search.cpp206
-rw-r--r--moses2/defer/CubePruningCardinalStack/Search.h57
-rw-r--r--moses2/defer/CubePruningCardinalStack/Stack.cpp (renamed from contrib/moses2/defer/CubePruningCardinalStack/Stack.cpp)104
-rw-r--r--moses2/defer/CubePruningCardinalStack/Stack.h71
-rw-r--r--moses2/defer/CubePruningPerBitmap/Misc.cpp159
-rw-r--r--moses2/defer/CubePruningPerBitmap/Misc.h113
-rw-r--r--moses2/defer/CubePruningPerBitmap/Search.cpp271
-rw-r--r--moses2/defer/CubePruningPerBitmap/Search.h66
-rw-r--r--moses2/defer/CubePruningPerBitmap/Stacks.cpp72
-rw-r--r--moses2/defer/CubePruningPerBitmap/Stacks.h55
-rw-r--r--moses2/defer/CubePruningPerMiniStack/Misc.cpp159
-rw-r--r--moses2/defer/CubePruningPerMiniStack/Misc.h113
-rw-r--r--moses2/defer/CubePruningPerMiniStack/Search.cpp246
-rw-r--r--moses2/defer/CubePruningPerMiniStack/Search.h66
-rw-r--r--moses2/defer/CubePruningPerMiniStack/Stacks.cpp72
-rw-r--r--moses2/defer/CubePruningPerMiniStack/Stacks.h55
-rw-r--r--moses2/legacy/Bitmap.cpp (renamed from contrib/moses2/legacy/Bitmap.cpp)6
-rw-r--r--moses2/legacy/Bitmap.h240
-rw-r--r--moses2/legacy/Bitmaps.cpp (renamed from contrib/moses2/legacy/Bitmaps.cpp)13
-rw-r--r--moses2/legacy/Bitmaps.h (renamed from contrib/moses2/legacy/Bitmaps.h)5
-rw-r--r--moses2/legacy/Factor.cpp (renamed from contrib/moses2/legacy/Factor.cpp)0
-rw-r--r--moses2/legacy/Factor.h (renamed from contrib/moses2/legacy/Factor.h)21
-rw-r--r--moses2/legacy/FactorCollection.cpp (renamed from contrib/moses2/legacy/FactorCollection.cpp)11
-rw-r--r--moses2/legacy/FactorCollection.h (renamed from contrib/moses2/legacy/FactorCollection.h)27
-rw-r--r--moses2/legacy/InputFileStream.cpp (renamed from contrib/moses2/legacy/InputFileStream.cpp)5
-rw-r--r--moses2/legacy/InputFileStream.h (renamed from contrib/moses2/legacy/InputFileStream.h)0
-rw-r--r--moses2/legacy/Matrix.cpp (renamed from contrib/moses2/legacy/Matrix.cpp)0
-rw-r--r--moses2/legacy/Matrix.h (renamed from contrib/moses2/legacy/Matrix.h)29
-rw-r--r--moses2/legacy/OutputCollector.h (renamed from contrib/moses2/legacy/OutputCollector.h)49
-rw-r--r--moses2/legacy/OutputFileStream.cpp (renamed from contrib/moses2/legacy/OutputFileStream.cpp)9
-rw-r--r--moses2/legacy/OutputFileStream.h (renamed from contrib/moses2/legacy/OutputFileStream.h)0
-rw-r--r--moses2/legacy/Parameter.cpp (renamed from contrib/moses2/legacy/Parameter.cpp)274
-rw-r--r--moses2/legacy/Parameter.h (renamed from contrib/moses2/legacy/Parameter.h)45
-rw-r--r--moses2/legacy/Range.cpp (renamed from contrib/moses2/legacy/Range.cpp)0
-rw-r--r--moses2/legacy/Range.h (renamed from contrib/moses2/legacy/Range.h)28
-rw-r--r--moses2/legacy/ThreadPool.cpp (renamed from contrib/moses2/legacy/ThreadPool.cpp)21
-rw-r--r--moses2/legacy/ThreadPool.h (renamed from contrib/moses2/legacy/ThreadPool.h)23
-rw-r--r--moses2/legacy/Timer.cpp (renamed from contrib/moses2/legacy/Timer.cpp)5
-rw-r--r--moses2/legacy/Timer.h (renamed from contrib/moses2/legacy/Timer.h)0
-rw-r--r--moses2/legacy/Util2.cpp (renamed from contrib/moses2/legacy/Util2.cpp)7
-rw-r--r--moses2/legacy/Util2.h (renamed from contrib/moses2/legacy/Util2.h)82
-rw-r--r--moses2/legacy/gzfilebuf.h (renamed from contrib/moses2/legacy/gzfilebuf.h)31
-rw-r--r--moses2/legacy/xmlrpc-c.h10
-rw-r--r--moses2/parameters/AllOptions.cpp118
-rw-r--r--moses2/parameters/AllOptions.h50
-rw-r--r--moses2/parameters/BeamSearchOptions.h14
-rw-r--r--moses2/parameters/BookkeepingOptions.cpp (renamed from contrib/moses2/parameters/BookkeepingOptions.cpp)0
-rw-r--r--moses2/parameters/BookkeepingOptions.h17
-rw-r--r--moses2/parameters/ContextParameters.cpp (renamed from contrib/moses2/parameters/ContextParameters.cpp)20
-rw-r--r--moses2/parameters/ContextParameters.h (renamed from contrib/moses2/parameters/ContextParameters.h)2
-rw-r--r--moses2/parameters/CubePruningOptions.cpp76
-rw-r--r--moses2/parameters/CubePruningOptions.h24
-rw-r--r--moses2/parameters/InputOptions.cpp99
-rw-r--r--moses2/parameters/InputOptions.h31
-rw-r--r--moses2/parameters/LMBR_Options.cpp39
-rw-r--r--moses2/parameters/LMBR_Options.h25
-rw-r--r--moses2/parameters/LookupOptions.h (renamed from contrib/moses2/parameters/LookupOptions.h)11
-rw-r--r--moses2/parameters/MBR_Options.cpp26
-rw-r--r--moses2/parameters/MBR_Options.h20
-rw-r--r--moses2/parameters/NBestOptions.cpp (renamed from contrib/moses2/parameters/NBestOptions.cpp)30
-rw-r--r--moses2/parameters/NBestOptions.h (renamed from contrib/moses2/parameters/NBestOptions.h)3
-rw-r--r--moses2/parameters/OOVHandlingOptions.cpp48
-rw-r--r--moses2/parameters/OOVHandlingOptions.h26
-rw-r--r--moses2/parameters/OptionsBaseClass.cpp29
-rw-r--r--moses2/parameters/OptionsBaseClass.h19
-rw-r--r--moses2/parameters/ReorderingOptions.cpp31
-rw-r--r--moses2/parameters/ReorderingOptions.h19
-rw-r--r--moses2/parameters/ReportingOptions.cpp152
-rw-r--r--moses2/parameters/ReportingOptions.h69
-rw-r--r--moses2/parameters/SearchOptions.cpp106
-rw-r--r--moses2/parameters/SearchOptions.h53
-rw-r--r--moses2/parameters/ServerOptions.cpp (renamed from contrib/moses2/parameters/ServerOptions.cpp)32
-rw-r--r--moses2/parameters/ServerOptions.h41
-rw-r--r--moses2/parameters/SyntaxOptions.cpp (renamed from contrib/moses2/parameters/SyntaxOptions.cpp)8
-rw-r--r--moses2/parameters/SyntaxOptions.h (renamed from contrib/moses2/parameters/SyntaxOptions.h)3
-rw-r--r--moses2/pugiconfig.hpp (renamed from contrib/moses2/pugiconfig.hpp)2
-rw-r--r--moses2/pugixml.cpp11456
-rw-r--r--moses2/pugixml.hpp1391
-rw-r--r--moses2/server/Server.cpp (renamed from contrib/moses2/server/Server.cpp)44
-rw-r--r--moses2/server/Server.h (renamed from contrib/moses2/server/Server.h)0
-rw-r--r--moses2/server/TranslationRequest.cpp (renamed from contrib/moses2/server/TranslationRequest.cpp)24
-rw-r--r--moses2/server/TranslationRequest.h (renamed from contrib/moses2/server/TranslationRequest.h)10
-rw-r--r--moses2/server/Translator.cpp (renamed from contrib/moses2/server/Translator.cpp)10
-rw-r--r--moses2/server/Translator.h (renamed from contrib/moses2/server/Translator.h)2
-rw-r--r--phrase-extract/consolidate-direct.vcxproj98
-rw-r--r--phrase-extract/consolidate.vcxproj100
-rw-r--r--phrase-extract/extract-lex.vcxproj89
-rw-r--r--phrase-extract/extract-rules.vcxproj111
-rw-r--r--phrase-extract/extract.vcxproj103
-rw-r--r--phrase-extract/phrase-extract.sln56
-rw-r--r--phrase-extract/score.vcxproj106
-rw-r--r--probingpt/CreateProbingPT.cpp (renamed from misc/CreateProbingPT.cpp)12
-rw-r--r--probingpt/InputFileStream.cpp59
-rw-r--r--probingpt/InputFileStream.h46
-rw-r--r--probingpt/Jamfile32
-rw-r--r--probingpt/OutputFileStream.cpp87
-rw-r--r--probingpt/OutputFileStream.h81
-rw-r--r--probingpt/StoreTarget.cpp (renamed from moses/TranslationModel/ProbingPT/StoreTarget.cpp)26
-rw-r--r--probingpt/StoreTarget.h (renamed from moses/TranslationModel/ProbingPT/StoreTarget.h)2
-rw-r--r--probingpt/StoreVocab.cpp (renamed from contrib/moses2/TranslationModel/ProbingPT/StoreVocab.cpp)2
-rw-r--r--probingpt/StoreVocab.h (renamed from moses/TranslationModel/ProbingPT/StoreVocab.h)6
-rw-r--r--probingpt/gzfilebuf.h94
-rw-r--r--probingpt/hash.cpp (renamed from moses/TranslationModel/ProbingPT/hash.cpp)4
-rw-r--r--probingpt/hash.h (renamed from contrib/moses2/TranslationModel/ProbingPT/hash.hh)2
-rw-r--r--probingpt/line_splitter.cpp (renamed from contrib/moses2/TranslationModel/ProbingPT/line_splitter.cpp)4
-rw-r--r--probingpt/line_splitter.h (renamed from moses/TranslationModel/ProbingPT/line_splitter.hh)7
-rw-r--r--probingpt/probing_hash_utils.cpp (renamed from contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp)4
-rw-r--r--probingpt/probing_hash_utils.h (renamed from contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh)18
-rw-r--r--probingpt/querying.cpp (renamed from contrib/moses2/TranslationModel/ProbingPT/querying.cpp)79
-rw-r--r--probingpt/querying.h (renamed from contrib/moses2/TranslationModel/ProbingPT/querying.hh)28
-rw-r--r--probingpt/storing.cpp (renamed from moses/TranslationModel/ProbingPT/storing.cpp)24
-rw-r--r--probingpt/storing.h (renamed from moses/TranslationModel/ProbingPT/storing.hh)8
-rw-r--r--probingpt/util.cpp24
-rw-r--r--probingpt/util.h24
-rw-r--r--probingpt/vocabid.cpp (renamed from moses/TranslationModel/ProbingPT/vocabid.cpp)10
-rw-r--r--probingpt/vocabid.h (renamed from moses/TranslationModel/ProbingPT/vocabid.hh)4
-rw-r--r--regression-testing/Jamfile2
-rw-r--r--scripts/docker/Dockerfile.ubuntu.basic26
-rw-r--r--scripts/docker/Dockerfile.ubuntu.fastlightpbmt47
-rw-r--r--scripts/ems/experiment.meta58
-rwxr-xr-xscripts/ems/support/reference-from-sgm.perl2
-rwxr-xr-xscripts/generic/binarize4moses2.perl3
-rwxr-xr-xscripts/generic/bsbleu.py17
-rwxr-xr-xscripts/generic/mteval-v12.pl3
-rwxr-xr-xscripts/generic/mteval-v13a.pl3
-rw-r--r--scripts/generic/mteval-v14.pl1179
-rwxr-xr-xscripts/recaser/train-truecaser.perl6
-rwxr-xr-xscripts/training/rdlm/train_rdlm.py5
-rw-r--r--util/tempfile.hh4
502 files changed, 26838 insertions, 28530 deletions
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 000000000..c80b60de5
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,24 @@
+sudo: false
+dist: trusty
+language: c
+compiler: gcc
+env:
+ matrix:
+addons:
+ apt:
+ sources:
+ - ubuntu-toolchain-r-test
+ packages:
+ - subversion
+ - automake
+ - libtool
+ - zlib1g-dev
+ - libbz2-dev
+ - liblzma-dev
+ - libboost-all-dev
+ - libgoogle-perftools-dev
+ - libxmlrpc-c++.*-dev
+ - cmake
+ - csh
+script:
+- ./bjam -j4
diff --git a/BUILD-INSTRUCTIONS.txt b/BUILD-INSTRUCTIONS.txt
index a41582bfa..7b9bc3a8a 100644
--- a/BUILD-INSTRUCTIONS.txt
+++ b/BUILD-INSTRUCTIONS.txt
@@ -7,4 +7,3 @@ into the source tree from elsewhere:
* "bjam-files" is taken from Boost.
* "util" and "lm" are taken from KenLM: https://github.com/kpu/kenlm
-
diff --git a/Jamroot b/Jamroot
index d9fc811dd..6cc7c9427 100644
--- a/Jamroot
+++ b/Jamroot
@@ -316,7 +316,8 @@ contrib/c++tokenizer//tokenizer
contrib/expected-bleu-training//train-expected-bleu
contrib/expected-bleu-training//prepare-expected-bleu-training
-contrib/moses2//programs
+probingpt//programs
+moses2//programs
;
diff --git a/contrib/moses2/ArcLists.cpp b/contrib/moses2/ArcLists.cpp
deleted file mode 100644
index edc985465..000000000
--- a/contrib/moses2/ArcLists.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * ArcList.cpp
- *
- * Created on: 26 Oct 2015
- * Author: hieu
- */
-#include <iostream>
-#include <sstream>
-#include <algorithm>
-#include <boost/foreach.hpp>
-#include "ArcLists.h"
-#include "HypothesisBase.h"
-#include "util/exception.hh"
-
-using namespace std;
-
-namespace Moses2
-{
-
-ArcLists::ArcLists()
-{
- // TODO Auto-generated constructor stub
-
-}
-
-ArcLists::~ArcLists()
-{
- BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
- const ArcList *arcList = collPair.second;
- delete arcList;
- }
-}
-
-void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
- const HypothesisBase *otherHypo)
-{
- //cerr << added << " " << currHypo << " " << otherHypo << endl;
- ArcList *arcList;
- if (added) {
- // we're winners!
- if (otherHypo) {
- // there was a existing losing hypo
- arcList = &GetAndDetachArcList(otherHypo);
- }
- else {
- // there was no existing hypo
- arcList = new ArcList;
- }
- m_coll[currHypo] = arcList;
- }
- else {
- // we're losers!
- // there should be a winner, we're not doing beam pruning
- UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
- arcList = &GetArcList(otherHypo);
- }
-
- // in any case, add the curr hypo
- arcList->push_back(currHypo);
-}
-
-ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
-{
- Coll::iterator iter = m_coll.find(hypo);
- UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
- ArcList &arcList = *iter->second;
- return arcList;
-}
-
-const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
-{
- Coll::const_iterator iter = m_coll.find(hypo);
-
- if (iter == m_coll.end()) {
- cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
- BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
- const HypothesisBase *hypo = collPair.first;
- cerr << hypo << " ";
- }
- }
-
- UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
- ArcList &arcList = *iter->second;
- return arcList;
-}
-
-ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
-{
- Coll::iterator iter = m_coll.find(hypo);
- UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
- ArcList &arcList = *iter->second;
-
- m_coll.erase(iter);
-
- return arcList;
-}
-
-void ArcLists::Sort()
-{
- BOOST_FOREACH(Coll::value_type &collPair, m_coll){
- ArcList &list = *collPair.second;
- std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
- }
-}
-
-void ArcLists::Delete(const HypothesisBase *hypo)
-{
- //cerr << "hypo=" << hypo->Debug() << endl;
- //cerr << "m_coll=" << m_coll.size() << endl;
- Coll::iterator iter = m_coll.find(hypo);
- UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
- ArcList *arcList = iter->second;
-
- m_coll.erase(iter);
- delete arcList;
-}
-
-std::string ArcLists::Debug(const System &system) const
-{
- stringstream strm;
- BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
- const ArcList *arcList = collPair.second;
- strm << arcList << "(" << arcList->size() << ") ";
- }
- return strm.str();
-}
-
-}
-
diff --git a/contrib/moses2/FF/FFState.cpp b/contrib/moses2/FF/FFState.cpp
deleted file mode 100644
index e69de29bb..000000000
--- a/contrib/moses2/FF/FFState.cpp
+++ /dev/null
diff --git a/contrib/moses2/FF/PointerState.cpp b/contrib/moses2/FF/PointerState.cpp
deleted file mode 100644
index e69de29bb..000000000
--- a/contrib/moses2/FF/PointerState.cpp
+++ /dev/null
diff --git a/contrib/moses2/FF/SkeletonStatefulFF.h b/contrib/moses2/FF/SkeletonStatefulFF.h
deleted file mode 100644
index bfa3ad870..000000000
--- a/contrib/moses2/FF/SkeletonStatefulFF.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * SkeletonStatefulFF.h
- *
- * Created on: 27 Oct 2015
- * Author: hieu
- */
-
-#ifndef SKELETONSTATEFULFF_H_
-#define SKELETONSTATEFULFF_H_
-
-#include "StatefulFeatureFunction.h"
-
-namespace Moses2
-{
-
-class SkeletonStatefulFF: public StatefulFeatureFunction
-{
-public:
- SkeletonStatefulFF(size_t startInd, const std::string &line);
- virtual ~SkeletonStatefulFF();
-
- virtual FFState* BlankState(MemPool &pool, const System &sys) const;
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
-
-};
-
-}
-
-#endif /* SKELETONSTATEFULFF_H_ */
diff --git a/contrib/moses2/FF/SkeletonStatelessFF.h b/contrib/moses2/FF/SkeletonStatelessFF.h
deleted file mode 100644
index 9be14bffe..000000000
--- a/contrib/moses2/FF/SkeletonStatelessFF.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * SkeletonStatefulFF.h
- *
- * Created on: 27 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include "StatelessFeatureFunction.h"
-
-namespace Moses2
-{
-
-class SkeletonStatelessFF: public StatelessFeatureFunction
-{
-public:
- SkeletonStatelessFF(size_t startInd, const std::string &line);
- virtual ~SkeletonStatelessFF();
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
-};
-
-}
-
diff --git a/contrib/moses2/LM/LanguageModelDALM.cpp b/contrib/moses2/LM/LanguageModelDALM.cpp
deleted file mode 100644
index 7d3e8242b..000000000
--- a/contrib/moses2/LM/LanguageModelDALM.cpp
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * LanguageModelDALM.cpp
- *
- * Created on: 5 Dec 2015
- * Author: hieu
- */
-
-#include "LanguageModelDALM.h"
-#include "../TypeDef.h"
-#include "../System.h"
-#include "dalm.h"
-#include "util/exception.hh"
-#include "../legacy/InputFileStream.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-//////////////////////////////////////////////////////////////////////////////////////////
-class Murmur: public DALM::State::HashFunction
-{
-public:
- Murmur(std::size_t seed=0): seed(seed) {
- }
- virtual std::size_t operator()(const DALM::VocabId *words, std::size_t size) const {
- return util::MurmurHashNative(words, sizeof(DALM::VocabId) * size, seed);
- }
-private:
- std::size_t seed;
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////
-class DALMState : public FFState
-{
-private:
- DALM::State state;
-
-public:
- DALMState() {
- }
-
- DALMState(const DALMState &from) {
- state = from.state;
- }
-
- virtual ~DALMState() {
- }
-
- void reset(const DALMState &from) {
- state = from.state;
- }
-
- virtual int Compare(const FFState& other) const {
- const DALMState &o = static_cast<const DALMState &>(other);
- if(state.get_count() < o.state.get_count()) return -1;
- else if(state.get_count() > o.state.get_count()) return 1;
- else return state.compare(o.state);
- }
-
- virtual size_t hash() const {
- // imitate KenLM
- return state.hash(Murmur());
- }
-
- virtual bool operator==(const FFState& other) const {
- const DALMState &o = static_cast<const DALMState &>(other);
- return state.compare(o.state) == 0;
- }
-
- DALM::State &get_state() {
- return state;
- }
-
- void refresh() {
- state.refresh();
- }
-
- virtual std::string ToString() const
- { return "DALM state"; }
-
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-inline void read_ini(const char *inifile, string &model, string &words, string &wordstxt)
-{
- ifstream ifs(inifile);
- string line;
-
- getline(ifs, line);
- while(ifs) {
- unsigned int pos = line.find("=");
- string key = line.substr(0, pos);
- string value = line.substr(pos+1, line.size()-pos);
- if(key=="MODEL") {
- model = value;
- } else if(key=="WORDS") {
- words = value;
- } else if(key=="WORDSTXT") {
- wordstxt = value;
- }
- getline(ifs, line);
- }
-}
-/////////////////////////
-
-LanguageModelDALM::LanguageModelDALM(size_t startInd, const std::string &line)
-:StatefulFeatureFunction(startInd, line)
-{
- ReadParameters();
-}
-
-LanguageModelDALM::~LanguageModelDALM() {
- // TODO Auto-generated destructor stub
-}
-
-void LanguageModelDALM::Load(System &system)
-{
- /////////////////////
- // READING INIFILE //
- /////////////////////
- string inifile= m_filePath + "/dalm.ini";
-
- string model; // Path to the double-array file.
- string words; // Path to the vocabulary file.
- string wordstxt; //Path to the vocabulary file in text format.
- read_ini(inifile.c_str(), model, words, wordstxt);
-
- model = m_filePath + "/" + model;
- words = m_filePath + "/" + words;
- wordstxt = m_filePath + "/" + wordstxt;
-
- UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
- util::FileOpenException,
- "Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
-
- ////////////////
- // LOADING LM //
- ////////////////
-
- // Preparing a logger object.
- m_logger = new DALM::Logger(stderr);
- m_logger->setLevel(DALM::LOGGER_INFO);
-
- // Load the vocabulary file.
- m_vocab = new DALM::Vocabulary(words, *m_logger);
-
- // Load the language model.
- m_lm = new DALM::LM(model, *m_vocab, m_nGramOrder, *m_logger);
-
- wid_start = m_vocab->lookup(BOS_);
- wid_end = m_vocab->lookup(EOS_);
-
- // vocab mapping
- CreateVocabMapping(wordstxt, system);
-
- m_beginSentenceFactor = system.GetVocab().AddFactor(BOS_, system);
-}
-
-void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt, const System &system)
-{
- InputFileStream vocabStrm(wordstxt);
-
- std::vector< std::pair<std::size_t, DALM::VocabId> > vlist;
- string line;
- std::size_t max_fid = 0;
- while(getline(vocabStrm, line)) {
- const Factor *factor = system.GetVocab().AddFactor(line, system);
- std::size_t fid = factor->GetId();
- DALM::VocabId wid = m_vocab->lookup(line.c_str());
-
- vlist.push_back(std::pair<std::size_t, DALM::VocabId>(fid, wid));
- if(max_fid < fid) max_fid = fid;
- }
-
- for(std::size_t i = 0; i < m_vocabMap.size(); i++) {
- m_vocabMap[i] = m_vocab->unk();
- }
-
- m_vocabMap.resize(max_fid+1, m_vocab->unk());
- std::vector< std::pair<std::size_t, DALM::VocabId> >::iterator it = vlist.begin();
- while(it != vlist.end()) {
- std::pair<std::size_t, DALM::VocabId> &entry = *it;
- m_vocabMap[entry.first] = entry.second;
-
- ++it;
- }
-}
-
-void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value)
-{
- if (key == "factor") {
- m_factorType = Scan<FactorType>(value);
- } else if (key == "order") {
- m_nGramOrder = Scan<size_t>(value);
- } else if (key == "path") {
- m_filePath = value;
- } else {
- StatefulFeatureFunction::SetParameter(key, value);
- }
- m_ContextSize = m_nGramOrder-1;
-}
-
-FFState* LanguageModelDALM::BlankState(MemPool &pool, const System &sys) const
-{
- DALMState *state = new DALMState();
- return state;
-}
-
-void LanguageModelDALM::EmptyHypothesisState(FFState &state,
- const ManagerBase &mgr,
- const InputType &input,
- const Hypothesis &hypo) const
-{
- DALMState &dalmState = static_cast<DALMState&>(state);
- m_lm->init_state(dalmState.get_state());
-}
-
- void LanguageModelDALM::EvaluateInIsolation(MemPool &pool,
- const System &system,
- const Phrase &source,
- const TargetPhraseImpl &targetPhrase,
- Scores &scores,
- SCORE &estimatedScore) const
- {
-
- }
-
-void LanguageModelDALM::EvaluateWhenApplied(const ManagerBase &mgr,
-const Hypothesis &hypo,
-const FFState &prevState,
-Scores &scores,
-FFState &state) const
-{
-
-}
-
-void LanguageModelDALM::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
-
diff --git a/contrib/moses2/LM/LanguageModelDALM.h b/contrib/moses2/LM/LanguageModelDALM.h
deleted file mode 100644
index cbbeca97d..000000000
--- a/contrib/moses2/LM/LanguageModelDALM.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * LanguageModelDALM.h
- *
- * Created on: 5 Dec 2015
- * Author: hieu
- */
-
-#pragma once
-#include "../FF/StatefulFeatureFunction.h"
-#include "../legacy/Util2.h"
-#include "../legacy/Factor.h"
-
-namespace DALM
-{
-class Logger;
-class Vocabulary;
-class State;
-class LM;
-union Fragment;
-class Gap;
-
-typedef unsigned int VocabId;
-}
-
-namespace Moses2
-{
-
-class LanguageModelDALM: public StatefulFeatureFunction
-{
-public:
- LanguageModelDALM(size_t startInd, const std::string &line);
- virtual ~LanguageModelDALM();
-
- virtual void Load(System &system);
- virtual void SetParameter(const std::string& key, const std::string& value);
-
- virtual FFState* BlankState(MemPool &pool, const System &sys) const;
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
-
-protected:
- FactorType m_factorType;
-
- std::string m_filePath;
- size_t m_nGramOrder; //! max n-gram length contained in this LM
- size_t m_ContextSize;
-
- DALM::Logger *m_logger;
- DALM::Vocabulary *m_vocab;
- DALM::LM *m_lm;
- DALM::VocabId wid_start, wid_end;
-
- const Factor *m_beginSentenceFactor;
-
- mutable std::vector<DALM::VocabId> m_vocabMap;
-
- void CreateVocabMapping(const std::string &wordstxt, const System &system);
-
-};
-
-}
-
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp
deleted file mode 100644
index 94baafeb9..000000000
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Search.cpp
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Search.h"
-#include "Stack.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../TrellisPath.h"
-#include "../Sentence.h"
-#include "../../TrellisPaths.h"
-#include "../../InputPathsBase.h"
-#include "../../InputPathBase.h"
-#include "../../System.h"
-#include "../../TranslationTask.h"
-#include "../../legacy/Util2.h"
-#include "../../PhraseBased/TargetPhrases.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningMiniStack
-{
-
-////////////////////////////////////////////////////////////////////////
-Search::Search(Manager &mgr) :
- Moses2::Search(mgr), m_stack(mgr), m_cubeEdgeAlloc(mgr.GetPool())
-
-, m_queue(QueueItemOrderer(),
- std::vector<QueueItem*, MemPoolAllocator<QueueItem*> >(
- MemPoolAllocator<QueueItem*>(mgr.GetPool())))
-
-, m_seenPositions(
- MemPoolAllocator<CubeEdge::SeenPositionItem>(mgr.GetPool()))
-
-, m_queueItemRecycler(MemPoolAllocator<QueueItem*>(mgr.GetPool()))
-
-{
-}
-
-Search::~Search()
-{
-}
-
-void Search::Decode()
-{
- const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
-
- // init cue edges
- m_cubeEdges.resize(sentence.GetSize() + 1);
- for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
- m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges(
- m_cubeEdgeAlloc);
- }
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
- initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
- //cerr << "initHypo=" << *initHypo << endl;
-
- m_stack.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
- PostDecode(0);
-
- for (size_t stackInd = 1; stackInd < sentence.GetSize() + 1;
- ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- m_stack.Clear();
- Decode(stackInd);
- PostDecode(stackInd);
-
- //m_stack.DebugCounts();
- }
-
-}
-
-void Search::Decode(size_t stackInd)
-{
- Recycler<HypothesisBase*> &hypoRecycler = mgr.GetHypoRecycle();
-
- // reuse queue from previous stack. Clear it first
- std::vector<QueueItem*, MemPoolAllocator<QueueItem*> > &container = Container(
- m_queue);
- //cerr << "container=" << container.size() << endl;
- BOOST_FOREACH(QueueItem *item, container){
- // recycle unused hypos from queue
- Hypothesis *hypo = item->hypo;
- hypoRecycler.Recycle(hypo);
-
- // recycle queue item
- m_queueItemRecycler.push_back(item);
- }
- container.clear();
-
- m_seenPositions.clear();
-
- // add top hypo from every edge into queue
- CubeEdges &edges = *m_cubeEdges[stackInd];
-
- BOOST_FOREACH(CubeEdge *edge, edges){
- //cerr << *edge << " ";
- edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
- }
-
- /*
- cerr << "edges: ";
- boost::unordered_set<const Bitmap*> uniqueBM;
- BOOST_FOREACH(CubeEdge *edge, edges) {
- uniqueBM.insert(&edge->newBitmap);
- //cerr << *edge << " ";
- }
- cerr << edges.size() << " " << uniqueBM.size();
- cerr << endl;
- */
-
- size_t pops = 0;
- while (!m_queue.empty() && pops < mgr.system.options.cube.pop_limit) {
- // get best hypo from queue, add to stack
- //cerr << "queue=" << queue.size() << endl;
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- CubeEdge *edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
-
- if (mgr.system.options.cube.lazy_scoring) {
- hypo->EvaluateWhenApplied();
- }
-
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stack.Add(hypo, hypoRecycler, mgr.arcLists);
-
- edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
-
- ++pops;
- }
-
- // create hypo from every edge. Increase diversity
- if (mgr.system.options.cube.diversity) {
- while (!m_queue.empty()) {
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- if (item->hypoIndex == 0 && item->tpIndex == 0) {
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stack.Add(hypo, hypoRecycler, mgr.arcLists);
- }
- }
- }
-}
-
-void Search::PostDecode(size_t stackInd)
-{
- MemPool &pool = mgr.GetPool();
-
- const InputPaths &paths = mgr.GetInputPaths();
- const Matrix<InputPath*> &pathMatrix = paths.GetMatrix();
- size_t inputSize = pathMatrix.GetRows();
- size_t numPaths = pathMatrix.GetCols();
-
- BOOST_FOREACH(const Stack::Coll::value_type &val, m_stack.GetColl()){
- const Bitmap &hypoBitmap = *val.first.first;
- size_t firstGap = hypoBitmap.GetFirstGapPos();
- size_t hypoEndPos = val.first.second;
-
- Moses2::HypothesisColl &hypos = *val.second;
-
- //cerr << "key=" << hypoBitmap << " " << firstGap << " " << inputSize << endl;
-
- // create edges to next hypos from existing hypos
- for (size_t startPos = firstGap; startPos < inputSize; ++startPos) {
- for (size_t pathInd = 0; pathInd < numPaths; ++pathInd) {
- const InputPath *path = pathMatrix.GetValue(startPos, pathInd);
-
- if (path == NULL) {
- break;
- }
- if (path->GetNumRules() == 0) {
- continue;
- }
-
- const Range &pathRange = path->range;
- //cerr << "pathRange=" << pathRange << endl;
- if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
- continue;
- }
-
- const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
- if (!reorderingConstraint.Check(hypoBitmap, startPos, pathRange.GetEndPos())) {
- continue;
- }
-
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
- size_t numWords = newBitmap.GetNumWordsCovered();
-
- CubeEdges &edges = *m_cubeEdges[numWords];
-
- // sort hypo for a particular bitmap and hypoEndPos
- const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
-
- size_t numPt = mgr.system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = path->targetPhrases[i];
- if (tps && tps->GetSize()) {
- CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
- edges.push_back(edge);
- }
- }
- }
- }
- }
-}
-
-const Hypothesis *Search::GetBestHypo() const
-{
- const Hypothesis *bestHypo = m_stack.GetBestHypo();
- return bestHypo;
-}
-
-void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
-{
- const Stack::Coll &coll = m_stack.GetColl();
- BOOST_FOREACH(const Stack::Coll::value_type &val, coll){
- Moses2::HypothesisColl &hypos = *val.second;
- const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
-
- BOOST_FOREACH(const HypothesisBase *hypoBase, sortedHypos) {
- const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
- TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
- paths.Add(path);
- }
- }
-}
-
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp
deleted file mode 100644
index e2b81f0ba..000000000
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Stack.cpp
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#include <algorithm>
-#include <boost/foreach.hpp>
-#include "Stack.h"
-#include "../Hypothesis.h"
-#include "../Manager.h"
-#include "../../Scores.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningMiniStack
-{
-Stack::Stack(const Manager &mgr) :
- m_mgr(mgr), m_coll(
- MemPoolAllocator<std::pair<HypoCoverage, Moses2::HypothesisColl*> >(
- mgr.GetPool())), m_miniStackRecycler(
- MemPoolAllocator<Moses2::HypothesisColl*>(mgr.GetPool()))
-{
-}
-
-Stack::~Stack()
-{
- BOOST_FOREACH(const Coll::value_type &val, m_coll){
- const Moses2::HypothesisColl *miniStack = val.second;
- delete miniStack;
- }
-
- while (!m_miniStackRecycler.empty()) {
- Moses2::HypothesisColl *miniStack = m_miniStackRecycler.back();
- m_miniStackRecycler.pop_back();
- delete miniStack;
-
- }
-}
-
-void Stack::Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists)
-{
- HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
- Moses2::HypothesisColl &coll = GetMiniStack(key);
- coll.Add(m_mgr, hypo, hypoRecycle, arcLists);
-}
-
-const Hypothesis *Stack::GetBestHypo() const
-{
- SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
- const HypothesisBase *bestHypo = NULL;
- BOOST_FOREACH(const Coll::value_type &val, m_coll){
- const Moses2::HypothesisColl &hypos = *val.second;
- const Moses2::HypothesisBase *hypo = hypos.GetBestHypo();
-
- if (hypo && hypo->GetFutureScore() > bestScore) {
- bestScore = hypo->GetFutureScore();
- bestHypo = hypo;
- }
- }
- return &bestHypo->Cast<Hypothesis>();
-}
-
-size_t Stack::GetHypoSize() const
-{
- size_t ret = 0;
- BOOST_FOREACH(const Coll::value_type &val, m_coll){
- const Moses2::HypothesisColl &hypos = *val.second;
- ret += hypos.GetSize();
- }
- return ret;
-}
-
-Moses2::HypothesisColl &Stack::GetMiniStack(const HypoCoverage &key)
-{
- Moses2::HypothesisColl *ret;
- Coll::iterator iter = m_coll.find(key);
- if (iter == m_coll.end()) {
- if (m_miniStackRecycler.empty()) {
- ret = new Moses2::HypothesisColl(m_mgr);
- }
- else {
- ret = m_miniStackRecycler.back();
- ret->Clear();
- m_miniStackRecycler.pop_back();
- }
-
- m_coll[key] = ret;
- }
- else {
- ret = iter->second;
- }
- return *ret;
-}
-
-void Stack::Clear()
-{
- BOOST_FOREACH(const Coll::value_type &val, m_coll){
- Moses2::HypothesisColl *miniStack = val.second;
- m_miniStackRecycler.push_back(miniStack);
- }
-
- m_coll.clear();
-}
-
-void Stack::DebugCounts()
-{
- cerr << "counts=";
- BOOST_FOREACH(const Coll::value_type &val, GetColl()){
- const Moses2::HypothesisColl &miniStack = *val.second;
- size_t count = miniStack.GetSize();
- cerr << count << " ";
- }
- cerr << endl;
-}
-
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/Manager.cpp b/contrib/moses2/PhraseBased/Manager.cpp
deleted file mode 100644
index 158b72592..000000000
--- a/contrib/moses2/PhraseBased/Manager.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Manager.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include <boost/functional/hash.hpp>
-#include <boost/unordered_set.hpp>
-#include <vector>
-#include <sstream>
-#include "Manager.h"
-#include "TargetPhraseImpl.h"
-#include "InputPath.h"
-#include "Sentence.h"
-
-#include "Normal/Search.h"
-#include "CubePruningMiniStack/Search.h"
-
-/*
- #include "CubePruningPerMiniStack/Search.h"
- #include "CubePruningPerBitmap/Search.h"
- #include "CubePruningCardinalStack/Search.h"
- #include "CubePruningBitmapStack/Search.h"
- */
-#include "../TrellisPaths.h"
-#include "../System.h"
-#include "../Phrase.h"
-#include "../InputPathsBase.h"
-#include "../TranslationModel/PhraseTable.h"
-#include "../TranslationModel/UnknownWordPenalty.h"
-#include "../legacy/Range.h"
-#include "../PhraseBased/TargetPhrases.h"
-
-using namespace std;
-
-namespace Moses2
-{
-Manager::Manager(System &sys, const TranslationTask &task,
- const std::string &inputStr, long translationId) :
- ManagerBase(sys, task, inputStr, translationId)
-,m_search(NULL)
-,m_bitmaps(NULL)
-{
- //cerr << translationId << " inputStr=" << inputStr << endl;
-}
-
-Manager::~Manager()
-{
- //cerr << "Start ~Manager " << this << endl;
- delete m_search;
- delete m_bitmaps;
- //cerr << "Finish ~Manager " << this << endl;
-}
-
-void Manager::Init()
-{
- // init pools etc
- InitPools();
-
- FactorCollection &vocab = system.GetVocab();
- m_input = Moses2::Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr);
-
- m_bitmaps = new Bitmaps(GetPool());
-
- const PhraseTable &firstPt = *system.featureFunctions.phraseTables[0];
- m_initPhrase = new (GetPool().Allocate<TargetPhraseImpl>()) TargetPhraseImpl(
- GetPool(), firstPt, system, 0);
-
- const Sentence &sentence = static_cast<const Sentence&>(GetInput());
- //cerr << "sentence=" << sentence.GetSize() << " " << sentence.Debug(system) << endl;
-
- m_inputPaths.Init(sentence, *this);
-
- // xml
- const UnknownWordPenalty *unkWP = system.featureFunctions.GetUnknownWordPenalty();
- UTIL_THROW_IF2(unkWP == NULL, "There must be a UnknownWordPenalty FF");
- unkWP->ProcessXML(*this, GetPool(), sentence, m_inputPaths);
-
- // lookup with every pt
- const std::vector<const PhraseTable*> &pts = system.mappings;
- for (size_t i = 0; i < pts.size(); ++i) {
- const PhraseTable &pt = *pts[i];
- //cerr << "Looking up from " << pt.GetName() << endl;
- pt.Lookup(*this, m_inputPaths);
- }
- //m_inputPaths.DeleteUnusedPaths();
- CalcFutureScore();
-
- m_bitmaps->Init(sentence.GetSize(), vector<bool>(0));
-
- switch (system.options.search.algo) {
- case Normal:
- m_search = new NSNormal::Search(*this);
- break;
- case NormalBatch:
- //m_search = new NSBatch::Search(*this);
- UTIL_THROW2("Not implemented");
- break;
- case CubePruning:
- case CubePruningMiniStack:
- m_search = new NSCubePruningMiniStack::Search(*this);
- break;
- /*
- case CubePruningPerMiniStack:
- m_search = new NSCubePruningPerMiniStack::Search(*this);
- break;
- case CubePruningPerBitmap:
- m_search = new NSCubePruningPerBitmap::Search(*this);
- break;
- case CubePruningCardinalStack:
- m_search = new NSCubePruningCardinalStack::Search(*this);
- break;
- case CubePruningBitmapStack:
- m_search = new NSCubePruningBitmapStack::Search(*this);
- break;
- */
- default:
- UTIL_THROW2("Unknown search algorithm");
- }
-}
-
-void Manager::Decode()
-{
- //cerr << "Start Decode " << this << endl;
-
- Init();
- m_search->Decode();
-
- //cerr << "Finished Decode " << this << endl;
-}
-
-void Manager::CalcFutureScore()
-{
- const Sentence &sentence = static_cast<const Sentence&>(GetInput());
- size_t size = sentence.GetSize();
- m_estimatedScores =
- new (GetPool().Allocate<EstimatedScores>()) EstimatedScores(GetPool(),
- size);
- m_estimatedScores->InitTriangle(-numeric_limits<SCORE>::infinity());
-
- // walk all the translation options and record the cheapest option for each span
- BOOST_FOREACH(const InputPathBase *path, m_inputPaths){
- const Range &range = path->range;
- SCORE bestScore = -numeric_limits<SCORE>::infinity();
-
- size_t numPt = system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = static_cast<const InputPath*>(path)->targetPhrases[i];
- if (tps) {
- BOOST_FOREACH(const TargetPhraseImpl *tp, *tps) {
- SCORE score = tp->GetFutureScore();
- if (score > bestScore) {
- bestScore = score;
- }
- }
- }
- }
- m_estimatedScores->SetValue(range.GetStartPos(), range.GetEndPos(), bestScore);
- }
-
- // now fill all the cells in the strictly upper triangle
- // there is no way to modify the diagonal now, in the case
- // where no translation option covers a single-word span,
- // we leave the +inf in the matrix
- // like in chart parsing we want each cell to contain the highest score
- // of the full-span trOpt or the sum of scores of joining two smaller spans
-
- for (size_t colstart = 1; colstart < size; colstart++) {
- for (size_t diagshift = 0; diagshift < size - colstart; diagshift++) {
- size_t sPos = diagshift;
- size_t ePos = colstart + diagshift;
- for (size_t joinAt = sPos; joinAt < ePos; joinAt++) {
- float joinedScore = m_estimatedScores->GetValue(sPos, joinAt)
- + m_estimatedScores->GetValue(joinAt + 1, ePos);
- // uncomment to see the cell filling scheme
- // TRACE_ERR("[" << sPos << "," << ePos << "] <-? ["
- // << sPos << "," << joinAt << "]+["
- // << joinAt+1 << "," << ePos << "] (colstart: "
- // << colstart << ", diagshift: " << diagshift << ")"
- // << endl);
-
- if (joinedScore > m_estimatedScores->GetValue(sPos, ePos)) m_estimatedScores->SetValue(
- sPos, ePos, joinedScore);
- }
- }
- }
-
- //cerr << "Square matrix:" << endl;
- //cerr << *m_estimatedScores << endl;
-}
-
-std::string Manager::OutputBest() const
-{
- stringstream out;
- Moses2::FixPrecision(out);
-
- const Hypothesis *bestHypo = m_search->GetBestHypo();
- if (bestHypo) {
- if (system.options.output.ReportHypoScore) {
- out << bestHypo->GetScores().GetTotalScore() << " ";
- }
-
- bestHypo->OutputToStream(out);
- //cerr << "BEST TRANSLATION: " << *bestHypo;
- }
- else {
- if (system.options.output.ReportHypoScore) {
- out << "0 ";
- }
- //cerr << "NO TRANSLATION " << m_input->GetTranslationId() << endl;
- }
-
- return out.str();
- //cerr << endl;
-}
-
-std::string Manager::OutputNBest()
-{
- arcLists.Sort();
-
- boost::unordered_set<size_t> distinctHypos;
-
- TrellisPaths<TrellisPath> contenders;
- m_search->AddInitialTrellisPaths(contenders);
-
- long transId = GetTranslationId();
-
- // MAIN LOOP
- stringstream out;
- //Moses2::FixPrecision(out);
-
- size_t maxIter = system.options.nbest.nbest_size * system.options.nbest.factor;
- size_t bestInd = 0;
- for (size_t i = 0; i < maxIter; ++i) {
- if (bestInd > system.options.nbest.nbest_size || contenders.empty()) {
- break;
- }
-
- //cerr << "bestInd=" << bestInd << endl;
- TrellisPath *path = contenders.Get();
-
- bool ok = false;
- if (system.options.nbest.only_distinct) {
- string tgtPhrase = path->OutputTargetPhrase(system);
- //cerr << "tgtPhrase=" << tgtPhrase << endl;
- boost::hash<std::string> string_hash;
- size_t hash = string_hash(tgtPhrase);
-
- if (distinctHypos.insert(hash).second) {
- ok = true;
- }
- }
- else {
- ok = true;
- }
-
- if (ok) {
- ++bestInd;
- out << transId << " ||| ";
- path->OutputToStream(out, system);
- out << "\n";
- }
-
- // create next paths
- path->CreateDeviantPaths(contenders, arcLists, GetPool(), system);
-
- delete path;
- }
-
- return out.str();
-}
-
-std::string Manager::OutputTransOpt()
-{
- return "";
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/Normal/Search.cpp b/contrib/moses2/PhraseBased/Normal/Search.cpp
deleted file mode 100644
index 7c5026a7c..000000000
--- a/contrib/moses2/PhraseBased/Normal/Search.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * SearchNormal.cpp
- *
- * Created on: 25 Oct 2015
- * Author: hieu
- */
-
-#include "Search.h"
-#include <algorithm>
-#include <boost/foreach.hpp>
-#include "Stack.h"
-#include "../Manager.h"
-#include "../TrellisPath.h"
-#include "../Sentence.h"
-#include "../../TrellisPaths.h"
-#include "../../InputPathsBase.h"
-#include "../../Phrase.h"
-#include "../../System.h"
-#include "../../PhraseBased/TargetPhrases.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace NSNormal
-{
-
-Search::Search(Manager &mgr)
-:Moses2::Search(mgr)
-, m_stacks(mgr)
-{
- // TODO Auto-generated constructor stub
-
-}
-
-Search::~Search()
-{
- // TODO Auto-generated destructor stub
-}
-
-void Search::Decode()
-{
- // init stacks
- const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
- m_stacks.Init(mgr, sentence.GetSize() + 1);
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
- initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
-
- m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
-
- for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
- Decode(stackInd);
- //cerr << m_stacks << endl;
-
- // delete stack to save mem
- if (stackInd < m_stacks.GetSize() - 1) {
- m_stacks.Delete(stackInd);
- }
- //cerr << m_stacks.Debug(mgr.system) << endl;
- }
-}
-
-void Search::Decode(size_t stackInd)
-{
- //cerr << "stackInd=" << stackInd << endl;
- Stack &stack = m_stacks[stackInd];
- if (&stack == &m_stacks.Back()) {
- // last stack. don't do anythin
- return;
- }
-
- const Hypotheses &hypos = stack.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
- //cerr << "hypos=" << hypos.size() << endl;
-
- const InputPaths &paths = mgr.GetInputPaths();
-
- BOOST_FOREACH(const InputPathBase *path, paths){
- BOOST_FOREACH(const HypothesisBase *hypo, hypos) {
- Extend(*static_cast<const Hypothesis*>(hypo), *static_cast<const InputPath*>(path));
- }
- }
-}
-
-void Search::Extend(const Hypothesis &hypo, const InputPath &path)
-{
- const Bitmap &hypoBitmap = hypo.GetBitmap();
- const Range &hypoRange = hypo.GetInputPath().range;
- const Range &pathRange = path.range;
-
- if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) {
- return;
- }
-
- const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
- if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) {
- return;
- }
-
- // extend this hypo
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
- //SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos());
- SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
-
- size_t numPt = mgr.system.mappings.size();
- const TargetPhrases **tpsAllPt = path.targetPhrases;
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = tpsAllPt[i];
- if (tps) {
- Extend(hypo, *tps, path, newBitmap, estimatedScore);
- }
- }
-}
-
-void Search::Extend(const Hypothesis &hypo, const TargetPhrases &tps,
- const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
-{
- BOOST_FOREACH(const TargetPhraseImpl *tp, tps){
- Extend(hypo, *tp, path, newBitmap, estimatedScore);
- }
-}
-
-void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
- const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
-{
- Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore);
- newHypo->EvaluateWhenApplied();
-
- m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists);
-
- //m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other);
- //stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2);
-
-}
-
-const Hypothesis *Search::GetBestHypo() const
-{
- const Stack &lastStack = m_stacks.Back();
- const Hypothesis *best = lastStack.GetBestHypo<Hypothesis>();
- return best;
-}
-
-void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
-{
- const Stack &lastStack = m_stacks.Back();
- const Hypotheses &hypos = lastStack.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
-
- BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){
- const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
- TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
- paths.Add(path);
- }
-}
-
-} // namespace
-}
-
diff --git a/contrib/moses2/PhraseBased/Sentence.cpp b/contrib/moses2/PhraseBased/Sentence.cpp
deleted file mode 100644
index dbedf878e..000000000
--- a/contrib/moses2/PhraseBased/Sentence.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Sentence.cpp
- *
- * Created on: 14 Dec 2015
- * Author: hieu
- */
-#include <boost/property_tree/ptree.hpp>
-#include <boost/property_tree/xml_parser.hpp>
-#include "Sentence.h"
-#include "../System.h"
-#include "../parameters/AllOptions.h"
-#include "../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str)
-{
- Sentence *ret;
-
- if (system.options.input.xml_policy) {
- // xml
- ret = CreateFromStringXML(pool, vocab, system, str);
- }
- else {
- // no xml
- //cerr << "PB Sentence" << endl;
- std::vector<std::string> toks = Tokenize(str);
-
- size_t size = toks.size();
- ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
- ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
- }
-
- //cerr << "REORDERING CONSTRAINTS:" << ret->GetReorderingConstraint() << endl;
- //cerr << "ret=" << ret->Debug(system) << endl;
-
- return ret;
-}
-
-Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str)
-{
- Sentence *ret;
-
- vector<XMLOption*> xmlOptions;
- pugi::xml_document doc;
-
- string str2 = "<xml>" + str + "</xml>";
- pugi::xml_parse_result result = doc.load(str2.c_str(),
- pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
- pugi::xml_node topNode = doc.child("xml");
-
- std::vector<std::string> toks;
- XMLParse(pool, system, 0, topNode, toks, xmlOptions);
-
- // debug
- /*
- cerr << "xmloptions:" << endl;
- for (size_t i = 0; i < xmlOptions.size(); ++i) {
- cerr << xmlOptions[i]->Debug(system) << endl;
- }
- */
-
- // create words
- size_t size = toks.size();
- ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
- ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
-
- // xml
- ret->Init(system, size, system.options.reordering.max_distortion);
-
- ReorderingConstraint &reorderingConstraint = ret->GetReorderingConstraint();
-
- // set reordering walls, if "-monotone-at-punction" is set
- if (system.options.reordering.monotone_at_punct && ret->GetSize()) {
- reorderingConstraint.SetMonotoneAtPunctuation(*ret);
- }
-
- // set walls obtained from xml
- for(size_t i=0; i<xmlOptions.size(); i++) {
- const XMLOption *xmlOption = xmlOptions[i];
- if(strcmp(xmlOption->GetNodeName(), "wall") == 0) {
- UTIL_THROW_IF2(xmlOption->startPos > ret->GetSize(), "wall is beyond the sentence"); // no buggy walls, please
- reorderingConstraint.SetWall(xmlOption->startPos - 1, true);
- }
- else if (strcmp(xmlOption->GetNodeName(), "zone") == 0) {
- reorderingConstraint.SetZone( xmlOption->startPos, xmlOption->startPos + xmlOption->phraseSize -1 );
- }
- else if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
- FactorType placeholderFactor = system.options.input.placeholder_factor;
- UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
- "Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
- UTIL_THROW_IF2(xmlOption->phraseSize != 1,
- "Placeholder must only cover 1 word");
-
- const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
- (*ret)[xmlOption->startPos][placeholderFactor] = factor;
- }
- else {
- // default - forced translation. Add to class variable
- ret->AddXMLOption(system, xmlOption);
- }
- }
- reorderingConstraint.FinalizeWalls();
-
- return ret;
-}
-
-void Sentence::XMLParse(
- MemPool &pool,
- const System &system,
- size_t depth,
- const pugi::xml_node &parentNode,
- std::vector<std::string> &toks,
- vector<XMLOption*> &xmlOptions)
-{ // pugixml
- for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
- string nodeName = childNode.name();
- //cerr << depth << " nodeName=" << nodeName << endl;
-
- int startPos = toks.size();
-
- string value = childNode.value();
- if (!value.empty()) {
- //cerr << depth << "childNode text=" << value << endl;
- std::vector<std::string> subPhraseToks = Tokenize(value);
- for (size_t i = 0; i < subPhraseToks.size(); ++i) {
- toks.push_back(subPhraseToks[i]);
- }
- }
-
- if (!nodeName.empty()) {
- XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
-
- pugi::xml_attribute attr;
- attr = childNode.attribute("translation");
- if (!attr.empty()) {
- xmlOption->SetTranslation(pool, attr.as_string());
- }
-
- attr = childNode.attribute("entity");
- if (!attr.empty()) {
- xmlOption->SetEntity(pool, attr.as_string());
- }
-
- attr = childNode.attribute("prob");
- if (!attr.empty()) {
- xmlOption->prob = attr.as_float();
- }
-
- xmlOptions.push_back(xmlOption);
-
- // recursively call this function. For proper recursive trees
- XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
-
- size_t endPos = toks.size();
- xmlOption->phraseSize = endPos - startPos;
-
- /*
- cerr << "xmlOptions=";
- xmlOption->Debug(cerr, system);
- cerr << endl;
- */
- }
-
- }
-}
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/SCFG/Sentence.cpp b/contrib/moses2/SCFG/Sentence.cpp
deleted file mode 100644
index 5e69a7e23..000000000
--- a/contrib/moses2/SCFG/Sentence.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Sentence.cpp
- *
- * Created on: 14 Dec 2015
- * Author: hieu
- */
-
-#include "Sentence.h"
-#include "../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str, long translationId)
-{
- //cerr << "SCFG Sentence" << endl;
-
- Sentence *ret;
-
- if (system.options.input.xml_policy) {
- // xml
- ret = CreateFromStringXML(pool, vocab, system, str);
- //cerr << "ret=" << ret->Debug(system) << endl;
- }
- else {
- std::vector<std::string> toks = Tokenize(str);
- size_t size = toks.size() + 2;
-
- ret = new (pool.Allocate<SCFG::Sentence>()) Sentence(pool, size);
- ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
-
- }
-
- return ret;
-}
-
-Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str)
-{
- Sentence *ret;
-
- vector<XMLOption*> xmlOptions;
- pugi::xml_document doc;
-
- string str2 = "<xml>" + str + "</xml>";
- pugi::xml_parse_result result = doc.load(str2.c_str(),
- pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
- pugi::xml_node topNode = doc.child("xml");
-
- std::vector<std::string> toks;
- XMLParse(pool, system, 0, topNode, toks, xmlOptions);
-
- // debug
- /*
- cerr << "xmloptions:" << endl;
- for (size_t i = 0; i < xmlOptions.size(); ++i) {
- cerr << xmlOptions[i]->Debug(system) << endl;
- }
- */
-
- // create words
- size_t size = toks.size() + 2;
- ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
- ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
-
- // xml
- for(size_t i=0; i<xmlOptions.size(); i++) {
- const XMLOption *xmlOption = xmlOptions[i];
- if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
- FactorType placeholderFactor = system.options.input.placeholder_factor;
- UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
- "Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
- UTIL_THROW_IF2(xmlOption->phraseSize != 1,
- "Placeholder must only cover 1 word");
-
- const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
- (*ret)[xmlOption->startPos + 1][placeholderFactor] = factor;
- }
- else {
- // default - forced translation. Add to class variable
- ret->AddXMLOption(system, xmlOption);
- }
- }
-
- //cerr << "ret=" << ret->Debug(system) << endl;
- return ret;
-}
-
-void Sentence::XMLParse(
- MemPool &pool,
- const System &system,
- size_t depth,
- const pugi::xml_node &parentNode,
- std::vector<std::string> &toks,
- vector<XMLOption*> &xmlOptions)
-{ // pugixml
- for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
- string nodeName = childNode.name();
- //cerr << depth << " nodeName=" << nodeName << endl;
-
- int startPos = toks.size();
-
- string value = childNode.value();
- if (!value.empty()) {
- //cerr << depth << "childNode text=" << value << endl;
- std::vector<std::string> subPhraseToks = Tokenize(value);
- for (size_t i = 0; i < subPhraseToks.size(); ++i) {
- toks.push_back(subPhraseToks[i]);
- }
- }
-
- if (!nodeName.empty()) {
- XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
-
- pugi::xml_attribute attr;
- attr = childNode.attribute("translation");
- if (!attr.empty()) {
- xmlOption->SetTranslation(pool, attr.as_string());
- }
-
- attr = childNode.attribute("entity");
- if (!attr.empty()) {
- xmlOption->SetEntity(pool, attr.as_string());
- }
-
- attr = childNode.attribute("prob");
- if (!attr.empty()) {
- xmlOption->prob = attr.as_float();
- }
-
- xmlOptions.push_back(xmlOption);
-
- // recursively call this function. For proper recursive trees
- XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
-
- size_t endPos = toks.size();
- xmlOption->phraseSize = endPos - startPos;
-
- /*
- cerr << "xmlOptions=";
- xmlOption->Debug(cerr, system);
- cerr << endl;
- */
- }
-
- }
-}
-
-}
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/SCFG/nbest/KBestExtractor.cpp b/contrib/moses2/SCFG/nbest/KBestExtractor.cpp
deleted file mode 100644
index ae7ec8634..000000000
--- a/contrib/moses2/SCFG/nbest/KBestExtractor.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * KBestExtractor.cpp
- *
- * Created on: 2 Aug 2016
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include <sstream>
-#include "KBestExtractor.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../Stacks.h"
-#include "../Stack.h"
-#include "../Sentence.h"
-#include "../../System.h"
-#include "../../Scores.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-//bool g_debug = false;
-
-namespace SCFG
-{
-/////////////////////////////////////////////////////////////
-KBestExtractor::KBestExtractor(const SCFG::Manager &mgr)
-:m_mgr(mgr)
-{
-
-}
-
-KBestExtractor::~KBestExtractor()
-{
-}
-
-void KBestExtractor::OutputToStream(std::stringstream &strm)
-{
- //cerr << "1" << flush;
- const Stack &lastStack = m_mgr.GetStacks().GetLastStack();
- UTIL_THROW_IF2(lastStack.GetColl().size() != 1, "Only suppose to be 1 hypo coll in last stack");
- UTIL_THROW_IF2(lastStack.GetColl().begin()->second == NULL, "NULL hypo collection");
-
- const Hypotheses &hypos = lastStack.GetColl().begin()->second->GetSortedAndPrunedHypos(m_mgr, m_mgr.arcLists);
- UTIL_THROW_IF2(hypos.size() != 1, "Only suppose to be 1 hypo in collection");
- const HypothesisBase *hypo = hypos[0];
-
- const ArcLists &arcLists = m_mgr.arcLists;
- const ArcList &arcList = arcLists.GetArcList(hypo);
- NBests &nbests = m_nbestColl.GetOrCreateNBests(m_mgr, arcList);
-
- size_t ind = 0;
- while (nbests.Extend(m_mgr, m_nbestColl, ind)) {
- const NBest &deriv = nbests.Get(ind);
- strm << m_mgr.GetTranslationId() << " ||| ";
- //cerr << "1" << flush;
- strm << deriv.GetStringExclSentenceMarkers();
- //cerr << "2" << flush;
- strm << " ||| ";
- deriv.GetScores().OutputBreakdown(strm, m_mgr.system);
- //cerr << "3" << flush;
- strm << "||| ";
- strm << deriv.GetScores().GetTotalScore();
- //cerr << "4" << flush;
-
- strm << endl;
-
- ++ind;
- }
-}
-
-}
-} /* namespace Moses2 */
diff --git a/contrib/moses2/SCFG/nbest/NBest.cpp b/contrib/moses2/SCFG/nbest/NBest.cpp
deleted file mode 100644
index 99c005ee3..000000000
--- a/contrib/moses2/SCFG/nbest/NBest.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * NBest.cpp
- *
- * Created on: 24 Aug 2016
- * Author: hieu
- */
-#include <sstream>
-#include <boost/foreach.hpp>
-#include "util/exception.hh"
-#include "NBest.h"
-#include "NBests.h"
-#include "NBestColl.h"
-#include "../Manager.h"
-#include "../TargetPhraseImpl.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-
-NBest::NBest(
- const SCFG::Manager &mgr,
- const ArcList &varcList,
- size_t vind,
- NBestColl &nbestColl)
-:arcList(&varcList)
-,arcInd(vind)
-{
- const SCFG::Hypothesis &hypo = GetHypo();
-
- // copy scores from best hypo
- MemPool &pool = mgr.GetPool();
- m_scores = new (pool.Allocate<Scores>())
- Scores(mgr.system, pool, mgr.system.featureFunctions.GetNumScores(), hypo.GetScores());
-
- // children
- const ArcLists &arcLists = mgr.arcLists;
- //const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
-
- const Vector<const Hypothesis*> &prevHypos = hypo.GetPrevHypos();
- for (size_t i = 0; i < prevHypos.size(); ++i) {
- const SCFG::Hypothesis *prevHypo = prevHypos[i];
- const ArcList &childArc = arcLists.GetArcList(prevHypo);
- NBests &childNBests = nbestColl.GetOrCreateNBests(mgr, childArc);
- Child child(&childNBests, 0);
- children.push_back(child);
- }
-
- stringstream strm;
- OutputToStream(mgr, strm);
- m_str = strm.str();
-}
-
-NBest::NBest(const SCFG::Manager &mgr,
- const NBest &orig,
- size_t childInd,
- NBestColl &nbestColl)
-:arcList(orig.arcList)
-,arcInd(orig.arcInd)
-,children(orig.children)
-{
- Child &child = children[childInd];
- size_t &ind = child.second;
- ++ind;
- UTIL_THROW_IF2(ind >= child.first->GetSize(),
- "out of bound:" << ind << ">=" << child.first->GetSize());
-
- // scores
- MemPool &pool = mgr.GetPool();
- m_scores = new (pool.Allocate<Scores>())
- Scores(mgr.system,
- pool,
- mgr.system.featureFunctions.GetNumScores(),
- orig.GetScores());
-
- const Scores &origScores = orig.GetChild(childInd).GetScores();
- const Scores &newScores = GetChild(childInd).GetScores();
-
- m_scores->MinusEquals(mgr.system, origScores);
- m_scores->PlusEquals(mgr.system, newScores);
-
- stringstream strm;
- OutputToStream(mgr, strm);
- m_str = strm.str();
-}
-
-const SCFG::Hypothesis &NBest::GetHypo() const
-{
- const HypothesisBase *hypoBase = (*arcList)[arcInd];
- const SCFG::Hypothesis &hypo = *static_cast<const SCFG::Hypothesis*>(hypoBase);
- return hypo;
-}
-
-const NBest &NBest::GetChild(size_t ind) const
-{
- const Child &child = children[ind];
- const NBests &nbests = *child.first;
- const NBest &nbest = nbests.Get(child.second);
- return nbest;
-}
-
-
-void NBest::CreateDeviants(
- const SCFG::Manager &mgr,
- NBestColl &nbestColl,
- Contenders &contenders) const
-{
- if (arcInd + 1 < arcList->size()) {
- // to use next arclist, all children must be 1st. Not sure if this is correct
- bool ok = true;
- BOOST_FOREACH(const Child &child, children) {
- if (child.second) {
- ok = false;
- break;
- }
- }
-
- if (ok) {
- NBest *next = new NBest(mgr, *arcList, arcInd + 1, nbestColl);
- contenders.push(next);
- }
- }
-
- for (size_t childInd = 0; childInd < children.size(); ++childInd) {
- const Child &child = children[childInd];
- NBests &childNBests = *child.first;
- bool extended = childNBests.Extend(mgr, nbestColl, child.second + 1);
- if (extended) {
- //cerr << "HH1 " << childInd << endl;
- NBest *next = new NBest(mgr, *this, childInd, nbestColl);
-
- //cerr << "HH2 " << childInd << endl;
- contenders.push(next);
- //cerr << "HH3 " << childInd << endl;
- }
- }
-}
-
-void NBest::OutputToStream(
- const SCFG::Manager &mgr,
- std::stringstream &strm) const
-{
- const SCFG::Hypothesis &hypo = GetHypo();
- //strm << &hypo << " ";
-
- const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
-
- for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
- const SCFG::Word &word = tp[targetPos];
- //cerr << "word " << pos << "=" << word << endl;
- if (word.isNonTerminal) {
- //cerr << "is nt" << endl;
- // non-term. fill out with prev hypo
- size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[targetPos];
-
- UTIL_THROW_IF2(nonTermInd >= children.size(), "Out of bounds:" << nonTermInd << ">=" << children.size());
-
- const NBest &nbest = GetChild(nonTermInd);
- strm << nbest.GetString();
- }
- else {
- //cerr << "not nt" << endl;
- word.OutputToStream(hypo.GetManager(), targetPos, hypo, strm);
-
- strm << " ";
- }
- }
-}
-
-std::string NBest::Debug(const System &system) const
-{
- stringstream strm;
- strm << GetScores().GetTotalScore() << " "
- << arcList << "("
- << arcList->size() << ")["
- << arcInd << "] ";
- for (size_t i = 0; i < children.size(); ++i) {
- const Child &child = children[i];
- const NBest &childNBest = child.first->Get(child.second);
-
- strm << child.first << "("
- << child.first->GetSize() << ")["
- << child.second << "]";
- strm << childNBest.GetScores().GetTotalScore() << " ";
- }
- return strm.str();
-}
-
-}
-}
diff --git a/contrib/moses2/SCFG/nbest/NBest.h b/contrib/moses2/SCFG/nbest/NBest.h
deleted file mode 100644
index fa21866bb..000000000
--- a/contrib/moses2/SCFG/nbest/NBest.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * NBest.h
- *
- * Created on: 24 Aug 2016
- * Author: hieu
- */
-
-#pragma once
-#include <queue>
-#include <vector>
-#include <string>
-#include <stdlib.h>
-#include "../../Scores.h"
-#include "../../ArcLists.h"
-
-namespace Moses2
-{
-class Scores;
-class System;
-
-namespace SCFG
-{
-class NBest;
-class NBests;
-class NBestScoreOrderer;
-class Manager;
-class NBestColl;
-class Hypothesis;
-
-/////////////////////////////////////////////////////////////
-typedef std::priority_queue<NBest*, std::vector<NBest*>, NBestScoreOrderer> Contenders;
-
-/////////////////////////////////////////////////////////////
-class NBest
-{
-public:
- const ArcList *arcList;
- size_t arcInd;
-
- typedef std::pair<NBests*, size_t> Child; // key to another NBest
- typedef std::vector<Child> Children;
- Children children;
-
- NBest(const SCFG::Manager &mgr,
- const ArcList &varcList,
- size_t vind,
- NBestColl &nbestColl);
-
- NBest(const SCFG::Manager &mgr,
- const NBest &orig,
- size_t childInd,
- NBestColl &nbestColl);
-
-
- void CreateDeviants(
- const SCFG::Manager &mgr,
- NBestColl &nbestColl,
- Contenders &contenders) const;
-
- const Scores &GetScores() const
- { return *m_scores; }
-
- const NBest &GetChild(size_t ind) const;
-
- const std::string &GetString() const
- { return m_str; }
-
- std::string GetStringExclSentenceMarkers() const
- {
- std::string ret = m_str.substr(4, m_str.size() - 10);
- return ret;
- }
-
- std::string Debug(const System &system) const;
-
-protected:
- Scores *m_scores;
- std::string m_str;
-
- const SCFG::Hypothesis &GetHypo() const;
-
- void OutputToStream(
- const SCFG::Manager &mgr,
- std::stringstream &strm) const;
-};
-
-/////////////////////////////////////////////////////////////
-class NBestScoreOrderer
-{
-public:
- bool operator()(const NBest* a, const NBest* b) const
- {
- return a->GetScores().GetTotalScore() < b->GetScores().GetTotalScore();
- }
-};
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/nbest/NBests.cpp b/contrib/moses2/SCFG/nbest/NBests.cpp
deleted file mode 100644
index ea7e835dc..000000000
--- a/contrib/moses2/SCFG/nbest/NBests.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * NBests.cpp
- *
- * Created on: 24 Aug 2016
- * Author: hieu
- */
-
-#include <boost/foreach.hpp>
-#include "NBests.h"
-#include "../Manager.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-NBests::NBests(const SCFG::Manager &mgr,
- const ArcList &arcList,
- NBestColl &nbestColl)
-:indIter(0)
-{
- // best
- NBest *contender = new NBest(mgr, arcList, 0, nbestColl);
- contenders.push(contender);
- bool extended = Extend(mgr, nbestColl, 0);
- assert(extended);
-}
-
-NBests::~NBests()
-{
- BOOST_FOREACH(const NBest *nbest, m_coll) {
- delete nbest;
- }
-
- // delete bad contenders left in queue
- while (!contenders.empty()) {
- NBest *contender = contenders.top();
- contenders.pop();
- delete contender;
- }
-}
-
-bool NBests::Extend(const SCFG::Manager &mgr,
- NBestColl &nbestColl,
- size_t ind)
-{
- if (ind < m_coll.size()) {
- // asking for 1 we've dont already
- return true;
- }
-
- assert(ind == m_coll.size());
-
- // checks
- if (ind >= mgr.system.options.nbest.nbest_size) {
- return false;
- }
-
- size_t maxIter = mgr.system.options.nbest.nbest_size * mgr.system.options.nbest.factor;
-
- // MAIN LOOP, create 1 new deriv.
- // The loop is for distinct nbest
- bool ok = false;
- while (!ok) {
- ++indIter;
- if (indIter > maxIter) {
- return false;
- }
-
- if (contenders.empty()) {
- return false;
- }
-
- NBest *contender = contenders.top();
- contenders.pop();
-
- contender->CreateDeviants(mgr, nbestColl, contenders);
-
- if (mgr.system.options.nbest.only_distinct) {
- const string &tgtPhrase = contender->GetString();
- //cerr << "tgtPhrase=" << tgtPhrase << endl;
- boost::hash<std::string> string_hash;
- size_t hash = string_hash(tgtPhrase);
-
- if (distinctHypos.insert(hash).second) {
- ok = true;
- }
- }
- else {
- ok = true;
- }
-
- if (ok) {
- Add(contender);
- //cerr << best->GetScores().GetTotalScore() << " ";
- //cerr << best->Debug(mgr.system) << endl;
- return true;
- }
- else {
- delete contender;
- }
- }
-
- return false;
-}
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/nbest/NBests.h b/contrib/moses2/SCFG/nbest/NBests.h
deleted file mode 100644
index a9cb93a5d..000000000
--- a/contrib/moses2/SCFG/nbest/NBests.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * NBests.h
- *
- * Created on: 24 Aug 2016
- * Author: hieu
- */
-
-#pragma once
-#include <boost/unordered_set.hpp>
-#include "NBest.h"
-
-namespace Moses2
-{
-namespace SCFG
-{
-
-class NBests
-{
-public:
- Contenders contenders;
- boost::unordered_set<size_t> distinctHypos;
-
- NBests(const SCFG::Manager &mgr,
- const ArcList &arcList,
- NBestColl &nbestColl);
-
- virtual ~NBests();
-
- size_t GetSize() const
- { return m_coll.size(); }
-
- const NBest &Get(size_t ind) const
- { return *m_coll[ind]; }
-
- bool Extend(const SCFG::Manager &mgr,
- NBestColl &nbestColl,
- size_t ind);
-
-protected:
- std::vector<const NBest*> m_coll;
- size_t indIter;
-
- void Add(const NBest *nbest)
- {
- m_coll.push_back(nbest);
- }
-
-};
-
-
-}
-}
-
diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp
deleted file mode 100644
index 7860fed94..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp
+++ /dev/null
@@ -1,466 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include <deque>
-
-#include "PhraseDecoder.h"
-#include "../../System.h"
-#include "../../SubPhrase.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-PhraseDecoder::PhraseDecoder(
- PhraseTableCompact &phraseDictionary,
- const std::vector<FactorType>* input,
- const std::vector<FactorType>* output,
- size_t numScoreComponent
- // , const std::vector<float>* weight
-)
- : m_coding(None), m_numScoreComponent(numScoreComponent),
- m_containsAlignmentInfo(true), m_maxRank(0),
- m_symbolTree(0), m_multipleScoreTrees(false),
- m_scoreTrees(1), m_alignTree(0),
- m_phraseDictionary(phraseDictionary), m_input(input), m_output(output),
- // m_weight(weight),
- m_separator(" ||| ")
-{ }
-
-PhraseDecoder::~PhraseDecoder()
-{
- if(m_symbolTree)
- delete m_symbolTree;
-
- for(size_t i = 0; i < m_scoreTrees.size(); i++)
- if(m_scoreTrees[i])
- delete m_scoreTrees[i];
-
- if(m_alignTree)
- delete m_alignTree;
-}
-
-inline unsigned PhraseDecoder::GetSourceSymbolId(std::string& symbol)
-{
- boost::unordered_map<std::string, unsigned>::iterator it
- = m_sourceSymbolsMap.find(symbol);
- if(it != m_sourceSymbolsMap.end())
- return it->second;
-
- size_t idx = m_sourceSymbols.find(symbol);
- m_sourceSymbolsMap[symbol] = idx;
- return idx;
-}
-
-inline std::string PhraseDecoder::GetTargetSymbol(unsigned idx) const
-{
- if(idx < m_targetSymbols.size())
- return m_targetSymbols[idx];
- return std::string("##ERROR##");
-}
-
-inline size_t PhraseDecoder::GetREncType(unsigned encodedSymbol)
-{
- return (encodedSymbol >> 30) + 1;
-}
-
-inline size_t PhraseDecoder::GetPREncType(unsigned encodedSymbol)
-{
- return (encodedSymbol >> 31) + 1;
-}
-
-inline unsigned PhraseDecoder::GetTranslation(unsigned srcIdx, size_t rank)
-{
- size_t srcTrgIdx = m_lexicalTableIndex[srcIdx];
- return m_lexicalTable[srcTrgIdx + rank].second;
-}
-
-size_t PhraseDecoder::GetMaxSourcePhraseLength()
-{
- return m_maxPhraseLength;
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol1(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(3 << 30);
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol2Rank(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(255 << 24);
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol2Position(unsigned encodedSymbol)
-{
- encodedSymbol &= ~(3 << 30);
- encodedSymbol >>= 24;
- return encodedSymbol;
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol3(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(3 << 30);
-}
-
-inline unsigned PhraseDecoder::DecodePREncSymbol1(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(1 << 31);
-}
-
-inline int PhraseDecoder::DecodePREncSymbol2Left(unsigned encodedSymbol)
-{
- return ((encodedSymbol >> 25) & 63) - 32;
-}
-
-inline int PhraseDecoder::DecodePREncSymbol2Right(unsigned encodedSymbol)
-{
- return ((encodedSymbol >> 19) & 63) - 32;
-}
-
-inline unsigned PhraseDecoder::DecodePREncSymbol2Rank(unsigned encodedSymbol)
-{
- return (encodedSymbol & 524287);
-}
-
-size_t PhraseDecoder::Load(std::FILE* in)
-{
- size_t start = std::ftell(in);
- size_t read = 0;
-
- read += std::fread(&m_coding, sizeof(m_coding), 1, in);
- read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, in);
- read += std::fread(&m_containsAlignmentInfo, sizeof(m_containsAlignmentInfo), 1, in);
- read += std::fread(&m_maxRank, sizeof(m_maxRank), 1, in);
- read += std::fread(&m_maxPhraseLength, sizeof(m_maxPhraseLength), 1, in);
-
- if(m_coding == REnc) {
- m_sourceSymbols.load(in);
-
- size_t size;
- read += std::fread(&size, sizeof(size_t), 1, in);
- m_lexicalTableIndex.resize(size);
- read += std::fread(&m_lexicalTableIndex[0], sizeof(size_t), size, in);
-
- read += std::fread(&size, sizeof(size_t), 1, in);
- m_lexicalTable.resize(size);
- read += std::fread(&m_lexicalTable[0], sizeof(SrcTrg), size, in);
- }
-
- m_targetSymbols.load(in);
-
- m_symbolTree = new CanonicalHuffman<unsigned>(in);
-
- read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, in);
- if(m_multipleScoreTrees) {
- m_scoreTrees.resize(m_numScoreComponent);
- for(size_t i = 0; i < m_numScoreComponent; i++)
- m_scoreTrees[i] = new CanonicalHuffman<float>(in);
- } else {
- m_scoreTrees.resize(1);
- m_scoreTrees[0] = new CanonicalHuffman<float>(in);
- }
-
- if(m_containsAlignmentInfo)
- m_alignTree = new CanonicalHuffman<AlignPoint>(in);
-
- size_t end = std::ftell(in);
- return end - start;
-}
-
-std::string PhraseDecoder::MakeSourceKey(std::string &source)
-{
- return source + m_separator;
-}
-
-TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(
- const ManagerBase &mgr,
- const Phrase<Word> &sourcePhrase,
- bool topLevel,
- bool eval)
-{
-
- // Not using TargetPhraseCollection avoiding "new" operator
- // which can introduce heavy locking with multiple threads
- TargetPhraseVectorPtr tpv(new TargetPhraseVector());
- size_t bitsLeft = 0;
-
- if(m_coding == PREnc) {
- std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl
- = m_decodingCache.Retrieve(sourcePhrase);
-
- // Has been cached and is complete or does not need to be completed
- if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0))
- return cachedPhraseColl.first;
-
- // Has been cached, but is incomplete
- else if(cachedPhraseColl.first != NULL) {
- bitsLeft = cachedPhraseColl.second;
- tpv->resize(cachedPhraseColl.first->size());
- std::copy(cachedPhraseColl.first->begin(),
- cachedPhraseColl.first->end(),
- tpv->begin());
- }
- }
-
- // Retrieve source phrase identifier
- std::string sourcePhraseString = sourcePhrase.GetString(*m_input);
- size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)];
- /*
- cerr << "sourcePhraseString=" << sourcePhraseString << " "
- << sourcePhraseId
- << endl;
- */
- if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize()) {
- // Retrieve compressed and encoded target phrase collection
- std::string encodedPhraseCollection;
- if(m_phraseDictionary.m_inMemory)
- encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId].str();
- else
- encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId].str();
-
- BitWrapper<> encodedBitStream(encodedPhraseCollection);
- if(m_coding == PREnc && bitsLeft)
- encodedBitStream.SeekFromEnd(bitsLeft);
-
- // Decompress and decode target phrase collection
- TargetPhraseVectorPtr decodedPhraseColl =
- DecodeCollection(mgr, tpv, encodedBitStream, sourcePhrase, topLevel, eval);
-
- return decodedPhraseColl;
- } else
- return TargetPhraseVectorPtr();
-}
-
-TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
- const ManagerBase &mgr,
- TargetPhraseVectorPtr tpv,
- BitWrapper<> &encodedBitStream,
- const Phrase<Word> &sourcePhrase,
- bool topLevel,
- bool eval)
-{
- const System &system = mgr.system;
- FactorCollection &vocab = system.GetVocab();
-
- bool extending = tpv->size();
- size_t bitsLeft = encodedBitStream.TellFromEnd();
-
- std::vector<int> sourceWords;
- if(m_coding == REnc) {
- for(size_t i = 0; i < sourcePhrase.GetSize(); i++) {
- std::string sourceWord
- = sourcePhrase[i].GetString(*m_input);
- unsigned idx = GetSourceSymbolId(sourceWord);
- sourceWords.push_back(idx);
- }
- }
-
- unsigned phraseStopSymbol = 0;
- AlignPoint alignStopSymbol(-1, -1);
-
- std::vector<float> scores;
- std::set<AlignPointSizeT> alignment;
-
- enum DecodeState { New, Symbol, Score, Alignment, Add } state = New;
-
- size_t srcSize = sourcePhrase.GetSize();
-
- TPCompact* targetPhrase = NULL;
- while(encodedBitStream.TellFromEnd()) {
-
- if(state == New) {
- // Creating new TargetPhrase on the heap
- tpv->push_back(TPCompact());
- targetPhrase = &tpv->back();
-
- alignment.clear();
- scores.clear();
-
- state = Symbol;
- }
-
- if(state == Symbol) {
- unsigned symbol = m_symbolTree->Read(encodedBitStream);
- if(symbol == phraseStopSymbol) {
- state = Score;
- } else {
- if(m_coding == REnc) {
- std::string wordString;
- size_t type = GetREncType(symbol);
-
- if(type == 1) {
- unsigned decodedSymbol = DecodeREncSymbol1(symbol);
- wordString = GetTargetSymbol(decodedSymbol);
- } else if (type == 2) {
- size_t rank = DecodeREncSymbol2Rank(symbol);
- size_t srcPos = DecodeREncSymbol2Position(symbol);
-
- if(srcPos >= sourceWords.size())
- return TargetPhraseVectorPtr();
-
- wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
- if(m_phraseDictionary.m_useAlignmentInfo) {
- size_t trgPos = targetPhrase->words.size();
- alignment.insert(AlignPoint(srcPos, trgPos));
- }
- } else if(type == 3) {
- size_t rank = DecodeREncSymbol3(symbol);
- size_t srcPos = targetPhrase->words.size();
-
- if(srcPos >= sourceWords.size())
- return TargetPhraseVectorPtr();
-
- wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
- if(m_phraseDictionary.m_useAlignmentInfo) {
- size_t trgPos = srcPos;
- alignment.insert(AlignPoint(srcPos, trgPos));
- }
- }
-
- Word word;
- word.CreateFromString(vocab, system, wordString);
- targetPhrase->words.push_back(word);
- } else if(m_coding == PREnc) {
- // if the symbol is just a word
- if(GetPREncType(symbol) == 1) {
- unsigned decodedSymbol = DecodePREncSymbol1(symbol);
-
- Word word;
- word.CreateFromString(vocab, system, GetTargetSymbol(decodedSymbol));
- targetPhrase->words.push_back(word);
- }
- // if the symbol is a subphrase pointer
- else {
- int left = DecodePREncSymbol2Left(symbol);
- int right = DecodePREncSymbol2Right(symbol);
- unsigned rank = DecodePREncSymbol2Rank(symbol);
-
- int srcStart = left + targetPhrase->words.size();
- int srcEnd = srcSize - right - 1;
-
- // false positive consistency check
- if(0 > srcStart || srcStart > srcEnd || unsigned(srcEnd) >= srcSize)
- return TargetPhraseVectorPtr();
-
- // false positive consistency check
- if(m_maxRank && rank > m_maxRank)
- return TargetPhraseVectorPtr();
-
- // set subphrase by default to itself
- TargetPhraseVectorPtr subTpv = tpv;
-
- // if range smaller than source phrase retrieve subphrase
- if(unsigned(srcEnd - srcStart + 1) != srcSize) {
- SubPhrase<Word> subPhrase = sourcePhrase.GetSubPhrase(srcStart, srcEnd - srcStart + 1);
- subTpv = CreateTargetPhraseCollection(mgr, subPhrase, false);
- } else {
- // false positive consistency check
- if(rank >= tpv->size()-1)
- return TargetPhraseVectorPtr();
- }
-
- // false positive consistency check
- if(subTpv != NULL && rank < subTpv->size()) {
- // insert the subphrase into the main target phrase
- TPCompact& subTp = subTpv->at(rank);
- if(m_phraseDictionary.m_useAlignmentInfo) {
- // reconstruct the alignment data based on the alignment of the subphrase
- for(std::set<AlignPointSizeT>::const_iterator it = subTp.alignment.begin();
- it != subTp.alignment.end(); it++) {
- alignment.insert(AlignPointSizeT(srcStart + it->first,
- targetPhrase->words.size() + it->second));
- }
- }
-
- std::copy(subTp.words.begin(), subTp.words.end(), std::back_inserter(targetPhrase->words));
- } else
- return TargetPhraseVectorPtr();
- }
- } else {
- Word word;
- word.CreateFromString(vocab, system, GetTargetSymbol(symbol));
- targetPhrase->words.push_back(word);
- }
- }
- } else if(state == Score) {
- size_t idx = m_multipleScoreTrees ? scores.size() : 0;
- float score = m_scoreTrees[idx]->Read(encodedBitStream);
- scores.push_back(score);
-
- if(scores.size() == m_numScoreComponent) {
- targetPhrase->scores = scores;
-
- if(m_containsAlignmentInfo)
- state = Alignment;
- else
- state = Add;
- }
- } else if(state == Alignment) {
- AlignPoint alignPoint = m_alignTree->Read(encodedBitStream);
- if(alignPoint == alignStopSymbol) {
- state = Add;
- } else {
- if(m_phraseDictionary.m_useAlignmentInfo)
- alignment.insert(AlignPointSizeT(alignPoint));
- }
- }
-
- if(state == Add) {
- if(m_phraseDictionary.m_useAlignmentInfo) {
- size_t sourceSize = sourcePhrase.GetSize();
- size_t targetSize = targetPhrase->words.size();
- for(std::set<AlignPointSizeT>::iterator it = alignment.begin(); it != alignment.end(); it++) {
- if(it->first >= sourceSize || it->second >= targetSize)
- return TargetPhraseVectorPtr();
- }
- targetPhrase->alignment = alignment;
- }
-
- if(m_coding == PREnc) {
- if(!m_maxRank || tpv->size() <= m_maxRank)
- bitsLeft = encodedBitStream.TellFromEnd();
-
- if(!topLevel && m_maxRank && tpv->size() >= m_maxRank)
- break;
- }
-
- if(encodedBitStream.TellFromEnd() <= 8)
- break;
-
- state = New;
- }
- }
-
- if(m_coding == PREnc && !extending) {
- bitsLeft = bitsLeft > 8 ? bitsLeft : 0;
- m_decodingCache.Cache(sourcePhrase, tpv, bitsLeft, m_maxRank);
- }
-
- return tpv;
-}
-
-void PhraseDecoder::PruneCache()
-{
- m_decodingCache.Prune();
-}
-
-}
diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h
deleted file mode 100644
index 79faa38a6..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h
+++ /dev/null
@@ -1,142 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#pragma once
-
-#include <sstream>
-#include <vector>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <string>
-#include <iterator>
-#include <algorithm>
-#include <sys/stat.h>
-
-#include "PhraseTableCompact.h"
-#include "StringVector.h"
-#include "CanonicalHuffman.h"
-#include "TargetPhraseCollectionCache.h"
-
-#include "../../Phrase.h"
-#include "../../ManagerBase.h"
-
-namespace Moses2
-{
-
-class PhraseTableCompact;
-
-class PhraseDecoder
-{
-protected:
-
- friend class PhraseTableCompact;
-
- typedef std::pair<unsigned char, unsigned char> AlignPoint;
- typedef std::pair<unsigned, unsigned> SrcTrg;
-
- enum Coding { None, REnc, PREnc } m_coding;
-
- size_t m_numScoreComponent;
- bool m_containsAlignmentInfo;
- size_t m_maxRank;
- size_t m_maxPhraseLength;
-
- boost::unordered_map<std::string, unsigned> m_sourceSymbolsMap;
- StringVector<unsigned char, unsigned, std::allocator> m_sourceSymbols;
- StringVector<unsigned char, unsigned, std::allocator> m_targetSymbols;
-
- std::vector<size_t> m_lexicalTableIndex;
- std::vector<SrcTrg> m_lexicalTable;
-
- CanonicalHuffman<unsigned>* m_symbolTree;
-
- bool m_multipleScoreTrees;
- std::vector<CanonicalHuffman<float>*> m_scoreTrees;
-
- CanonicalHuffman<AlignPoint>* m_alignTree;
-
- TargetPhraseCollectionCache m_decodingCache;
-
- PhraseTableCompact& m_phraseDictionary;
-
- // ***********************************************
-
- const std::vector<FactorType>* m_input;
- const std::vector<FactorType>* m_output;
-
- std::string m_separator;
-
- // ***********************************************
-
- unsigned GetSourceSymbolId(std::string& s);
- std::string GetTargetSymbol(unsigned id) const;
-
- size_t GetREncType(unsigned encodedSymbol);
- size_t GetPREncType(unsigned encodedSymbol);
-
- unsigned GetTranslation(unsigned srcIdx, size_t rank);
-
- size_t GetMaxSourcePhraseLength();
-
- unsigned DecodeREncSymbol1(unsigned encodedSymbol);
- unsigned DecodeREncSymbol2Rank(unsigned encodedSymbol);
- unsigned DecodeREncSymbol2Position(unsigned encodedSymbol);
- unsigned DecodeREncSymbol3(unsigned encodedSymbol);
-
- unsigned DecodePREncSymbol1(unsigned encodedSymbol);
- int DecodePREncSymbol2Left(unsigned encodedSymbol);
- int DecodePREncSymbol2Right(unsigned encodedSymbol);
- unsigned DecodePREncSymbol2Rank(unsigned encodedSymbol);
-
- std::string MakeSourceKey(std::string &);
-
-public:
-
- PhraseDecoder(
- PhraseTableCompact &phraseDictionary,
- const std::vector<FactorType>* input,
- const std::vector<FactorType>* output,
- size_t numScoreComponent
- );
-
- ~PhraseDecoder();
-
- size_t Load(std::FILE* in);
-
- TargetPhraseVectorPtr CreateTargetPhraseCollection(
- const ManagerBase &mgr,
- const Phrase<Word> &sourcePhrase,
- bool topLevel = false,
- bool eval = true);
-
- TargetPhraseVectorPtr DecodeCollection(
- const ManagerBase &mgr,
- TargetPhraseVectorPtr tpv,
- BitWrapper<> &encodedBitStream,
- const Phrase<Word> &sourcePhrase,
- bool topLevel,
- bool eval);
-
- void PruneCache();
-};
-
-}
-
diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp b/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp
deleted file mode 100644
index 49244df1b..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-#include <boost/algorithm/string/predicate.hpp>
-#include <boost/thread/tss.hpp>
-#include "PhraseTableCompact.h"
-#include "PhraseDecoder.h"
-#include "../../PhraseBased/InputPath.h"
-#include "../../PhraseBased/Manager.h"
-#include "../../PhraseBased/TargetPhrases.h"
-#include "../../PhraseBased/TargetPhraseImpl.h"
-#include "../../PhraseBased/Sentence.h"
-
-using namespace std;
-using namespace boost::algorithm;
-
-namespace Moses2
-{
-bool PhraseTableCompact::s_inMemoryByDefault = false;
-
-PhraseTableCompact::PhraseTableCompact(size_t startInd, const std::string &line)
-:PhraseTable(startInd, line)
-,m_inMemory(s_inMemoryByDefault)
-,m_useAlignmentInfo(true)
-,m_hash(10, 16)
-,m_phraseDecoder(0)
-{
- ReadParameters();
-}
-
-PhraseTableCompact::~PhraseTableCompact()
-{
-
-}
-
-void PhraseTableCompact::Load(System &system)
-{
- std::string tFilePath = m_path;
-
- std::string suffix = ".minphr";
- if (!ends_with(tFilePath, suffix)) tFilePath += suffix;
- if (!FileExists(tFilePath))
- throw runtime_error("Error: File " + tFilePath + " does not exist.");
-
- m_phraseDecoder
- = new PhraseDecoder(*this, &m_input, &m_output, GetNumScores());
-
- std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
-
- size_t indexSize;
- //if(m_inMemory)
- // Load source phrase index into memory
- indexSize = m_hash.Load(pFile);
- // else
- // Keep source phrase index on disk
- //indexSize = m_hash.LoadIndex(pFile);
-
- size_t coderSize = m_phraseDecoder->Load(pFile);
-
- size_t phraseSize;
- if(m_inMemory) {
- // Load target phrase collections into memory
- phraseSize = m_targetPhrasesMemory.load(pFile, false);
- }
- else {
- // Keep target phrase collections on disk
- phraseSize = m_targetPhrasesMapped.load(pFile, true);
- }
-
- UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0,
- "Not successfully loaded");
-}
-
-void PhraseTableCompact::SetParameter(const std::string& key, const std::string& value)
-{
- if (key == "blah") {
-
- }
- else {
- PhraseTable::SetParameter(key, value);
- }
-}
-
-void PhraseTableCompact::CleanUpAfterSentenceProcessing() const
-{
- //if(!m_sentenceCache.get())
- // m_sentenceCache.reset(new PhraseCache());
-
- m_phraseDecoder->PruneCache();
- //m_sentenceCache->clear();
-}
-
-
-// pb
-void PhraseTableCompact::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const
-{
- size_t inputSize = static_cast<const Sentence&>(mgr.GetInput()).GetSize();
- InputPaths &inputPathsCast = static_cast<InputPaths&>(inputPaths);
-
- for (size_t i = 0; i < inputSize; ++i) {
- for (size_t startPos = 0; startPos < inputSize; ++startPos) {
- size_t endPos = startPos + i;
- if (endPos >= inputSize) {
- break;
- }
- InputPath *path = inputPathsCast.GetMatrix().GetValue(startPos, i);
- //cerr << "path=" << path->Debug(mgr.system) << endl;
- TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
- path->AddTargetPhrases(*this, tps);
- }
- }
-}
-
-TargetPhrases *PhraseTableCompact::Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const
-{
- TargetPhrases *ret = NULL;
-
- const Phrase<Word> &sourcePhrase = inputPath.subPhrase;
- //cerr << "sourcePhrase=" << sourcePhrase.Debug(mgr.system) << endl;
-
- // There is no souch source phrase if source phrase is longer than longest
- // observed source phrase during compilation
- if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
- return ret;
-
- // Retrieve target phrase collection from phrase table
- TargetPhraseVectorPtr decodedPhraseColl
- = m_phraseDecoder->CreateTargetPhraseCollection(mgr, sourcePhrase, true, true);
-
- if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
- TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
- //TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection);
- ret = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, decodedPhraseColl->size());
-
- for (size_t i = 0; i < decodedPhraseColl->size(); ++i) {
- const TPCompact &tpCompact = decodedPhraseColl->at(i);
- const TargetPhraseImpl *tp = CreateTargetPhrase(mgr, tpCompact, sourcePhrase);
-
- ret->AddTargetPhrase(*tp);
- }
-
- ret->SortAndPrune(m_tableLimit);
- mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *ret, sourcePhrase);
-
- //cerr << "RET2=" << ret->Debug(mgr.system) << endl;
- /*
- // Cache phrase pair for clean-up or retrieval with PREnc
- const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
-
- return phraseColl;
- */
- }
-
- return ret;
-
-}
-
-const TargetPhraseImpl *PhraseTableCompact::CreateTargetPhrase(
- const Manager &mgr,
- const TPCompact &tpCompact,
- const Phrase<Word> &sourcePhrase) const
-{
- MemPool &pool = mgr.GetPool();
-
- size_t size = tpCompact.words.size();
- TargetPhraseImpl *ret = new TargetPhraseImpl(pool, *this, mgr.system, size);
-
- // words
- for (size_t i = 0; i < size; ++i) {
- const Word &compactWord = tpCompact.words[i];
- Word &tpWord = (*ret)[i];
- tpWord = compactWord;
- }
-
- // scores
- Scores &scores = ret->GetScores();
- scores.Assign(mgr.system, *this, tpCompact.scores);
-
- // align
- ret->SetAlignTerm(tpCompact.alignment);
-
- // score
- mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *ret);
-
- // Cache phrase pair for clean-up or retrieval with PREnc
- //const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
-
- //cerr << "ret=" << ret->Debug(mgr.system) << endl;
- return ret;
-}
-
-
-// scfg
-void PhraseTableCompact::InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void PhraseTableCompact::Lookup(
- MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void PhraseTableCompact::LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.h b/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.h
deleted file mode 100644
index 84ea7e4b2..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#pragma once
-#include "../PhraseTable.h"
-#include "BlockHashIndex.h"
-
-namespace Moses2
-{
-class PhraseDecoder;
-class TPCompact;
-
-class PhraseTableCompact: public PhraseTable
-{
-public:
- PhraseTableCompact(size_t startInd, const std::string &line);
- virtual ~PhraseTableCompact();
- void Load(System &system);
- virtual void SetParameter(const std::string& key, const std::string& value);
-
- virtual void CleanUpAfterSentenceProcessing() const;
-
- virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
-
- // scfg
- virtual void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const;
-
- virtual void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
-
- virtual void Lookup(
- MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
-protected:
- static bool s_inMemoryByDefault;
- bool m_inMemory;
- bool m_useAlignmentInfo;
-
- BlockHashIndex m_hash;
-
- StringVector<unsigned char, size_t, MmapAllocator> m_targetPhrasesMapped;
- StringVector<unsigned char, size_t, std::allocator> m_targetPhrasesMemory;
-
- friend class PhraseDecoder;
- PhraseDecoder* m_phraseDecoder;
-
- const TargetPhraseImpl *CreateTargetPhrase(
- const Manager &mgr,
- const TPCompact &tpCompact,
- const Phrase<Word> &sourcePhrase) const;
-
- // SCFG
- virtual void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
-};
-
-}
diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.cpp b/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.cpp
deleted file mode 100644
index 326aaea5f..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.cpp
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * StoreTarget.cpp
- *
- * Created on: 19 Jan 2016
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "StoreTarget.h"
-#include "line_splitter.hh"
-#include "probing_hash_utils.hh"
-#include "../../legacy/OutputFileStream.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-StoreTarget::StoreTarget(const std::string &basepath)
-:m_basePath(basepath)
-,m_vocab(basepath + "/TargetVocab.dat")
-{
- std::string path = basepath + "/TargetColl.dat";
- m_fileTargetColl.open(path.c_str(),
- std::ios::out | std::ios::binary | std::ios::ate | std::ios::trunc);
- if (!m_fileTargetColl.is_open()) {
- throw "can't create file ";
- }
-
-}
-
-StoreTarget::~StoreTarget()
-{
- assert(m_coll.empty());
- m_fileTargetColl.close();
-
- // vocab
- m_vocab.Save();
-}
-
-uint64_t StoreTarget::Save()
-{
- uint64_t ret = m_fileTargetColl.tellp();
-
- // save to disk
- uint64_t numTP = m_coll.size();
- m_fileTargetColl.write((char*) &numTP, sizeof(uint64_t));
-
- for (size_t i = 0; i < m_coll.size(); ++i) {
- Save(*m_coll[i]);
- }
-
- // clear coll
- RemoveAllInColl(m_coll);
- m_coll.clear();
-
- // starting position of coll
- return ret;
-}
-
-void StoreTarget::Save(const target_text &rule)
-{
- // metadata for each tp
- TargetPhraseInfo tpInfo;
- tpInfo.alignTerm = GetAlignId(rule.word_align_term);
- tpInfo.alignNonTerm = GetAlignId(rule.word_align_non_term);
- tpInfo.numWords = rule.target_phrase.size();
- tpInfo.propLength = rule.property.size();
-
- //cerr << "TPInfo=" << sizeof(TPInfo);
- m_fileTargetColl.write((char*) &tpInfo, sizeof(TargetPhraseInfo));
-
- // scores
- for (size_t i = 0; i < rule.prob.size(); ++i) {
- float prob = rule.prob[i];
- m_fileTargetColl.write((char*) &prob, sizeof(prob));
- }
-
- // tp
- for (size_t i = 0; i < rule.target_phrase.size(); ++i) {
- uint32_t vocabId = rule.target_phrase[i];
- m_fileTargetColl.write((char*) &vocabId, sizeof(vocabId));
- }
-
- // prop TODO
-
-}
-
-void StoreTarget::SaveAlignment()
-{
- std::string path = m_basePath + "/Alignments.dat";
- Moses2::OutputFileStream file(path);
-
- BOOST_FOREACH(Alignments::value_type &valPair, m_aligns) {
- file << valPair.second << "\t";
-
- const std::vector<size_t> &aligns = valPair.first;
- BOOST_FOREACH(size_t align, aligns) {
- file << align << " ";
- }
- file << endl;
- }
-
-}
-
-void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg)
-{
- target_text *rule = new target_text;
- //cerr << "line.target_phrase=" << line.target_phrase << endl;
-
- // target_phrase
- vector<bool> nonTerms;
- util::TokenIter<util::SingleCharacter> it;
- it = util::TokenIter<util::SingleCharacter>(line.target_phrase,
- util::SingleCharacter(' '));
- while (it) {
- StringPiece word = *it;
- //cerr << "word=" << word << endl;
-
- bool nonTerm = false;
- if (scfg) {
- // not really sure how to handle factored SCFG and NT
- if (scfg && word[0] == '[' && word[word.size() - 1] == ']') {
- //cerr << "NON-TERM=" << tok << " " << nonTerms.size() << endl;
- nonTerm = true;
- }
- nonTerms.push_back(nonTerm);
- }
-
- util::TokenIter<util::SingleCharacter> itFactor;
- itFactor = util::TokenIter<util::SingleCharacter>(word,
- util::SingleCharacter('|'));
- while (itFactor) {
- StringPiece factor = *itFactor;
-
- string factorStr = factor.as_string();
- uint32_t vocabId = m_vocab.GetVocabId(factorStr);
-
- rule->target_phrase.push_back(vocabId);
-
- itFactor++;
- }
-
- it++;
- }
-
- // probs
- it = util::TokenIter<util::SingleCharacter>(line.prob,
- util::SingleCharacter(' '));
- while (it) {
- string tok = it->as_string();
- float prob = Scan<float>(tok);
-
- if (log_prob) {
- prob = FloorScore(log(prob));
- if (prob == 0.0f) prob = 0.0000000001;
- }
-
- rule->prob.push_back(prob);
- it++;
- }
-
- /*
- cerr << "nonTerms=";
- for (size_t i = 0; i < nonTerms.size(); ++i) {
- cerr << nonTerms[i] << " ";
- }
- cerr << endl;
- */
-
- // alignment
- it = util::TokenIter<util::SingleCharacter>(line.word_align,
- util::SingleCharacter(' '));
- while (it) {
- string tokPair = Trim(it->as_string());
- if (tokPair.empty()) {
- break;
- }
-
- vector<size_t> alignPair = Tokenize<size_t>(tokPair, "-");
- assert(alignPair.size() == 2);
-
- bool nonTerm = false;
- size_t sourcePos = alignPair[0];
- size_t targetPos = alignPair[1];
- if (scfg) {
- nonTerm = nonTerms[targetPos];
- }
-
- //cerr << targetPos << "=" << nonTerm << endl;
-
- if (nonTerm) {
- rule->word_align_non_term.push_back(sourcePos);
- rule->word_align_non_term.push_back(targetPos);
- //cerr << (int) rule->word_all1.back() << " ";
- }
- else {
- rule->word_align_term.push_back(sourcePos);
- rule->word_align_term.push_back(targetPos);
- }
-
- it++;
- }
-
- // extra scores
- string prop = line.property.as_string();
- AppendLexRO(prop, rule->prob, log_prob);
-
- //cerr << "line.property=" << line.property << endl;
- //cerr << "prop=" << prop << endl;
-
- // properties
- /*
- for (size_t i = 0; i < prop.size(); ++i) {
- rule->property.push_back(prop[i]);
- }
- */
- m_coll.push_back(rule);
-}
-
-uint32_t StoreTarget::GetAlignId(const std::vector<size_t> &align)
-{
- boost::unordered_map<std::vector<size_t>, uint32_t>::iterator iter =
- m_aligns.find(align);
- if (iter == m_aligns.end()) {
- uint32_t ind = m_aligns.size();
- m_aligns[align] = ind;
- return ind;
- }
- else {
- return iter->second;
- }
-}
-
-void StoreTarget::AppendLexRO(std::string &prop, std::vector<float> &retvector,
- bool log_prob) const
-{
- size_t startPos = prop.find("{{LexRO ");
-
- if (startPos != string::npos) {
- size_t endPos = prop.find("}}", startPos + 8);
- string lexProb = prop.substr(startPos + 8, endPos - startPos - 8);
- //cerr << "lexProb=" << lexProb << endl;
-
- // append lex probs to pt probs
- vector<float> scores = Tokenize<float>(lexProb);
-
- if (log_prob) {
- for (size_t i = 0; i < scores.size(); ++i) {
- scores[i] = FloorScore(log(scores[i]));
- if (scores[i] == 0.0f) scores[i] = 0.0000000001;
- }
- }
-
- for (size_t i = 0; i < scores.size(); ++i) {
- retvector.push_back(scores[i]);
- }
-
- // exclude LexRO property from property column
- prop = prop.substr(0, startPos)
- + prop.substr(endPos + 2, prop.size() - endPos - 2);
- //cerr << "line.property_to_be_binarized=" << line.property_to_be_binarized << "AAAA" << endl;
- }
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.h b/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.h
deleted file mode 100644
index 6fc3b1f66..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * StoreTarget.h
- *
- * Created on: 19 Jan 2016
- * Author: hieu
- */
-#pragma once
-#include <string>
-#include <fstream>
-#include <vector>
-#include <inttypes.h>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include "StoreVocab.h"
-
-namespace Moses2
-{
-
-class line_text;
-class target_text;
-
-class StoreTarget
-{
-public:
- StoreTarget(const std::string &basepath);
- virtual ~StoreTarget();
-
- uint64_t Save();
- void SaveAlignment();
-
- void Append(const line_text &line, bool log_prob, bool scfg);
-protected:
- std::string m_basePath;
- std::fstream m_fileTargetColl;
- StoreVocab<uint32_t> m_vocab;
-
- typedef boost::unordered_map<std::vector<size_t>, uint32_t> Alignments;
- Alignments m_aligns;
-
- std::vector<target_text*> m_coll;
-
- uint32_t GetAlignId(const std::vector<size_t> &align);
- void Save(const target_text &rule);
-
- void AppendLexRO(std::string &prop, std::vector<float> &retvector,
- bool log_prob) const;
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.h b/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.h
deleted file mode 100644
index e9808707a..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * StoreVocab.h
- *
- * Created on: 15 Jun 2016
- * Author: hieu
- */
-#pragma once
-#include <string>
-#include <boost/unordered_map.hpp>
-#include "../../legacy/OutputFileStream.h"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-template<typename VOCABID>
-class StoreVocab
-{
-protected:
- std::string m_path;
-
- typedef boost::unordered_map<std::string, VOCABID> Coll;
- Coll m_vocab;
-
-public:
- StoreVocab(const std::string &path)
- :m_path(path)
- {}
-
- virtual ~StoreVocab() {}
-
- VOCABID GetVocabId(const std::string &word)
- {
- typename Coll::iterator iter = m_vocab.find(word);
- if (iter == m_vocab.end()) {
- VOCABID ind = m_vocab.size() + 1;
- m_vocab[word] = ind;
- return ind;
- }
- else {
- return iter->second;
- }
- }
-
- void Insert(VOCABID id, const std::string &word)
- {
- m_vocab[word] = id;
- }
-
- void Save()
- {
- OutputFileStream strme(m_path);
-
- typename Coll::const_iterator iter;
- for (iter = m_vocab.begin(); iter != m_vocab.end(); ++iter) {
- strme << iter->first << "\t" << iter->second << std::endl;
- }
-
- strme.Close();
- }
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/hash.cpp b/contrib/moses2/TranslationModel/ProbingPT/hash.cpp
deleted file mode 100644
index aab5ee2b3..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/hash.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-#include <iostream>
-#include "hash.hh"
-
-using namespace std;
-
-namespace Moses2
-{
-
-uint64_t getHash(StringPiece text)
-{
- std::size_t len = text.size();
- uint64_t key = util::MurmurHashNative(text.data(), len);
- return key;
-}
-
-std::vector<uint64_t> getVocabIDs(const StringPiece &textin)
-{
- //Tokenize
- std::vector<uint64_t> output;
-
- util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));
-
- while (itWord) {
- StringPiece word = *itWord;
- uint64_t id = 0;
-
- util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
- while (itFactor) {
- StringPiece factor = *itFactor;
- //cerr << "factor=" << factor << endl;
-
- id += getHash(factor);
- itFactor++;
- }
-
- output.push_back(id);
- itWord++;
- }
-
- return output;
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/line_splitter.hh b/contrib/moses2/TranslationModel/ProbingPT/line_splitter.hh
deleted file mode 100644
index 3b086b44a..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/line_splitter.hh
+++ /dev/null
@@ -1,59 +0,0 @@
-#pragma once
-
-#include "util/string_piece.hh"
-#include "util/tokenize_piece.hh"
-#include "util/file_piece.hh"
-#include <vector>
-#include <cstdlib> //atof
-#include "util/string_piece.hh" //Tokenization and work with StringPiece
-#include "util/tokenize_piece.hh"
-#include <vector>
-
-namespace Moses2
-{
-
-//Struct for holding processed line
-struct line_text
-{
- StringPiece source_phrase;
- StringPiece target_phrase;
- StringPiece prob;
- StringPiece word_align;
- StringPiece counts;
- StringPiece sparse_score;
- StringPiece property;
- std::string property_to_be_binarized;
-};
-
-//Struct for holding processed line
-struct target_text
-{
- std::vector<unsigned int> target_phrase;
- std::vector<float> prob;
- std::vector<size_t> word_align_term;
- std::vector<size_t> word_align_non_term;
- std::vector<char> counts;
- std::vector<char> sparse_score;
- std::vector<char> property;
-
- /*
- void Reset()
- {
- target_phrase.clear();
- prob.clear();
- word_all1.clear();
- counts.clear();
- sparse_score.clear();
- property.clear();
- }
- */
-};
-
-//Ask if it's better to have it receive a pointer to a line_text struct
-line_text splitLine(const StringPiece &textin, bool scfg);
-void reformatSCFG(line_text &output);
-
-std::vector<unsigned char> splitWordAll1(const StringPiece &textin);
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/storing.cpp b/contrib/moses2/TranslationModel/ProbingPT/storing.cpp
deleted file mode 100644
index 75cdcc038..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/storing.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-#include <sys/stat.h>
-#include <boost/foreach.hpp>
-#include "line_splitter.hh"
-#include "storing.hh"
-#include "StoreTarget.h"
-#include "StoreVocab.h"
-#include "../../legacy/Util2.h"
-#include "../../legacy/InputFileStream.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-///////////////////////////////////////////////////////////////////////
-void Node::Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos)
-{
- if (pos < sourcePhrase.size()) {
- uint64_t vocabId = sourcePhrase[pos];
-
- Node *child;
- Children::iterator iter = m_children.find(vocabId);
- if (iter == m_children.end()) {
- // New node. Write other children then discard them
- BOOST_FOREACH(Children::value_type &valPair, m_children) {
- Node &otherChild = valPair.second;
- otherChild.Write(table);
- }
- m_children.clear();
-
- // create new node
- child = &m_children[vocabId];
- assert(!child->done);
- child->key = key + (vocabId << pos);
- }
- else {
- child = &iter->second;
- }
-
- child->Add(table, sourcePhrase, pos + 1);
- }
- else {
- // this node was written previously 'cos it has rules
- done = true;
- }
-}
-
-void Node::Write(Table &table)
-{
- //cerr << "START write " << done << " " << key << endl;
- BOOST_FOREACH(Children::value_type &valPair, m_children) {
- Node &child = valPair.second;
- child.Write(table);
- }
-
- if (!done) {
- // save
- Entry sourceEntry;
- sourceEntry.value = NONE;
- sourceEntry.key = key;
-
- //Put into table
- table.Insert(sourceEntry);
- }
-}
-
-///////////////////////////////////////////////////////////////////////
-void createProbingPT(const std::string &phrasetable_path,
- const std::string &basepath, int num_scores, int num_lex_scores,
- bool log_prob, int max_cache_size, bool scfg)
-{
- std::cerr << "Starting..." << std::endl;
-
- //Get basepath and create directory if missing
- mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
-
- StoreTarget storeTarget(basepath);
-
- //Get uniq lines:
- unsigned long uniq_entries = countUniqueSource(phrasetable_path);
-
- //Source phrase vocabids
- StoreVocab<uint64_t> sourceVocab(basepath + "/source_vocabids");
-
- //Read the file
- util::FilePiece filein(phrasetable_path.c_str());
-
- //Init the probing hash table
- size_t size = Table::Size(uniq_entries, 1.2);
- char * mem = new char[size];
- memset(mem, 0, size);
- Table sourceEntries(mem, size);
-
- std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> cache;
- float totalSourceCount = 0;
-
- //Keep track of the size of each group of target phrases
- size_t line_num = 0;
-
- //Read everything and processs
- std::string prevSource;
-
- Node sourcePhrases;
- sourcePhrases.done = true;
- sourcePhrases.key = 0;
-
- while (true) {
- try {
- //Process line read
- line_text line;
- line = splitLine(filein.ReadLine(), scfg);
- //cerr << "line=" << line.source_phrase << endl;
-
- ++line_num;
- if (line_num % 1000000 == 0) {
- std::cerr << line_num << " " << std::flush;
- }
-
- //Add source phrases to vocabularyIDs
- add_to_map(sourceVocab, line.source_phrase);
-
- if (prevSource.empty()) {
- // 1st line
- prevSource = line.source_phrase.as_string();
- storeTarget.Append(line, log_prob, scfg);
- }
- else if (prevSource == line.source_phrase) {
- //If we still have the same line, just append to it:
- storeTarget.Append(line, log_prob, scfg);
- }
- else {
- assert(prevSource != line.source_phrase);
-
- //Create a new entry even
-
- // save
- uint64_t targetInd = storeTarget.Save();
-
- // next line
- storeTarget.Append(line, log_prob, scfg);
-
- //Create an entry for the previous source phrase:
- Entry sourceEntry;
- sourceEntry.value = targetInd;
- //The key is the sum of hashes of individual words bitshifted by their position in the phrase.
- //Probably not entirerly correct, but fast and seems to work fine in practise.
- std::vector<uint64_t> vocabid_source = getVocabIDs(prevSource);
- if (scfg) {
- // storing prefixes?
- sourcePhrases.Add(sourceEntries, vocabid_source);
- }
- sourceEntry.key = getKey(vocabid_source);
-
- /*
- cerr << "prevSource=" << prevSource << flush
- << " vocabids=" << Debug(vocabid_source) << flush
- << " key=" << sourceEntry.key << endl;
- */
- //Put into table
- sourceEntries.Insert(sourceEntry);
-
- // update cache - CURRENT source phrase, not prev
- if (max_cache_size) {
- std::string countStr = line.counts.as_string();
- countStr = Trim(countStr);
- if (!countStr.empty()) {
- std::vector<float> toks = Tokenize<float>(countStr);
- //cerr << "CACHE:" << line.source_phrase << " " << countStr << " " << toks[1] << endl;
-
- if (toks.size() >= 2) {
- totalSourceCount += toks[1];
-
- // compute key for CURRENT source
- std::vector<uint64_t> currVocabidSource = getVocabIDs(line.source_phrase.as_string());
- uint64_t currKey = getKey(currVocabidSource);
-
- CacheItem *item = new CacheItem(
- Trim(line.source_phrase.as_string()),
- currKey,
- toks[1]);
- cache.push(item);
-
- if (max_cache_size > 0 && cache.size() > max_cache_size) {
- cache.pop();
- }
- }
- }
- }
-
- //Set prevLine
- prevSource = line.source_phrase.as_string();
- }
-
- }
- catch (util::EndOfFileException e) {
- std::cerr
- << "Reading phrase table finished, writing remaining files to disk."
- << std::endl;
-
- //After the final entry is constructed we need to add it to the phrase_table
- //Create an entry for the previous source phrase:
- uint64_t targetInd = storeTarget.Save();
-
- Entry sourceEntry;
- sourceEntry.value = targetInd;
-
- //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
- std::vector<uint64_t> vocabid_source = getVocabIDs(prevSource);
- sourceEntry.key = getKey(vocabid_source);
-
- //Put into table
- sourceEntries.Insert(sourceEntry);
-
- break;
- }
- }
-
- sourcePhrases.Write(sourceEntries);
-
- storeTarget.SaveAlignment();
-
- serialize_table(mem, size, (basepath + "/probing_hash.dat"));
-
- sourceVocab.Save();
-
- serialize_cache(cache, (basepath + "/cache"), totalSourceCount);
-
- delete[] mem;
-
- //Write configfile
- std::ofstream configfile;
- configfile.open((basepath + "/config").c_str());
- configfile << "API_VERSION\t" << API_VERSION << '\n';
- configfile << "uniq_entries\t" << uniq_entries << '\n';
- configfile << "num_scores\t" << num_scores << '\n';
- configfile << "num_lex_scores\t" << num_lex_scores << '\n';
- configfile << "log_prob\t" << log_prob << '\n';
- configfile.close();
-}
-
-size_t countUniqueSource(const std::string &path)
-{
- size_t ret = 0;
- InputFileStream strme(path);
-
- std::string line, prevSource;
- while (std::getline(strme, line)) {
- std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
- assert(toks.size() != 0);
-
- if (prevSource != toks[0]) {
- prevSource = toks[0];
- ++ret;
- }
- }
-
- return ret;
-}
-
-void serialize_cache(
- std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
- const std::string &path, float totalSourceCount)
-{
- std::vector<const CacheItem*> vec(cache.size());
-
- size_t ind = cache.size() - 1;
- while (!cache.empty()) {
- const CacheItem *item = cache.top();
- vec[ind] = item;
- cache.pop();
- --ind;
- }
-
- std::ofstream os(path.c_str());
-
- os << totalSourceCount << std::endl;
- for (size_t i = 0; i < vec.size(); ++i) {
- const CacheItem *item = vec[i];
- os << item->count << "\t" << item->sourceKey << "\t" << item->source << std::endl;
- delete item;
- }
-
- os.close();
-}
-
-uint64_t getKey(const std::vector<uint64_t> &vocabid_source)
-{
- return Moses2::getKey(vocabid_source.data(), vocabid_source.size());
-}
-
-std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos)
-{
- assert(endPos < vocabid_source.size());
-
- std::vector<uint64_t> ret(endPos + 1);
- for (size_t i = 0; i <= endPos; ++i) {
- ret[i] = vocabid_source[i];
- }
- return ret;
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/storing.hh b/contrib/moses2/TranslationModel/ProbingPT/storing.hh
deleted file mode 100644
index 10d7050d3..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/storing.hh
+++ /dev/null
@@ -1,95 +0,0 @@
-#pragma once
-
-#include <boost/unordered_set.hpp>
-#include <boost/unordered_map.hpp>
-#include <cstdio>
-#include <sstream>
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <queue>
-#include <sys/stat.h> //mkdir
-
-#include "hash.hh" //Includes line_splitter
-#include "probing_hash_utils.hh"
-
-#include "util/file_piece.hh"
-#include "util/file.hh"
-#include "vocabid.hh"
-
-namespace Moses2
-{
-typedef std::vector<uint64_t> SourcePhrase;
-
-
-class Node
-{
- typedef boost::unordered_map<uint64_t, Node> Children;
- Children m_children;
-
-public:
- uint64_t key;
- bool done;
-
- Node()
- :done(false)
- {}
-
- void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0);
- void Write(Table &table);
-};
-
-
-void createProbingPT(const std::string &phrasetable_path,
- const std::string &basepath, int num_scores, int num_lex_scores,
- bool log_prob, int max_cache_size, bool scfg);
-uint64_t getKey(const std::vector<uint64_t> &source_phrase);
-
-std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos);
-
-template<typename T>
-std::string Debug(const std::vector<T> &vec)
-{
- std::stringstream strm;
- for (size_t i = 0; i < vec.size(); ++i) {
- strm << vec[i] << " ";
- }
- return strm.str();
-}
-
-size_t countUniqueSource(const std::string &path);
-
-class CacheItem
-{
-public:
- std::string source;
- uint64_t sourceKey;
- float count;
- CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount)
- :source(vSource)
- ,sourceKey(vSourceKey)
- ,count(vCount)
- {
- }
-
- bool operator<(const CacheItem &other) const
- {
- return count > other.count;
- }
-};
-
-class CacheItemOrderer
-{
-public:
- bool operator()(const CacheItem* a, const CacheItem* b) const
- {
- return (*a) < (*b);
- }
-};
-
-void serialize_cache(
- std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
- const std::string &path, float totalSourceCount);
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/vocabid.cpp b/contrib/moses2/TranslationModel/ProbingPT/vocabid.cpp
deleted file mode 100644
index 696373ee5..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/vocabid.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <boost/foreach.hpp>
-#include "vocabid.hh"
-#include "StoreVocab.h"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-void add_to_map(StoreVocab<uint64_t> &sourceVocab,
- const StringPiece &textin)
-{
- //Tokenize
- util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));
-
- while (itWord) {
- StringPiece word = *itWord;
-
- util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
- while (itFactor) {
- StringPiece factor = *itFactor;
-
- sourceVocab.Insert(getHash(factor), factor.as_string());
- itFactor++;
- }
- itWord++;
- }
-}
-
-void serialize_map(const std::map<uint64_t, std::string> &karta,
- const std::string &filename)
-{
- std::ofstream os(filename.c_str());
-
- std::map<uint64_t, std::string>::const_iterator iter;
- for (iter = karta.begin(); iter != karta.end(); ++iter) {
- os << iter->first << '\t' << iter->second << std::endl;
- }
-
- os.close();
-}
-
-void read_map(std::map<uint64_t, std::string> &karta, const char* filename)
-{
- std::ifstream is(filename);
-
- std::string line;
- while (getline(is, line)) {
- std::vector<std::string> toks = Tokenize(line, "\t");
- assert(toks.size() == 2);
- uint64_t ind = Scan<uint64_t>(toks[1]);
- karta[ind] = toks[0];
- }
-
- //Close the stream after we are done.
- is.close();
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/vocabid.hh b/contrib/moses2/TranslationModel/ProbingPT/vocabid.hh
deleted file mode 100644
index 55d99d453..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/vocabid.hh
+++ /dev/null
@@ -1,29 +0,0 @@
-//Serialization
-#include <boost/serialization/serialization.hpp>
-#include <boost/serialization/map.hpp>
-#include <boost/archive/text_iarchive.hpp>
-#include <boost/archive/text_oarchive.hpp>
-#include <fstream>
-#include <iostream>
-#include <vector>
-
-#include <map> //Container
-#include "hash.hh" //Hash of elements
-
-#include "util/string_piece.hh" //Tokenization and work with StringPiece
-#include "util/tokenize_piece.hh"
-
-namespace Moses2
-{
-template<typename VOCABID>
-class StoreVocab;
-
-void add_to_map(StoreVocab<uint64_t> &sourceVocab,
- const StringPiece &textin);
-
-void serialize_map(const std::map<uint64_t, std::string> &karta,
- const std::string &filename);
-
-void read_map(std::map<uint64_t, std::string> &karta, const char* filename);
-
-}
diff --git a/contrib/moses2/TranslationModel/Transliteration.h b/contrib/moses2/TranslationModel/Transliteration.h
deleted file mode 100644
index 15f262ac8..000000000
--- a/contrib/moses2/TranslationModel/Transliteration.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Transliteration.h
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include "PhraseTable.h"
-
-namespace Moses2
-{
-class Sentence;
-class InputPaths;
-class Range;
-
-class Transliteration: public PhraseTable
-{
-public:
- Transliteration(size_t startInd, const std::string &line);
- virtual ~Transliteration();
-
- void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
- virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
-
- virtual void
- EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const;
-
- void Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
- void LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
-protected:
- virtual void LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const;
-
- virtual void LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
- virtual void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
- void SetParameter(const std::string& key, const std::string& value);
-
-protected:
- std::string m_filePath;
- std::string m_mosesDir, m_scriptDir, m_externalDir, m_inputLang, m_outputLang;
-
- std::vector<TargetPhraseImpl*> CreateTargetPhrases(
- const Manager &mgr,
- MemPool &pool,
- const SubPhrase<Moses2::Word> &sourcePhrase,
- const std::string &outDir) const;
-
-};
-
-}
-
diff --git a/contrib/moses2/TranslationModel/UnknownWordPenalty.h b/contrib/moses2/TranslationModel/UnknownWordPenalty.h
deleted file mode 100644
index 52c235a36..000000000
--- a/contrib/moses2/TranslationModel/UnknownWordPenalty.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * UnknownWordPenalty.h
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include "PhraseTable.h"
-
-namespace Moses2
-{
-class Sentence;
-class InputPaths;
-class Range;
-
-class UnknownWordPenalty: public PhraseTable
-{
-public:
- UnknownWordPenalty(size_t startInd, const std::string &line);
- virtual ~UnknownWordPenalty();
-
- virtual void SetParameter(const std::string& key, const std::string& value);
-
- void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
- virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
-
- void ProcessXML(
- const Manager &mgr,
- MemPool &pool,
- const Sentence &sentence,
- InputPaths &inputPaths) const;
-
- virtual void
- EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const;
-
- void Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
- void LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
-protected:
- virtual void LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const;
-
- virtual void LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
- virtual void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-protected:
- bool m_drop;
- std::string m_prefix, m_suffix;
-};
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Misc.cpp b/contrib/moses2/defer/CubePruningBitmapStack/Misc.cpp
deleted file mode 100644
index 5eb7893f2..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Misc.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * CubePruning.cpp
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-
-#include "Misc.h"
-#include "Stack.h"
-#include "../Manager.h"
-#include "../../MemPool.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningBitmapStack
-{
-
-////////////////////////////////////////////////////////////////////////
-QueueItem *QueueItem::Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- QueueItem *ret;
- if (currItem) {
- // reuse incoming queue item to create new item
- ret = currItem;
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- }
- else if (!queueItemRecycler.empty()) {
- // use item from recycle bin
- ret = queueItemRecycler.back();
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- queueItemRecycler.pop_back();
- }
- else {
- // create new item
- ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
- }
-
- return ret;
-}
-
-QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-:edge(&edge)
-,hypoIndex(hypoIndex)
-,tpIndex(tpIndex)
-{
- CreateHypothesis(mgr);
-}
-
-void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-{
- this->edge = &edge;
- this->hypoIndex = hypoIndex;
- this->tpIndex = tpIndex;
-
- CreateHypothesis(mgr);
-}
-
-void QueueItem::CreateHypothesis(Manager &mgr)
-{
- const Hypothesis *prevHypo = edge->hypos[hypoIndex];
- const TargetPhrase &tp = edge->tps[tpIndex];
-
- //cerr << "hypoIndex=" << hypoIndex << endl;
- //cerr << "edge.hypos=" << edge.hypos.size() << endl;
- //cerr << prevHypo << endl;
- //cerr << *prevHypo << endl;
-
- hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
- hypo->EvaluateWhenApplied();
-}
-
-////////////////////////////////////////////////////////////////////////
-CubeEdge::CubeEdge(
- Manager &mgr,
- const Hypotheses &hypos,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap)
-:hypos(hypos)
-,path(path)
-,tps(tps)
-,newBitmap(newBitmap)
-{
- estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
-}
-
-std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
-{
- out << obj.newBitmap;
- return out;
-}
-
-bool
-CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
-{
- //UTIL_THROW_IF2(x >= (1<<17), "Error");
- //UTIL_THROW_IF2(y >= (1<<17), "Error");
-
- SeenPositionItem val(this, (x<<16) + y);
- std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
- return pairRet.second;
-}
-
-void CubeEdge::CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- assert(hypos.size());
- assert(tps.GetSize());
-
- QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
- queue.push(item);
- bool setSeen = SetSeenPosition(0, 0, seenPositions);
- assert(setSeen);
-}
-
-void CubeEdge::CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- size_t hypoIndex = item->hypoIndex;
- size_t tpIndex = item->tpIndex;
-
- if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
- // reuse incoming queue item to create new item
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
- assert(newItem == item);
- queue.push(newItem);
- item = NULL;
- }
-
- if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
- queue.push(newItem);
- item = NULL;
- }
-
- if (item) {
- // recycle unused queue item
- queueItemRecycler.push_back(item);
- }
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Misc.h b/contrib/moses2/defer/CubePruningBitmapStack/Misc.h
deleted file mode 100644
index 00f3fa865..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Misc.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * CubePruning.h
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <vector>
-#include <queue>
-#include "../../legacy/Range.h"
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "Stack.h"
-
-namespace Moses2
-{
-
-class Manager;
-class InputPath;
-class TargetPhrases;
-class Bitmap;
-
-namespace NSCubePruningBitmapStack
-{
-class CubeEdge;
-
-///////////////////////////////////////////
-class QueueItem
-{
- ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
-public:
- static QueueItem *Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler);
- QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- CubeEdge *edge;
- size_t hypoIndex, tpIndex;
- Hypothesis *hypo;
-
-protected:
- void CreateHypothesis(Manager &mgr);
-};
-
-///////////////////////////////////////////
-class QueueItemOrderer
-{
-public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const {
- HypothesisFutureScoreOrderer orderer;
- return !orderer(itemA->hypo, itemB->hypo);
- }
-};
-
-///////////////////////////////////////////
-class CubeEdge
-{
- friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
-
-public:
- typedef std::priority_queue<QueueItem*,
- std::vector<QueueItem*>,
- QueueItemOrderer> Queue;
-
- typedef std::pair<const CubeEdge*, int> SeenPositionItem;
- typedef boost::unordered_set<SeenPositionItem,
- boost::hash<SeenPositionItem>,
- std::equal_to<SeenPositionItem> > SeenPositions;
-
- const Hypotheses &hypos;
- const InputPath &path;
- const TargetPhrases &tps;
- const Bitmap &newBitmap;
- SCORE estimatedScore;
-
- CubeEdge(Manager &mgr,
- const Hypotheses &hypos,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap);
-
- bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
-
- void CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
- void CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
-
-protected:
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Search.cpp b/contrib/moses2/defer/CubePruningBitmapStack/Search.cpp
deleted file mode 100644
index 6188edfa4..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Search.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Search.cpp
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Search.h"
-#include "Stack.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../../InputPaths.h"
-#include "../../InputPath.h"
-#include "../../System.h"
-#include "../../Sentence.h"
-#include "../../TranslationTask.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningBitmapStack
-{
-
-////////////////////////////////////////////////////////////////////////
-Search::Search(Manager &mgr)
-:Moses2::Search(mgr)
-,m_stack(mgr)
-
-,m_queue(QueueItemOrderer(), std::vector<QueueItem*>() )
-
-,m_seenPositions()
-{
-}
-
-Search::~Search()
-{
-}
-
-void Search::Decode()
-{
- // init cue edges
- m_cubeEdges.resize(mgr.GetInput().GetSize() + 1);
- for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
- m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges();
- }
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
-
- m_stack.Add(initHypo, mgr.GetHypoRecycle());
- PostDecode(0);
-
- for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- m_stack.Clear();
- Decode(stackInd);
- PostDecode(stackInd);
-
- //m_stack.DebugCounts();
- //cerr << m_stacks << endl;
- }
-
-}
-
-void Search::Decode(size_t stackInd)
-{
- Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
-
- // reuse queue from previous stack. Clear it first
- std::vector<QueueItem*> &container = Container(m_queue);
- //cerr << "container=" << container.size() << endl;
- BOOST_FOREACH(QueueItem *item, container) {
- // recycle unused hypos from queue
- Hypothesis *hypo = item->hypo;
- hypoRecycler.Recycle(hypo);
-
- // recycle queue item
- m_queueItemRecycler.push_back(item);
- }
- container.clear();
-
- m_seenPositions.clear();
-
- // add top hypo from every edge into queue
- CubeEdges &edges = *m_cubeEdges[stackInd];
-
- BOOST_FOREACH(CubeEdge *edge, edges) {
- //cerr << *edge << " ";
- edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
- }
-
- /*
- cerr << "edges: ";
- boost::unordered_set<const Bitmap*> uniqueBM;
- BOOST_FOREACH(CubeEdge *edge, edges) {
- uniqueBM.insert(&edge->newBitmap);
- //cerr << *edge << " ";
- }
- cerr << edges.size() << " " << uniqueBM.size();
- cerr << endl;
- */
-
- size_t pops = 0;
- while (!m_queue.empty() && pops < mgr.system.popLimit) {
- // get best hypo from queue, add to stack
- //cerr << "queue=" << queue.size() << endl;
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- CubeEdge *edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stack.Add(hypo, hypoRecycler);
-
- edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
-
- ++pops;
- }
-
- /*
- // create hypo from every edge. Increase diversity
- while (!m_queue.empty()) {
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- if (item->hypoIndex == 0 && item->tpIndex == 0) {
- CubeEdge &edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, mgr.GetHypoRecycle());
- }
- }
- */
-}
-
-void Search::PostDecode(size_t stackInd)
-{
- MemPool &pool = mgr.GetPool();
-
- Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr);
-
- BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) {
- const Bitmap &hypoBitmap = *val.first.first;
- size_t hypoEndPos = val.first.second;
- //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
-
- // create edges to next hypos from existing hypos
- const InputPaths &paths = mgr.GetInputPaths();
-
- BOOST_FOREACH(const InputPath *path, paths) {
- const Range &pathRange = path->range;
- //cerr << "pathRange=" << pathRange << endl;
-
- if (!path->IsUsed()) {
- continue;
- }
- if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
- continue;
- }
-
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
- size_t numWords = newBitmap.GetNumWordsCovered();
-
- CubeEdges &edges = *m_cubeEdges[numWords];
-
- // sort hypo for a particular bitmap and hypoEndPos
- Hypotheses &sortedHypos = *val.second;
-
- size_t numPt = mgr.system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = path->targetPhrases[i];
- if (tps && tps->GetSize()) {
- CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
- edges.push_back(edge);
- }
- }
- }
- }
-
-}
-
-const Hypothesis *Search::GetBestHypo() const
-{
- std::vector<const Hypothesis*> sortedHypos = m_stack.GetBestHypos(1);
-
- const Hypothesis *best = NULL;
- if (sortedHypos.size()) {
- best = sortedHypos[0];
- }
- return best;
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Search.h b/contrib/moses2/defer/CubePruningBitmapStack/Search.h
deleted file mode 100644
index 7e58ba91f..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Search.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Search.h
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include "../Search.h"
-#include "Misc.h"
-#include "Stack.h"
-#include "../../legacy/Range.h"
-
-namespace Moses2
-{
-
-class Bitmap;
-class Hypothesis;
-class InputPath;
-class TargetPhrases;
-
-namespace NSCubePruningBitmapStack
-{
-
-class Search : public Moses2::Search
-{
-public:
- Search(Manager &mgr);
- virtual ~Search();
-
- virtual void Decode();
- const Hypothesis *GetBestHypo() const;
-
-protected:
- Stack m_stack;
-
- CubeEdge::Queue m_queue;
- CubeEdge::SeenPositions m_seenPositions;
-
- // CUBE PRUNING VARIABLES
- // setup
- typedef std::vector<CubeEdge*> CubeEdges;
- std::vector<CubeEdges*> m_cubeEdges;
-
- std::deque<QueueItem*> m_queueItemRecycler;
-
- // CUBE PRUNING
- // decoding
- void Decode(size_t stackInd);
- void PostDecode(size_t stackInd);
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Stack.cpp b/contrib/moses2/defer/CubePruningBitmapStack/Stack.cpp
deleted file mode 100644
index 4dfa3b6f4..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Stack.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Stack.cpp
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#include <algorithm>
-#include <boost/foreach.hpp>
-#include "Stack.h"
-#include "../Hypothesis.h"
-#include "../Manager.h"
-#include "../../Scores.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningBitmapStack
-{
-MiniStack::MiniStack(const Manager &mgr)
-:m_coll()
-,m_sortedHypos(NULL)
-{}
-
-StackAdd MiniStack::Add(const Hypothesis *hypo)
-{
- std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
-
- // CHECK RECOMBINATION
- if (addRet.second) {
- // equiv hypo doesn't exists
- return StackAdd(true, NULL);
- }
- else {
- const Hypothesis *hypoExisting = *addRet.first;
- if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) {
- // incoming hypo is better than the one we have
- const Hypothesis *const &hypoExisting1 = *addRet.first;
- const Hypothesis *&hypoExisting2 = const_cast<const Hypothesis *&>(hypoExisting1);
- hypoExisting2 = hypo;
-
- return StackAdd(true, const_cast<Hypothesis*>(hypoExisting));
- }
- else {
- // already storing the best hypo. discard incoming hypo
- return StackAdd(false, const_cast<Hypothesis*>(hypo));
- }
- }
-
- assert(false);
-}
-
-Hypotheses &MiniStack::GetSortedAndPruneHypos(const Manager &mgr) const
-{
- if (m_sortedHypos == NULL) {
- // create sortedHypos first
- MemPool &pool = mgr.GetPool();
- m_sortedHypos = new (pool.Allocate< Vector<const Hypothesis*> >()) Vector<const Hypothesis*>(pool, m_coll.size());
-
- size_t ind = 0;
- BOOST_FOREACH(const Hypothesis *hypo, m_coll) {
- (*m_sortedHypos)[ind] = hypo;
- ++ind;
- }
-
- SortAndPruneHypos(mgr);
- }
-
- return *m_sortedHypos;
-}
-
-void MiniStack::SortAndPruneHypos(const Manager &mgr) const
-{
- size_t stackSize = mgr.system.stackSize;
- Recycler<Hypothesis*> &recycler = mgr.GetHypoRecycle();
-
- /*
- cerr << "UNSORTED hypos:" << endl;
- for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << *hypo << endl;
- }
- cerr << endl;
- */
- Hypotheses::iterator iterMiddle;
- iterMiddle = (stackSize == 0 || m_sortedHypos->size() < stackSize)
- ? m_sortedHypos->end()
- : m_sortedHypos->begin() + stackSize;
-
- std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(),
- HypothesisFutureScoreOrderer());
-
- // prune
- if (stackSize && m_sortedHypos->size() > stackSize) {
- for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) {
- Hypothesis *hypo = const_cast<Hypothesis*>((*m_sortedHypos)[i]);
- recycler.Recycle(hypo);
- }
- m_sortedHypos->resize(stackSize);
- }
-
- /*
- cerr << "sorted hypos:" << endl;
- for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << hypo << " " << *hypo << endl;
- }
- cerr << endl;
- */
-
-}
-
-void MiniStack::Clear()
-{
- m_sortedHypos = NULL;
- m_coll.clear();
-}
-
-///////////////////////////////////////////////////////////////
-Stack::Stack(const Manager &mgr)
-:m_mgr(mgr)
-,m_coll()
-,m_miniStackRecycler()
-{
-}
-
-Stack::~Stack() {
- // TODO Auto-generated destructor stub
-}
-
-void Stack::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
-{
- HypoCoverageInternal key = &hypo->GetBitmap();
- StackAdd added = GetMiniStack(key).Add(hypo);
-
- if (added.toBeDeleted) {
- hypoRecycle.Recycle(added.toBeDeleted);
- }
-}
-
-std::vector<const Hypothesis*> Stack::GetBestHypos(size_t num) const
-{
- std::vector<const Hypothesis*> ret;
- BOOST_FOREACH(const Coll::value_type &val, m_coll) {
- const MiniStack::_HCType &hypos = val.second->GetColl();
- ret.insert(ret.end(), hypos.begin(), hypos.end());
- }
-
- std::vector<const Hypothesis*>::iterator iterMiddle;
- iterMiddle = (num == 0 || ret.size() < num)
- ? ret.end()
- : ret.begin()+num;
-
- std::partial_sort(ret.begin(), iterMiddle, ret.end(),
- HypothesisFutureScoreOrderer());
-
- return ret;
-}
-
-size_t Stack::GetHypoSize() const
-{
- size_t ret = 0;
- BOOST_FOREACH(const Coll::value_type &val, m_coll) {
- const MiniStack::_HCType &hypos = val.second->GetColl();
- ret += hypos.size();
- }
- return ret;
-}
-
-MiniStack &Stack::GetMiniStack(const HypoCoverageInternal &key)
-{
- MiniStack *ret;
- Coll::iterator iter = m_coll.find(key);
- if (iter == m_coll.end()) {
- if (m_miniStackRecycler.empty()) {
- ret = new (m_mgr.GetPool().Allocate<MiniStack>()) MiniStack(m_mgr);
- }
- else {
- ret = m_miniStackRecycler.back();
- ret->Clear();
- m_miniStackRecycler.pop_back();
- }
-
- m_coll[key] = ret;
- }
- else {
- ret = iter->second;
- }
- return *ret;
-}
-
-void Stack::Clear()
-{
- BOOST_FOREACH(const Coll::value_type &val, m_coll) {
- MiniStack *miniStack = val.second;
- m_miniStackRecycler.push_back(miniStack);
- }
-
- m_coll.clear();
-}
-
-Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const
-{
- SortedHypos ret;
-
- MemPool &pool = mgr.GetPool();
-
- // prune and sort
- Hypotheses *allHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool, GetHypoSize());
- size_t i = 0;
-
- BOOST_FOREACH(const Coll::value_type &val, m_coll) {
- const MiniStack *miniStack = val.second;
- const MiniStack::MiniStack::_HCType &hypos = miniStack->GetColl();
-
- BOOST_FOREACH(const Hypothesis *hypo, hypos) {
- (*allHypos)[i++] = hypo;
- }
- }
-
- SortAndPruneHypos(mgr, *allHypos);
-
- // divide hypos by [bitmap, last end pos]
- BOOST_FOREACH(const Hypothesis *hypo, *allHypos) {
- HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
-
- Hypotheses *hypos;
- SortedHypos::iterator iter;
- iter = ret.find(key);
- if (iter == ret.end()) {
- hypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool);
- ret[key] = hypos;
- }
- else {
- hypos = iter->second;
- }
- hypos->push_back(hypo);
- }
-
- return ret;
-}
-
-void Stack::SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const
-{
- size_t stackSize = mgr.system.stackSize;
- Recycler<Hypothesis*> &recycler = mgr.GetHypoRecycle();
-
- /*
- cerr << "UNSORTED hypos:" << endl;
- for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << *hypo << endl;
- }
- cerr << endl;
- */
- Hypotheses::iterator iterMiddle;
- iterMiddle = (stackSize == 0 || hypos.size() < stackSize)
- ? hypos.end()
- : hypos.begin() + stackSize;
-
- std::partial_sort(hypos.begin(), iterMiddle, hypos.end(),
- HypothesisFutureScoreOrderer());
-
- // prune
- if (stackSize && hypos.size() > stackSize) {
- for (size_t i = stackSize; i < hypos.size(); ++i) {
- Hypothesis *hypo = const_cast<Hypothesis*>(hypos[i]);
- recycler.Recycle(hypo);
- }
- hypos.resize(stackSize);
- }
-
- /*
- cerr << "sorted hypos:" << endl;
- for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << hypo << " " << *hypo << endl;
- }
- cerr << endl;
- */
-
-}
-
-
-void Stack::DebugCounts()
-{
- /*
- cerr << "counts=";
- BOOST_FOREACH(const Coll::value_type &val, GetColl()) {
- const NSCubePruning::MiniStack &miniStack = *val.second;
- size_t count = miniStack.GetColl().size();
- cerr << count << " ";
- }
- cerr << endl;
- */
-}
-
-}
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Stack.h b/contrib/moses2/defer/CubePruningBitmapStack/Stack.h
deleted file mode 100644
index d0687ec59..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Stack.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Stack.h
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <deque>
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "../../MemPool.h"
-#include "../../Recycler.h"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-class Manager;
-
-namespace NSCubePruningBitmapStack
-{
-typedef Vector<const Hypothesis*> Hypotheses;
-
-class MiniStack
-{
-public:
- typedef boost::unordered_set<const Hypothesis*,
- UnorderedComparer<Hypothesis>,
- UnorderedComparer<Hypothesis>
- > _HCType;
-
- MiniStack(const Manager &mgr);
-
- StackAdd Add(const Hypothesis *hypo);
-
- _HCType &GetColl()
- { return m_coll; }
-
- const _HCType &GetColl() const
- { return m_coll; }
-
- void Clear();
-
- Hypotheses &GetSortedAndPruneHypos(const Manager &mgr) const;
-
-protected:
- _HCType m_coll;
- mutable Hypotheses *m_sortedHypos;
-
- void SortAndPruneHypos(const Manager &mgr) const;
-
-};
-
-/////////////////////////////////////////////
-class Stack {
-protected:
-
-
-public:
- typedef std::pair<const Bitmap*, size_t> HypoCoverage;
- // bitmap and current endPos of hypos
- typedef boost::unordered_map<HypoCoverage, Hypotheses*> SortedHypos;
-
- typedef const Bitmap* HypoCoverageInternal;
- typedef boost::unordered_map<HypoCoverageInternal, MiniStack*
- ,boost::hash<HypoCoverageInternal>
- ,std::equal_to<HypoCoverageInternal>
- > Coll;
-
-
- Stack(const Manager &mgr);
- virtual ~Stack();
-
- size_t GetHypoSize() const;
-
- Coll &GetColl()
- { return m_coll; }
- const Coll &GetColl() const
- { return m_coll; }
-
- void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
-
- MiniStack &GetMiniStack(const HypoCoverageInternal &key);
-
- std::vector<const Hypothesis*> GetBestHypos(size_t num) const;
- void Clear();
-
- SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const;
- void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const;
-
- void DebugCounts();
-
-protected:
- const Manager &m_mgr;
- Coll m_coll;
-
- std::deque<MiniStack*> m_miniStackRecycler;
-
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Misc.cpp b/contrib/moses2/defer/CubePruningCardinalStack/Misc.cpp
deleted file mode 100644
index 8918fdf52..000000000
--- a/contrib/moses2/defer/CubePruningCardinalStack/Misc.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * CubePruning.cpp
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-
-#include "Misc.h"
-#include "Stack.h"
-#include "../Manager.h"
-#include "../../MemPool.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningCardinalStack
-{
-
-////////////////////////////////////////////////////////////////////////
-QueueItem *QueueItem::Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- QueueItem *ret;
- if (currItem) {
- // reuse incoming queue item to create new item
- ret = currItem;
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- }
- else if (!queueItemRecycler.empty()) {
- // use item from recycle bin
- ret = queueItemRecycler.back();
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- queueItemRecycler.pop_back();
- }
- else {
- // create new item
- ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
- }
-
- return ret;
-}
-
-QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-:edge(&edge)
-,hypoIndex(hypoIndex)
-,tpIndex(tpIndex)
-{
- CreateHypothesis(mgr);
-}
-
-void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-{
- this->edge = &edge;
- this->hypoIndex = hypoIndex;
- this->tpIndex = tpIndex;
-
- CreateHypothesis(mgr);
-}
-
-void QueueItem::CreateHypothesis(Manager &mgr)
-{
- const Hypothesis *prevHypo = edge->hypos[hypoIndex];
- const TargetPhrase &tp = edge->tps[tpIndex];
-
- //cerr << "hypoIndex=" << hypoIndex << endl;
- //cerr << "edge.hypos=" << edge.hypos.size() << endl;
- //cerr << prevHypo << endl;
- //cerr << *prevHypo << endl;
-
- hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
- hypo->EvaluateWhenApplied();
-}
-
-////////////////////////////////////////////////////////////////////////
-CubeEdge::CubeEdge(
- Manager &mgr,
- const Hypotheses &hypos,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap)
-:hypos(hypos)
-,path(path)
-,tps(tps)
-,newBitmap(newBitmap)
-{
- estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
-}
-
-std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
-{
- out << obj.newBitmap;
- return out;
-}
-
-bool
-CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
-{
- //UTIL_THROW_IF2(x >= (1<<17), "Error");
- //UTIL_THROW_IF2(y >= (1<<17), "Error");
-
- SeenPositionItem val(this, (x<<16) + y);
- std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
- return pairRet.second;
-}
-
-void CubeEdge::CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- assert(hypos.size());
- assert(tps.GetSize());
-
- QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
- queue.push(item);
- bool setSeen = SetSeenPosition(0, 0, seenPositions);
- assert(setSeen);
-}
-
-void CubeEdge::CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- size_t hypoIndex = item->hypoIndex;
- size_t tpIndex = item->tpIndex;
-
- if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
- // reuse incoming queue item to create new item
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
- assert(newItem == item);
- queue.push(newItem);
- item = NULL;
- }
-
- if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
- queue.push(newItem);
- item = NULL;
- }
-
- if (item) {
- // recycle unused queue item
- queueItemRecycler.push_back(item);
- }
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Misc.h b/contrib/moses2/defer/CubePruningCardinalStack/Misc.h
deleted file mode 100644
index b86c88519..000000000
--- a/contrib/moses2/defer/CubePruningCardinalStack/Misc.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * CubePruning.h
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <vector>
-#include <queue>
-#include "../../legacy/Range.h"
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "Stack.h"
-
-namespace Moses2
-{
-
-class Manager;
-class InputPath;
-class TargetPhrases;
-class Bitmap;
-
-namespace NSCubePruningCardinalStack
-{
-class CubeEdge;
-
-///////////////////////////////////////////
-class QueueItem
-{
- ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
-public:
- static QueueItem *Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler);
- QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- CubeEdge *edge;
- size_t hypoIndex, tpIndex;
- Hypothesis *hypo;
-
-protected:
- void CreateHypothesis(Manager &mgr);
-};
-
-///////////////////////////////////////////
-class QueueItemOrderer
-{
-public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const {
- HypothesisFutureScoreOrderer orderer;
- return !orderer(itemA->hypo, itemB->hypo);
- }
-};
-
-///////////////////////////////////////////
-class CubeEdge
-{
- friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
-
-public:
- typedef std::priority_queue<QueueItem*,
- std::vector<QueueItem*>,
- QueueItemOrderer> Queue;
-
- typedef std::pair<const CubeEdge*, int> SeenPositionItem;
- typedef boost::unordered_set<SeenPositionItem,
- boost::hash<SeenPositionItem>,
- std::equal_to<SeenPositionItem>
- > SeenPositions;
-
- const Hypotheses &hypos;
- const InputPath &path;
- const TargetPhrases &tps;
- const Bitmap &newBitmap;
- SCORE estimatedScore;
-
- CubeEdge(Manager &mgr,
- const Hypotheses &hypos,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap);
-
- bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
-
- void CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
- void CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
-
-protected:
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Search.cpp b/contrib/moses2/defer/CubePruningCardinalStack/Search.cpp
deleted file mode 100644
index d4899ae46..000000000
--- a/contrib/moses2/defer/CubePruningCardinalStack/Search.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Search.cpp
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Search.h"
-#include "Stack.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../../InputPaths.h"
-#include "../../InputPath.h"
-#include "../../System.h"
-#include "../../Sentence.h"
-#include "../../TranslationTask.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningCardinalStack
-{
-
-////////////////////////////////////////////////////////////////////////
-Search::Search(Manager &mgr)
-:Moses2::Search(mgr)
-,m_stack(mgr)
-
-,m_queue(QueueItemOrderer(), std::vector<QueueItem* >() )
-
-,m_seenPositions()
-{
-}
-
-Search::~Search()
-{
-}
-
-void Search::Decode()
-{
- // init cue edges
- m_cubeEdges.resize(mgr.GetInput().GetSize() + 1);
- for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
- m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges();
- }
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
-
- m_stack.Add(initHypo, mgr.GetHypoRecycle());
- PostDecode(0);
-
- for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- m_stack.Clear();
- Decode(stackInd);
- PostDecode(stackInd);
-
- //m_stack.DebugCounts();
- //cerr << m_stacks << endl;
- }
-
-}
-
-void Search::Decode(size_t stackInd)
-{
- Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
-
- // reuse queue from previous stack. Clear it first
- std::vector<QueueItem*> &container = Container(m_queue);
- //cerr << "container=" << container.size() << endl;
- BOOST_FOREACH(QueueItem *item, container) {
- // recycle unused hypos from queue
- Hypothesis *hypo = item->hypo;
- hypoRecycler.Recycle(hypo);
-
- // recycle queue item
- m_queueItemRecycler.push_back(item);
- }
- container.clear();
-
- m_seenPositions.clear();
-
- // add top hypo from every edge into queue
- CubeEdges &edges = *m_cubeEdges[stackInd];
-
- BOOST_FOREACH(CubeEdge *edge, edges) {
- //cerr << *edge << " ";
- edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
- }
-
- /*
- cerr << "edges: ";
- boost::unordered_set<const Bitmap*> uniqueBM;
- BOOST_FOREACH(CubeEdge *edge, edges) {
- uniqueBM.insert(&edge->newBitmap);
- //cerr << *edge << " ";
- }
- cerr << edges.size() << " " << uniqueBM.size();
- cerr << endl;
- */
-
- size_t pops = 0;
- while (!m_queue.empty() && pops < mgr.system.popLimit) {
- // get best hypo from queue, add to stack
- //cerr << "queue=" << queue.size() << endl;
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- CubeEdge *edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stack.Add(hypo, hypoRecycler);
-
- edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
-
- ++pops;
- }
-
- /*
- // create hypo from every edge. Increase diversity
- while (!m_queue.empty()) {
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- if (item->hypoIndex == 0 && item->tpIndex == 0) {
- CubeEdge &edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, mgr.GetHypoRecycle());
- }
- }
- */
-}
-
-void Search::PostDecode(size_t stackInd)
-{
- MemPool &pool = mgr.GetPool();
-
- Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr);
-
- BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) {
- const Bitmap &hypoBitmap = *val.first.first;
- size_t hypoEndPos = val.first.second;
- //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
-
- // create edges to next hypos from existing hypos
- const InputPaths &paths = mgr.GetInputPaths();
-
- BOOST_FOREACH(const InputPath *path, paths) {
- const Range &pathRange = path->range;
- //cerr << "pathRange=" << pathRange << endl;
-
- if (!path->IsUsed()) {
- continue;
- }
- if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
- continue;
- }
-
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
- size_t numWords = newBitmap.GetNumWordsCovered();
-
- CubeEdges &edges = *m_cubeEdges[numWords];
-
- // sort hypo for a particular bitmap and hypoEndPos
- Hypotheses &sortedHypos = *val.second;
-
- size_t numPt = mgr.system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = path->targetPhrases[i];
- if (tps && tps->GetSize()) {
- CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
- edges.push_back(edge);
- }
- }
- }
- }
-
-}
-
-const Hypothesis *Search::GetBestHypo() const
-{
- std::vector<const Hypothesis*> sortedHypos = m_stack.GetBestHypos(1);
-
- const Hypothesis *best = NULL;
- if (sortedHypos.size()) {
- best = sortedHypos[0];
- }
- return best;
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Search.h b/contrib/moses2/defer/CubePruningCardinalStack/Search.h
deleted file mode 100644
index e772926a2..000000000
--- a/contrib/moses2/defer/CubePruningCardinalStack/Search.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Search.h
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include "../Search.h"
-#include "Misc.h"
-#include "Stack.h"
-#include "../../legacy/Range.h"
-
-namespace Moses2
-{
-
-class Bitmap;
-class Hypothesis;
-class InputPath;
-class TargetPhrases;
-
-namespace NSCubePruningCardinalStack
-{
-
-class Search : public Moses2::Search
-{
-public:
- Search(Manager &mgr);
- virtual ~Search();
-
- virtual void Decode();
- const Hypothesis *GetBestHypo() const;
-
-protected:
- Stack m_stack;
-
- CubeEdge::Queue m_queue;
- CubeEdge::SeenPositions m_seenPositions;
-
- // CUBE PRUNING VARIABLES
- // setup
- typedef std::vector<CubeEdge*> CubeEdges;
- std::vector<CubeEdges*> m_cubeEdges;
-
- std::deque<QueueItem*> m_queueItemRecycler;
-
- // CUBE PRUNING
- // decoding
- void Decode(size_t stackInd);
- void PostDecode(size_t stackInd);
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Stack.h b/contrib/moses2/defer/CubePruningCardinalStack/Stack.h
deleted file mode 100644
index d6ae80577..000000000
--- a/contrib/moses2/defer/CubePruningCardinalStack/Stack.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Stack.h
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <deque>
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "../../MemPool.h"
-#include "../../Recycler.h"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-class Manager;
-
-namespace NSCubePruningCardinalStack
-{
-typedef Vector<const Hypothesis*> Hypotheses;
-
-
-/////////////////////////////////////////////
-class Stack {
-protected:
- typedef boost::unordered_set<const Hypothesis*,
- UnorderedComparer<Hypothesis>,
- UnorderedComparer<Hypothesis>
- > _HCType;
-
-public:
- typedef std::pair<const Bitmap*, size_t> HypoCoverage;
- typedef boost::unordered_map<HypoCoverage, Hypotheses*> SortedHypos;
-
- Stack(const Manager &mgr);
- virtual ~Stack();
-
- size_t GetHypoSize() const;
-
- _HCType &GetColl()
- { return m_coll; }
- const _HCType &GetColl() const
- { return m_coll; }
-
- void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
-
- std::vector<const Hypothesis*> GetBestHypos(size_t num) const;
- void Clear();
-
- SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const;
- void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const;
-
-protected:
- const Manager &m_mgr;
- _HCType m_coll;
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp b/contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp
deleted file mode 100644
index 7b324e244..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * CubePruning.cpp
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-
-#include "Misc.h"
-#include "../Manager.h"
-#include "../../MemPool.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerBitmap
-{
-
-////////////////////////////////////////////////////////////////////////
-QueueItem *QueueItem::Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- QueueItem *ret;
- if (currItem) {
- // reuse incoming queue item to create new item
- ret = currItem;
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- }
- else if (!queueItemRecycler.empty()) {
- // use item from recycle bin
- ret = queueItemRecycler.back();
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- queueItemRecycler.pop_back();
- }
- else {
- // create new item
- ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
- }
-
- return ret;
-}
-
-QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-:edge(&edge)
-,hypoIndex(hypoIndex)
-,tpIndex(tpIndex)
-{
- CreateHypothesis(mgr);
-}
-
-void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-{
- this->edge = &edge;
- this->hypoIndex = hypoIndex;
- this->tpIndex = tpIndex;
-
- CreateHypothesis(mgr);
-}
-
-void QueueItem::CreateHypothesis(Manager &mgr)
-{
- const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex];
- const TargetPhrase &tp = edge->tps[tpIndex];
-
- //cerr << "hypoIndex=" << hypoIndex << endl;
- //cerr << "edge.hypos=" << edge.hypos.size() << endl;
- //cerr << prevHypo << endl;
- //cerr << *prevHypo << endl;
-
- hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
- hypo->EvaluateWhenApplied();
-}
-
-////////////////////////////////////////////////////////////////////////
-CubeEdge::CubeEdge(
- Manager &mgr,
- const NSCubePruningMiniStack::MiniStack &miniStack,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap)
-:miniStack(miniStack)
-,path(path)
-,tps(tps)
-,newBitmap(newBitmap)
-{
- estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
-}
-
-std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
-{
- out << obj.newBitmap;
- return out;
-}
-
-bool
-CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
-{
- //UTIL_THROW_IF2(x >= (1<<17), "Error");
- //UTIL_THROW_IF2(y >= (1<<17), "Error");
-
- SeenPositionItem val(this, (x<<16) + y);
- std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
- return pairRet.second;
-}
-
-void CubeEdge::CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- if (miniStack.GetSortedAndPruneHypos(mgr).size()) {
- assert(tps.GetSize());
-
- QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
- queue.push(item);
- bool setSeen = SetSeenPosition(0, 0, seenPositions);
- assert(setSeen);
- }
-}
-
-void CubeEdge::CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- size_t hypoIndex = item->hypoIndex;
- size_t tpIndex = item->tpIndex;
-
- if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
- // reuse incoming queue item to create new item
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
- assert(newItem == item);
- queue.push(newItem);
- item = NULL;
- }
-
- if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
- queue.push(newItem);
- item = NULL;
- }
-
- if (item) {
- // recycle unused queue item
- queueItemRecycler.push_back(item);
- }
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Misc.h b/contrib/moses2/defer/CubePruningPerBitmap/Misc.h
deleted file mode 100644
index 77b5ba9c3..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Misc.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * CubePruning.h
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <vector>
-#include <queue>
-#include "../../legacy/Range.h"
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "../CubePruningMiniStack/Stack.h"
-
-namespace Moses2
-{
-
-class Manager;
-class InputPath;
-class TargetPhrases;
-class Bitmap;
-
-namespace NSCubePruningPerBitmap
-{
-class CubeEdge;
-
-///////////////////////////////////////////
-class QueueItem
-{
- ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
-public:
- static QueueItem *Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler);
- QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- CubeEdge *edge;
- size_t hypoIndex, tpIndex;
- Hypothesis *hypo;
-
-protected:
- void CreateHypothesis(Manager &mgr);
-};
-
-///////////////////////////////////////////
-class QueueItemOrderer
-{
-public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const {
- HypothesisFutureScoreOrderer orderer;
- return !orderer(itemA->hypo, itemB->hypo);
- }
-};
-
-///////////////////////////////////////////
-class CubeEdge
-{
- friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
-
-public:
- typedef std::priority_queue<QueueItem*,
- std::vector<QueueItem*>,
- QueueItemOrderer> Queue;
-
- typedef std::pair<const CubeEdge*, int> SeenPositionItem;
- typedef boost::unordered_set<SeenPositionItem,
- boost::hash<SeenPositionItem>,
- std::equal_to<SeenPositionItem>
- > SeenPositions;
-
- const NSCubePruningMiniStack::MiniStack &miniStack;
- const InputPath &path;
- const TargetPhrases &tps;
- const Bitmap &newBitmap;
- SCORE estimatedScore;
-
- CubeEdge(Manager &mgr,
- const NSCubePruningMiniStack::MiniStack &miniStack,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap);
-
- bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
-
- void CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
- void CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
-
-
-protected:
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Search.cpp b/contrib/moses2/defer/CubePruningPerBitmap/Search.cpp
deleted file mode 100644
index b0eddcc21..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Search.cpp
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Search.cpp
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Search.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../../InputPaths.h"
-#include "../../InputPath.h"
-#include "../../System.h"
-#include "../../Sentence.h"
-#include "../../TranslationTask.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerBitmap
-{
-
-////////////////////////////////////////////////////////////////////////
-Search::Search(Manager &mgr)
-:Moses2::Search(mgr)
-,m_stacks(mgr)
-
-,m_queue(QueueItemOrderer(),
- std::vector<QueueItem*>() )
-
-,m_seenPositions()
-{
-}
-
-Search::~Search()
-{
-}
-
-void Search::Decode()
-{
- // init stacks
- m_stacks.Init(mgr.GetInput().GetSize() + 1);
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
-
- m_stacks.Add(initHypo, mgr.GetHypoRecycle());
-
- for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) {
- CreateSearchGraph(stackInd);
- }
-
- for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- Decode(stackInd);
-
- //cerr << m_stacks << endl;
- }
-
- //DebugCounts();
-}
-
-void Search::Decode(size_t stackInd)
-{
- NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
-
- // FOR EACH BITMAP IN EACH STACK
- boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> > uniqueBM;
-
- BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
-
- const Bitmap *bitmap = val.first.first;
- uniqueBM[bitmap].push_back(&miniStack);
- }
-
- // decode each bitmap
- boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> >::iterator iter;
- for (iter = uniqueBM.begin(); iter != uniqueBM.end(); ++iter) {
- const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks = iter->second;
- Decode(miniStacks);
- }
-
- /*
- // FOR EACH STACK
- vector<NSCubePruningMiniStack::MiniStack*> miniStacks;
- BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
-
- miniStacks.push_back(&miniStack);
- }
- Decode(miniStacks);
- */
-}
-
-void Search::Decode(const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks)
-{
- Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
-
- // reuse queue from previous stack. Clear it first
- std::vector<QueueItem*> &container = Container(m_queue);
- //cerr << "container=" << container.size() << endl;
- BOOST_FOREACH(QueueItem *item, container) {
- // recycle unused hypos from queue
- Hypothesis *hypo = item->hypo;
- hypoRecycler.Recycle(hypo);
-
- // recycle queue item
- m_queueItemRecycler.push_back(item);
- }
- container.clear();
-
- m_seenPositions.clear();
-
- BOOST_FOREACH(NSCubePruningMiniStack::MiniStack *miniStack, miniStacks) {
- // add top hypo from every edge into queue
- CubeEdges &edges = *m_cubeEdges[miniStack];
-
- BOOST_FOREACH(CubeEdge *edge, edges) {
- //cerr << "edge=" << *edge << endl;
- edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
- }
- }
-
- size_t pops = 0;
- while (!m_queue.empty() && pops < mgr.system.popLimit) {
- // get best hypo from queue, add to stack
- //cerr << "queue=" << queue.size() << endl;
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- CubeEdge *edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, hypoRecycler);
-
- edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
-
- ++pops;
- }
-
- /*
- // create hypo from every edge. Increase diversity
- while (!m_queue.empty()) {
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- if (item->hypoIndex == 0 && item->tpIndex == 0) {
- CubeEdge &edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, mgr.GetHypoRecycle());
- }
- }
- */
-}
-
-
-void Search::CreateSearchGraph(size_t stackInd)
-{
- NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
- MemPool &pool = mgr.GetPool();
-
- BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- const Bitmap &hypoBitmap = *val.first.first;
- size_t hypoEndPos = val.first.second;
- //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
-
- // create edges to next hypos from existing hypos
- const InputPaths &paths = mgr.GetInputPaths();
-
- BOOST_FOREACH(const InputPath *path, paths) {
- const Range &pathRange = path->range;
- //cerr << "pathRange=" << pathRange << endl;
-
- if (!path->IsUsed()) {
- continue;
- }
- if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
- continue;
- }
-
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
-
- // sort hypo for a particular bitmap and hypoEndPos
- const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
-
-
- // add cube edge
- size_t numPt = mgr.system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = path->targetPhrases[i];
- if (tps && tps->GetSize()) {
- // create next mini stack
- NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange);
-
- CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap);
-
- CubeEdges *edges;
- boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack);
- if (iter == m_cubeEdges.end()) {
- edges = new (pool.Allocate<CubeEdges>()) CubeEdges();
- m_cubeEdges[&nextMiniStack] = edges;
- }
- else {
- edges = iter->second;
- }
-
- edges->push_back(edge);
- }
- }
- }
- }
-
-}
-
-
-const Hypothesis *Search::GetBestHypo() const
-{
- const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back();
- std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1);
-
- const Hypothesis *best = NULL;
- if (sortedHypos.size()) {
- best = sortedHypos[0];
- }
- return best;
-}
-
-void Search::DebugCounts()
-{
- std::map<size_t, size_t> counts;
-
- for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
- BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
- size_t count = miniStack.GetColl().size();
-
- if (counts.find(count) == counts.end()) {
- counts[count] = 0;
- }
- else {
- ++counts[count];
- }
- }
- //cerr << m_stacks << endl;
- }
-
- std::map<size_t, size_t>::const_iterator iter;
- for (iter = counts.begin(); iter != counts.end(); ++iter) {
- cerr << iter->first << "=" << iter->second << " ";
- }
- cerr << endl;
-}
-
-
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Search.h b/contrib/moses2/defer/CubePruningPerBitmap/Search.h
deleted file mode 100644
index 913095e25..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Search.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Search.h
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include "../Search.h"
-#include "Misc.h"
-#include "Stacks.h"
-#include "../../legacy/Range.h"
-
-namespace Moses2
-{
-
-class Bitmap;
-class Hypothesis;
-class InputPath;
-class TargetPhrases;
-
-namespace NSCubePruningMiniStack
-{
-class MiniStack;
-}
-
-namespace NSCubePruningPerBitmap
-{
-
-class Search : public Moses2::Search
-{
-public:
- Search(Manager &mgr);
- virtual ~Search();
-
- virtual void Decode();
- const Hypothesis *GetBestHypo() const;
-
-protected:
- Stacks m_stacks;
-
- CubeEdge::Queue m_queue;
- CubeEdge::SeenPositions m_seenPositions;
-
- // CUBE PRUNING VARIABLES
- // setup
- typedef std::vector<CubeEdge*> CubeEdges;
- boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*> m_cubeEdges;
-
- std::deque<QueueItem*> m_queueItemRecycler;
-
- // CUBE PRUNING
- // decoding
- void CreateSearchGraph(size_t stackInd);
- void Decode(size_t stackInd);
- void Decode(const std::vector<NSCubePruningMiniStack::MiniStack*> &miniStacks);
-
- void DebugCounts();
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp b/contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp
deleted file mode 100644
index ca29f52c0..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Stacks.cpp
- *
- * Created on: 6 Nov 2015
- * Author: hieu
- */
-
-#include "Stacks.h"
-#include "../../System.h"
-#include "../Manager.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerBitmap
-{
-
-Stacks::Stacks(const Manager &mgr)
-:m_mgr(mgr)
-{
-}
-
-Stacks::~Stacks()
-{
-}
-
-void Stacks::Init(size_t numStacks)
-{
- m_stacks.resize(numStacks);
- for (size_t i = 0; i < m_stacks.size(); ++i) {
- m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruningMiniStack::Stack>()) NSCubePruningMiniStack::Stack(m_mgr);
- }
-}
-
-
-std::ostream& operator<<(std::ostream &out, const Stacks &obj)
-{
- for (size_t i = 0; i < obj.GetSize(); ++i) {
- const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i];
- out << stack.GetHypoSize() << " ";
- }
-
- return out;
-}
-
-void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
-{
- size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
- //cerr << "numWordsCovered=" << numWordsCovered << endl;
- NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
- stack.Add(hypo, hypoRecycle);
-
-}
-
-NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange)
-{
- size_t numWordsCovered = newBitmap.GetNumWordsCovered();
- //cerr << "numWordsCovered=" << numWordsCovered << endl;
- NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
-
- NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos());
- stack.GetMiniStack(key);
-
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.h b/contrib/moses2/defer/CubePruningPerBitmap/Stacks.h
deleted file mode 100644
index 5729fa613..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Stacks.h
- *
- * Created on: 6 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <vector>
-#include "../CubePruningMiniStack/Stack.h"
-#include "../../Recycler.h"
-
-namespace Moses2
-{
-class Manager;
-
-namespace NSCubePruningPerBitmap
-{
-
-class Stacks {
- friend std::ostream& operator<<(std::ostream &, const Stacks &);
-public:
- Stacks(const Manager &mgr);
- virtual ~Stacks();
-
- void Init(size_t numStacks);
-
- size_t GetSize() const
- { return m_stacks.size(); }
-
- const NSCubePruningMiniStack::Stack &Back() const
- { return *m_stacks.back(); }
-
- NSCubePruningMiniStack::Stack &operator[](size_t ind)
- { return *m_stacks[ind]; }
-
- void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
- NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange);
-
-protected:
- const Manager &m_mgr;
- std::vector<NSCubePruningMiniStack::Stack*> m_stacks;
-};
-
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Misc.cpp b/contrib/moses2/defer/CubePruningPerMiniStack/Misc.cpp
deleted file mode 100644
index 935882aa0..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Misc.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * CubePruning.cpp
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-
-#include "Misc.h"
-#include "../Manager.h"
-#include "../../MemPool.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerMiniStack
-{
-
-////////////////////////////////////////////////////////////////////////
-QueueItem *QueueItem::Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- QueueItem *ret;
- if (currItem) {
- // reuse incoming queue item to create new item
- ret = currItem;
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- }
- else if (!queueItemRecycler.empty()) {
- // use item from recycle bin
- ret = queueItemRecycler.back();
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- queueItemRecycler.pop_back();
- }
- else {
- // create new item
- ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
- }
-
- return ret;
-}
-
-QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-:edge(&edge)
-,hypoIndex(hypoIndex)
-,tpIndex(tpIndex)
-{
- CreateHypothesis(mgr);
-}
-
-void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-{
- this->edge = &edge;
- this->hypoIndex = hypoIndex;
- this->tpIndex = tpIndex;
-
- CreateHypothesis(mgr);
-}
-
-void QueueItem::CreateHypothesis(Manager &mgr)
-{
- const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex];
- const TargetPhrase &tp = edge->tps[tpIndex];
-
- //cerr << "hypoIndex=" << hypoIndex << endl;
- //cerr << "edge.hypos=" << edge.hypos.size() << endl;
- //cerr << prevHypo << endl;
- //cerr << *prevHypo << endl;
-
- hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
- hypo->EvaluateWhenApplied();
-}
-
-////////////////////////////////////////////////////////////////////////
-CubeEdge::CubeEdge(
- Manager &mgr,
- const NSCubePruningMiniStack::MiniStack &miniStack,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap)
-:miniStack(miniStack)
-,path(path)
-,tps(tps)
-,newBitmap(newBitmap)
-{
- estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
-}
-
-std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
-{
- out << obj.newBitmap;
- return out;
-}
-
-bool
-CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
-{
- //UTIL_THROW_IF2(x >= (1<<17), "Error");
- //UTIL_THROW_IF2(y >= (1<<17), "Error");
-
- SeenPositionItem val(this, (x<<16) + y);
- std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
- return pairRet.second;
-}
-
-void CubeEdge::CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- if (miniStack.GetSortedAndPruneHypos(mgr).size()) {
- assert(tps.GetSize());
-
- QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
- queue.push(item);
- bool setSeen = SetSeenPosition(0, 0, seenPositions);
- assert(setSeen);
- }
-}
-
-void CubeEdge::CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- size_t hypoIndex = item->hypoIndex;
- size_t tpIndex = item->tpIndex;
-
- if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
- // reuse incoming queue item to create new item
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
- assert(newItem == item);
- queue.push(newItem);
- item = NULL;
- }
-
- if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
- queue.push(newItem);
- item = NULL;
- }
-
- if (item) {
- // recycle unused queue item
- queueItemRecycler.push_back(item);
- }
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Misc.h b/contrib/moses2/defer/CubePruningPerMiniStack/Misc.h
deleted file mode 100644
index 4a3935422..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Misc.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * CubePruning.h
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <vector>
-#include <queue>
-#include "../../legacy/Range.h"
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "../CubePruningMiniStack/Stack.h"
-
-namespace Moses2
-{
-
-class Manager;
-class InputPath;
-class TargetPhrases;
-class Bitmap;
-
-namespace NSCubePruningPerMiniStack
-{
-class CubeEdge;
-
-///////////////////////////////////////////
-class QueueItem
-{
- ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
-public:
- static QueueItem *Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler);
- QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- CubeEdge *edge;
- size_t hypoIndex, tpIndex;
- Hypothesis *hypo;
-
-protected:
- void CreateHypothesis(Manager &mgr);
-};
-
-///////////////////////////////////////////
-class QueueItemOrderer
-{
-public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const {
- HypothesisFutureScoreOrderer orderer;
- return !orderer(itemA->hypo, itemB->hypo);
- }
-};
-
-///////////////////////////////////////////
-class CubeEdge
-{
- friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
-
-public:
- typedef std::priority_queue<QueueItem*,
- std::vector<QueueItem*>,
- QueueItemOrderer> Queue;
-
- typedef std::pair<const CubeEdge*, int> SeenPositionItem;
- typedef boost::unordered_set<SeenPositionItem,
- boost::hash<SeenPositionItem>,
- std::equal_to<SeenPositionItem>
- > SeenPositions;
-
- const NSCubePruningMiniStack::MiniStack &miniStack;
- const InputPath &path;
- const TargetPhrases &tps;
- const Bitmap &newBitmap;
- SCORE estimatedScore;
-
- CubeEdge(Manager &mgr,
- const NSCubePruningMiniStack::MiniStack &miniStack,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap);
-
- bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
-
- void CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
- void CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
-
-
-protected:
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Search.cpp b/contrib/moses2/defer/CubePruningPerMiniStack/Search.cpp
deleted file mode 100644
index fe993daf0..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Search.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Search.cpp
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Search.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../../InputPaths.h"
-#include "../../InputPath.h"
-#include "../../System.h"
-#include "../../Sentence.h"
-#include "../../TranslationTask.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerMiniStack
-{
-
-////////////////////////////////////////////////////////////////////////
-Search::Search(Manager &mgr)
-:Moses2::Search(mgr)
-,m_stacks(mgr)
-
-,m_queue(QueueItemOrderer(),
- std::vector<QueueItem*>() )
-
-,m_seenPositions()
-{
-}
-
-Search::~Search()
-{
-}
-
-void Search::Decode()
-{
- // init stacks
- m_stacks.Init(mgr.GetInput().GetSize() + 1);
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
-
- m_stacks.Add(initHypo, mgr.GetHypoRecycle());
-
- for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) {
- CreateSearchGraph(stackInd);
- }
-
- for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- Decode(stackInd);
-
- //cerr << m_stacks << endl;
- }
-
- //DebugCounts();
-}
-
-void Search::Decode(size_t stackInd)
-{
- NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
- BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
- Decode(miniStack);
- }
-
-}
-
-void Search::Decode(NSCubePruningMiniStack::MiniStack &miniStack)
-{
- Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
-
- // reuse queue from previous stack. Clear it first
- std::vector<QueueItem*> &container = Container(m_queue);
- //cerr << "container=" << container.size() << endl;
- BOOST_FOREACH(QueueItem *item, container) {
- // recycle unused hypos from queue
- Hypothesis *hypo = item->hypo;
- hypoRecycler.Recycle(hypo);
-
- // recycle queue item
- m_queueItemRecycler.push_back(item);
- }
- container.clear();
-
- m_seenPositions.clear();
-
- // add top hypo from every edge into queue
- CubeEdges &edges = *m_cubeEdges[&miniStack];
-
- BOOST_FOREACH(CubeEdge *edge, edges) {
- //cerr << "edge=" << *edge << endl;
- edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
- }
-
- size_t pops = 0;
- while (!m_queue.empty() && pops < mgr.system.popLimit) {
- // get best hypo from queue, add to stack
- //cerr << "queue=" << queue.size() << endl;
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- CubeEdge *edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, hypoRecycler);
-
- edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
-
- ++pops;
- }
-
- /*
- // create hypo from every edge. Increase diversity
- while (!m_queue.empty()) {
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- if (item->hypoIndex == 0 && item->tpIndex == 0) {
- CubeEdge &edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, mgr.GetHypoRecycle());
- }
- }
- */
-}
-
-
-void Search::CreateSearchGraph(size_t stackInd)
-{
- NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
- MemPool &pool = mgr.GetPool();
-
- BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- const Bitmap &hypoBitmap = *val.first.first;
- size_t hypoEndPos = val.first.second;
- //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
-
- // create edges to next hypos from existing hypos
- const InputPaths &paths = mgr.GetInputPaths();
-
- BOOST_FOREACH(const InputPath *path, paths) {
- const Range &pathRange = path->range;
- //cerr << "pathRange=" << pathRange << endl;
-
- if (!path->IsUsed()) {
- continue;
- }
- if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
- continue;
- }
-
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
-
- // sort hypo for a particular bitmap and hypoEndPos
- const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
-
-
- // add cube edge
- size_t numPt = mgr.system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = path->targetPhrases[i];
- if (tps && tps->GetSize()) {
- // create next mini stack
- NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange);
-
- CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap);
-
- CubeEdges *edges;
- boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack);
- if (iter == m_cubeEdges.end()) {
- edges = new (pool.Allocate<CubeEdges>()) CubeEdges();
- m_cubeEdges[&nextMiniStack] = edges;
- }
- else {
- edges = iter->second;
- }
-
- edges->push_back(edge);
- }
- }
- }
- }
-
-}
-
-
-const Hypothesis *Search::GetBestHypo() const
-{
- const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back();
- std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1);
-
- const Hypothesis *best = NULL;
- if (sortedHypos.size()) {
- best = sortedHypos[0];
- }
- return best;
-}
-
-void Search::DebugCounts()
-{
- std::map<size_t, size_t> counts;
-
- for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
- BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
- size_t count = miniStack.GetColl().size();
-
- if (counts.find(count) == counts.end()) {
- counts[count] = 0;
- }
- else {
- ++counts[count];
- }
- }
- //cerr << m_stacks << endl;
- }
-
- std::map<size_t, size_t>::const_iterator iter;
- for (iter = counts.begin(); iter != counts.end(); ++iter) {
- cerr << iter->first << "=" << iter->second << " ";
- }
- cerr << endl;
-}
-
-
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Search.h b/contrib/moses2/defer/CubePruningPerMiniStack/Search.h
deleted file mode 100644
index be256360e..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Search.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Search.h
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include "../Search.h"
-#include "Misc.h"
-#include "Stacks.h"
-#include "../../legacy/Range.h"
-
-namespace Moses2
-{
-
-class Bitmap;
-class Hypothesis;
-class InputPath;
-class TargetPhrases;
-
-namespace NSCubePruningMiniStack
-{
-class MiniStack;
-}
-
-namespace NSCubePruningPerMiniStack
-{
-
-class Search : public Moses2::Search
-{
-public:
- Search(Manager &mgr);
- virtual ~Search();
-
- virtual void Decode();
- const Hypothesis *GetBestHypo() const;
-
-protected:
- Stacks m_stacks;
-
- CubeEdge::Queue m_queue;
- CubeEdge::SeenPositions m_seenPositions;
-
- // CUBE PRUNING VARIABLES
- // setup
- typedef std::vector<CubeEdge*> CubeEdges;
- boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*> m_cubeEdges;
-
- std::deque<QueueItem*> m_queueItemRecycler;
-
- // CUBE PRUNING
- // decoding
- void CreateSearchGraph(size_t stackInd);
- void Decode(size_t stackInd);
- void Decode(NSCubePruningMiniStack::MiniStack &miniStack);
-
- void DebugCounts();
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.cpp b/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.cpp
deleted file mode 100644
index 86bf5d1b8..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Stacks.cpp
- *
- * Created on: 6 Nov 2015
- * Author: hieu
- */
-
-#include "Stacks.h"
-#include "../../System.h"
-#include "../Manager.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerMiniStack
-{
-
-Stacks::Stacks(const Manager &mgr)
-:m_mgr(mgr)
-{
-}
-
-Stacks::~Stacks()
-{
-}
-
-void Stacks::Init(size_t numStacks)
-{
- m_stacks.resize(numStacks);
- for (size_t i = 0; i < m_stacks.size(); ++i) {
- m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruningMiniStack::Stack>()) NSCubePruningMiniStack::Stack(m_mgr);
- }
-}
-
-
-std::ostream& operator<<(std::ostream &out, const Stacks &obj)
-{
- for (size_t i = 0; i < obj.GetSize(); ++i) {
- const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i];
- out << stack.GetHypoSize() << " ";
- }
-
- return out;
-}
-
-void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
-{
- size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
- //cerr << "numWordsCovered=" << numWordsCovered << endl;
- NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
- stack.Add(hypo, hypoRecycle);
-
-}
-
-NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange)
-{
- size_t numWordsCovered = newBitmap.GetNumWordsCovered();
- //cerr << "numWordsCovered=" << numWordsCovered << endl;
- NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
-
- NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos());
- stack.GetMiniStack(key);
-
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.h b/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.h
deleted file mode 100644
index 94ebe4618..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Stacks.h
- *
- * Created on: 6 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <vector>
-#include "../CubePruningMiniStack/Stack.h"
-#include "../../Recycler.h"
-
-namespace Moses2
-{
-class Manager;
-
-namespace NSCubePruningPerMiniStack
-{
-
-class Stacks {
- friend std::ostream& operator<<(std::ostream &, const Stacks &);
-public:
- Stacks(const Manager &mgr);
- virtual ~Stacks();
-
- void Init(size_t numStacks);
-
- size_t GetSize() const
- { return m_stacks.size(); }
-
- const NSCubePruningMiniStack::Stack &Back() const
- { return *m_stacks.back(); }
-
- NSCubePruningMiniStack::Stack &operator[](size_t ind)
- { return *m_stacks[ind]; }
-
- void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
- NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange);
-
-protected:
- const Manager &m_mgr;
- std::vector<NSCubePruningMiniStack::Stack*> m_stacks;
-};
-
-
-}
-
-}
-
-
diff --git a/contrib/moses2/legacy/Bitmap.h b/contrib/moses2/legacy/Bitmap.h
deleted file mode 100644
index e6a0f7948..000000000
--- a/contrib/moses2/legacy/Bitmap.h
+++ /dev/null
@@ -1,244 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <algorithm>
-#include <limits>
-#include <vector>
-#include <iostream>
-#include <cstring>
-#include <cmath>
-#include <cstdlib>
-#include "Range.h"
-#include "../Array.h"
-
-namespace Moses2
-{
-class MemPool;
-
-typedef unsigned long WordsBitmapID;
-
-/** Vector of boolean to represent whether a word has been translated or not.
- *
- * Implemented using a vector of char, which is usually the same representation
- * for the elements that a C array of bool would use. A vector of bool, or a
- * Boost dynamic_bitset, could be much more efficient in theory. Unfortunately
- * algorithms like std::find() are not optimized for vector<bool> on gcc or
- * clang, and dynamic_bitset lacks all the optimized search operations we want.
- * Only benchmarking will tell what works best. Perhaps dynamic_bitset could
- * still be a dramatic improvement, if we flip the meaning of the bits around
- * so we can use its find_first() and find_next() for the most common searches.
- */
-class Bitmap
-{
- friend std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap);
-private:
- Array<char> m_bitmap; //! Ticks of words in sentence that have been done.
- size_t m_firstGap; //! Cached position of first gap, or NOT_FOUND.
- size_t m_numWordsCovered;
-
- Bitmap(); // not implemented
- Bitmap& operator=(const Bitmap& other);
-
- /** Update the first gap, when bits are flipped */
- void UpdateFirstGap(size_t startPos, size_t endPos, bool value)
- {
- if (value) {
- //may remove gap
- if (startPos <= m_firstGap && m_firstGap <= endPos) {
- m_firstGap = NOT_FOUND;
- for (size_t i = endPos + 1; i < m_bitmap.size(); ++i) {
- if (!m_bitmap[i]) {
- m_firstGap = i;
- break;
- }
- }
- }
-
- }
- else {
- //setting positions to false, may add new gap
- if (startPos < m_firstGap) {
- m_firstGap = startPos;
- }
- }
- }
-
- //! set value between 2 positions, inclusive
- void
- SetValueNonOverlap(Range const& range) {
- size_t startPos = range.GetStartPos();
- size_t endPos = range.GetEndPos();
-
- for(size_t pos = startPos; pos <= endPos; pos++) {
- m_bitmap[pos] = true;
- }
-
- m_numWordsCovered += range.GetNumWordsCovered();
- UpdateFirstGap(startPos, endPos, true);
- }
-
- public:
- //! Create Bitmap of length size, and initialise with vector.
- explicit Bitmap(MemPool &pool, size_t size);
-
- void Init(const std::vector<bool>& initializer);
- void Init(const Bitmap &copy, const Range &range);
-
- //! Count of words translated.
- size_t GetNumWordsCovered() const {
- return m_numWordsCovered;
- }
-
- //! position of 1st word not yet translated, or NOT_FOUND if everything already translated
- size_t GetFirstGapPos() const {
- return m_firstGap;
- }
-
- //! position of last word not yet translated, or NOT_FOUND if everything already translated
- size_t GetLastGapPos() const {
- for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) {
- if (!m_bitmap[pos]) {
- return pos;
- }
- }
- // no starting pos
- return NOT_FOUND;
- }
-
- //! position of last translated word
- size_t GetLastPos() const {
- for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) {
- if (m_bitmap[pos]) {
- return pos;
- }
- }
- // no starting pos
- return NOT_FOUND;
- }
-
- //! whether a word has been translated at a particular position
- bool GetValue(size_t pos) const {
- return bool(m_bitmap[pos]);
- }
- //! set value at a particular position
- void SetValue( size_t pos, bool value ) {
- bool origValue = m_bitmap[pos];
- if (origValue == value) {
- // do nothing
- }
- else {
- m_bitmap[pos] = value;
- UpdateFirstGap(pos, pos, value);
- if (value) {
- ++m_numWordsCovered;
- }
- else {
- --m_numWordsCovered;
- }
- }
- }
-
- //! whether every word has been translated
- bool IsComplete() const {
- return GetSize() == GetNumWordsCovered();
- }
- //! whether the wordrange overlaps with any translated word in this bitmap
- bool Overlap(const Range &compare) const {
- for (size_t pos = compare.GetStartPos(); pos <= compare.GetEndPos(); pos++) {
- if (m_bitmap[pos])
- return true;
- }
- return false;
- }
- //! number of elements
- size_t GetSize() const {
- return m_bitmap.size();
- }
-
- inline size_t GetEdgeToTheLeftOf(size_t l) const {
- if (l == 0) return l;
- while (l && !m_bitmap[l-1]) {
- --l;
- }
- return l;
- }
-
- inline size_t GetEdgeToTheRightOf(size_t r) const {
- if (r+1 == m_bitmap.size()) return r;
- return (
- std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) -
- m_bitmap.begin()
- ) - 1;
- }
-
- //! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16
- WordsBitmapID GetID() const {
- assert(m_bitmap.size() < (1<<16));
-
- size_t start = GetFirstGapPos();
- if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
-
- size_t end = GetLastPos();
- if (end == NOT_FOUND) end = 0;// nothing translated yet
-
- assert(end < start || end-start <= 16);
- WordsBitmapID id = 0;
- for(size_t pos = end; pos > start; pos--) {
- id = id*2 + (int) GetValue(pos);
- }
- return id + (1<<16) * start;
- }
-
- //! converts bitmap into an integer ID, with an additional span covered
- WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
- assert(m_bitmap.size() < (1<<16));
-
- size_t start = GetFirstGapPos();
- if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
-
- size_t end = GetLastPos();
- if (end == NOT_FOUND) end = 0;// nothing translated yet
-
- if (start == startPos) start = endPos+1;
- if (end < endPos) end = endPos;
-
- assert(end < start || end-start <= 16);
- WordsBitmapID id = 0;
- for(size_t pos = end; pos > start; pos--) {
- id = id*2;
- if (GetValue(pos) || (startPos<=pos && pos<=endPos))
- id++;
- }
- return id + (1<<16) * start;
- }
-
- // for unordered_set in stack
- size_t hash() const;
- bool operator==(const Bitmap& other) const;
- bool operator!=(const Bitmap& other) const {
- return !(*this == other);
- }
-
- };
-
- }
diff --git a/contrib/moses2/parameters/AllOptions.cpp b/contrib/moses2/parameters/AllOptions.cpp
deleted file mode 100644
index c4171d807..000000000
--- a/contrib/moses2/parameters/AllOptions.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "../legacy/Parameter.h"
-#include "AllOptions.h"
-
-namespace Moses2
-{
- AllOptions::
- AllOptions()
- : mira(false)
- , use_legacy_pt(false)
- { }
-
- AllOptions::
- AllOptions(Parameter const& param)
- {
- init(param);
- }
-
- bool
- AllOptions::
- init(Parameter const& param)
- {
- if (!search.init(param)) return false;
- if (!cube.init(param)) return false;
- if (!nbest.init(param)) return false;
- if (!reordering.init(param)) return false;
- if (!context.init(param)) return false;
- if (!input.init(param)) return false;
- if (!mbr.init(param)) return false;
- if (!lmbr.init(param)) return false;
- if (!output.init(param)) return false;
- if (!unk.init(param)) return false;
- if (!server.init(param)) return false;
- if (!syntax.init(param)) return false;
-
- param.SetParameter(mira, "mira", false);
-
- return sanity_check();
- }
-
- bool
- AllOptions::
- sanity_check()
- {
- using namespace std;
- if (lmbr.enabled)
- {
- if (mbr.enabled)
- {
- cerr << "Error: Cannot use both n-best mbr and lattice mbr together" << endl;
- return false;
- }
- mbr.enabled = true;
- }
- if (search.consensus)
- {
- if (mbr.enabled)
- {
- cerr << "Error: Cannot use consensus decoding together with mbr"
- << endl;
- return false;
- }
- mbr.enabled = true;
- }
-
- // RecoverPath should only be used with confusion net or word lattice input
- if (output.RecoverPath && input.input_type == SentenceInput)
- {
- TRACE_ERR("--recover-input-path should only be used with "
- <<"confusion net or word lattice input!\n");
- output.RecoverPath = false;
- }
-
- // set m_nbest_options.enabled = true if necessary:
- nbest.enabled = (nbest.enabled || mira || search.consensus
- || nbest.nbest_size > 0
- || mbr.enabled || lmbr.enabled
- || !output.SearchGraph.empty()
- || !output.SearchGraphExtended.empty()
- || !output.SearchGraphSLF.empty()
- || !output.SearchGraphHG.empty()
- || !output.SearchGraphPB.empty()
- || output.lattice_sample_size != 0);
-
- return true;
- }
-
-#ifdef HAVE_XMLRPC_C
- bool
- AllOptions::
- update(std::map<std::string,xmlrpc_c::value>const& param)
- {
- if (!search.update(param)) return false;
- if (!cube.update(param)) return false;
- if (!nbest.update(param)) return false;
- if (!reordering.update(param)) return false;
- if (!context.update(param)) return false;
- if (!input.update(param)) return false;
- if (!mbr.update(param)) return false;
- if (!lmbr.update(param)) return false;
- if (!output.update(param)) return false;
- if (!unk.update(param)) return false;
- if (!server.update(param)) return false;
- //if (!syntax.update(param)) return false;
- return sanity_check();
- }
-#endif
-
- bool
- AllOptions::
- NBestDistinct() const
- {
- return (nbest.only_distinct
- || mbr.enabled || lmbr.enabled
- || output.lattice_sample_size
- || !output.SearchGraph.empty()
- || !output.SearchGraphExtended.empty()
- || !output.SearchGraphSLF.empty()
- || !output.SearchGraphHG.empty());
- }
-
-
-}
diff --git a/contrib/moses2/parameters/AllOptions.h b/contrib/moses2/parameters/AllOptions.h
deleted file mode 100644
index 694a8a347..000000000
--- a/contrib/moses2/parameters/AllOptions.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <boost/shared_ptr.hpp>
-#include "OptionsBaseClass.h"
-#include "SearchOptions.h"
-#include "CubePruningOptions.h"
-#include "NBestOptions.h"
-#include "ReorderingOptions.h"
-#include "ContextParameters.h"
-#include "InputOptions.h"
-#include "MBR_Options.h"
-#include "LMBR_Options.h"
-#include "ReportingOptions.h"
-#include "OOVHandlingOptions.h"
-#include "ServerOptions.h"
-#include "SyntaxOptions.h"
-
-namespace Moses2
-{
- struct
- AllOptions : public OptionsBaseClass
- {
- typedef boost::shared_ptr<AllOptions const> ptr;
- SearchOptions search;
- CubePruningOptions cube;
- NBestOptions nbest;
- ReorderingOptions reordering;
- ContextParameters context;
- InputOptions input;
- MBR_Options mbr;
- LMBR_Options lmbr;
- ReportingOptions output;
- OOVHandlingOptions unk;
- ServerOptions server;
- SyntaxOptions syntax;
- bool mira;
- bool use_legacy_pt;
- // StackOptions stack;
- // BeamSearchOptions beam;
- bool init(Parameter const& param);
- bool sanity_check();
- AllOptions();
- AllOptions(Parameter const& param);
-
- bool update(std::map<std::string,xmlrpc_c::value>const& param);
- bool NBestDistinct() const;
-
- };
-
-}
diff --git a/contrib/moses2/parameters/BeamSearchOptions.h b/contrib/moses2/parameters/BeamSearchOptions.h
deleted file mode 100644
index d67c43438..000000000
--- a/contrib/moses2/parameters/BeamSearchOptions.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include "OptionsBaseClass.h"
-namespace Moses2
-{
-
- struct
- BeamSearchOptions : public OptionsBaseClass
- {
- bool init(Parameter const& param);
- BeamSearchOptions(Parameter const& param);
- };
-
-}
diff --git a/contrib/moses2/parameters/BookkeepingOptions.h b/contrib/moses2/parameters/BookkeepingOptions.h
deleted file mode 100644
index ad7c78301..000000000
--- a/contrib/moses2/parameters/BookkeepingOptions.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include "OptionsBaseClass.h"
-
-namespace Moses2
-{
-class Parameter;
-
- struct BookkeepingOptions : public OptionsBaseClass
- {
- bool need_alignment_info;
- bool init(Parameter const& param);
- BookkeepingOptions();
- };
-
-
-
-}
diff --git a/contrib/moses2/parameters/CubePruningOptions.cpp b/contrib/moses2/parameters/CubePruningOptions.cpp
deleted file mode 100644
index 35663e61d..000000000
--- a/contrib/moses2/parameters/CubePruningOptions.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "CubePruningOptions.h"
-#include "../TypeDef.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- CubePruningOptions::
- CubePruningOptions()
- : pop_limit(DEFAULT_CUBE_PRUNING_POP_LIMIT)
- , diversity(DEFAULT_CUBE_PRUNING_DIVERSITY)
- , lazy_scoring(false)
- , deterministic_search(false)
- {}
-
- bool
- CubePruningOptions::
- init(Parameter const& param)
- {
- param.SetParameter(pop_limit, "cube-pruning-pop-limit",
- DEFAULT_CUBE_PRUNING_POP_LIMIT);
- param.SetParameter(diversity, "cube-pruning-diversity",
- DEFAULT_CUBE_PRUNING_DIVERSITY);
- param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false);
- //param.SetParameter(deterministic_search, "cube-pruning-deterministic-search", false);
- return true;
- }
-
-#ifdef HAVE_XMLRPC_C
- bool
- CubePruningOptions::
- update(std::map<std::string,xmlrpc_c::value>const& params)
- {
- typedef std::map<std::string, xmlrpc_c::value> params_t;
-
- params_t::const_iterator si = params.find("cube-pruning-pop-limit");
- if (si != params.end()) pop_limit = xmlrpc_c::value_int(si->second);
-
- si = params.find("cube-pruning-diversity");
- if (si != params.end()) diversity = xmlrpc_c::value_int(si->second);
-
- si = params.find("cube-pruning-lazy-scoring");
- if (si != params.end())
- {
- std::string spec = xmlrpc_c::value_string(si->second);
- if (spec == "true" or spec == "on" or spec == "1")
- lazy_scoring = true;
- else if (spec == "false" or spec == "off" or spec == "0")
- lazy_scoring = false;
- else
- {
- char const* msg
- = "Error parsing specification for cube-pruning-lazy-scoring";
- xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
- }
- }
-
- si = params.find("cube-pruning-deterministic-search");
- if (si != params.end())
- {
- std::string spec = xmlrpc_c::value_string(si->second);
- if (spec == "true" or spec == "on" or spec == "1")
- deterministic_search = true;
- else if (spec == "false" or spec == "off" or spec == "0")
- deterministic_search = false;
- else
- {
- char const* msg
- = "Error parsing specification for cube-pruning-deterministic-search";
- xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
- }
- }
-
- return true;
- }
-#endif
-
-
-}
diff --git a/contrib/moses2/parameters/CubePruningOptions.h b/contrib/moses2/parameters/CubePruningOptions.h
deleted file mode 100644
index 2e9c898dc..000000000
--- a/contrib/moses2/parameters/CubePruningOptions.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include "OptionsBaseClass.h"
-
-namespace Moses2
-{
-
- struct
- CubePruningOptions : public OptionsBaseClass
- {
- size_t pop_limit;
- size_t diversity;
- bool lazy_scoring;
- bool deterministic_search;
-
- bool init(Parameter const& param);
- CubePruningOptions(Parameter const& param);
- CubePruningOptions();
-
- bool
- update(std::map<std::string,xmlrpc_c::value>const& params);
- };
-
-}
diff --git a/contrib/moses2/parameters/InputOptions.cpp b/contrib/moses2/parameters/InputOptions.cpp
deleted file mode 100644
index c008e98c4..000000000
--- a/contrib/moses2/parameters/InputOptions.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "InputOptions.h"
-#include <vector>
-#include <iostream>
-// #include "moses/StaticData.h"
-#include "moses/TypeDef.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- InputOptions::
- InputOptions()
- : continue_partial_translation(false)
- , input_type(SentenceInput)
- , xml_policy(XmlPassThrough)
- , placeholder_factor(NOT_FOUND)
- {
- xml_brackets.first = "<";
- xml_brackets.second = ">";
- factor_order.assign(1,0);
- factor_delimiter = "|";
- }
-
- bool
- InputOptions::
- init(Parameter const& param)
- {
- param.SetParameter(input_type, "inputtype", SentenceInput);
-#if 0
- if (input_type == SentenceInput)
- { VERBOSE(2, "input type is: text input"); }
- else if (input_type == ConfusionNetworkInput)
- { VERBOSE(2, "input type is: confusion net"); }
- else if (input_type == WordLatticeInput)
- { VERBOSE(2, "input type is: word lattice"); }
- else if (input_type == TreeInputType)
- { VERBOSE(2, "input type is: tree"); }
- else if (input_type == TabbedSentenceInput)
- { VERBOSE(2, "input type is: tabbed sentence"); }
- else if (input_type == ForestInputType)
- { VERBOSE(2, "input type is: forest"); }
-#endif
-
-
- param.SetParameter(continue_partial_translation,
- "continue-partial-translation", false);
-
- param.SetParameter<XmlInputType>(xml_policy, "xml-input", XmlPassThrough);
-
- // specify XML tags opening and closing brackets for XML option
- // Do we really want this to be configurable???? UG
- const PARAM_VEC *pspec;
- pspec = param.GetParam("xml-brackets");
- if (pspec && pspec->size())
- {
- std::vector<std::string> brackets = Tokenize(pspec->at(0));
- if(brackets.size()!=2)
- {
- std::cerr << "invalid xml-brackets value, "
- << "must specify exactly 2 blank-delimited strings "
- << "for XML tags opening and closing brackets"
- << std::endl;
- exit(1);
- }
-
- xml_brackets.first= brackets[0];
- xml_brackets.second=brackets[1];
-
-#if 0
- VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
- << xml_brackets.first << " and "
- << xml_brackets.second << std::endl);
-#endif
- }
-
- pspec = param.GetParam("input-factors");
- if (pspec) factor_order = Scan<FactorType>(*pspec);
- if (factor_order.empty()) factor_order.assign(1,0);
- param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND);
-
- param.SetParameter<std::string>(factor_delimiter, "factor-delimiter", "|");
- param.SetParameter<std::string>(input_file_path,"input-file","");
-
- return true;
- }
-
-
-#ifdef HAVE_XMLRPC_C
- bool
- InputOptions::
- update(std::map<std::string,xmlrpc_c::value>const& param)
- {
- typedef std::map<std::string, xmlrpc_c::value> params_t;
- params_t::const_iterator si = param.find("xml-input");
- if (si != param.end())
- xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
- return true;
- }
-#endif
-
-}
diff --git a/contrib/moses2/parameters/InputOptions.h b/contrib/moses2/parameters/InputOptions.h
deleted file mode 100644
index dd3be80e1..000000000
--- a/contrib/moses2/parameters/InputOptions.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <string>
-#include "OptionsBaseClass.h"
-#include "../TypeDef.h"
-
-namespace Moses2
-{
- struct
- InputOptions : public OptionsBaseClass
- {
- bool continue_partial_translation;
- InputTypeEnum input_type;
- XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
- std::vector<FactorType> factor_order; // input factor order
- std::string factor_delimiter;
- FactorType placeholder_factor; // where to store original text for placeholders
- std::string input_file_path;
- std::pair<std::string,std::string> xml_brackets;
- // strings to use as XML tags' opening and closing brackets.
- // Default are "<" and ">"
-
- InputOptions();
-
- bool init(Parameter const& param);
- bool update(std::map<std::string,xmlrpc_c::value>const& param);
-
- };
-
-}
-
diff --git a/contrib/moses2/parameters/LMBR_Options.cpp b/contrib/moses2/parameters/LMBR_Options.cpp
deleted file mode 100644
index 25febd616..000000000
--- a/contrib/moses2/parameters/LMBR_Options.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "LMBR_Options.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- LMBR_Options::
- LMBR_Options()
- : enabled(false)
- , use_lattice_hyp_set(false)
- , precision(0.8f)
- , ratio(0.6f)
- , map_weight(0.8f)
- , pruning_factor(30)
- { }
-
- bool
- LMBR_Options::
- init(Parameter const& param)
- {
- param.SetParameter(enabled, "lminimum-bayes-risk", false);
-
- param.SetParameter(ratio, "lmbr-r", 0.6f);
- param.SetParameter(precision, "lmbr-p", 0.8f);
- param.SetParameter(map_weight, "lmbr-map-weight", 0.0f);
- param.SetParameter(pruning_factor, "lmbr-pruning-factor", size_t(30));
- param.SetParameter(use_lattice_hyp_set, "lattice-hypo-set", false);
-
- PARAM_VEC const* params = param.GetParam("lmbr-thetas");
- if (params) theta = Scan<float>(*params);
-
- return true;
- }
-
-
-
-
-}
diff --git a/contrib/moses2/parameters/LMBR_Options.h b/contrib/moses2/parameters/LMBR_Options.h
deleted file mode 100644
index c084f04b9..000000000
--- a/contrib/moses2/parameters/LMBR_Options.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <vector>
-#include "OptionsBaseClass.h"
-
-namespace Moses2
-{
-
- // Options for mimum bayes risk decoding
- struct
- LMBR_Options : public OptionsBaseClass
- {
- bool enabled;
- bool use_lattice_hyp_set; //! to use nbest as hypothesis set during lattice MBR
- float precision; //! unigram precision theta - see Tromble et al 08 for more details
- float ratio; //! decaying factor for ngram thetas - see Tromble et al 08
- float map_weight; //! Weight given to the map solution. See Kumar et al 09
- size_t pruning_factor; //! average number of nodes per word wanted in pruned lattice
- std::vector<float> theta; //! theta(s) for lattice mbr calculation
- bool init(Parameter const& param);
- LMBR_Options();
- };
-
-}
-
diff --git a/contrib/moses2/parameters/MBR_Options.cpp b/contrib/moses2/parameters/MBR_Options.cpp
deleted file mode 100644
index 669ee94cc..000000000
--- a/contrib/moses2/parameters/MBR_Options.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "MBR_Options.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- MBR_Options::
- MBR_Options()
- : enabled(false)
- , size(200)
- , scale(1.0f)
- {}
-
-
- bool
- MBR_Options::
- init(Parameter const& param)
- {
- param.SetParameter(enabled, "minimum-bayes-risk", false);
- param.SetParameter<size_t>(size, "mbr-size", 200);
- param.SetParameter(scale, "mbr-scale", 1.0f);
- return true;
- }
-
-}
diff --git a/contrib/moses2/parameters/MBR_Options.h b/contrib/moses2/parameters/MBR_Options.h
deleted file mode 100644
index 47ff45551..000000000
--- a/contrib/moses2/parameters/MBR_Options.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include "OptionsBaseClass.h"
-namespace Moses2
-{
-
- // Options for mimum bayes risk decoding
- struct
- MBR_Options : public OptionsBaseClass
- {
- bool enabled;
- size_t size; //! number of translation candidates considered
- float scale; /*! scaling factor for computing marginal probability
- * of candidate translation */
- bool init(Parameter const& param);
- MBR_Options();
- };
-
-}
-
diff --git a/contrib/moses2/parameters/OOVHandlingOptions.cpp b/contrib/moses2/parameters/OOVHandlingOptions.cpp
deleted file mode 100644
index 65f79584e..000000000
--- a/contrib/moses2/parameters/OOVHandlingOptions.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "OOVHandlingOptions.h"
-#include <vector>
-#include <iostream>
-#include "moses/StaticData.h"
-#include "moses/TypeDef.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- OOVHandlingOptions::
- OOVHandlingOptions()
- {
- drop = false;
- mark = false;
- prefix = "UNK";
- suffix = "";
- word_deletion_enabled = false;
- always_create_direct_transopt = false;
- }
-
- bool
- OOVHandlingOptions::
- init(Parameter const& param)
- {
- param.SetParameter(drop,"drop-unknown",false);
- param.SetParameter(mark,"mark-unknown",false);
- param.SetParameter(word_deletion_enabled, "phrase-drop-allowed", false);
- param.SetParameter(always_create_direct_transopt, "always-create-direct-transopt", false);
- param.SetParameter<std::string>(prefix,"unknown-word-prefix","UNK");
- param.SetParameter<std::string>(suffix,"unknown-word-suffix","");
- return true;
- }
-
-
-#ifdef HAVE_XMLRPC_C
- bool
- OOVHandlingOptions::
- update(std::map<std::string,xmlrpc_c::value>const& param)
- {
- typedef std::map<std::string, xmlrpc_c::value> params_t;
- // params_t::const_iterator si = param.find("xml-input");
- // if (si != param.end())
- // xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
- return true;
- }
-#endif
-
-}
diff --git a/contrib/moses2/parameters/OOVHandlingOptions.h b/contrib/moses2/parameters/OOVHandlingOptions.h
deleted file mode 100644
index d11284220..000000000
--- a/contrib/moses2/parameters/OOVHandlingOptions.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <string>
-#include "OptionsBaseClass.h"
-
-namespace Moses2
-{
- struct
- OOVHandlingOptions : public OptionsBaseClass
- {
- bool drop;
- bool mark;
- std::string prefix;
- std::string suffix;
-
- bool word_deletion_enabled;
- bool always_create_direct_transopt;
- OOVHandlingOptions();
-
- bool init(Parameter const& param);
- bool update(std::map<std::string,xmlrpc_c::value>const& param);
-
- };
-
-}
-
diff --git a/contrib/moses2/parameters/OptionsBaseClass.cpp b/contrib/moses2/parameters/OptionsBaseClass.cpp
deleted file mode 100644
index c523a1333..000000000
--- a/contrib/moses2/parameters/OptionsBaseClass.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
-#include "OptionsBaseClass.h"
-#include "moses/Util.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
-#ifdef HAVE_XMLRPC_C
- bool
- OptionsBaseClass::
- update(std::map<std::string,xmlrpc_c::value>const& params)
- {
- return true;
- }
-#endif
-
-#ifdef HAVE_XMLRPC_C
- bool
- OptionsBaseClass::
- check(std::map<std::string, xmlrpc_c::value> const& param,
- std::string const key, bool dfltval)
- {
- std::map<std::string, xmlrpc_c::value>::const_iterator m;
- m = param.find(key);
- if (m == param.end()) return dfltval;
- return Scan<bool>(xmlrpc_c::value_string(m->second));
- }
-#endif
-}
diff --git a/contrib/moses2/parameters/OptionsBaseClass.h b/contrib/moses2/parameters/OptionsBaseClass.h
deleted file mode 100644
index 05914ed82..000000000
--- a/contrib/moses2/parameters/OptionsBaseClass.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include "moses/xmlrpc-c.h"
-#include <string>
-#include <map>
-namespace Moses2
-{
-class Parameter;
-
- struct OptionsBaseClass
- {
-#ifdef HAVE_XMLRPC_C
- virtual bool
- update(std::map<std::string,xmlrpc_c::value>const& params);
-#endif
- bool
- check(std::map<std::string, xmlrpc_c::value> const& param,
- std::string const key, bool dfltval);
- };
-}
diff --git a/contrib/moses2/parameters/ReorderingOptions.cpp b/contrib/moses2/parameters/ReorderingOptions.cpp
deleted file mode 100644
index 5fef5c54a..000000000
--- a/contrib/moses2/parameters/ReorderingOptions.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "ReorderingOptions.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- ReorderingOptions::
- ReorderingOptions()
- : max_distortion(-1)
- , monotone_at_punct(false)
- , use_early_distortion_cost(false)
- {}
-
-
- ReorderingOptions::
- ReorderingOptions(Parameter const& param)
- {
- init(param);
- }
-
- bool
- ReorderingOptions::
- init(Parameter const& param)
- {
- param.SetParameter(max_distortion, "distortion-limit", -1);
- param.SetParameter(monotone_at_punct, "monotone-at-punctuation", false);
- param.SetParameter(use_early_distortion_cost, "early-distortion-cost", false);
- return true;
- }
-}
diff --git a/contrib/moses2/parameters/ReorderingOptions.h b/contrib/moses2/parameters/ReorderingOptions.h
deleted file mode 100644
index 6bdc1f043..000000000
--- a/contrib/moses2/parameters/ReorderingOptions.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include "OptionsBaseClass.h"
-namespace Moses2
-{
-
- struct
- ReorderingOptions : public OptionsBaseClass
- {
- int max_distortion;
- bool monotone_at_punct;
- bool use_early_distortion_cost;
- bool init(Parameter const& param);
- ReorderingOptions(Parameter const& param);
- ReorderingOptions();
- };
-
-}
-
diff --git a/contrib/moses2/parameters/ReportingOptions.cpp b/contrib/moses2/parameters/ReportingOptions.cpp
deleted file mode 100644
index adc27baf2..000000000
--- a/contrib/moses2/parameters/ReportingOptions.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "ReportingOptions.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
- using namespace std;
-
- ReportingOptions::
- ReportingOptions()
- : start_translation_id(0)
- , ReportAllFactors(false)
- , ReportSegmentation(0)
- , PrintAlignmentInfo(false)
- , PrintAllDerivations(false)
- , PrintTranslationOptions(false)
- , WA_SortOrder(NoSort)
- , WordGraph(false)
- , DontPruneSearchGraph(false)
- , RecoverPath(false)
- , ReportHypoScore(false)
- , PrintID(false)
- , PrintPassThrough(false)
- , include_lhs_in_search_graph(false)
- , lattice_sample_size(0)
- {
- factor_order.assign(1,0);
- factor_delimiter = "|";
- }
-
- bool
- ReportingOptions::
- init(Parameter const& param)
- {
- param.SetParameter<long>(start_translation_id, "start-translation-id", 0);
-
- // including factors in the output
- param.SetParameter(ReportAllFactors, "report-all-factors", false);
-
- // segmentation reporting
- ReportSegmentation = (param.GetParam("report-segmentation-enriched")
- ? 2 : param.GetParam("report-segmentation")
- ? 1 : 0);
-
- // word alignment reporting
- param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false);
- param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort);
- std::string e; // hack to save us param.SetParameter<string>(...)
- param.SetParameter(AlignmentOutputFile,"alignment-output-file", e);
-
-
- param.SetParameter(PrintAllDerivations, "print-all-derivations", false);
- param.SetParameter(PrintTranslationOptions, "print-translation-option", false);
-
- // output a word graph
- PARAM_VEC const* params;
- params = param.GetParam("output-word-graph");
- WordGraph = (params && params->size() == 2); // what are the two options?
-
- // dump the search graph
- param.SetParameter(SearchGraph, "output-search-graph", e);
- param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e);
- param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e);
- param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e);
-#ifdef HAVE_PROTOBUF
- param.SetParameter(SearchGraphPB, "output-search-graph-pb", e);
-#endif
-
- param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false);
- param.SetParameter(include_lhs_in_search_graph,
- "include-lhs-in-search-graph", false );
-
-
- // miscellaneous
- param.SetParameter(RecoverPath, "recover-input-path",false);
- param.SetParameter(ReportHypoScore, "output-hypo-score",false);
- param.SetParameter(PrintID, "print-id",false);
- param.SetParameter(PrintPassThrough, "print-passthrough",false);
- param.SetParameter(detailed_all_transrep_filepath,
- "translation-all-details", e);
- param.SetParameter(detailed_transrep_filepath, "translation-details", e);
- param.SetParameter(detailed_tree_transrep_filepath,
- "tree-translation-details", e);
-
- params = param.GetParam("lattice-samples");
- if (params) {
- if (params->size() ==2 ) {
- lattice_sample_filepath = params->at(0);
- lattice_sample_size = Scan<size_t>(params->at(1));
- } else {
- std::cerr <<"wrong format for switch -lattice-samples file size";
- return false;
- }
- }
-
-
- if (ReportAllFactors) {
- factor_order.clear();
- for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
- factor_order.push_back(i);
- } else {
- params= param.GetParam("output-factors");
- if (params) factor_order = Scan<FactorType>(*params);
- if (factor_order.empty()) factor_order.assign(1,0);
- }
-
- param.SetParameter(factor_delimiter, "factor-delimiter", std::string("|"));
- param.SetParameter(factor_delimiter, "output-factor-delimiter", factor_delimiter);
-
- return true;
- }
-
-#ifdef HAVE_XMLRPC_C
- bool
- ReportingOptions::
- update(std::map<std::string, xmlrpc_c::value>const& param)
- {
- ReportAllFactors = check(param, "report-all-factors", ReportAllFactors);
-
-
- std::map<std::string, xmlrpc_c::value>::const_iterator m;
- m = param.find("output-factors");
- if (m != param.end()) {
- factor_order=Tokenize<FactorType>(xmlrpc_c::value_string(m->second),",");
- }
-
- if (ReportAllFactors) {
- factor_order.clear();
- for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
- factor_order.push_back(i);
- }
-
- m = param.find("align");
- if (m != param.end() && Scan<bool>(xmlrpc_c::value_string(m->second)))
- ReportSegmentation = 1;
-
- PrintAlignmentInfo = check(param,"word-align",PrintAlignmentInfo);
-
- m = param.find("factor-delimiter");
- if (m != param.end()) {
- factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
- }
-
- m = param.find("output-factor-delimiter");
- if (m != param.end()) {
- factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
- }
-
- return true;
- }
-#endif
-}
diff --git a/contrib/moses2/parameters/ReportingOptions.h b/contrib/moses2/parameters/ReportingOptions.h
deleted file mode 100644
index c96809aec..000000000
--- a/contrib/moses2/parameters/ReportingOptions.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <vector>
-#include "OptionsBaseClass.h"
-#include "../TypeDef.h"
-
-namespace Moses2
-{
-
- struct
- ReportingOptions : public OptionsBaseClass
- {
- long start_translation_id;
-
- std::vector<FactorType> factor_order;
- std::string factor_delimiter;
-
- bool ReportAllFactors; // m_reportAllFactors;
- int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
-
- bool PrintAlignmentInfo; // m_PrintAlignmentInfo
- bool PrintAllDerivations;
- bool PrintTranslationOptions;
-
- WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
- std::string AlignmentOutputFile;
-
- bool WordGraph;
-
- std::string SearchGraph;
- std::string SearchGraphExtended;
- std::string SearchGraphSLF;
- std::string SearchGraphHG;
- std::string SearchGraphPB;
- bool DontPruneSearchGraph;
-
- bool RecoverPath; // recover input path?
- bool ReportHypoScore;
-
- bool PrintID;
- bool PrintPassThrough;
-
- // transrep = translation reporting
- std::string detailed_transrep_filepath;
- std::string detailed_tree_transrep_filepath;
- std::string detailed_all_transrep_filepath;
- bool include_lhs_in_search_graph;
-
-
- std::string lattice_sample_filepath;
- size_t lattice_sample_size;
-
- bool init(Parameter const& param);
-
- /// do we need to keep the search graph from decoding?
- bool NeedSearchGraph() const {
- return !(SearchGraph.empty() && SearchGraphExtended.empty());
- }
-
-#ifdef HAVE_XMLRPC_C
- bool update(std::map<std::string, xmlrpc_c::value>const& param);
-#endif
-
-
- ReportingOptions();
- };
-
-}
-
diff --git a/contrib/moses2/parameters/SearchOptions.cpp b/contrib/moses2/parameters/SearchOptions.cpp
deleted file mode 100644
index 98c1789ea..000000000
--- a/contrib/moses2/parameters/SearchOptions.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "SearchOptions.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- SearchOptions::
- SearchOptions()
- : algo(Normal)
- , stack_size(DEFAULT_MAX_HYPOSTACK_SIZE)
- , stack_diversity(0)
- , disable_discarding(false)
- , max_phrase_length(DEFAULT_MAX_PHRASE_LENGTH)
- , max_trans_opt_per_cov(DEFAULT_MAX_TRANS_OPT_SIZE)
- , max_partial_trans_opt(DEFAULT_MAX_PART_TRANS_OPT_SIZE)
- , beam_width(DEFAULT_BEAM_WIDTH)
- , timeout(0)
- , consensus(false)
- , early_discarding_threshold(DEFAULT_EARLY_DISCARDING_THRESHOLD)
- , trans_opt_threshold(DEFAULT_TRANSLATION_OPTION_THRESHOLD)
- { }
-
- SearchOptions::
- SearchOptions(Parameter const& param)
- : stack_diversity(0)
- {
- init(param);
- }
-
- bool
- SearchOptions::
- init(Parameter const& param)
- {
- param.SetParameter(algo, "search-algorithm", Normal);
- param.SetParameter(stack_size, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
- param.SetParameter(stack_diversity, "stack-diversity", size_t(0));
- param.SetParameter(beam_width, "beam-threshold", DEFAULT_BEAM_WIDTH);
- param.SetParameter(early_discarding_threshold, "early-discarding-threshold",
- DEFAULT_EARLY_DISCARDING_THRESHOLD);
- param.SetParameter(timeout, "time-out", 0);
- param.SetParameter(max_phrase_length, "max-phrase-length",
- DEFAULT_MAX_PHRASE_LENGTH);
- param.SetParameter(trans_opt_threshold, "translation-option-threshold",
- DEFAULT_TRANSLATION_OPTION_THRESHOLD);
- param.SetParameter(max_trans_opt_per_cov, "max-trans-opt-per-coverage",
- DEFAULT_MAX_TRANS_OPT_SIZE);
- param.SetParameter(max_partial_trans_opt, "max-partial-trans-opt",
- DEFAULT_MAX_PART_TRANS_OPT_SIZE);
-
- param.SetParameter(consensus, "consensus-decoding", false);
- param.SetParameter(disable_discarding, "disable-discarding", false);
-
- // transformation to log of a few scores
- beam_width = TransformScore(beam_width);
- trans_opt_threshold = TransformScore(trans_opt_threshold);
- early_discarding_threshold = TransformScore(early_discarding_threshold);
-
- return true;
- }
-
- bool
- is_syntax(SearchAlgorithm algo)
- {
- return (algo == CYKPlus || algo == ChartIncremental ||
- algo == SyntaxS2T || algo == SyntaxT2S ||
- algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
- }
-
-#ifdef HAVE_XMLRPC_C
- bool
- SearchOptions::
- update(std::map<std::string,xmlrpc_c::value>const& params)
- {
- typedef std::map<std::string, xmlrpc_c::value> params_t;
-
- params_t::const_iterator si = params.find("search-algorithm");
- if (si != params.end())
- {
- // use named parameters
- std::string spec = xmlrpc_c::value_string(si->second);
- if (spec == "normal" || spec == "0") algo = Normal;
- else if (spec == "cube" || spec == "1") algo = CubePruning;
- else throw xmlrpc_c::fault("Unsupported search algorithm",
- xmlrpc_c::fault::CODE_PARSE);
- }
-
- si = params.find("stack");
- if (si != params.end()) stack_size = xmlrpc_c::value_int(si->second);
-
- si = params.find("stack-diversity");
- if (si != params.end()) stack_diversity = xmlrpc_c::value_int(si->second);
-
- si = params.find("beam-threshold");
- if (si != params.end()) beam_width = xmlrpc_c::value_double(si->second);
-
- si = params.find("time-out");
- if (si != params.end()) timeout = xmlrpc_c::value_int(si->second);
-
- si = params.find("max-phrase-length");
- if (si != params.end()) max_phrase_length = xmlrpc_c::value_int(si->second);
-
- return true;
- }
-#endif
-
-}
diff --git a/contrib/moses2/parameters/SearchOptions.h b/contrib/moses2/parameters/SearchOptions.h
deleted file mode 100644
index 3de0a979a..000000000
--- a/contrib/moses2/parameters/SearchOptions.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <limits>
-#include "OptionsBaseClass.h"
-#include "../TypeDef.h"
-
-namespace Moses2
-{
-
- bool is_syntax(SearchAlgorithm algo);
-
- struct
- SearchOptions : public OptionsBaseClass
- {
- SearchAlgorithm algo;
-
- // stack decoding
- size_t stack_size; // maxHypoStackSize;
- size_t stack_diversity; // minHypoStackDiversity;
- bool disable_discarding;
- // Disable discarding of bad hypotheses from HypothesisStackNormal
- size_t max_phrase_length;
- size_t max_trans_opt_per_cov;
- size_t max_partial_trans_opt;
- // beam search
- float beam_width;
-
- int timeout;
-
- bool consensus; //! Use Consensus decoding (DeNero et al 2009)
-
- // reordering options
- // bool reorderingConstraint; //! use additional reordering constraints
- // bool useEarlyDistortionCost;
-
- float early_discarding_threshold;
- float trans_opt_threshold;
-
- bool init(Parameter const& param);
- SearchOptions(Parameter const& param);
- SearchOptions();
-
- bool
- UseEarlyDiscarding() const {
- return early_discarding_threshold != -std::numeric_limits<float>::infinity();
- }
-
- bool
- update(std::map<std::string,xmlrpc_c::value>const& params);
-
- };
-
-}
diff --git a/contrib/moses2/parameters/ServerOptions.h b/contrib/moses2/parameters/ServerOptions.h
deleted file mode 100644
index d662d1499..000000000
--- a/contrib/moses2/parameters/ServerOptions.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <map>
-#include <stdint.h>
-#include <xmlrpc-c/base.hpp>
-#include <xmlrpc-c/registry.hpp>
-#include <xmlrpc-c/server_abyss.hpp>
-
-namespace Moses2
-{
-class Parameter;
-
- struct
- ServerOptions
- {
- bool is_serial;
- uint32_t numThreads; // might not be used any more, actually
-
- size_t sessionTimeout; // this is related to Moses translation sessions
- size_t sessionCacheSize; // this is related to Moses translation sessions
-
- int port; // this is for the abyss server
- std::string logfile; // this is for the abyss server
- int maxConn; // this is for the abyss server
- int maxConnBacklog; // this is for the abyss server
- int keepaliveTimeout; // this is for the abyss server
- int keepaliveMaxConn; // this is for the abyss server
- int timeout; // this is for the abyss server
-
- bool init(Parameter const& param);
- ServerOptions(Parameter const& param);
- ServerOptions();
-
- bool
- update(std::map<std::string,xmlrpc_c::value>const& params)
- {
- return true;
- }
-
- };
-
-}
diff --git a/contrib/moses2/pugixml.cpp b/contrib/moses2/pugixml.cpp
deleted file mode 100644
index 737733e64..000000000
--- a/contrib/moses2/pugixml.cpp
+++ /dev/null
@@ -1,12444 +0,0 @@
-/**
- * pugixml parser - version 1.7
- * --------------------------------------------------------
- * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
- * Report bugs and download new versions at http://pugixml.org/
- *
- * This library is distributed under the MIT License. See notice at the end
- * of this file.
- *
- * This work is based on the pugxml parser, which is:
- * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
- */
-
-#ifndef SOURCE_PUGIXML_CPP
-#define SOURCE_PUGIXML_CPP
-
-#include "pugixml.hpp"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <limits.h>
-
-#ifdef PUGIXML_WCHAR_MODE
-# include <wchar.h>
-#endif
-
-#ifndef PUGIXML_NO_XPATH
-# include <math.h>
-# include <float.h>
-# ifdef PUGIXML_NO_EXCEPTIONS
-# include <setjmp.h>
-# endif
-#endif
-
-#ifndef PUGIXML_NO_STL
-# include <istream>
-# include <ostream>
-# include <string>
-#endif
-
-// For placement new
-#include <new>
-
-#ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable: 4127) // conditional expression is constant
-# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
-# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
-# pragma warning(disable: 4702) // unreachable code
-# pragma warning(disable: 4996) // this function or variable may be unsafe
-# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
-#endif
-
-#ifdef __INTEL_COMPILER
-# pragma warning(disable: 177) // function was declared but never referenced
-# pragma warning(disable: 279) // controlling expression is constant
-# pragma warning(disable: 1478 1786) // function was declared "deprecated"
-# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
-#endif
-
-#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
-# pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
-#endif
-
-#ifdef __BORLANDC__
-# pragma option push
-# pragma warn -8008 // condition is always false
-# pragma warn -8066 // unreachable code
-#endif
-
-#ifdef __SNC__
-// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
-# pragma diag_suppress=178 // function was declared but never referenced
-# pragma diag_suppress=237 // controlling expression is constant
-#endif
-
-// Inlining controls
-#if defined(_MSC_VER) && _MSC_VER >= 1300
-# define PUGI__NO_INLINE __declspec(noinline)
-#elif defined(__GNUC__)
-# define PUGI__NO_INLINE __attribute__((noinline))
-#else
-# define PUGI__NO_INLINE
-#endif
-
-// Branch weight controls
-#if defined(__GNUC__)
-# define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
-#else
-# define PUGI__UNLIKELY(cond) (cond)
-#endif
-
-// Simple static assertion
-#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
-
-// Digital Mars C++ bug workaround for passing char loaded from memory via stack
-#ifdef __DMC__
-# define PUGI__DMC_VOLATILE volatile
-#else
-# define PUGI__DMC_VOLATILE
-#endif
-
-// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
-#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
-using std::memcpy;
-using std::memmove;
-using std::memset;
-#endif
-
-// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
-#if defined(_MSC_VER) && !defined(__S3E__)
-# define PUGI__MSVC_CRT_VERSION _MSC_VER
-#endif
-
-#ifdef PUGIXML_HEADER_ONLY
-# define PUGI__NS_BEGIN namespace pugi { namespace impl {
-# define PUGI__NS_END } }
-# define PUGI__FN inline
-# define PUGI__FN_NO_INLINE inline
-#else
-# if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
-# define PUGI__NS_BEGIN namespace pugi { namespace impl {
-# define PUGI__NS_END } }
-# else
-# define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
-# define PUGI__NS_END } } }
-# endif
-# define PUGI__FN
-# define PUGI__FN_NO_INLINE PUGI__NO_INLINE
-#endif
-
-// uintptr_t
-#if !defined(_MSC_VER) || _MSC_VER >= 1600
-# include <stdint.h>
-#else
-namespace pugi
-{
-# ifndef _UINTPTR_T_DEFINED
- typedef size_t uintptr_t;
-# endif
-
- typedef unsigned __int8 uint8_t;
- typedef unsigned __int16 uint16_t;
- typedef unsigned __int32 uint32_t;
-}
-#endif
-
-// Memory allocation
-PUGI__NS_BEGIN
- PUGI__FN void* default_allocate(size_t size)
- {
- return malloc(size);
- }
-
- PUGI__FN void default_deallocate(void* ptr)
- {
- free(ptr);
- }
-
- template <typename T>
- struct xml_memory_management_function_storage
- {
- static allocation_function allocate;
- static deallocation_function deallocate;
- };
-
- // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
- // Without a template<> we'll get multiple definitions of the same static
- template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
- template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
-
- typedef xml_memory_management_function_storage<int> xml_memory;
-PUGI__NS_END
-
-// String utilities
-PUGI__NS_BEGIN
- // Get string length
- PUGI__FN size_t strlength(const char_t* s)
- {
- assert(s);
-
- #ifdef PUGIXML_WCHAR_MODE
- return wcslen(s);
- #else
- return strlen(s);
- #endif
- }
-
- // Compare two strings
- PUGI__FN bool strequal(const char_t* src, const char_t* dst)
- {
- assert(src && dst);
-
- #ifdef PUGIXML_WCHAR_MODE
- return wcscmp(src, dst) == 0;
- #else
- return strcmp(src, dst) == 0;
- #endif
- }
-
- // Compare lhs with [rhs_begin, rhs_end)
- PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
- {
- for (size_t i = 0; i < count; ++i)
- if (lhs[i] != rhs[i])
- return false;
-
- return lhs[count] == 0;
- }
-
- // Get length of wide string, even if CRT lacks wide character support
- PUGI__FN size_t strlength_wide(const wchar_t* s)
- {
- assert(s);
-
- #ifdef PUGIXML_WCHAR_MODE
- return wcslen(s);
- #else
- const wchar_t* end = s;
- while (*end) end++;
- return static_cast<size_t>(end - s);
- #endif
- }
-PUGI__NS_END
-
-// auto_ptr-like object for exception recovery
-PUGI__NS_BEGIN
- template <typename T, typename D = void(*)(T*)> struct auto_deleter
- {
- T* data;
- D deleter;
-
- auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
- {
- }
-
- ~auto_deleter()
- {
- if (data) deleter(data);
- }
-
- T* release()
- {
- T* result = data;
- data = 0;
- return result;
- }
- };
-PUGI__NS_END
-
-#ifdef PUGIXML_COMPACT
-PUGI__NS_BEGIN
- class compact_hash_table
- {
- public:
- compact_hash_table(): _items(0), _capacity(0), _count(0)
- {
- }
-
- void clear()
- {
- if (_items)
- {
- xml_memory::deallocate(_items);
- _items = 0;
- _capacity = 0;
- _count = 0;
- }
- }
-
- void** find(const void* key)
- {
- assert(key);
-
- if (_capacity == 0) return 0;
-
- size_t hashmod = _capacity - 1;
- size_t bucket = hash(key) & hashmod;
-
- for (size_t probe = 0; probe <= hashmod; ++probe)
- {
- item_t& probe_item = _items[bucket];
-
- if (probe_item.key == key)
- return &probe_item.value;
-
- if (probe_item.key == 0)
- return 0;
-
- // hash collision, quadratic probing
- bucket = (bucket + probe + 1) & hashmod;
- }
-
- assert(!"Hash table is full");
- return 0;
- }
-
- void** insert(const void* key)
- {
- assert(key);
- assert(_capacity != 0 && _count < _capacity - _capacity / 4);
-
- size_t hashmod = _capacity - 1;
- size_t bucket = hash(key) & hashmod;
-
- for (size_t probe = 0; probe <= hashmod; ++probe)
- {
- item_t& probe_item = _items[bucket];
-
- if (probe_item.key == 0)
- {
- probe_item.key = key;
- _count++;
- return &probe_item.value;
- }
-
- if (probe_item.key == key)
- return &probe_item.value;
-
- // hash collision, quadratic probing
- bucket = (bucket + probe + 1) & hashmod;
- }
-
- assert(!"Hash table is full");
- return 0;
- }
-
- bool reserve()
- {
- if (_count + 16 >= _capacity - _capacity / 4)
- return rehash();
-
- return true;
- }
-
- private:
- struct item_t
- {
- const void* key;
- void* value;
- };
-
- item_t* _items;
- size_t _capacity;
-
- size_t _count;
-
- bool rehash();
-
- static unsigned int hash(const void* key)
- {
- unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
-
- // MurmurHash3 32-bit finalizer
- h ^= h >> 16;
- h *= 0x85ebca6bu;
- h ^= h >> 13;
- h *= 0xc2b2ae35u;
- h ^= h >> 16;
-
- return h;
- }
- };
-
- PUGI__FN_NO_INLINE bool compact_hash_table::rehash()
- {
- compact_hash_table rt;
- rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
- rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity));
-
- if (!rt._items)
- return false;
-
- memset(rt._items, 0, sizeof(item_t) * rt._capacity);
-
- for (size_t i = 0; i < _capacity; ++i)
- if (_items[i].key)
- *rt.insert(_items[i].key) = _items[i].value;
-
- if (_items)
- xml_memory::deallocate(_items);
-
- _capacity = rt._capacity;
- _items = rt._items;
-
- assert(_count == rt._count);
-
- return true;
- }
-
-PUGI__NS_END
-#endif
-
-PUGI__NS_BEGIN
- static const size_t xml_memory_page_size =
- #ifdef PUGIXML_MEMORY_PAGE_SIZE
- PUGIXML_MEMORY_PAGE_SIZE
- #else
- 32768
- #endif
- ;
-
-#ifdef PUGIXML_COMPACT
- static const uintptr_t xml_memory_block_alignment = 4;
-
- static const uintptr_t xml_memory_page_alignment = sizeof(void*);
-#else
- static const uintptr_t xml_memory_block_alignment = sizeof(void*);
-
- static const uintptr_t xml_memory_page_alignment = 64;
- static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
-#endif
-
- // extra metadata bits
- static const uintptr_t xml_memory_page_contents_shared_mask = 32;
- static const uintptr_t xml_memory_page_name_allocated_mask = 16;
- static const uintptr_t xml_memory_page_value_allocated_mask = 8;
- static const uintptr_t xml_memory_page_type_mask = 7;
-
- // combined masks for string uniqueness
- static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
- static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
-
-#ifdef PUGIXML_COMPACT
- #define PUGI__GETPAGE_IMPL(header) (header).get_page()
-#else
- #define PUGI__GETPAGE_IMPL(header) reinterpret_cast<impl::xml_memory_page*>((header) & impl::xml_memory_page_pointer_mask)
-#endif
-
- #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
- #define PUGI__NODETYPE(n) static_cast<xml_node_type>(((n)->header & impl::xml_memory_page_type_mask) + 1)
-
- struct xml_allocator;
-
- struct xml_memory_page
- {
- static xml_memory_page* construct(void* memory)
- {
- xml_memory_page* result = static_cast<xml_memory_page*>(memory);
-
- result->allocator = 0;
- result->prev = 0;
- result->next = 0;
- result->busy_size = 0;
- result->freed_size = 0;
-
- #ifdef PUGIXML_COMPACT
- result->compact_string_base = 0;
- result->compact_shared_parent = 0;
- result->compact_page_marker = 0;
- #endif
-
- return result;
- }
-
- xml_allocator* allocator;
-
- xml_memory_page* prev;
- xml_memory_page* next;
-
- size_t busy_size;
- size_t freed_size;
-
- #ifdef PUGIXML_COMPACT
- char_t* compact_string_base;
- void* compact_shared_parent;
- uint32_t* compact_page_marker;
- #endif
- };
-
- struct xml_memory_string_header
- {
- uint16_t page_offset; // offset from page->data
- uint16_t full_size; // 0 if string occupies whole page
- };
-
- struct xml_allocator
- {
- xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
- {
- #ifdef PUGIXML_COMPACT
- _hash = 0;
- #endif
- }
-
- xml_memory_page* allocate_page(size_t data_size)
- {
- size_t size = sizeof(xml_memory_page) + data_size;
-
- // allocate block with some alignment, leaving memory for worst-case padding
- void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
- if (!memory) return 0;
-
- // align to next page boundary (note: this guarantees at least 1 usable byte before the page)
- char* page_memory = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1));
-
- // prepare page structure
- xml_memory_page* page = xml_memory_page::construct(page_memory);
- assert(page);
-
- page->allocator = _root->allocator;
-
- // record the offset for freeing the memory block
- assert(page_memory > memory && page_memory - static_cast<char*>(memory) <= 127);
- page_memory[-1] = static_cast<char>(page_memory - static_cast<char*>(memory));
-
- return page;
- }
-
- static void deallocate_page(xml_memory_page* page)
- {
- char* page_memory = reinterpret_cast<char*>(page);
-
- xml_memory::deallocate(page_memory - page_memory[-1]);
- }
-
- void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
-
- void* allocate_memory(size_t size, xml_memory_page*& out_page)
- {
- if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
- return allocate_memory_oob(size, out_page);
-
- void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
-
- _busy_size += size;
-
- out_page = _root;
-
- return buf;
- }
-
- #ifdef PUGIXML_COMPACT
- void* allocate_object(size_t size, xml_memory_page*& out_page)
- {
- void* result = allocate_memory(size + sizeof(uint32_t), out_page);
- if (!result) return 0;
-
- // adjust for marker
- ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
-
- if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
- {
- // insert new marker
- uint32_t* marker = static_cast<uint32_t*>(result);
-
- *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
- out_page->compact_page_marker = marker;
-
- // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
- // this will make sure deallocate_memory correctly tracks the size
- out_page->freed_size += sizeof(uint32_t);
-
- return marker + 1;
- }
- else
- {
- // roll back uint32_t part
- _busy_size -= sizeof(uint32_t);
-
- return result;
- }
- }
- #else
- void* allocate_object(size_t size, xml_memory_page*& out_page)
- {
- return allocate_memory(size, out_page);
- }
- #endif
-
- void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
- {
- if (page == _root) page->busy_size = _busy_size;
-
- assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
- (void)!ptr;
-
- page->freed_size += size;
- assert(page->freed_size <= page->busy_size);
-
- if (page->freed_size == page->busy_size)
- {
- if (page->next == 0)
- {
- assert(_root == page);
-
- // top page freed, just reset sizes
- page->busy_size = 0;
- page->freed_size = 0;
-
- #ifdef PUGIXML_COMPACT
- // reset compact state to maximize efficiency
- page->compact_string_base = 0;
- page->compact_shared_parent = 0;
- page->compact_page_marker = 0;
- #endif
-
- _busy_size = 0;
- }
- else
- {
- assert(_root != page);
- assert(page->prev);
-
- // remove from the list
- page->prev->next = page->next;
- page->next->prev = page->prev;
-
- // deallocate
- deallocate_page(page);
- }
- }
- }
-
- char_t* allocate_string(size_t length)
- {
- static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
-
- PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
-
- // allocate memory for string and header block
- size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
-
- // round size up to block alignment boundary
- size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
-
- xml_memory_page* page;
- xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
-
- if (!header) return 0;
-
- // setup header
- ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
-
- assert(page_offset % xml_memory_block_alignment == 0);
- assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
- header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
-
- // full_size == 0 for large strings that occupy the whole page
- assert(full_size % xml_memory_block_alignment == 0);
- assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
- header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
-
- // round-trip through void* to avoid 'cast increases required alignment of target type' warning
- // header is guaranteed a pointer-sized alignment, which should be enough for char_t
- return static_cast<char_t*>(static_cast<void*>(header + 1));
- }
-
- void deallocate_string(char_t* string)
- {
- // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
- // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
-
- // get header
- xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
- assert(header);
-
- // deallocate
- size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
- xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
-
- // if full_size == 0 then this string occupies the whole page
- size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
-
- deallocate_memory(header, full_size, page);
- }
-
- bool reserve()
- {
- #ifdef PUGIXML_COMPACT
- return _hash->reserve();
- #else
- return true;
- #endif
- }
-
- xml_memory_page* _root;
- size_t _busy_size;
-
- #ifdef PUGIXML_COMPACT
- compact_hash_table* _hash;
- #endif
- };
-
- PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
- {
- const size_t large_allocation_threshold = xml_memory_page_size / 4;
-
- xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
- out_page = page;
-
- if (!page) return 0;
-
- if (size <= large_allocation_threshold)
- {
- _root->busy_size = _busy_size;
-
- // insert page at the end of linked list
- page->prev = _root;
- _root->next = page;
- _root = page;
-
- _busy_size = size;
- }
- else
- {
- // insert page before the end of linked list, so that it is deleted as soon as possible
- // the last page is not deleted even if it's empty (see deallocate_memory)
- assert(_root->prev);
-
- page->prev = _root->prev;
- page->next = _root;
-
- _root->prev->next = page;
- _root->prev = page;
-
- page->busy_size = size;
- }
-
- return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
- }
-PUGI__NS_END
-
-#ifdef PUGIXML_COMPACT
-PUGI__NS_BEGIN
- static const uintptr_t compact_alignment_log2 = 2;
- static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
-
- class compact_header
- {
- public:
- compact_header(xml_memory_page* page, unsigned int flags)
- {
- PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
-
- ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
- assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
-
- _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
- _flags = static_cast<unsigned char>(flags);
- }
-
- void operator&=(uintptr_t mod)
- {
- _flags &= static_cast<unsigned char>(mod);
- }
-
- void operator|=(uintptr_t mod)
- {
- _flags |= static_cast<unsigned char>(mod);
- }
-
- uintptr_t operator&(uintptr_t mod) const
- {
- return _flags & mod;
- }
-
- xml_memory_page* get_page() const
- {
- // round-trip through void* to silence 'cast increases required alignment of target type' warnings
- const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
- const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
-
- return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
- }
-
- private:
- unsigned char _page;
- unsigned char _flags;
- };
-
- PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
- {
- const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
-
- return header->get_page();
- }
-
- template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
- {
- return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object));
- }
-
- template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
- {
- *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value;
- }
-
- template <typename T, int header_offset, int start = -126> class compact_pointer
- {
- public:
- compact_pointer(): _data(0)
- {
- }
-
- void operator=(const compact_pointer& rhs)
- {
- *this = rhs + 0;
- }
-
- void operator=(T* value)
- {
- if (value)
- {
- // value is guaranteed to be compact-aligned; 'this' is not
- // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
- // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
- // compensate for arithmetic shift rounding for negative values
- ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
- ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
-
- if (static_cast<uintptr_t>(offset) <= 253)
- _data = static_cast<unsigned char>(offset + 1);
- else
- {
- compact_set_value<header_offset>(this, value);
-
- _data = 255;
- }
- }
- else
- _data = 0;
- }
-
- operator T*() const
- {
- if (_data)
- {
- if (_data < 255)
- {
- uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
-
- return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2));
- }
- else
- return compact_get_value<header_offset, T>(this);
- }
- else
- return 0;
- }
-
- T* operator->() const
- {
- return *this;
- }
-
- private:
- unsigned char _data;
- };
-
- template <typename T, int header_offset> class compact_pointer_parent
- {
- public:
- compact_pointer_parent(): _data(0)
- {
- }
-
- void operator=(const compact_pointer_parent& rhs)
- {
- *this = rhs + 0;
- }
-
- void operator=(T* value)
- {
- if (value)
- {
- // value is guaranteed to be compact-aligned; 'this' is not
- // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
- // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
- // compensate for arithmetic shift behavior for negative values
- ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
- ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
-
- if (static_cast<uintptr_t>(offset) <= 65533)
- {
- _data = static_cast<unsigned short>(offset + 1);
- }
- else
- {
- xml_memory_page* page = compact_get_page(this, header_offset);
-
- if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
- page->compact_shared_parent = value;
-
- if (page->compact_shared_parent == value)
- {
- _data = 65534;
- }
- else
- {
- compact_set_value<header_offset>(this, value);
-
- _data = 65535;
- }
- }
- }
- else
- {
- _data = 0;
- }
- }
-
- operator T*() const
- {
- if (_data)
- {
- if (_data < 65534)
- {
- uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
-
- return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2));
- }
- else if (_data == 65534)
- return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
- else
- return compact_get_value<header_offset, T>(this);
- }
- else
- return 0;
- }
-
- T* operator->() const
- {
- return *this;
- }
-
- private:
- uint16_t _data;
- };
-
- template <int header_offset, int base_offset> class compact_string
- {
- public:
- compact_string(): _data(0)
- {
- }
-
- void operator=(const compact_string& rhs)
- {
- *this = rhs + 0;
- }
-
- void operator=(char_t* value)
- {
- if (value)
- {
- xml_memory_page* page = compact_get_page(this, header_offset);
-
- if (PUGI__UNLIKELY(page->compact_string_base == 0))
- page->compact_string_base = value;
-
- ptrdiff_t offset = value - page->compact_string_base;
-
- if (static_cast<uintptr_t>(offset) < (65535 << 7))
- {
- // round-trip through void* to silence 'cast increases required alignment of target type' warnings
- uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
-
- if (*base == 0)
- {
- *base = static_cast<uint16_t>((offset >> 7) + 1);
- _data = static_cast<unsigned char>((offset & 127) + 1);
- }
- else
- {
- ptrdiff_t remainder = offset - ((*base - 1) << 7);
-
- if (static_cast<uintptr_t>(remainder) <= 253)
- {
- _data = static_cast<unsigned char>(remainder + 1);
- }
- else
- {
- compact_set_value<header_offset>(this, value);
-
- _data = 255;
- }
- }
- }
- else
- {
- compact_set_value<header_offset>(this, value);
-
- _data = 255;
- }
- }
- else
- {
- _data = 0;
- }
- }
-
- operator char_t*() const
- {
- if (_data)
- {
- if (_data < 255)
- {
- xml_memory_page* page = compact_get_page(this, header_offset);
-
- // round-trip through void* to silence 'cast increases required alignment of target type' warnings
- const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
- assert(*base);
-
- ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
-
- return page->compact_string_base + offset;
- }
- else
- {
- return compact_get_value<header_offset, char_t>(this);
- }
- }
- else
- return 0;
- }
-
- private:
- unsigned char _data;
- };
-PUGI__NS_END
-#endif
-
-#ifdef PUGIXML_COMPACT
-namespace pugi
-{
- struct xml_attribute_struct
- {
- xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
- {
- PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
- }
-
- impl::compact_header header;
-
- uint16_t namevalue_base;
-
- impl::compact_string<4, 2> name;
- impl::compact_string<5, 3> value;
-
- impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
- impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
- };
-
- struct xml_node_struct
- {
- xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1), namevalue_base(0)
- {
- PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
- }
-
- impl::compact_header header;
-
- uint16_t namevalue_base;
-
- impl::compact_string<4, 2> name;
- impl::compact_string<5, 3> value;
-
- impl::compact_pointer_parent<xml_node_struct, 6> parent;
-
- impl::compact_pointer<xml_node_struct, 8, 0> first_child;
-
- impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
- impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
-
- impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
- };
-}
-#else
-namespace pugi
-{
- struct xml_attribute_struct
- {
- xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
- {
- }
-
- uintptr_t header;
-
- char_t* name;
- char_t* value;
-
- xml_attribute_struct* prev_attribute_c;
- xml_attribute_struct* next_attribute;
- };
-
- struct xml_node_struct
- {
- xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
- {
- }
-
- uintptr_t header;
-
- char_t* name;
- char_t* value;
-
- xml_node_struct* parent;
-
- xml_node_struct* first_child;
-
- xml_node_struct* prev_sibling_c;
- xml_node_struct* next_sibling;
-
- xml_attribute_struct* first_attribute;
- };
-}
-#endif
-
-PUGI__NS_BEGIN
- struct xml_extra_buffer
- {
- char_t* buffer;
- xml_extra_buffer* next;
- };
-
- struct xml_document_struct: public xml_node_struct, public xml_allocator
- {
- xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
- {
- #ifdef PUGIXML_COMPACT
- _hash = &hash;
- #endif
- }
-
- const char_t* buffer;
-
- xml_extra_buffer* extra_buffers;
-
- #ifdef PUGIXML_COMPACT
- compact_hash_table hash;
- #endif
- };
-
- template <typename Object> inline xml_allocator& get_allocator(const Object* object)
- {
- assert(object);
-
- return *PUGI__GETPAGE(object)->allocator;
- }
-
- template <typename Object> inline xml_document_struct& get_document(const Object* object)
- {
- assert(object);
-
- return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
- }
-PUGI__NS_END
-
-// Low-level DOM operations
-PUGI__NS_BEGIN
- inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
- {
- xml_memory_page* page;
- void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
- if (!memory) return 0;
-
- return new (memory) xml_attribute_struct(page);
- }
-
- inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
- {
- xml_memory_page* page;
- void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
- if (!memory) return 0;
-
- return new (memory) xml_node_struct(page, type);
- }
-
- inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
- {
- if (a->header & impl::xml_memory_page_name_allocated_mask)
- alloc.deallocate_string(a->name);
-
- if (a->header & impl::xml_memory_page_value_allocated_mask)
- alloc.deallocate_string(a->value);
-
- alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
- }
-
- inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
- {
- if (n->header & impl::xml_memory_page_name_allocated_mask)
- alloc.deallocate_string(n->name);
-
- if (n->header & impl::xml_memory_page_value_allocated_mask)
- alloc.deallocate_string(n->value);
-
- for (xml_attribute_struct* attr = n->first_attribute; attr; )
- {
- xml_attribute_struct* next = attr->next_attribute;
-
- destroy_attribute(attr, alloc);
-
- attr = next;
- }
-
- for (xml_node_struct* child = n->first_child; child; )
- {
- xml_node_struct* next = child->next_sibling;
-
- destroy_node(child, alloc);
-
- child = next;
- }
-
- alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
- }
-
- inline void append_node(xml_node_struct* child, xml_node_struct* node)
- {
- child->parent = node;
-
- xml_node_struct* head = node->first_child;
-
- if (head)
- {
- xml_node_struct* tail = head->prev_sibling_c;
-
- tail->next_sibling = child;
- child->prev_sibling_c = tail;
- head->prev_sibling_c = child;
- }
- else
- {
- node->first_child = child;
- child->prev_sibling_c = child;
- }
- }
-
- inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
- {
- child->parent = node;
-
- xml_node_struct* head = node->first_child;
-
- if (head)
- {
- child->prev_sibling_c = head->prev_sibling_c;
- head->prev_sibling_c = child;
- }
- else
- child->prev_sibling_c = child;
-
- child->next_sibling = head;
- node->first_child = child;
- }
-
- inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
- {
- xml_node_struct* parent = node->parent;
-
- child->parent = parent;
-
- if (node->next_sibling)
- node->next_sibling->prev_sibling_c = child;
- else
- parent->first_child->prev_sibling_c = child;
-
- child->next_sibling = node->next_sibling;
- child->prev_sibling_c = node;
-
- node->next_sibling = child;
- }
-
- inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
- {
- xml_node_struct* parent = node->parent;
-
- child->parent = parent;
-
- if (node->prev_sibling_c->next_sibling)
- node->prev_sibling_c->next_sibling = child;
- else
- parent->first_child = child;
-
- child->prev_sibling_c = node->prev_sibling_c;
- child->next_sibling = node;
-
- node->prev_sibling_c = child;
- }
-
- inline void remove_node(xml_node_struct* node)
- {
- xml_node_struct* parent = node->parent;
-
- if (node->next_sibling)
- node->next_sibling->prev_sibling_c = node->prev_sibling_c;
- else
- parent->first_child->prev_sibling_c = node->prev_sibling_c;
-
- if (node->prev_sibling_c->next_sibling)
- node->prev_sibling_c->next_sibling = node->next_sibling;
- else
- parent->first_child = node->next_sibling;
-
- node->parent = 0;
- node->prev_sibling_c = 0;
- node->next_sibling = 0;
- }
-
- inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
- {
- xml_attribute_struct* head = node->first_attribute;
-
- if (head)
- {
- xml_attribute_struct* tail = head->prev_attribute_c;
-
- tail->next_attribute = attr;
- attr->prev_attribute_c = tail;
- head->prev_attribute_c = attr;
- }
- else
- {
- node->first_attribute = attr;
- attr->prev_attribute_c = attr;
- }
- }
-
- inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
- {
- xml_attribute_struct* head = node->first_attribute;
-
- if (head)
- {
- attr->prev_attribute_c = head->prev_attribute_c;
- head->prev_attribute_c = attr;
- }
- else
- attr->prev_attribute_c = attr;
-
- attr->next_attribute = head;
- node->first_attribute = attr;
- }
-
- inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
- {
- if (place->next_attribute)
- place->next_attribute->prev_attribute_c = attr;
- else
- node->first_attribute->prev_attribute_c = attr;
-
- attr->next_attribute = place->next_attribute;
- attr->prev_attribute_c = place;
- place->next_attribute = attr;
- }
-
- inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
- {
- if (place->prev_attribute_c->next_attribute)
- place->prev_attribute_c->next_attribute = attr;
- else
- node->first_attribute = attr;
-
- attr->prev_attribute_c = place->prev_attribute_c;
- attr->next_attribute = place;
- place->prev_attribute_c = attr;
- }
-
- inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
- {
- if (attr->next_attribute)
- attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
- else
- node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
-
- if (attr->prev_attribute_c->next_attribute)
- attr->prev_attribute_c->next_attribute = attr->next_attribute;
- else
- node->first_attribute = attr->next_attribute;
-
- attr->prev_attribute_c = 0;
- attr->next_attribute = 0;
- }
-
- PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
- {
- if (!alloc.reserve()) return 0;
-
- xml_node_struct* child = allocate_node(alloc, type);
- if (!child) return 0;
-
- append_node(child, node);
-
- return child;
- }
-
- PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
- {
- if (!alloc.reserve()) return 0;
-
- xml_attribute_struct* attr = allocate_attribute(alloc);
- if (!attr) return 0;
-
- append_attribute(attr, node);
-
- return attr;
- }
-PUGI__NS_END
-
-// Helper classes for code generation
-PUGI__NS_BEGIN
- struct opt_false
- {
- enum { value = 0 };
- };
-
- struct opt_true
- {
- enum { value = 1 };
- };
-PUGI__NS_END
-
-// Unicode utilities
-PUGI__NS_BEGIN
- inline uint16_t endian_swap(uint16_t value)
- {
- return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
- }
-
- inline uint32_t endian_swap(uint32_t value)
- {
- return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
- }
-
- struct utf8_counter
- {
- typedef size_t value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- // U+0000..U+007F
- if (ch < 0x80) return result + 1;
- // U+0080..U+07FF
- else if (ch < 0x800) return result + 2;
- // U+0800..U+FFFF
- else return result + 3;
- }
-
- static value_type high(value_type result, uint32_t)
- {
- // U+10000..U+10FFFF
- return result + 4;
- }
- };
-
- struct utf8_writer
- {
- typedef uint8_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- // U+0000..U+007F
- if (ch < 0x80)
- {
- *result = static_cast<uint8_t>(ch);
- return result + 1;
- }
- // U+0080..U+07FF
- else if (ch < 0x800)
- {
- result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
- result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
- return result + 2;
- }
- // U+0800..U+FFFF
- else
- {
- result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
- result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
- result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
- return result + 3;
- }
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- // U+10000..U+10FFFF
- result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
- result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
- result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
- result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
- return result + 4;
- }
-
- static value_type any(value_type result, uint32_t ch)
- {
- return (ch < 0x10000) ? low(result, ch) : high(result, ch);
- }
- };
-
- struct utf16_counter
- {
- typedef size_t value_type;
-
- static value_type low(value_type result, uint32_t)
- {
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t)
- {
- return result + 2;
- }
- };
-
- struct utf16_writer
- {
- typedef uint16_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- *result = static_cast<uint16_t>(ch);
-
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
- uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
-
- result[0] = static_cast<uint16_t>(0xD800 + msh);
- result[1] = static_cast<uint16_t>(0xDC00 + lsh);
-
- return result + 2;
- }
-
- static value_type any(value_type result, uint32_t ch)
- {
- return (ch < 0x10000) ? low(result, ch) : high(result, ch);
- }
- };
-
- struct utf32_counter
- {
- typedef size_t value_type;
-
- static value_type low(value_type result, uint32_t)
- {
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t)
- {
- return result + 1;
- }
- };
-
- struct utf32_writer
- {
- typedef uint32_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- *result = ch;
-
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- *result = ch;
-
- return result + 1;
- }
-
- static value_type any(value_type result, uint32_t ch)
- {
- *result = ch;
-
- return result + 1;
- }
- };
-
- struct latin1_writer
- {
- typedef uint8_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
-
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- (void)ch;
-
- *result = '?';
-
- return result + 1;
- }
- };
-
- struct utf8_decoder
- {
- typedef uint8_t type;
-
- template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
- {
- const uint8_t utf8_byte_mask = 0x3f;
-
- while (size)
- {
- uint8_t lead = *data;
-
- // 0xxxxxxx -> U+0000..U+007F
- if (lead < 0x80)
- {
- result = Traits::low(result, lead);
- data += 1;
- size -= 1;
-
- // process aligned single-byte (ascii) blocks
- if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
- {
- // round-trip through void* to silence 'cast increases required alignment of target type' warnings
- while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
- {
- result = Traits::low(result, data[0]);
- result = Traits::low(result, data[1]);
- result = Traits::low(result, data[2]);
- result = Traits::low(result, data[3]);
- data += 4;
- size -= 4;
- }
- }
- }
- // 110xxxxx -> U+0080..U+07FF
- else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
- {
- result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
- data += 2;
- size -= 2;
- }
- // 1110xxxx -> U+0800-U+FFFF
- else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
- {
- result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
- data += 3;
- size -= 3;
- }
- // 11110xxx -> U+10000..U+10FFFF
- else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
- {
- result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
- data += 4;
- size -= 4;
- }
- // 10xxxxxx or 11111xxx -> invalid
- else
- {
- data += 1;
- size -= 1;
- }
- }
-
- return result;
- }
- };
-
- template <typename opt_swap> struct utf16_decoder
- {
- typedef uint16_t type;
-
- template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
- {
- while (size)
- {
- uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
-
- // U+0000..U+D7FF
- if (lead < 0xD800)
- {
- result = Traits::low(result, lead);
- data += 1;
- size -= 1;
- }
- // U+E000..U+FFFF
- else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
- {
- result = Traits::low(result, lead);
- data += 1;
- size -= 1;
- }
- // surrogate pair lead
- else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
- {
- uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
-
- if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
- {
- result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
- data += 2;
- size -= 2;
- }
- else
- {
- data += 1;
- size -= 1;
- }
- }
- else
- {
- data += 1;
- size -= 1;
- }
- }
-
- return result;
- }
- };
-
- template <typename opt_swap> struct utf32_decoder
- {
- typedef uint32_t type;
-
- template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
- {
- while (size)
- {
- uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
-
- // U+0000..U+FFFF
- if (lead < 0x10000)
- {
- result = Traits::low(result, lead);
- data += 1;
- size -= 1;
- }
- // U+10000..U+10FFFF
- else
- {
- result = Traits::high(result, lead);
- data += 1;
- size -= 1;
- }
- }
-
- return result;
- }
- };
-
- struct latin1_decoder
- {
- typedef uint8_t type;
-
- template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
- {
- while (size)
- {
- result = Traits::low(result, *data);
- data += 1;
- size -= 1;
- }
-
- return result;
- }
- };
-
- template <size_t size> struct wchar_selector;
-
- template <> struct wchar_selector<2>
- {
- typedef uint16_t type;
- typedef utf16_counter counter;
- typedef utf16_writer writer;
- typedef utf16_decoder<opt_false> decoder;
- };
-
- template <> struct wchar_selector<4>
- {
- typedef uint32_t type;
- typedef utf32_counter counter;
- typedef utf32_writer writer;
- typedef utf32_decoder<opt_false> decoder;
- };
-
- typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
- typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
-
- struct wchar_decoder
- {
- typedef wchar_t type;
-
- template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
- {
- typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
-
- return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
- }
- };
-
-#ifdef PUGIXML_WCHAR_MODE
- PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
- {
- for (size_t i = 0; i < length; ++i)
- result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
- }
-#endif
-PUGI__NS_END
-
-PUGI__NS_BEGIN
- enum chartype_t
- {
- ct_parse_pcdata = 1, // \0, &, \r, <
- ct_parse_attr = 2, // \0, &, \r, ', "
- ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
- ct_space = 8, // \r, \n, space, tab
- ct_parse_cdata = 16, // \0, ], >, \r
- ct_parse_comment = 32, // \0, -, >, \r
- ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
- ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
- };
-
- static const unsigned char chartype_table[256] =
- {
- 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
- 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
- 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
- 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
-
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
- };
-
- enum chartypex_t
- {
- ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
- ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
- ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
- ctx_digit = 8, // 0-9
- ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
- };
-
- static const unsigned char chartypex_table[256] =
- {
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
- 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
-
- 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
- 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
-
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
- };
-
-#ifdef PUGIXML_WCHAR_MODE
- #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
-#else
- #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
-#endif
-
- #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
- #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
-
- PUGI__FN bool is_little_endian()
- {
- unsigned int ui = 1;
-
- return *reinterpret_cast<unsigned char*>(&ui) == 1;
- }
-
- PUGI__FN xml_encoding get_wchar_encoding()
- {
- PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
-
- if (sizeof(wchar_t) == 2)
- return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
- else
- return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
- }
-
- PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
- {
- // look for BOM in first few bytes
- if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
- if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
- if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
- if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
- if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
-
- // look for <, <? or <?xm in various encodings
- if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
- if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
- if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
- if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
- if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
-
- // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
- if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
- if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
-
- // no known BOM detected, assume utf8
- return encoding_utf8;
- }
-
- PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
- {
- // replace wchar encoding with utf implementation
- if (encoding == encoding_wchar) return get_wchar_encoding();
-
- // replace utf16 encoding with utf16 with specific endianness
- if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- // replace utf32 encoding with utf32 with specific endianness
- if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- // only do autodetection if no explicit encoding is requested
- if (encoding != encoding_auto) return encoding;
-
- // skip encoding autodetection if input buffer is too small
- if (size < 4) return encoding_utf8;
-
- // try to guess encoding (based on XML specification, Appendix F.1)
- const uint8_t* data = static_cast<const uint8_t*>(contents);
-
- PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
-
- return guess_buffer_encoding(d0, d1, d2, d3);
- }
-
- PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
- {
- size_t length = size / sizeof(char_t);
-
- if (is_mutable)
- {
- out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
- out_length = length;
- }
- else
- {
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- if (contents)
- memcpy(buffer, contents, length * sizeof(char_t));
- else
- assert(length == 0);
-
- buffer[length] = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
- }
-
- return true;
- }
-
-#ifdef PUGIXML_WCHAR_MODE
- PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
- {
- return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
- (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
- }
-
- PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
- {
- const char_t* data = static_cast<const char_t*>(contents);
- size_t length = size / sizeof(char_t);
-
- if (is_mutable)
- {
- char_t* buffer = const_cast<char_t*>(data);
-
- convert_wchar_endian_swap(buffer, data, length);
-
- out_buffer = buffer;
- out_length = length;
- }
- else
- {
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- convert_wchar_endian_swap(buffer, data, length);
- buffer[length] = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
- }
-
- return true;
- }
-
- template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
- {
- const typename D::type* data = static_cast<const typename D::type*>(contents);
- size_t data_length = size / sizeof(typename D::type);
-
- // first pass: get length in wchar_t units
- size_t length = D::process(data, data_length, 0, wchar_counter());
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert utf16 input to wchar_t
- wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
- wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
- {
- // get native encoding
- xml_encoding wchar_encoding = get_wchar_encoding();
-
- // fast path: no conversion required
- if (encoding == wchar_encoding)
- return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
-
- // only endian-swapping is required
- if (need_endian_swap_utf(encoding, wchar_encoding))
- return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
-
- // source encoding is utf8
- if (encoding == encoding_utf8)
- return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
-
- // source encoding is utf16
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- return (native_encoding == encoding) ?
- convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
- convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
- }
-
- // source encoding is utf32
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- return (native_encoding == encoding) ?
- convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
- convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
- }
-
- // source encoding is latin1
- if (encoding == encoding_latin1)
- return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
-
- assert(!"Invalid encoding");
- return false;
- }
-#else
- template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
- {
- const typename D::type* data = static_cast<const typename D::type*>(contents);
- size_t data_length = size / sizeof(typename D::type);
-
- // first pass: get length in utf8 units
- size_t length = D::process(data, data_length, 0, utf8_counter());
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert utf16 input to utf8
- uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
- uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
- {
- for (size_t i = 0; i < size; ++i)
- if (data[i] > 127)
- return i;
-
- return size;
- }
-
- PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
- {
- const uint8_t* data = static_cast<const uint8_t*>(contents);
- size_t data_length = size;
-
- // get size of prefix that does not need utf8 conversion
- size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
- assert(prefix_length <= data_length);
-
- const uint8_t* postfix = data + prefix_length;
- size_t postfix_length = data_length - prefix_length;
-
- // if no conversion is needed, just return the original buffer
- if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
-
- // first pass: get length in utf8 units
- size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert latin1 input to utf8
- memcpy(buffer, data, prefix_length);
-
- uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
- uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
- {
- // fast path: no conversion required
- if (encoding == encoding_utf8)
- return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
-
- // source encoding is utf16
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- return (native_encoding == encoding) ?
- convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
- convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
- }
-
- // source encoding is utf32
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- return (native_encoding == encoding) ?
- convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
- convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
- }
-
- // source encoding is latin1
- if (encoding == encoding_latin1)
- return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
-
- assert(!"Invalid encoding");
- return false;
- }
-#endif
-
- PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
- {
- // get length in utf8 characters
- return wchar_decoder::process(str, length, 0, utf8_counter());
- }
-
- PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
- {
- // convert to utf8
- uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
- uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
-
- assert(begin + size == end);
- (void)!end;
- (void)!size;
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
- {
- // first pass: get length in utf8 characters
- size_t size = as_utf8_begin(str, length);
-
- // allocate resulting string
- std::string result;
- result.resize(size);
-
- // second pass: convert to utf8
- if (size > 0) as_utf8_end(&result[0], size, str, length);
-
- return result;
- }
-
- PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
- {
- const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
-
- // first pass: get length in wchar_t units
- size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
-
- // allocate resulting string
- std::basic_string<wchar_t> result;
- result.resize(length);
-
- // second pass: convert to wchar_t
- if (length > 0)
- {
- wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
- wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
-
- assert(begin + length == end);
- (void)!end;
- }
-
- return result;
- }
-#endif
-
- template <typename Header>
- inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
- {
- // never reuse shared memory
- if (header & xml_memory_page_contents_shared_mask) return false;
-
- size_t target_length = strlength(target);
-
- // always reuse document buffer memory if possible
- if ((header & header_mask) == 0) return target_length >= length;
-
- // reuse heap memory if waste is not too great
- const size_t reuse_threshold = 32;
-
- return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
- {
- if (source_length == 0)
- {
- // empty string and null pointer are equivalent, so just deallocate old memory
- xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
-
- if (header & header_mask) alloc->deallocate_string(dest);
-
- // mark the string as not allocated
- dest = 0;
- header &= ~header_mask;
-
- return true;
- }
- else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
- {
- // we can reuse old buffer, so just copy the new data (including zero terminator)
- memcpy(dest, source, source_length * sizeof(char_t));
- dest[source_length] = 0;
-
- return true;
- }
- else
- {
- xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
-
- if (!alloc->reserve()) return false;
-
- // allocate new buffer
- char_t* buf = alloc->allocate_string(source_length + 1);
- if (!buf) return false;
-
- // copy the string (including zero terminator)
- memcpy(buf, source, source_length * sizeof(char_t));
- buf[source_length] = 0;
-
- // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
- if (header & header_mask) alloc->deallocate_string(dest);
-
- // the string is now allocated, so set the flag
- dest = buf;
- header |= header_mask;
-
- return true;
- }
- }
-
- struct gap
- {
- char_t* end;
- size_t size;
-
- gap(): end(0), size(0)
- {
- }
-
- // Push new gap, move s count bytes further (skipping the gap).
- // Collapse previous gap.
- void push(char_t*& s, size_t count)
- {
- if (end) // there was a gap already; collapse it
- {
- // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
- assert(s >= end);
- memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
- }
-
- s += count; // end of current gap
-
- // "merge" two gaps
- end = s;
- size += count;
- }
-
- // Collapse all gaps, return past-the-end pointer
- char_t* flush(char_t* s)
- {
- if (end)
- {
- // Move [old_gap_end, current_pos) to [old_gap_start, ...)
- assert(s >= end);
- memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
-
- return s - size;
- }
- else return s;
- }
- };
-
- PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
- {
- char_t* stre = s + 1;
-
- switch (*stre)
- {
- case '#': // &#...
- {
- unsigned int ucsc = 0;
-
- if (stre[1] == 'x') // &#x... (hex code)
- {
- stre += 2;
-
- char_t ch = *stre;
-
- if (ch == ';') return stre;
-
- for (;;)
- {
- if (static_cast<unsigned int>(ch - '0') <= 9)
- ucsc = 16 * ucsc + (ch - '0');
- else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
- ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
- else if (ch == ';')
- break;
- else // cancel
- return stre;
-
- ch = *++stre;
- }
-
- ++stre;
- }
- else // &#... (dec code)
- {
- char_t ch = *++stre;
-
- if (ch == ';') return stre;
-
- for (;;)
- {
- if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
- ucsc = 10 * ucsc + (ch - '0');
- else if (ch == ';')
- break;
- else // cancel
- return stre;
-
- ch = *++stre;
- }
-
- ++stre;
- }
-
- #ifdef PUGIXML_WCHAR_MODE
- s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
- #else
- s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
- #endif
-
- g.push(s, stre - s);
- return stre;
- }
-
- case 'a': // &a
- {
- ++stre;
-
- if (*stre == 'm') // &am
- {
- if (*++stre == 'p' && *++stre == ';') // &amp;
- {
- *s++ = '&';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- }
- else if (*stre == 'p') // &ap
- {
- if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
- {
- *s++ = '\'';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- }
- break;
- }
-
- case 'g': // &g
- {
- if (*++stre == 't' && *++stre == ';') // &gt;
- {
- *s++ = '>';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- break;
- }
-
- case 'l': // &l
- {
- if (*++stre == 't' && *++stre == ';') // &lt;
- {
- *s++ = '<';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- break;
- }
-
- case 'q': // &q
- {
- if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
- {
- *s++ = '"';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- break;
- }
-
- default:
- break;
- }
-
- return stre;
- }
-
- // Parser utilities
- #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
- #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
- #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
- #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
- #define PUGI__POPNODE() { cursor = cursor->parent; }
- #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
- #define PUGI__SCANWHILE(X) { while (X) ++s; }
- #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
- #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
- #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
- #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
-
- PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
- {
- gap g;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
-
- if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
- {
- *s++ = '\n'; // replace first one with 0x0a
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
- {
- *g.flush(s) = 0;
-
- return s + (s[2] == '>' ? 3 : 2);
- }
- else if (*s == 0)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
- {
- gap g;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
-
- if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
- {
- *s++ = '\n'; // replace first one with 0x0a
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (*s == 0)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- typedef char_t* (*strconv_pcdata_t)(char_t*);
-
- template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
- {
- static char_t* parse(char_t* s)
- {
- gap g;
-
- char_t* begin = s;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
-
- if (*s == '<') // PCDATA ends here
- {
- char_t* end = g.flush(s);
-
- if (opt_trim::value)
- while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
- --end;
-
- *end = 0;
-
- return s + 1;
- }
- else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
- {
- *s++ = '\n'; // replace first one with 0x0a
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (*s == 0)
- {
- char_t* end = g.flush(s);
-
- if (opt_trim::value)
- while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
- --end;
-
- *end = 0;
-
- return s;
- }
- else ++s;
- }
- }
- };
-
- PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
- {
- PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
-
- switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
- {
- case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
- case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
- case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
- case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
- case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
- case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
- case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
- case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
- default: assert(false); return 0; // should not get here
- }
- }
-
- typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
-
- template <typename opt_escape> struct strconv_attribute_impl
- {
- static char_t* parse_wnorm(char_t* s, char_t end_quote)
- {
- gap g;
-
- // trim leading whitespaces
- if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- char_t* str = s;
-
- do ++str;
- while (PUGI__IS_CHARTYPE(*str, ct_space));
-
- g.push(s, str - s);
- }
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
-
- if (*s == end_quote)
- {
- char_t* str = g.flush(s);
-
- do *str-- = 0;
- while (PUGI__IS_CHARTYPE(*str, ct_space));
-
- return s + 1;
- }
- else if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- *s++ = ' ';
-
- if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- char_t* str = s + 1;
- while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
-
- g.push(s, str - s);
- }
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- static char_t* parse_wconv(char_t* s, char_t end_quote)
- {
- gap g;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
-
- if (*s == end_quote)
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- if (*s == '\r')
- {
- *s++ = ' ';
-
- if (*s == '\n') g.push(s, 1);
- }
- else *s++ = ' ';
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- static char_t* parse_eol(char_t* s, char_t end_quote)
- {
- gap g;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
-
- if (*s == end_quote)
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (*s == '\r')
- {
- *s++ = '\n';
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- static char_t* parse_simple(char_t* s, char_t end_quote)
- {
- gap g;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
-
- if (*s == end_quote)
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
- };
-
- PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
- {
- PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
-
- switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
- {
- case 0: return strconv_attribute_impl<opt_false>::parse_simple;
- case 1: return strconv_attribute_impl<opt_true>::parse_simple;
- case 2: return strconv_attribute_impl<opt_false>::parse_eol;
- case 3: return strconv_attribute_impl<opt_true>::parse_eol;
- case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
- case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
- case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
- case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
- case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
- case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
- case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
- case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
- default: assert(false); return 0; // should not get here
- }
- }
-
- inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
- {
- xml_parse_result result;
- result.status = status;
- result.offset = offset;
-
- return result;
- }
-
- struct xml_parser
- {
- xml_allocator alloc;
- xml_allocator* alloc_state;
- char_t* error_offset;
- xml_parse_status error_status;
-
- xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok)
- {
- }
-
- ~xml_parser()
- {
- *alloc_state = alloc;
- }
-
- // DOCTYPE consists of nested sections of the following possible types:
- // <!-- ... -->, <? ... ?>, "...", '...'
- // <![...]]>
- // <!...>
- // First group can not contain nested groups
- // Second group can contain nested groups of the same type
- // Third group can contain all other groups
- char_t* parse_doctype_primitive(char_t* s)
- {
- if (*s == '"' || *s == '\'')
- {
- // quoted string
- char_t ch = *s++;
- PUGI__SCANFOR(*s == ch);
- if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- s++;
- }
- else if (s[0] == '<' && s[1] == '?')
- {
- // <? ... ?>
- s += 2;
- PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
- if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- s += 2;
- }
- else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
- {
- s += 4;
- PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
- if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- s += 3;
- }
- else PUGI__THROW_ERROR(status_bad_doctype, s);
-
- return s;
- }
-
- char_t* parse_doctype_ignore(char_t* s)
- {
- size_t depth = 0;
-
- assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
- s += 3;
-
- while (*s)
- {
- if (s[0] == '<' && s[1] == '!' && s[2] == '[')
- {
- // nested ignore section
- s += 3;
- depth++;
- }
- else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
- {
- // ignore section end
- s += 3;
-
- if (depth == 0)
- return s;
-
- depth--;
- }
- else s++;
- }
-
- PUGI__THROW_ERROR(status_bad_doctype, s);
- }
-
- char_t* parse_doctype_group(char_t* s, char_t endch)
- {
- size_t depth = 0;
-
- assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
- s += 2;
-
- while (*s)
- {
- if (s[0] == '<' && s[1] == '!' && s[2] != '-')
- {
- if (s[2] == '[')
- {
- // ignore
- s = parse_doctype_ignore(s);
- if (!s) return s;
- }
- else
- {
- // some control group
- s += 2;
- depth++;
- }
- }
- else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
- {
- // unknown tag (forbidden), or some primitive group
- s = parse_doctype_primitive(s);
- if (!s) return s;
- }
- else if (*s == '>')
- {
- if (depth == 0)
- return s;
-
- depth--;
- s++;
- }
- else s++;
- }
-
- if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
-
- return s;
- }
-
- char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
- {
- // parse node contents, starting with exclamation mark
- ++s;
-
- if (*s == '-') // '<!-...'
- {
- ++s;
-
- if (*s == '-') // '<!--...'
- {
- ++s;
-
- if (PUGI__OPTSET(parse_comments))
- {
- PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
- }
-
- if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
- {
- s = strconv_comment(s, endch);
-
- if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
- }
- else
- {
- // Scan for terminating '-->'.
- PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
- PUGI__CHECK_ERROR(status_bad_comment, s);
-
- if (PUGI__OPTSET(parse_comments))
- *s = 0; // Zero-terminate this segment at the first terminating '-'.
-
- s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
- }
- }
- else PUGI__THROW_ERROR(status_bad_comment, s);
- }
- else if (*s == '[')
- {
- // '<![CDATA[...'
- if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
- {
- ++s;
-
- if (PUGI__OPTSET(parse_cdata))
- {
- PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
-
- if (PUGI__OPTSET(parse_eol))
- {
- s = strconv_cdata(s, endch);
-
- if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
- }
- else
- {
- // Scan for terminating ']]>'.
- PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
- PUGI__CHECK_ERROR(status_bad_cdata, s);
-
- *s++ = 0; // Zero-terminate this segment.
- }
- }
- else // Flagged for discard, but we still have to scan for the terminator.
- {
- // Scan for terminating ']]>'.
- PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
- PUGI__CHECK_ERROR(status_bad_cdata, s);
-
- ++s;
- }
-
- s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
- }
- else PUGI__THROW_ERROR(status_bad_cdata, s);
- }
- else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
- {
- s -= 2;
-
- if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- char_t* mark = s + 9;
-
- s = parse_doctype_group(s, endch);
- if (!s) return s;
-
- assert((*s == 0 && endch == '>') || *s == '>');
- if (*s) *s++ = 0;
-
- if (PUGI__OPTSET(parse_doctype))
- {
- while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
-
- PUGI__PUSHNODE(node_doctype);
-
- cursor->value = mark;
- }
- }
- else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
- else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
- else PUGI__THROW_ERROR(status_unrecognized_tag, s);
-
- return s;
- }
-
- char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
- {
- // load into registers
- xml_node_struct* cursor = ref_cursor;
- char_t ch = 0;
-
- // parse node contents, starting with question mark
- ++s;
-
- // read PI target
- char_t* target = s;
-
- if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
-
- PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
- PUGI__CHECK_ERROR(status_bad_pi, s);
-
- // determine node type; stricmp / strcasecmp is not portable
- bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
-
- if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
- {
- if (declaration)
- {
- // disallow non top-level declarations
- if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
-
- PUGI__PUSHNODE(node_declaration);
- }
- else
- {
- PUGI__PUSHNODE(node_pi);
- }
-
- cursor->name = target;
-
- PUGI__ENDSEG();
-
- // parse value/attributes
- if (ch == '?')
- {
- // empty node
- if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
- s += (*s == '>');
-
- PUGI__POPNODE();
- }
- else if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- PUGI__SKIPWS();
-
- // scan for tag end
- char_t* value = s;
-
- PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
- PUGI__CHECK_ERROR(status_bad_pi, s);
-
- if (declaration)
- {
- // replace ending ? with / so that 'element' terminates properly
- *s = '/';
-
- // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
- s = value;
- }
- else
- {
- // store value and step over >
- cursor->value = value;
-
- PUGI__POPNODE();
-
- PUGI__ENDSEG();
-
- s += (*s == '>');
- }
- }
- else PUGI__THROW_ERROR(status_bad_pi, s);
- }
- else
- {
- // scan for tag end
- PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
- PUGI__CHECK_ERROR(status_bad_pi, s);
-
- s += (s[1] == '>' ? 2 : 1);
- }
-
- // store from registers
- ref_cursor = cursor;
-
- return s;
- }
-
- char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
- {
- strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
- strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
-
- char_t ch = 0;
- xml_node_struct* cursor = root;
- char_t* mark = s;
-
- while (*s != 0)
- {
- if (*s == '<')
- {
- ++s;
-
- LOC_TAG:
- if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
- {
- PUGI__PUSHNODE(node_element); // Append a new node to the tree.
-
- cursor->name = s;
-
- PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
- PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
-
- if (ch == '>')
- {
- // end of tag
- }
- else if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- LOC_ATTRIBUTES:
- while (true)
- {
- PUGI__SKIPWS(); // Eat any whitespace.
-
- if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
- {
- xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
- if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
-
- a->name = s; // Save the offset.
-
- PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
- PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
-
- if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- PUGI__SKIPWS(); // Eat any whitespace.
-
- ch = *s;
- ++s;
- }
-
- if (ch == '=') // '<... #=...'
- {
- PUGI__SKIPWS(); // Eat any whitespace.
-
- if (*s == '"' || *s == '\'') // '<... #="...'
- {
- ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
- ++s; // Step over the quote.
- a->value = s; // Save the offset.
-
- s = strconv_attribute(s, ch);
-
- if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
-
- // After this line the loop continues from the start;
- // Whitespaces, / and > are ok, symbols and EOF are wrong,
- // everything else will be detected
- if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
- }
- else PUGI__THROW_ERROR(status_bad_attribute, s);
- }
- else PUGI__THROW_ERROR(status_bad_attribute, s);
- }
- else if (*s == '/')
- {
- ++s;
-
- if (*s == '>')
- {
- PUGI__POPNODE();
- s++;
- break;
- }
- else if (*s == 0 && endch == '>')
- {
- PUGI__POPNODE();
- break;
- }
- else PUGI__THROW_ERROR(status_bad_start_element, s);
- }
- else if (*s == '>')
- {
- ++s;
-
- break;
- }
- else if (*s == 0 && endch == '>')
- {
- break;
- }
- else PUGI__THROW_ERROR(status_bad_start_element, s);
- }
-
- // !!!
- }
- else if (ch == '/') // '<#.../'
- {
- if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
-
- PUGI__POPNODE(); // Pop.
-
- s += (*s == '>');
- }
- else if (ch == 0)
- {
- // we stepped over null terminator, backtrack & handle closing tag
- --s;
-
- if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
- }
- else PUGI__THROW_ERROR(status_bad_start_element, s);
- }
- else if (*s == '/')
- {
- ++s;
-
- char_t* name = cursor->name;
- if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
-
- while (PUGI__IS_CHARTYPE(*s, ct_symbol))
- {
- if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
- }
-
- if (*name)
- {
- if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
- else PUGI__THROW_ERROR(status_end_element_mismatch, s);
- }
-
- PUGI__POPNODE(); // Pop.
-
- PUGI__SKIPWS();
-
- if (*s == 0)
- {
- if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
- }
- else
- {
- if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
- ++s;
- }
- }
- else if (*s == '?') // '<?...'
- {
- s = parse_question(s, cursor, optmsk, endch);
- if (!s) return s;
-
- assert(cursor);
- if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
- }
- else if (*s == '!') // '<!...'
- {
- s = parse_exclamation(s, cursor, optmsk, endch);
- if (!s) return s;
- }
- else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
- else PUGI__THROW_ERROR(status_unrecognized_tag, s);
- }
- else
- {
- mark = s; // Save this offset while searching for a terminator.
-
- PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
-
- if (*s == '<' || !*s)
- {
- // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
- assert(mark != s);
-
- if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
- {
- continue;
- }
- else if (PUGI__OPTSET(parse_ws_pcdata_single))
- {
- if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
- }
- }
-
- if (!PUGI__OPTSET(parse_trim_pcdata))
- s = mark;
-
- if (cursor->parent || PUGI__OPTSET(parse_fragment))
- {
- PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
-
- s = strconv_pcdata(s);
-
- PUGI__POPNODE(); // Pop since this is a standalone.
-
- if (!*s) break;
- }
- else
- {
- PUGI__SCANFOR(*s == '<'); // '...<'
- if (!*s) break;
-
- ++s;
- }
-
- // We're after '<'
- goto LOC_TAG;
- }
- }
-
- // check that last tag is closed
- if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
-
- return s;
- }
-
- #ifdef PUGIXML_WCHAR_MODE
- static char_t* parse_skip_bom(char_t* s)
- {
- unsigned int bom = 0xfeff;
- return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
- }
- #else
- static char_t* parse_skip_bom(char_t* s)
- {
- return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
- }
- #endif
-
- static bool has_element_node_siblings(xml_node_struct* node)
- {
- while (node)
- {
- if (PUGI__NODETYPE(node) == node_element) return true;
-
- node = node->next_sibling;
- }
-
- return false;
- }
-
- static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
- {
- // early-out for empty documents
- if (length == 0)
- return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
-
- // get last child of the root before parsing
- xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
-
- // create parser on stack
- xml_parser parser(static_cast<xml_allocator*>(xmldoc));
-
- // save last character and make buffer zero-terminated (speeds up parsing)
- char_t endch = buffer[length - 1];
- buffer[length - 1] = 0;
-
- // skip BOM to make sure it does not end up as part of parse output
- char_t* buffer_data = parse_skip_bom(buffer);
-
- // perform actual parsing
- parser.parse_tree(buffer_data, root, optmsk, endch);
-
- xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
- assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
-
- if (result)
- {
- // since we removed last character, we have to handle the only possible false positive (stray <)
- if (endch == '<')
- return make_parse_result(status_unrecognized_tag, length - 1);
-
- // check if there are any element nodes parsed
- xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
-
- if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
- return make_parse_result(status_no_document_element, length - 1);
- }
- else
- {
- // roll back offset if it occurs on a null terminator in the source buffer
- if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
- result.offset--;
- }
-
- return result;
- }
- };
-
- // Output facilities
- PUGI__FN xml_encoding get_write_native_encoding()
- {
- #ifdef PUGIXML_WCHAR_MODE
- return get_wchar_encoding();
- #else
- return encoding_utf8;
- #endif
- }
-
- PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
- {
- // replace wchar encoding with utf implementation
- if (encoding == encoding_wchar) return get_wchar_encoding();
-
- // replace utf16 encoding with utf16 with specific endianness
- if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- // replace utf32 encoding with utf32 with specific endianness
- if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- // only do autodetection if no explicit encoding is requested
- if (encoding != encoding_auto) return encoding;
-
- // assume utf8 encoding
- return encoding_utf8;
- }
-
- template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
- {
- PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
-
- typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
-
- return static_cast<size_t>(end - dest) * sizeof(*dest);
- }
-
- template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
- {
- PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
-
- typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
-
- if (opt_swap)
- {
- for (typename T::value_type i = dest; i != end; ++i)
- *i = endian_swap(*i);
- }
-
- return static_cast<size_t>(end - dest) * sizeof(*dest);
- }
-
-#ifdef PUGIXML_WCHAR_MODE
- PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
- {
- if (length < 1) return 0;
-
- // discard last character if it's the lead of a surrogate pair
- return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
- }
-
- PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
- {
- // only endian-swapping is required
- if (need_endian_swap_utf(encoding, get_wchar_encoding()))
- {
- convert_wchar_endian_swap(r_char, data, length);
-
- return length * sizeof(char_t);
- }
-
- // convert to utf8
- if (encoding == encoding_utf8)
- return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
-
- // convert to utf16
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
- }
-
- // convert to utf32
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
- }
-
- // convert to latin1
- if (encoding == encoding_latin1)
- return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
-
- assert(!"Invalid encoding");
- return 0;
- }
-#else
- PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
- {
- if (length < 5) return 0;
-
- for (size_t i = 1; i <= 4; ++i)
- {
- uint8_t ch = static_cast<uint8_t>(data[length - i]);
-
- // either a standalone character or a leading one
- if ((ch & 0xc0) != 0x80) return length - i;
- }
-
- // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
- return length;
- }
-
- PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
- {
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
- }
-
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
- }
-
- if (encoding == encoding_latin1)
- return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
-
- assert(!"Invalid encoding");
- return 0;
- }
-#endif
-
- class xml_buffered_writer
- {
- xml_buffered_writer(const xml_buffered_writer&);
- xml_buffered_writer& operator=(const xml_buffered_writer&);
-
- public:
- xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
- {
- PUGI__STATIC_ASSERT(bufcapacity >= 8);
- }
-
- size_t flush()
- {
- flush(buffer, bufsize);
- bufsize = 0;
- return 0;
- }
-
- void flush(const char_t* data, size_t size)
- {
- if (size == 0) return;
-
- // fast path, just write data
- if (encoding == get_write_native_encoding())
- writer.write(data, size * sizeof(char_t));
- else
- {
- // convert chunk
- size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
- assert(result <= sizeof(scratch));
-
- // write data
- writer.write(scratch.data_u8, result);
- }
- }
-
- void write_direct(const char_t* data, size_t length)
- {
- // flush the remaining buffer contents
- flush();
-
- // handle large chunks
- if (length > bufcapacity)
- {
- if (encoding == get_write_native_encoding())
- {
- // fast path, can just write data chunk
- writer.write(data, length * sizeof(char_t));
- return;
- }
-
- // need to convert in suitable chunks
- while (length > bufcapacity)
- {
- // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
- // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
- size_t chunk_size = get_valid_length(data, bufcapacity);
- assert(chunk_size);
-
- // convert chunk and write
- flush(data, chunk_size);
-
- // iterate
- data += chunk_size;
- length -= chunk_size;
- }
-
- // small tail is copied below
- bufsize = 0;
- }
-
- memcpy(buffer + bufsize, data, length * sizeof(char_t));
- bufsize += length;
- }
-
- void write_buffer(const char_t* data, size_t length)
- {
- size_t offset = bufsize;
-
- if (offset + length <= bufcapacity)
- {
- memcpy(buffer + offset, data, length * sizeof(char_t));
- bufsize = offset + length;
- }
- else
- {
- write_direct(data, length);
- }
- }
-
- void write_string(const char_t* data)
- {
- // write the part of the string that fits in the buffer
- size_t offset = bufsize;
-
- while (*data && offset < bufcapacity)
- buffer[offset++] = *data++;
-
- // write the rest
- if (offset < bufcapacity)
- {
- bufsize = offset;
- }
- else
- {
- // backtrack a bit if we have split the codepoint
- size_t length = offset - bufsize;
- size_t extra = length - get_valid_length(data - length, length);
-
- bufsize = offset - extra;
-
- write_direct(data - extra, strlength(data) + extra);
- }
- }
-
- void write(char_t d0)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 1) offset = flush();
-
- buffer[offset + 0] = d0;
- bufsize = offset + 1;
- }
-
- void write(char_t d0, char_t d1)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 2) offset = flush();
-
- buffer[offset + 0] = d0;
- buffer[offset + 1] = d1;
- bufsize = offset + 2;
- }
-
- void write(char_t d0, char_t d1, char_t d2)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 3) offset = flush();
-
- buffer[offset + 0] = d0;
- buffer[offset + 1] = d1;
- buffer[offset + 2] = d2;
- bufsize = offset + 3;
- }
-
- void write(char_t d0, char_t d1, char_t d2, char_t d3)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 4) offset = flush();
-
- buffer[offset + 0] = d0;
- buffer[offset + 1] = d1;
- buffer[offset + 2] = d2;
- buffer[offset + 3] = d3;
- bufsize = offset + 4;
- }
-
- void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 5) offset = flush();
-
- buffer[offset + 0] = d0;
- buffer[offset + 1] = d1;
- buffer[offset + 2] = d2;
- buffer[offset + 3] = d3;
- buffer[offset + 4] = d4;
- bufsize = offset + 5;
- }
-
- void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 6) offset = flush();
-
- buffer[offset + 0] = d0;
- buffer[offset + 1] = d1;
- buffer[offset + 2] = d2;
- buffer[offset + 3] = d3;
- buffer[offset + 4] = d4;
- buffer[offset + 5] = d5;
- bufsize = offset + 6;
- }
-
- // utf8 maximum expansion: x4 (-> utf32)
- // utf16 maximum expansion: x2 (-> utf32)
- // utf32 maximum expansion: x1
- enum
- {
- bufcapacitybytes =
- #ifdef PUGIXML_MEMORY_OUTPUT_STACK
- PUGIXML_MEMORY_OUTPUT_STACK
- #else
- 10240
- #endif
- ,
- bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
- };
-
- char_t buffer[bufcapacity];
-
- union
- {
- uint8_t data_u8[4 * bufcapacity];
- uint16_t data_u16[2 * bufcapacity];
- uint32_t data_u32[bufcapacity];
- char_t data_char[bufcapacity];
- } scratch;
-
- xml_writer& writer;
- size_t bufsize;
- xml_encoding encoding;
- };
-
- PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
- {
- while (*s)
- {
- const char_t* prev = s;
-
- // While *s is a usual symbol
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
-
- writer.write_buffer(prev, static_cast<size_t>(s - prev));
-
- switch (*s)
- {
- case 0: break;
- case '&':
- writer.write('&', 'a', 'm', 'p', ';');
- ++s;
- break;
- case '<':
- writer.write('&', 'l', 't', ';');
- ++s;
- break;
- case '>':
- writer.write('&', 'g', 't', ';');
- ++s;
- break;
- case '"':
- writer.write('&', 'q', 'u', 'o', 't', ';');
- ++s;
- break;
- default: // s is not a usual symbol
- {
- unsigned int ch = static_cast<unsigned int>(*s++);
- assert(ch < 32);
-
- writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
- }
- }
- }
- }
-
- PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
- {
- if (flags & format_no_escapes)
- writer.write_string(s);
- else
- text_output_escaped(writer, s, type);
- }
-
- PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
- {
- do
- {
- writer.write('<', '!', '[', 'C', 'D');
- writer.write('A', 'T', 'A', '[');
-
- const char_t* prev = s;
-
- // look for ]]> sequence - we can't output it as is since it terminates CDATA
- while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
-
- // skip ]] if we stopped at ]]>, > will go to the next CDATA section
- if (*s) s += 2;
-
- writer.write_buffer(prev, static_cast<size_t>(s - prev));
-
- writer.write(']', ']', '>');
- }
- while (*s);
- }
-
- PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
- {
- switch (indent_length)
- {
- case 1:
- {
- for (unsigned int i = 0; i < depth; ++i)
- writer.write(indent[0]);
- break;
- }
-
- case 2:
- {
- for (unsigned int i = 0; i < depth; ++i)
- writer.write(indent[0], indent[1]);
- break;
- }
-
- case 3:
- {
- for (unsigned int i = 0; i < depth; ++i)
- writer.write(indent[0], indent[1], indent[2]);
- break;
- }
-
- case 4:
- {
- for (unsigned int i = 0; i < depth; ++i)
- writer.write(indent[0], indent[1], indent[2], indent[3]);
- break;
- }
-
- default:
- {
- for (unsigned int i = 0; i < depth; ++i)
- writer.write_buffer(indent, indent_length);
- }
- }
- }
-
- PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
- {
- writer.write('<', '!', '-', '-');
-
- while (*s)
- {
- const char_t* prev = s;
-
- // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
- while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
-
- writer.write_buffer(prev, static_cast<size_t>(s - prev));
-
- if (*s)
- {
- assert(*s == '-');
-
- writer.write('-', ' ');
- ++s;
- }
- }
-
- writer.write('-', '-', '>');
- }
-
- PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
- {
- while (*s)
- {
- const char_t* prev = s;
-
- // look for ?> sequence - we can't output it since ?> terminates PI
- while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
-
- writer.write_buffer(prev, static_cast<size_t>(s - prev));
-
- if (*s)
- {
- assert(s[0] == '?' && s[1] == '>');
-
- writer.write('?', ' ', '>');
- s += 2;
- }
- }
- }
-
- PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
- {
- const char_t* default_name = PUGIXML_TEXT(":anonymous");
-
- for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
- {
- if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
- {
- writer.write('\n');
-
- text_output_indent(writer, indent, indent_length, depth + 1);
- }
- else
- {
- writer.write(' ');
- }
-
- writer.write_string(a->name ? a->name + 0 : default_name);
- writer.write('=', '"');
-
- if (a->value)
- text_output(writer, a->value, ctx_special_attr, flags);
-
- writer.write('"');
- }
- }
-
- PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
- {
- const char_t* default_name = PUGIXML_TEXT(":anonymous");
- const char_t* name = node->name ? node->name + 0 : default_name;
-
- writer.write('<');
- writer.write_string(name);
-
- if (node->first_attribute)
- node_output_attributes(writer, node, indent, indent_length, flags, depth);
-
- if (!node->first_child)
- {
- writer.write(' ', '/', '>');
-
- return false;
- }
- else
- {
- writer.write('>');
-
- return true;
- }
- }
-
- PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
- {
- const char_t* default_name = PUGIXML_TEXT(":anonymous");
- const char_t* name = node->name ? node->name + 0 : default_name;
-
- writer.write('<', '/');
- writer.write_string(name);
- writer.write('>');
- }
-
- PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
- {
- const char_t* default_name = PUGIXML_TEXT(":anonymous");
-
- switch (PUGI__NODETYPE(node))
- {
- case node_pcdata:
- text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
- break;
-
- case node_cdata:
- text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
- break;
-
- case node_comment:
- node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
- break;
-
- case node_pi:
- writer.write('<', '?');
- writer.write_string(node->name ? node->name + 0 : default_name);
-
- if (node->value)
- {
- writer.write(' ');
- node_output_pi_value(writer, node->value);
- }
-
- writer.write('?', '>');
- break;
-
- case node_declaration:
- writer.write('<', '?');
- writer.write_string(node->name ? node->name + 0 : default_name);
- node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
- writer.write('?', '>');
- break;
-
- case node_doctype:
- writer.write('<', '!', 'D', 'O', 'C');
- writer.write('T', 'Y', 'P', 'E');
-
- if (node->value)
- {
- writer.write(' ');
- writer.write_string(node->value);
- }
-
- writer.write('>');
- break;
-
- default:
- assert(!"Invalid node type");
- }
- }
-
- enum indent_flags_t
- {
- indent_newline = 1,
- indent_indent = 2
- };
-
- PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
- {
- size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
- unsigned int indent_flags = indent_indent;
-
- xml_node_struct* node = root;
-
- do
- {
- assert(node);
-
- // begin writing current node
- if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
- {
- node_output_simple(writer, node, flags);
-
- indent_flags = 0;
- }
- else
- {
- if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
- writer.write('\n');
-
- if ((indent_flags & indent_indent) && indent_length)
- text_output_indent(writer, indent, indent_length, depth);
-
- if (PUGI__NODETYPE(node) == node_element)
- {
- indent_flags = indent_newline | indent_indent;
-
- if (node_output_start(writer, node, indent, indent_length, flags, depth))
- {
- node = node->first_child;
- depth++;
- continue;
- }
- }
- else if (PUGI__NODETYPE(node) == node_document)
- {
- indent_flags = indent_indent;
-
- if (node->first_child)
- {
- node = node->first_child;
- continue;
- }
- }
- else
- {
- node_output_simple(writer, node, flags);
-
- indent_flags = indent_newline | indent_indent;
- }
- }
-
- // continue to the next node
- while (node != root)
- {
- if (node->next_sibling)
- {
- node = node->next_sibling;
- break;
- }
-
- node = node->parent;
-
- // write closing node
- if (PUGI__NODETYPE(node) == node_element)
- {
- depth--;
-
- if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
- writer.write('\n');
-
- if ((indent_flags & indent_indent) && indent_length)
- text_output_indent(writer, indent, indent_length, depth);
-
- node_output_end(writer, node);
-
- indent_flags = indent_newline | indent_indent;
- }
- }
- }
- while (node != root);
-
- if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
- writer.write('\n');
- }
-
- PUGI__FN bool has_declaration(xml_node_struct* node)
- {
- for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
- {
- xml_node_type type = PUGI__NODETYPE(child);
-
- if (type == node_declaration) return true;
- if (type == node_element) return false;
- }
-
- return false;
- }
-
- PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
- {
- for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
- if (a == attr)
- return true;
-
- return false;
- }
-
- PUGI__FN bool allow_insert_attribute(xml_node_type parent)
- {
- return parent == node_element || parent == node_declaration;
- }
-
- PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
- {
- if (parent != node_document && parent != node_element) return false;
- if (child == node_document || child == node_null) return false;
- if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
-
- return true;
- }
-
- PUGI__FN bool allow_move(xml_node parent, xml_node child)
- {
- // check that child can be a child of parent
- if (!allow_insert_child(parent.type(), child.type()))
- return false;
-
- // check that node is not moved between documents
- if (parent.root() != child.root())
- return false;
-
- // check that new parent is not in the child subtree
- xml_node cur = parent;
-
- while (cur)
- {
- if (cur == child)
- return false;
-
- cur = cur.parent();
- }
-
- return true;
- }
-
- template <typename String, typename Header>
- PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
- {
- assert(!dest && (header & header_mask) == 0);
-
- if (source)
- {
- if (alloc && (source_header & header_mask) == 0)
- {
- dest = source;
-
- // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
- header |= xml_memory_page_contents_shared_mask;
- source_header |= xml_memory_page_contents_shared_mask;
- }
- else
- strcpy_insitu(dest, header, header_mask, source, strlength(source));
- }
- }
-
- PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
- {
- node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
- node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
-
- for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
- {
- xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
-
- if (da)
- {
- node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
- node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
- }
- }
- }
-
- PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
- {
- xml_allocator& alloc = get_allocator(dn);
- xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
-
- node_copy_contents(dn, sn, shared_alloc);
-
- xml_node_struct* dit = dn;
- xml_node_struct* sit = sn->first_child;
-
- while (sit && sit != sn)
- {
- if (sit != dn)
- {
- xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
-
- if (copy)
- {
- node_copy_contents(copy, sit, shared_alloc);
-
- if (sit->first_child)
- {
- dit = copy;
- sit = sit->first_child;
- continue;
- }
- }
- }
-
- // continue to the next node
- do
- {
- if (sit->next_sibling)
- {
- sit = sit->next_sibling;
- break;
- }
-
- sit = sit->parent;
- dit = dit->parent;
- }
- while (sit != sn);
- }
- }
-
- PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
- {
- xml_allocator& alloc = get_allocator(da);
- xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
-
- node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
- node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
- }
-
- inline bool is_text_node(xml_node_struct* node)
- {
- xml_node_type type = PUGI__NODETYPE(node);
-
- return type == node_pcdata || type == node_cdata;
- }
-
- // get value with conversion functions
- template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos)
- {
- U result = 0;
- const char_t* s = value;
-
- while (PUGI__IS_CHARTYPE(*s, ct_space))
- s++;
-
- bool negative = (*s == '-');
-
- s += (*s == '+' || *s == '-');
-
- bool overflow = false;
-
- if (s[0] == '0' && (s[1] | ' ') == 'x')
- {
- s += 2;
-
- // since overflow detection relies on length of the sequence skip leading zeros
- while (*s == '0')
- s++;
-
- const char_t* start = s;
-
- for (;;)
- {
- if (static_cast<unsigned>(*s - '0') < 10)
- result = result * 16 + (*s - '0');
- else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
- result = result * 16 + ((*s | ' ') - 'a' + 10);
- else
- break;
-
- s++;
- }
-
- size_t digits = static_cast<size_t>(s - start);
-
- overflow = digits > sizeof(U) * 2;
- }
- else
- {
- // since overflow detection relies on length of the sequence skip leading zeros
- while (*s == '0')
- s++;
-
- const char_t* start = s;
-
- for (;;)
- {
- if (static_cast<unsigned>(*s - '0') < 10)
- result = result * 10 + (*s - '0');
- else
- break;
-
- s++;
- }
-
- size_t digits = static_cast<size_t>(s - start);
-
- PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
-
- const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
- const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
- const size_t high_bit = sizeof(U) * 8 - 1;
-
- overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
- }
-
- if (negative)
- return (overflow || result > minneg) ? 0 - minneg : 0 - result;
- else
- return (overflow || result > maxpos) ? maxpos : result;
- }
-
- PUGI__FN int get_value_int(const char_t* value)
- {
- return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX);
- }
-
- PUGI__FN unsigned int get_value_uint(const char_t* value)
- {
- return string_to_integer<unsigned int>(value, 0, UINT_MAX);
- }
-
- PUGI__FN double get_value_double(const char_t* value)
- {
- #ifdef PUGIXML_WCHAR_MODE
- return wcstod(value, 0);
- #else
- return strtod(value, 0);
- #endif
- }
-
- PUGI__FN float get_value_float(const char_t* value)
- {
- #ifdef PUGIXML_WCHAR_MODE
- return static_cast<float>(wcstod(value, 0));
- #else
- return static_cast<float>(strtod(value, 0));
- #endif
- }
-
- PUGI__FN bool get_value_bool(const char_t* value)
- {
- // only look at first char
- char_t first = *value;
-
- // 1*, t* (true), T* (True), y* (yes), Y* (YES)
- return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN long long get_value_llong(const char_t* value)
- {
- return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
- }
-
- PUGI__FN unsigned long long get_value_ullong(const char_t* value)
- {
- return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
- }
-#endif
-
- template <typename U>
- PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
- {
- char_t* result = end - 1;
- U rest = negative ? 0 - value : value;
-
- do
- {
- *result-- = static_cast<char_t>('0' + (rest % 10));
- rest /= 10;
- }
- while (rest);
-
- assert(result >= begin);
- (void)begin;
-
- *result = '-';
-
- return result + !negative;
- }
-
- // set value with conversion functions
- template <typename String, typename Header>
- PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
- {
- #ifdef PUGIXML_WCHAR_MODE
- char_t wbuf[128];
- assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
-
- size_t offset = 0;
- for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
-
- return strcpy_insitu(dest, header, header_mask, wbuf, offset);
- #else
- return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
- #endif
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value)
- {
- char_t buf[64];
- char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
- char_t* begin = integer_to_string<unsigned int>(buf, end, value, value < 0);
-
- return strcpy_insitu(dest, header, header_mask, begin, end - begin);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value)
- {
- char_t buf[64];
- char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
- char_t* begin = integer_to_string<unsigned int>(buf, end, value, false);
-
- return strcpy_insitu(dest, header, header_mask, begin, end - begin);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
- {
- char buf[128];
- sprintf(buf, "%.9g", value);
-
- return set_value_ascii(dest, header, header_mask, buf);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
- {
- char buf[128];
- sprintf(buf, "%.17g", value);
-
- return set_value_ascii(dest, header, header_mask, buf);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value)
- {
- return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value)
- {
- char_t buf[64];
- char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
- char_t* begin = integer_to_string<unsigned long long>(buf, end, value, value < 0);
-
- return strcpy_insitu(dest, header, header_mask, begin, end - begin);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value)
- {
- char_t buf[64];
- char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
- char_t* begin = integer_to_string<unsigned long long>(buf, end, value, false);
-
- return strcpy_insitu(dest, header, header_mask, begin, end - begin);
- }
-#endif
-
- PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
- {
- // check input buffer
- if (!contents && size) return make_parse_result(status_io_error);
-
- // get actual encoding
- xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
-
- // get private buffer
- char_t* buffer = 0;
- size_t length = 0;
-
- if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
-
- // delete original buffer if we performed a conversion
- if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
-
- // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
- if (own || buffer != contents) *out_buffer = buffer;
-
- // store buffer for offset_debug
- doc->buffer = buffer;
-
- // parse
- xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
-
- // remember encoding
- res.encoding = buffer_encoding;
-
- return res;
- }
-
- // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
- PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
- {
- #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
- // there are 64-bit versions of fseek/ftell, let's use them
- typedef __int64 length_type;
-
- _fseeki64(file, 0, SEEK_END);
- length_type length = _ftelli64(file);
- _fseeki64(file, 0, SEEK_SET);
- #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
- // there are 64-bit versions of fseek/ftell, let's use them
- typedef off64_t length_type;
-
- fseeko64(file, 0, SEEK_END);
- length_type length = ftello64(file);
- fseeko64(file, 0, SEEK_SET);
- #else
- // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
- typedef long length_type;
-
- fseek(file, 0, SEEK_END);
- length_type length = ftell(file);
- fseek(file, 0, SEEK_SET);
- #endif
-
- // check for I/O errors
- if (length < 0) return status_io_error;
-
- // check for overflow
- size_t result = static_cast<size_t>(length);
-
- if (static_cast<length_type>(result) != length) return status_out_of_memory;
-
- // finalize
- out_result = result;
-
- return status_ok;
- }
-
- // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
- PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
- {
- // We only need to zero-terminate if encoding conversion does not do it for us
- #ifdef PUGIXML_WCHAR_MODE
- xml_encoding wchar_encoding = get_wchar_encoding();
-
- if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
- {
- size_t length = size / sizeof(char_t);
-
- static_cast<char_t*>(buffer)[length] = 0;
- return (length + 1) * sizeof(char_t);
- }
- #else
- if (encoding == encoding_utf8)
- {
- static_cast<char*>(buffer)[size] = 0;
- return size + 1;
- }
- #endif
-
- return size;
- }
-
- PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
- {
- if (!file) return make_parse_result(status_file_not_found);
-
- // get file size (can result in I/O errors)
- size_t size = 0;
- xml_parse_status size_status = get_file_size(file, size);
- if (size_status != status_ok) return make_parse_result(size_status);
-
- size_t max_suffix_size = sizeof(char_t);
-
- // allocate buffer for the whole file
- char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
- if (!contents) return make_parse_result(status_out_of_memory);
-
- // read file in memory
- size_t read_size = fread(contents, 1, size, file);
-
- if (read_size != size)
- {
- xml_memory::deallocate(contents);
- return make_parse_result(status_io_error);
- }
-
- xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
-
- return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
- }
-
-#ifndef PUGIXML_NO_STL
- template <typename T> struct xml_stream_chunk
- {
- static xml_stream_chunk* create()
- {
- void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
- if (!memory) return 0;
-
- return new (memory) xml_stream_chunk();
- }
-
- static void destroy(xml_stream_chunk* chunk)
- {
- // free chunk chain
- while (chunk)
- {
- xml_stream_chunk* next_ = chunk->next;
-
- xml_memory::deallocate(chunk);
-
- chunk = next_;
- }
- }
-
- xml_stream_chunk(): next(0), size(0)
- {
- }
-
- xml_stream_chunk* next;
- size_t size;
-
- T data[xml_memory_page_size / sizeof(T)];
- };
-
- template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
- {
- auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
-
- // read file to a chunk list
- size_t total = 0;
- xml_stream_chunk<T>* last = 0;
-
- while (!stream.eof())
- {
- // allocate new chunk
- xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
- if (!chunk) return status_out_of_memory;
-
- // append chunk to list
- if (last) last = last->next = chunk;
- else chunks.data = last = chunk;
-
- // read data to chunk
- stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
- chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
-
- // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
- if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
-
- // guard against huge files (chunk size is small enough to make this overflow check work)
- if (total + chunk->size < total) return status_out_of_memory;
- total += chunk->size;
- }
-
- size_t max_suffix_size = sizeof(char_t);
-
- // copy chunk list to a contiguous buffer
- char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
- if (!buffer) return status_out_of_memory;
-
- char* write = buffer;
-
- for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
- {
- assert(write + chunk->size <= buffer + total);
- memcpy(write, chunk->data, chunk->size);
- write += chunk->size;
- }
-
- assert(write == buffer + total);
-
- // return buffer
- *out_buffer = buffer;
- *out_size = total;
-
- return status_ok;
- }
-
- template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
- {
- // get length of remaining data in stream
- typename std::basic_istream<T>::pos_type pos = stream.tellg();
- stream.seekg(0, std::ios::end);
- std::streamoff length = stream.tellg() - pos;
- stream.seekg(pos);
-
- if (stream.fail() || pos < 0) return status_io_error;
-
- // guard against huge files
- size_t read_length = static_cast<size_t>(length);
-
- if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
-
- size_t max_suffix_size = sizeof(char_t);
-
- // read stream data into memory (guard against stream exceptions with buffer holder)
- auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
- if (!buffer.data) return status_out_of_memory;
-
- stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
-
- // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
- if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
-
- // return buffer
- size_t actual_length = static_cast<size_t>(stream.gcount());
- assert(actual_length <= read_length);
-
- *out_buffer = buffer.release();
- *out_size = actual_length * sizeof(T);
-
- return status_ok;
- }
-
- template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
- {
- void* buffer = 0;
- size_t size = 0;
- xml_parse_status status = status_ok;
-
- // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
- if (stream.fail()) return make_parse_result(status_io_error);
-
- // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
- if (stream.tellg() < 0)
- {
- stream.clear(); // clear error flags that could be set by a failing tellg
- status = load_stream_data_noseek(stream, &buffer, &size);
- }
- else
- status = load_stream_data_seek(stream, &buffer, &size);
-
- if (status != status_ok) return make_parse_result(status);
-
- xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
-
- return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
- }
-#endif
-
-#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
- PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
- {
- return _wfopen(path, mode);
- }
-#else
- PUGI__FN char* convert_path_heap(const wchar_t* str)
- {
- assert(str);
-
- // first pass: get length in utf8 characters
- size_t length = strlength_wide(str);
- size_t size = as_utf8_begin(str, length);
-
- // allocate resulting string
- char* result = static_cast<char*>(xml_memory::allocate(size + 1));
- if (!result) return 0;
-
- // second pass: convert to utf8
- as_utf8_end(result, size, str, length);
-
- // zero-terminate
- result[size] = 0;
-
- return result;
- }
-
- PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
- {
- // there is no standard function to open wide paths, so our best bet is to try utf8 path
- char* path_utf8 = convert_path_heap(path);
- if (!path_utf8) return 0;
-
- // convert mode to ASCII (we mirror _wfopen interface)
- char mode_ascii[4] = {0};
- for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
-
- // try to open the utf8 path
- FILE* result = fopen(path_utf8, mode_ascii);
-
- // free dummy buffer
- xml_memory::deallocate(path_utf8);
-
- return result;
- }
-#endif
-
- PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
- {
- if (!file) return false;
-
- xml_writer_file writer(file);
- doc.save(writer, indent, flags, encoding);
-
- return ferror(file) == 0;
- }
-
- struct name_null_sentry
- {
- xml_node_struct* node;
- char_t* name;
-
- name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
- {
- node->name = 0;
- }
-
- ~name_null_sentry()
- {
- node->name = name;
- }
- };
-PUGI__NS_END
-
-namespace pugi
-{
- PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
- {
- }
-
- PUGI__FN void xml_writer_file::write(const void* data, size_t size)
- {
- size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
- (void)!result; // unfortunately we can't do proper error handling here
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
- {
- }
-
- PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
- {
- }
-
- PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
- {
- if (narrow_stream)
- {
- assert(!wide_stream);
- narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
- }
- else
- {
- assert(wide_stream);
- assert(size % sizeof(wchar_t) == 0);
-
- wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
- }
- }
-#endif
-
- PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
- {
- }
-
- PUGI__FN xml_tree_walker::~xml_tree_walker()
- {
- }
-
- PUGI__FN int xml_tree_walker::depth() const
- {
- return _depth;
- }
-
- PUGI__FN bool xml_tree_walker::begin(xml_node&)
- {
- return true;
- }
-
- PUGI__FN bool xml_tree_walker::end(xml_node&)
- {
- return true;
- }
-
- PUGI__FN xml_attribute::xml_attribute(): _attr(0)
- {
- }
-
- PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
- {
- }
-
- PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
- {
- }
-
- PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
- {
- return _attr ? unspecified_bool_xml_attribute : 0;
- }
-
- PUGI__FN bool xml_attribute::operator!() const
- {
- return !_attr;
- }
-
- PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
- {
- return (_attr == r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
- {
- return (_attr != r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
- {
- return (_attr < r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
- {
- return (_attr > r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
- {
- return (_attr <= r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
- {
- return (_attr >= r._attr);
- }
-
- PUGI__FN xml_attribute xml_attribute::next_attribute() const
- {
- return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
- }
-
- PUGI__FN xml_attribute xml_attribute::previous_attribute() const
- {
- return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
- }
-
- PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
- {
- return (_attr && _attr->value) ? _attr->value + 0 : def;
- }
-
- PUGI__FN int xml_attribute::as_int(int def) const
- {
- return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
- }
-
- PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
- {
- return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
- }
-
- PUGI__FN double xml_attribute::as_double(double def) const
- {
- return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
- }
-
- PUGI__FN float xml_attribute::as_float(float def) const
- {
- return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
- }
-
- PUGI__FN bool xml_attribute::as_bool(bool def) const
- {
- return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN long long xml_attribute::as_llong(long long def) const
- {
- return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
- }
-
- PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
- {
- return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
- }
-#endif
-
- PUGI__FN bool xml_attribute::empty() const
- {
- return !_attr;
- }
-
- PUGI__FN const char_t* xml_attribute::name() const
- {
- return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* xml_attribute::value() const
- {
- return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
- }
-
- PUGI__FN size_t xml_attribute::hash_value() const
- {
- return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
- }
-
- PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
- {
- return _attr;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
- {
- set_value(rhs);
- return *this;
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
- {
- set_value(rhs);
- return *this;
- }
-#endif
-
- PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
- {
- if (!_attr) return false;
-
- return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
- }
-
- PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
- {
- if (!_attr) return false;
-
- return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
- }
-
- PUGI__FN bool xml_attribute::set_value(int rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(double rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(float rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(bool rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN bool xml_attribute::set_value(long long rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-#endif
-
-#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
-#endif
-
- PUGI__FN xml_node::xml_node(): _root(0)
- {
- }
-
- PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
- {
- }
-
- PUGI__FN static void unspecified_bool_xml_node(xml_node***)
- {
- }
-
- PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
- {
- return _root ? unspecified_bool_xml_node : 0;
- }
-
- PUGI__FN bool xml_node::operator!() const
- {
- return !_root;
- }
-
- PUGI__FN xml_node::iterator xml_node::begin() const
- {
- return iterator(_root ? _root->first_child + 0 : 0, _root);
- }
-
- PUGI__FN xml_node::iterator xml_node::end() const
- {
- return iterator(0, _root);
- }
-
- PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
- {
- return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
- }
-
- PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
- {
- return attribute_iterator(0, _root);
- }
-
- PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
- {
- return xml_object_range<xml_node_iterator>(begin(), end());
- }
-
- PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
- {
- return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
- }
-
- PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
- {
- return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
- }
-
- PUGI__FN bool xml_node::operator==(const xml_node& r) const
- {
- return (_root == r._root);
- }
-
- PUGI__FN bool xml_node::operator!=(const xml_node& r) const
- {
- return (_root != r._root);
- }
-
- PUGI__FN bool xml_node::operator<(const xml_node& r) const
- {
- return (_root < r._root);
- }
-
- PUGI__FN bool xml_node::operator>(const xml_node& r) const
- {
- return (_root > r._root);
- }
-
- PUGI__FN bool xml_node::operator<=(const xml_node& r) const
- {
- return (_root <= r._root);
- }
-
- PUGI__FN bool xml_node::operator>=(const xml_node& r) const
- {
- return (_root >= r._root);
- }
-
- PUGI__FN bool xml_node::empty() const
- {
- return !_root;
- }
-
- PUGI__FN const char_t* xml_node::name() const
- {
- return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
- }
-
- PUGI__FN xml_node_type xml_node::type() const
- {
- return _root ? PUGI__NODETYPE(_root) : node_null;
- }
-
- PUGI__FN const char_t* xml_node::value() const
- {
- return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
- }
-
- PUGI__FN xml_node xml_node::child(const char_t* name_) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
- return xml_node();
- }
-
- PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
- {
- if (!_root) return xml_attribute();
-
- for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
- if (i->name && impl::strequal(name_, i->name))
- return xml_attribute(i);
-
- return xml_attribute();
- }
-
- PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
- if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
- return xml_node();
- }
-
- PUGI__FN xml_node xml_node::next_sibling() const
- {
- return _root ? xml_node(_root->next_sibling) : xml_node();
- }
-
- PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
- if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
- return xml_node();
- }
-
- PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
- {
- xml_attribute_struct* hint = hint_._attr;
-
- // if hint is not an attribute of node, behavior is not defined
- assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
-
- if (!_root) return xml_attribute();
-
- // optimistically search from hint up until the end
- for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
- if (i->name && impl::strequal(name_, i->name))
- {
- // update hint to maximize efficiency of searching for consecutive attributes
- hint_._attr = i->next_attribute;
-
- return xml_attribute(i);
- }
-
- // wrap around and search from the first attribute until the hint
- // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
- for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
- if (j->name && impl::strequal(name_, j->name))
- {
- // update hint to maximize efficiency of searching for consecutive attributes
- hint_._attr = j->next_attribute;
-
- return xml_attribute(j);
- }
-
- return xml_attribute();
- }
-
- PUGI__FN xml_node xml_node::previous_sibling() const
- {
- if (!_root) return xml_node();
-
- if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
- else return xml_node();
- }
-
- PUGI__FN xml_node xml_node::parent() const
- {
- return _root ? xml_node(_root->parent) : xml_node();
- }
-
- PUGI__FN xml_node xml_node::root() const
- {
- return _root ? xml_node(&impl::get_document(_root)) : xml_node();
- }
-
- PUGI__FN xml_text xml_node::text() const
- {
- return xml_text(_root);
- }
-
- PUGI__FN const char_t* xml_node::child_value() const
- {
- if (!_root) return PUGIXML_TEXT("");
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (impl::is_text_node(i) && i->value)
- return i->value;
-
- return PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
- {
- return child(name_).child_value();
- }
-
- PUGI__FN xml_attribute xml_node::first_attribute() const
- {
- return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
- }
-
- PUGI__FN xml_attribute xml_node::last_attribute() const
- {
- return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
- }
-
- PUGI__FN xml_node xml_node::first_child() const
- {
- return _root ? xml_node(_root->first_child) : xml_node();
- }
-
- PUGI__FN xml_node xml_node::last_child() const
- {
- return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
- }
-
- PUGI__FN bool xml_node::set_name(const char_t* rhs)
- {
- xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
-
- if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
- return false;
-
- return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
- }
-
- PUGI__FN bool xml_node::set_value(const char_t* rhs)
- {
- xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
-
- if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
- return false;
-
- return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
- }
-
- PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
- {
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::append_attribute(a._attr, _root);
-
- a.set_name(name_);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
- {
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::prepend_attribute(a._attr, _root);
-
- a.set_name(name_);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
- {
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
- if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::insert_attribute_after(a._attr, attr._attr, _root);
-
- a.set_name(name_);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
- {
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
- if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::insert_attribute_before(a._attr, attr._attr, _root);
-
- a.set_name(name_);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
- {
- if (!proto) return xml_attribute();
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::append_attribute(a._attr, _root);
- impl::node_copy_attribute(a._attr, proto._attr);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
- {
- if (!proto) return xml_attribute();
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::prepend_attribute(a._attr, _root);
- impl::node_copy_attribute(a._attr, proto._attr);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
- {
- if (!proto) return xml_attribute();
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
- if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::insert_attribute_after(a._attr, attr._attr, _root);
- impl::node_copy_attribute(a._attr, proto._attr);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
- {
- if (!proto) return xml_attribute();
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
- if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::insert_attribute_before(a._attr, attr._attr, _root);
- impl::node_copy_attribute(a._attr, proto._attr);
-
- return a;
- }
-
- PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
- {
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::append_node(n._root, _root);
-
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
- {
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::prepend_node(n._root, _root);
-
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
- {
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::insert_node_before(n._root, node._root);
-
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
- {
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::insert_node_after(n._root, node._root);
-
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::append_child(const char_t* name_)
- {
- xml_node result = append_child(node_element);
-
- result.set_name(name_);
-
- return result;
- }
-
- PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
- {
- xml_node result = prepend_child(node_element);
-
- result.set_name(name_);
-
- return result;
- }
-
- PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
- {
- xml_node result = insert_child_after(node_element, node);
-
- result.set_name(name_);
-
- return result;
- }
-
- PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
- {
- xml_node result = insert_child_before(node_element, node);
-
- result.set_name(name_);
-
- return result;
- }
-
- PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
- {
- xml_node_type type_ = proto.type();
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::append_node(n._root, _root);
- impl::node_copy_tree(n._root, proto._root);
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
- {
- xml_node_type type_ = proto.type();
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::prepend_node(n._root, _root);
- impl::node_copy_tree(n._root, proto._root);
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
- {
- xml_node_type type_ = proto.type();
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::insert_node_after(n._root, node._root);
- impl::node_copy_tree(n._root, proto._root);
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
- {
- xml_node_type type_ = proto.type();
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::insert_node_before(n._root, node._root);
- impl::node_copy_tree(n._root, proto._root);
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
- {
- if (!impl::allow_move(*this, moved)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
- impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
-
- impl::remove_node(moved._root);
- impl::append_node(moved._root, _root);
-
- return moved;
- }
-
- PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
- {
- if (!impl::allow_move(*this, moved)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
- impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
-
- impl::remove_node(moved._root);
- impl::prepend_node(moved._root, _root);
-
- return moved;
- }
-
- PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
- {
- if (!impl::allow_move(*this, moved)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
- if (moved._root == node._root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
- impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
-
- impl::remove_node(moved._root);
- impl::insert_node_after(moved._root, node._root);
-
- return moved;
- }
-
- PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
- {
- if (!impl::allow_move(*this, moved)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
- if (moved._root == node._root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
- impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
-
- impl::remove_node(moved._root);
- impl::insert_node_before(moved._root, node._root);
-
- return moved;
- }
-
- PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
- {
- return remove_attribute(attribute(name_));
- }
-
- PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
- {
- if (!_root || !a._attr) return false;
- if (!impl::is_attribute_of(a._attr, _root)) return false;
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return false;
-
- impl::remove_attribute(a._attr, _root);
- impl::destroy_attribute(a._attr, alloc);
-
- return true;
- }
-
- PUGI__FN bool xml_node::remove_child(const char_t* name_)
- {
- return remove_child(child(name_));
- }
-
- PUGI__FN bool xml_node::remove_child(const xml_node& n)
- {
- if (!_root || !n._root || n._root->parent != _root) return false;
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return false;
-
- impl::remove_node(n._root);
- impl::destroy_node(n._root, alloc);
-
- return true;
- }
-
- PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
- {
- // append_buffer is only valid for elements/documents
- if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
-
- // get document node
- impl::xml_document_struct* doc = &impl::get_document(_root);
-
- // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
- doc->header |= impl::xml_memory_page_contents_shared_mask;
-
- // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
- impl::xml_memory_page* page = 0;
- impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
- (void)page;
-
- if (!extra) return impl::make_parse_result(status_out_of_memory);
-
- // add extra buffer to the list
- extra->buffer = 0;
- extra->next = doc->extra_buffers;
- doc->extra_buffers = extra;
-
- // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
- impl::name_null_sentry sentry(_root);
-
- return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
- }
-
- PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (i->name && impl::strequal(name_, i->name))
- {
- for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
- if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
- return xml_node(i);
- }
-
- return xml_node();
- }
-
- PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
- if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
- return xml_node(i);
-
- return xml_node();
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN string_t xml_node::path(char_t delimiter) const
- {
- if (!_root) return string_t();
-
- size_t offset = 0;
-
- for (xml_node_struct* i = _root; i; i = i->parent)
- {
- offset += (i != _root);
- offset += i->name ? impl::strlength(i->name) : 0;
- }
-
- string_t result;
- result.resize(offset);
-
- for (xml_node_struct* j = _root; j; j = j->parent)
- {
- if (j != _root)
- result[--offset] = delimiter;
-
- if (j->name && *j->name)
- {
- size_t length = impl::strlength(j->name);
-
- offset -= length;
- memcpy(&result[offset], j->name, length * sizeof(char_t));
- }
- }
-
- assert(offset == 0);
-
- return result;
- }
-#endif
-
- PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
- {
- xml_node found = *this; // Current search context.
-
- if (!_root || !path_ || !path_[0]) return found;
-
- if (path_[0] == delimiter)
- {
- // Absolute path; e.g. '/foo/bar'
- found = found.root();
- ++path_;
- }
-
- const char_t* path_segment = path_;
-
- while (*path_segment == delimiter) ++path_segment;
-
- const char_t* path_segment_end = path_segment;
-
- while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
-
- if (path_segment == path_segment_end) return found;
-
- const char_t* next_segment = path_segment_end;
-
- while (*next_segment == delimiter) ++next_segment;
-
- if (*path_segment == '.' && path_segment + 1 == path_segment_end)
- return found.first_element_by_path(next_segment, delimiter);
- else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
- return found.parent().first_element_by_path(next_segment, delimiter);
- else
- {
- for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
- {
- if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
- {
- xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
-
- if (subsearch) return subsearch;
- }
- }
-
- return xml_node();
- }
- }
-
- PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
- {
- walker._depth = -1;
-
- xml_node arg_begin = *this;
- if (!walker.begin(arg_begin)) return false;
-
- xml_node cur = first_child();
-
- if (cur)
- {
- ++walker._depth;
-
- do
- {
- xml_node arg_for_each = cur;
- if (!walker.for_each(arg_for_each))
- return false;
-
- if (cur.first_child())
- {
- ++walker._depth;
- cur = cur.first_child();
- }
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- // Borland C++ workaround
- while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
- {
- --walker._depth;
- cur = cur.parent();
- }
-
- if (cur != *this)
- cur = cur.next_sibling();
- }
- }
- while (cur && cur != *this);
- }
-
- assert(walker._depth == -1);
-
- xml_node arg_end = *this;
- return walker.end(arg_end);
- }
-
- PUGI__FN size_t xml_node::hash_value() const
- {
- return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
- }
-
- PUGI__FN xml_node_struct* xml_node::internal_object() const
- {
- return _root;
- }
-
- PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
- {
- if (!_root) return;
-
- impl::xml_buffered_writer buffered_writer(writer, encoding);
-
- impl::node_output(buffered_writer, _root, indent, flags, depth);
-
- buffered_writer.flush();
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
- {
- xml_writer_stream writer(stream);
-
- print(writer, indent, flags, encoding, depth);
- }
-
- PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
- {
- xml_writer_stream writer(stream);
-
- print(writer, indent, flags, encoding_wchar, depth);
- }
-#endif
-
- PUGI__FN ptrdiff_t xml_node::offset_debug() const
- {
- if (!_root) return -1;
-
- impl::xml_document_struct& doc = impl::get_document(_root);
-
- // we can determine the offset reliably only if there is exactly once parse buffer
- if (!doc.buffer || doc.extra_buffers) return -1;
-
- switch (type())
- {
- case node_document:
- return 0;
-
- case node_element:
- case node_declaration:
- case node_pi:
- return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
-
- case node_pcdata:
- case node_cdata:
- case node_comment:
- case node_doctype:
- return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
-
- default:
- return -1;
- }
- }
-
-#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
-#endif
-
- PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
- {
- }
-
- PUGI__FN xml_node_struct* xml_text::_data() const
- {
- if (!_root || impl::is_text_node(_root)) return _root;
-
- for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
- if (impl::is_text_node(node))
- return node;
-
- return 0;
- }
-
- PUGI__FN xml_node_struct* xml_text::_data_new()
- {
- xml_node_struct* d = _data();
- if (d) return d;
-
- return xml_node(_root).append_child(node_pcdata).internal_object();
- }
-
- PUGI__FN xml_text::xml_text(): _root(0)
- {
- }
-
- PUGI__FN static void unspecified_bool_xml_text(xml_text***)
- {
- }
-
- PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
- {
- return _data() ? unspecified_bool_xml_text : 0;
- }
-
- PUGI__FN bool xml_text::operator!() const
- {
- return !_data();
- }
-
- PUGI__FN bool xml_text::empty() const
- {
- return _data() == 0;
- }
-
- PUGI__FN const char_t* xml_text::get() const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? d->value + 0 : def;
- }
-
- PUGI__FN int xml_text::as_int(int def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_int(d->value) : def;
- }
-
- PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_uint(d->value) : def;
- }
-
- PUGI__FN double xml_text::as_double(double def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_double(d->value) : def;
- }
-
- PUGI__FN float xml_text::as_float(float def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_float(d->value) : def;
- }
-
- PUGI__FN bool xml_text::as_bool(bool def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_bool(d->value) : def;
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN long long xml_text::as_llong(long long def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_llong(d->value) : def;
- }
-
- PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_ullong(d->value) : def;
- }
-#endif
-
- PUGI__FN bool xml_text::set(const char_t* rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
- }
-
- PUGI__FN bool xml_text::set(int rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
- PUGI__FN bool xml_text::set(unsigned int rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
- PUGI__FN bool xml_text::set(float rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
- PUGI__FN bool xml_text::set(double rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
- PUGI__FN bool xml_text::set(bool rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN bool xml_text::set(long long rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
- PUGI__FN bool xml_text::set(unsigned long long rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-#endif
-
- PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(int rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(double rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(float rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(bool rhs)
- {
- set(rhs);
- return *this;
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN xml_text& xml_text::operator=(long long rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
- {
- set(rhs);
- return *this;
- }
-#endif
-
- PUGI__FN xml_node xml_text::data() const
- {
- return xml_node(_data());
- }
-
-#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
-#endif
-
- PUGI__FN xml_node_iterator::xml_node_iterator()
- {
- }
-
- PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
- {
- }
-
- PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
- {
- }
-
- PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
- {
- return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
- }
-
- PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
- {
- return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
- }
-
- PUGI__FN xml_node& xml_node_iterator::operator*() const
- {
- assert(_wrap._root);
- return _wrap;
- }
-
- PUGI__FN xml_node* xml_node_iterator::operator->() const
- {
- assert(_wrap._root);
- return const_cast<xml_node*>(&_wrap); // BCC32 workaround
- }
-
- PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
- {
- assert(_wrap._root);
- _wrap._root = _wrap._root->next_sibling;
- return *this;
- }
-
- PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
- {
- xml_node_iterator temp = *this;
- ++*this;
- return temp;
- }
-
- PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
- {
- _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
- return *this;
- }
-
- PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
- {
- xml_node_iterator temp = *this;
- --*this;
- return temp;
- }
-
- PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
- {
- }
-
- PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
- {
- }
-
- PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
- {
- }
-
- PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
- {
- return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
- }
-
- PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
- {
- return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
- }
-
- PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
- {
- assert(_wrap._attr);
- return _wrap;
- }
-
- PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
- {
- assert(_wrap._attr);
- return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
- }
-
- PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
- {
- assert(_wrap._attr);
- _wrap._attr = _wrap._attr->next_attribute;
- return *this;
- }
-
- PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
- {
- xml_attribute_iterator temp = *this;
- ++*this;
- return temp;
- }
-
- PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
- {
- _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
- return *this;
- }
-
- PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
- {
- xml_attribute_iterator temp = *this;
- --*this;
- return temp;
- }
-
- PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
- {
- }
-
- PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
- {
- }
-
- PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
- {
- }
-
- PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
- {
- return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
- }
-
- PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
- {
- return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
- }
-
- PUGI__FN xml_node& xml_named_node_iterator::operator*() const
- {
- assert(_wrap._root);
- return _wrap;
- }
-
- PUGI__FN xml_node* xml_named_node_iterator::operator->() const
- {
- assert(_wrap._root);
- return const_cast<xml_node*>(&_wrap); // BCC32 workaround
- }
-
- PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
- {
- assert(_wrap._root);
- _wrap = _wrap.next_sibling(_name);
- return *this;
- }
-
- PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
- {
- xml_named_node_iterator temp = *this;
- ++*this;
- return temp;
- }
-
- PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
- {
- if (_wrap._root)
- _wrap = _wrap.previous_sibling(_name);
- else
- {
- _wrap = _parent.last_child();
-
- if (!impl::strequal(_wrap.name(), _name))
- _wrap = _wrap.previous_sibling(_name);
- }
-
- return *this;
- }
-
- PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
- {
- xml_named_node_iterator temp = *this;
- --*this;
- return temp;
- }
-
- PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
- {
- }
-
- PUGI__FN xml_parse_result::operator bool() const
- {
- return status == status_ok;
- }
-
- PUGI__FN const char* xml_parse_result::description() const
- {
- switch (status)
- {
- case status_ok: return "No error";
-
- case status_file_not_found: return "File was not found";
- case status_io_error: return "Error reading from file/stream";
- case status_out_of_memory: return "Could not allocate memory";
- case status_internal_error: return "Internal error occurred";
-
- case status_unrecognized_tag: return "Could not determine tag type";
-
- case status_bad_pi: return "Error parsing document declaration/processing instruction";
- case status_bad_comment: return "Error parsing comment";
- case status_bad_cdata: return "Error parsing CDATA section";
- case status_bad_doctype: return "Error parsing document type declaration";
- case status_bad_pcdata: return "Error parsing PCDATA section";
- case status_bad_start_element: return "Error parsing start element tag";
- case status_bad_attribute: return "Error parsing element attribute";
- case status_bad_end_element: return "Error parsing end element tag";
- case status_end_element_mismatch: return "Start-end tags mismatch";
-
- case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
-
- case status_no_document_element: return "No document element found";
-
- default: return "Unknown error";
- }
- }
-
- PUGI__FN xml_document::xml_document(): _buffer(0)
- {
- create();
- }
-
- PUGI__FN xml_document::~xml_document()
- {
- destroy();
- }
-
- PUGI__FN void xml_document::reset()
- {
- destroy();
- create();
- }
-
- PUGI__FN void xml_document::reset(const xml_document& proto)
- {
- reset();
-
- for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
- append_copy(cur);
- }
-
- PUGI__FN void xml_document::create()
- {
- assert(!_root);
-
- #ifdef PUGIXML_COMPACT
- const size_t page_offset = sizeof(uint32_t);
- #else
- const size_t page_offset = 0;
- #endif
-
- // initialize sentinel page
- PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) + page_offset <= sizeof(_memory));
-
- // align upwards to page boundary
- void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
-
- // prepare page structure
- impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
- assert(page);
-
- page->busy_size = impl::xml_memory_page_size;
-
- // setup first page marker
- #ifdef PUGIXML_COMPACT
- // round-trip through void* to avoid 'cast increases required alignment of target type' warning
- page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
- *page->compact_page_marker = sizeof(impl::xml_memory_page);
- #endif
-
- // allocate new root
- _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
- _root->prev_sibling_c = _root;
-
- // setup sentinel page
- page->allocator = static_cast<impl::xml_document_struct*>(_root);
-
- // verify the document allocation
- assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
- }
-
- PUGI__FN void xml_document::destroy()
- {
- assert(_root);
-
- // destroy static storage
- if (_buffer)
- {
- impl::xml_memory::deallocate(_buffer);
- _buffer = 0;
- }
-
- // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
- for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
- {
- if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
- }
-
- // destroy dynamic storage, leave sentinel page (it's in static memory)
- impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
- assert(root_page && !root_page->prev);
- assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
-
- for (impl::xml_memory_page* page = root_page->next; page; )
- {
- impl::xml_memory_page* next = page->next;
-
- impl::xml_allocator::deallocate_page(page);
-
- page = next;
- }
-
- #ifdef PUGIXML_COMPACT
- // destroy hash table
- static_cast<impl::xml_document_struct*>(_root)->hash.clear();
- #endif
-
- _root = 0;
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
- }
-
- PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
- {
- reset();
-
- return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
- }
-#endif
-
- PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
- {
- // Force native encoding (skip autodetection)
- #ifdef PUGIXML_WCHAR_MODE
- xml_encoding encoding = encoding_wchar;
- #else
- xml_encoding encoding = encoding_utf8;
- #endif
-
- return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
- }
-
- PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
- {
- return load_string(contents, options);
- }
-
- PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- using impl::auto_deleter; // MSVC7 workaround
- auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, "rb"), fclose);
-
- return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
- }
-
- PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- using impl::auto_deleter; // MSVC7 workaround
- auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, L"rb"), fclose);
-
- return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
- }
-
- PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
- }
-
- PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
- }
-
- PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
- }
-
- PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- impl::xml_buffered_writer buffered_writer(writer, encoding);
-
- if ((flags & format_write_bom) && encoding != encoding_latin1)
- {
- // BOM always represents the codepoint U+FEFF, so just write it in native encoding
- #ifdef PUGIXML_WCHAR_MODE
- unsigned int bom = 0xfeff;
- buffered_writer.write(static_cast<wchar_t>(bom));
- #else
- buffered_writer.write('\xef', '\xbb', '\xbf');
- #endif
- }
-
- if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
- {
- buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
- if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
- buffered_writer.write('?', '>');
- if (!(flags & format_raw)) buffered_writer.write('\n');
- }
-
- impl::node_output(buffered_writer, _root, indent, flags, 0);
-
- buffered_writer.flush();
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- xml_writer_stream writer(stream);
-
- save(writer, indent, flags, encoding);
- }
-
- PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
- {
- xml_writer_stream writer(stream);
-
- save(writer, indent, flags, encoding_wchar);
- }
-#endif
-
- PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- using impl::auto_deleter; // MSVC7 workaround
- auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), fclose);
-
- return impl::save_file_impl(*this, file.data, indent, flags, encoding);
- }
-
- PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- using impl::auto_deleter; // MSVC7 workaround
- auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), fclose);
-
- return impl::save_file_impl(*this, file.data, indent, flags, encoding);
- }
-
- PUGI__FN xml_node xml_document::document_element() const
- {
- assert(_root);
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (PUGI__NODETYPE(i) == node_element)
- return xml_node(i);
-
- return xml_node();
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
- {
- assert(str);
-
- return impl::as_utf8_impl(str, impl::strlength_wide(str));
- }
-
- PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
- {
- return impl::as_utf8_impl(str.c_str(), str.size());
- }
-
- PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
- {
- assert(str);
-
- return impl::as_wide_impl(str, strlen(str));
- }
-
- PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
- {
- return impl::as_wide_impl(str.c_str(), str.size());
- }
-#endif
-
- PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
- {
- impl::xml_memory::allocate = allocate;
- impl::xml_memory::deallocate = deallocate;
- }
-
- PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
- {
- return impl::xml_memory::allocate;
- }
-
- PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
- {
- return impl::xml_memory::deallocate;
- }
-}
-
-#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
-namespace std
-{
- // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
- PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-
- PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-
- PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-}
-#endif
-
-#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
-namespace std
-{
- // Workarounds for (non-standard) iterator category detection
- PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-
- PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-
- PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-}
-#endif
-
-#ifndef PUGIXML_NO_XPATH
-// STL replacements
-PUGI__NS_BEGIN
- struct equal_to
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs == rhs;
- }
- };
-
- struct not_equal_to
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs != rhs;
- }
- };
-
- struct less
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs < rhs;
- }
- };
-
- struct less_equal
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs <= rhs;
- }
- };
-
- template <typename T> void swap(T& lhs, T& rhs)
- {
- T temp = lhs;
- lhs = rhs;
- rhs = temp;
- }
-
- template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
- {
- I result = begin;
-
- for (I it = begin + 1; it != end; ++it)
- if (pred(*it, *result))
- result = it;
-
- return result;
- }
-
- template <typename I> void reverse(I begin, I end)
- {
- while (end - begin > 1) swap(*begin++, *--end);
- }
-
- template <typename I> I unique(I begin, I end)
- {
- // fast skip head
- while (end - begin > 1 && *begin != *(begin + 1)) begin++;
-
- if (begin == end) return begin;
-
- // last written element
- I write = begin++;
-
- // merge unique elements
- while (begin != end)
- {
- if (*begin != *write)
- *++write = *begin++;
- else
- begin++;
- }
-
- // past-the-end (write points to live element)
- return write + 1;
- }
-
- template <typename I> void copy_backwards(I begin, I end, I target)
- {
- while (begin != end) *--target = *--end;
- }
-
- template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
- {
- assert(begin != end);
-
- for (I it = begin + 1; it != end; ++it)
- {
- T val = *it;
-
- if (pred(val, *begin))
- {
- // move to front
- copy_backwards(begin, it, it + 1);
- *begin = val;
- }
- else
- {
- I hole = it;
-
- // move hole backwards
- while (pred(val, *(hole - 1)))
- {
- *hole = *(hole - 1);
- hole--;
- }
-
- // fill hole with element
- *hole = val;
- }
- }
- }
-
- // std variant for elements with ==
- template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
- {
- I eqbeg = middle, eqend = middle + 1;
-
- // expand equal range
- while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
- while (eqend != end && *eqend == *eqbeg) ++eqend;
-
- // process outer elements
- I ltend = eqbeg, gtbeg = eqend;
-
- for (;;)
- {
- // find the element from the right side that belongs to the left one
- for (; gtbeg != end; ++gtbeg)
- if (!pred(*eqbeg, *gtbeg))
- {
- if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
- else break;
- }
-
- // find the element from the left side that belongs to the right one
- for (; ltend != begin; --ltend)
- if (!pred(*(ltend - 1), *eqbeg))
- {
- if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
- else break;
- }
-
- // scanned all elements
- if (gtbeg == end && ltend == begin)
- {
- *out_eqbeg = eqbeg;
- *out_eqend = eqend;
- return;
- }
-
- // make room for elements by moving equal area
- if (gtbeg == end)
- {
- if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
- swap(*eqbeg, *--eqend);
- }
- else if (ltend == begin)
- {
- if (eqend != gtbeg) swap(*eqbeg, *eqend);
- ++eqend;
- swap(*gtbeg++, *eqbeg++);
- }
- else swap(*gtbeg++, *--ltend);
- }
- }
-
- template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
- {
- if (pred(*middle, *first)) swap(*middle, *first);
- if (pred(*last, *middle)) swap(*last, *middle);
- if (pred(*middle, *first)) swap(*middle, *first);
- }
-
- template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
- {
- if (last - first <= 40)
- {
- // median of three for small chunks
- median3(first, middle, last, pred);
- }
- else
- {
- // median of nine
- size_t step = (last - first + 1) / 8;
-
- median3(first, first + step, first + 2 * step, pred);
- median3(middle - step, middle, middle + step, pred);
- median3(last - 2 * step, last - step, last, pred);
- median3(first + step, middle, last - step, pred);
- }
- }
-
- template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
- {
- // sort large chunks
- while (end - begin > 32)
- {
- // find median element
- I middle = begin + (end - begin) / 2;
- median(begin, middle, end - 1, pred);
-
- // partition in three chunks (< = >)
- I eqbeg, eqend;
- partition(begin, middle, end, pred, &eqbeg, &eqend);
-
- // loop on larger half
- if (eqbeg - begin > end - eqend)
- {
- sort(eqend, end, pred);
- end = eqbeg;
- }
- else
- {
- sort(begin, eqbeg, pred);
- begin = eqend;
- }
- }
-
- // insertion sort small chunk
- if (begin != end) insertion_sort(begin, end, pred, &*begin);
- }
-PUGI__NS_END
-
-// Allocator used for AST and evaluation stacks
-PUGI__NS_BEGIN
- static const size_t xpath_memory_page_size =
- #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
- PUGIXML_MEMORY_XPATH_PAGE_SIZE
- #else
- 4096
- #endif
- ;
-
- static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
-
- struct xpath_memory_block
- {
- xpath_memory_block* next;
- size_t capacity;
-
- union
- {
- char data[xpath_memory_page_size];
- double alignment;
- };
- };
-
- class xpath_allocator
- {
- xpath_memory_block* _root;
- size_t _root_size;
-
- public:
- #ifdef PUGIXML_NO_EXCEPTIONS
- jmp_buf* error_handler;
- #endif
-
- xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- error_handler = 0;
- #endif
- }
-
- void* allocate_nothrow(size_t size)
- {
- // round size up to block alignment boundary
- size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
-
- if (_root_size + size <= _root->capacity)
- {
- void* buf = &_root->data[0] + _root_size;
- _root_size += size;
- return buf;
- }
- else
- {
- // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
- size_t block_capacity_base = sizeof(_root->data);
- size_t block_capacity_req = size + block_capacity_base / 4;
- size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
-
- size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
-
- xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
- if (!block) return 0;
-
- block->next = _root;
- block->capacity = block_capacity;
-
- _root = block;
- _root_size = size;
-
- return block->data;
- }
- }
-
- void* allocate(size_t size)
- {
- void* result = allocate_nothrow(size);
-
- if (!result)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- assert(error_handler);
- longjmp(*error_handler, 1);
- #else
- throw std::bad_alloc();
- #endif
- }
-
- return result;
- }
-
- void* reallocate(void* ptr, size_t old_size, size_t new_size)
- {
- // round size up to block alignment boundary
- old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
- new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
-
- // we can only reallocate the last object
- assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
-
- // adjust root size so that we have not allocated the object at all
- bool only_object = (_root_size == old_size);
-
- if (ptr) _root_size -= old_size;
-
- // allocate a new version (this will obviously reuse the memory if possible)
- void* result = allocate(new_size);
- assert(result);
-
- // we have a new block
- if (result != ptr && ptr)
- {
- // copy old data
- assert(new_size >= old_size);
- memcpy(result, ptr, old_size);
-
- // free the previous page if it had no other objects
- if (only_object)
- {
- assert(_root->data == result);
- assert(_root->next);
-
- xpath_memory_block* next = _root->next->next;
-
- if (next)
- {
- // deallocate the whole page, unless it was the first one
- xml_memory::deallocate(_root->next);
- _root->next = next;
- }
- }
- }
-
- return result;
- }
-
- void revert(const xpath_allocator& state)
- {
- // free all new pages
- xpath_memory_block* cur = _root;
-
- while (cur != state._root)
- {
- xpath_memory_block* next = cur->next;
-
- xml_memory::deallocate(cur);
-
- cur = next;
- }
-
- // restore state
- _root = state._root;
- _root_size = state._root_size;
- }
-
- void release()
- {
- xpath_memory_block* cur = _root;
- assert(cur);
-
- while (cur->next)
- {
- xpath_memory_block* next = cur->next;
-
- xml_memory::deallocate(cur);
-
- cur = next;
- }
- }
- };
-
- struct xpath_allocator_capture
- {
- xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
- {
- }
-
- ~xpath_allocator_capture()
- {
- _target->revert(_state);
- }
-
- xpath_allocator* _target;
- xpath_allocator _state;
- };
-
- struct xpath_stack
- {
- xpath_allocator* result;
- xpath_allocator* temp;
- };
-
- struct xpath_stack_data
- {
- xpath_memory_block blocks[2];
- xpath_allocator result;
- xpath_allocator temp;
- xpath_stack stack;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- jmp_buf error_handler;
- #endif
-
- xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
- {
- blocks[0].next = blocks[1].next = 0;
- blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
-
- stack.result = &result;
- stack.temp = &temp;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- result.error_handler = temp.error_handler = &error_handler;
- #endif
- }
-
- ~xpath_stack_data()
- {
- result.release();
- temp.release();
- }
- };
-PUGI__NS_END
-
-// String class
-PUGI__NS_BEGIN
- class xpath_string
- {
- const char_t* _buffer;
- bool _uses_heap;
- size_t _length_heap;
-
- static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
- {
- char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
- assert(result);
-
- memcpy(result, string, length * sizeof(char_t));
- result[length] = 0;
-
- return result;
- }
-
- xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
- {
- }
-
- public:
- static xpath_string from_const(const char_t* str)
- {
- return xpath_string(str, false, 0);
- }
-
- static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
- {
- assert(begin <= end && *end == 0);
-
- return xpath_string(begin, true, static_cast<size_t>(end - begin));
- }
-
- static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
- {
- assert(begin <= end);
-
- size_t length = static_cast<size_t>(end - begin);
-
- return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length);
- }
-
- xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
- {
- }
-
- void append(const xpath_string& o, xpath_allocator* alloc)
- {
- // skip empty sources
- if (!*o._buffer) return;
-
- // fast append for constant empty target and constant source
- if (!*_buffer && !_uses_heap && !o._uses_heap)
- {
- _buffer = o._buffer;
- }
- else
- {
- // need to make heap copy
- size_t target_length = length();
- size_t source_length = o.length();
- size_t result_length = target_length + source_length;
-
- // allocate new buffer
- char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
- assert(result);
-
- // append first string to the new buffer in case there was no reallocation
- if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
-
- // append second string to the new buffer
- memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
- result[result_length] = 0;
-
- // finalize
- _buffer = result;
- _uses_heap = true;
- _length_heap = result_length;
- }
- }
-
- const char_t* c_str() const
- {
- return _buffer;
- }
-
- size_t length() const
- {
- return _uses_heap ? _length_heap : strlength(_buffer);
- }
-
- char_t* data(xpath_allocator* alloc)
- {
- // make private heap copy
- if (!_uses_heap)
- {
- size_t length_ = strlength(_buffer);
-
- _buffer = duplicate_string(_buffer, length_, alloc);
- _uses_heap = true;
- _length_heap = length_;
- }
-
- return const_cast<char_t*>(_buffer);
- }
-
- bool empty() const
- {
- return *_buffer == 0;
- }
-
- bool operator==(const xpath_string& o) const
- {
- return strequal(_buffer, o._buffer);
- }
-
- bool operator!=(const xpath_string& o) const
- {
- return !strequal(_buffer, o._buffer);
- }
-
- bool uses_heap() const
- {
- return _uses_heap;
- }
- };
-PUGI__NS_END
-
-PUGI__NS_BEGIN
- PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
- {
- while (*pattern && *string == *pattern)
- {
- string++;
- pattern++;
- }
-
- return *pattern == 0;
- }
-
- PUGI__FN const char_t* find_char(const char_t* s, char_t c)
- {
- #ifdef PUGIXML_WCHAR_MODE
- return wcschr(s, c);
- #else
- return strchr(s, c);
- #endif
- }
-
- PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
- {
- #ifdef PUGIXML_WCHAR_MODE
- // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
- return (*p == 0) ? s : wcsstr(s, p);
- #else
- return strstr(s, p);
- #endif
- }
-
- // Converts symbol to lower case, if it is an ASCII one
- PUGI__FN char_t tolower_ascii(char_t ch)
- {
- return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
- }
-
- PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
- {
- if (na.attribute())
- return xpath_string::from_const(na.attribute().value());
- else
- {
- xml_node n = na.node();
-
- switch (n.type())
- {
- case node_pcdata:
- case node_cdata:
- case node_comment:
- case node_pi:
- return xpath_string::from_const(n.value());
-
- case node_document:
- case node_element:
- {
- xpath_string result;
-
- xml_node cur = n.first_child();
-
- while (cur && cur != n)
- {
- if (cur.type() == node_pcdata || cur.type() == node_cdata)
- result.append(xpath_string::from_const(cur.value()), alloc);
-
- if (cur.first_child())
- cur = cur.first_child();
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- while (!cur.next_sibling() && cur != n)
- cur = cur.parent();
-
- if (cur != n) cur = cur.next_sibling();
- }
- }
-
- return result;
- }
-
- default:
- return xpath_string();
- }
- }
- }
-
- PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
- {
- assert(ln->parent == rn->parent);
-
- // there is no common ancestor (the shared parent is null), nodes are from different documents
- if (!ln->parent) return ln < rn;
-
- // determine sibling order
- xml_node_struct* ls = ln;
- xml_node_struct* rs = rn;
-
- while (ls && rs)
- {
- if (ls == rn) return true;
- if (rs == ln) return false;
-
- ls = ls->next_sibling;
- rs = rs->next_sibling;
- }
-
- // if rn sibling chain ended ln must be before rn
- return !rs;
- }
-
- PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
- {
- // find common ancestor at the same depth, if any
- xml_node_struct* lp = ln;
- xml_node_struct* rp = rn;
-
- while (lp && rp && lp->parent != rp->parent)
- {
- lp = lp->parent;
- rp = rp->parent;
- }
-
- // parents are the same!
- if (lp && rp) return node_is_before_sibling(lp, rp);
-
- // nodes are at different depths, need to normalize heights
- bool left_higher = !lp;
-
- while (lp)
- {
- lp = lp->parent;
- ln = ln->parent;
- }
-
- while (rp)
- {
- rp = rp->parent;
- rn = rn->parent;
- }
-
- // one node is the ancestor of the other
- if (ln == rn) return left_higher;
-
- // find common ancestor... again
- while (ln->parent != rn->parent)
- {
- ln = ln->parent;
- rn = rn->parent;
- }
-
- return node_is_before_sibling(ln, rn);
- }
-
- PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
- {
- while (node && node != parent) node = node->parent;
-
- return parent && node == parent;
- }
-
- PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
- {
- xml_node_struct* node = xnode.node().internal_object();
-
- if (node)
- {
- if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
- {
- if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
- if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
- }
-
- return 0;
- }
-
- xml_attribute_struct* attr = xnode.attribute().internal_object();
-
- if (attr)
- {
- if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
- {
- if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
- if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
- }
-
- return 0;
- }
-
- return 0;
- }
-
- struct document_order_comparator
- {
- bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
- {
- // optimized document order based check
- const void* lo = document_buffer_order(lhs);
- const void* ro = document_buffer_order(rhs);
-
- if (lo && ro) return lo < ro;
-
- // slow comparison
- xml_node ln = lhs.node(), rn = rhs.node();
-
- // compare attributes
- if (lhs.attribute() && rhs.attribute())
- {
- // shared parent
- if (lhs.parent() == rhs.parent())
- {
- // determine sibling order
- for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
- if (a == rhs.attribute())
- return true;
-
- return false;
- }
-
- // compare attribute parents
- ln = lhs.parent();
- rn = rhs.parent();
- }
- else if (lhs.attribute())
- {
- // attributes go after the parent element
- if (lhs.parent() == rhs.node()) return false;
-
- ln = lhs.parent();
- }
- else if (rhs.attribute())
- {
- // attributes go after the parent element
- if (rhs.parent() == lhs.node()) return true;
-
- rn = rhs.parent();
- }
-
- if (ln == rn) return false;
-
- if (!ln || !rn) return ln < rn;
-
- return node_is_before(ln.internal_object(), rn.internal_object());
- }
- };
-
- struct duplicate_comparator
- {
- bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
- {
- if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
- else return rhs.attribute() ? false : lhs.node() < rhs.node();
- }
- };
-
- PUGI__FN double gen_nan()
- {
- #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
- union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
- u[0].i = 0x7fc00000;
- return u[0].f;
- #else
- // fallback
- const volatile double zero = 0.0;
- return zero / zero;
- #endif
- }
-
- PUGI__FN bool is_nan(double value)
- {
- #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
- return !!_isnan(value);
- #elif defined(fpclassify) && defined(FP_NAN)
- return fpclassify(value) == FP_NAN;
- #else
- // fallback
- const volatile double v = value;
- return v != v;
- #endif
- }
-
- PUGI__FN const char_t* convert_number_to_string_special(double value)
- {
- #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
- if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
- if (_isnan(value)) return PUGIXML_TEXT("NaN");
- return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
- #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
- switch (fpclassify(value))
- {
- case FP_NAN:
- return PUGIXML_TEXT("NaN");
-
- case FP_INFINITE:
- return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
-
- case FP_ZERO:
- return PUGIXML_TEXT("0");
-
- default:
- return 0;
- }
- #else
- // fallback
- const volatile double v = value;
-
- if (v == 0) return PUGIXML_TEXT("0");
- if (v != v) return PUGIXML_TEXT("NaN");
- if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
- return 0;
- #endif
- }
-
- PUGI__FN bool convert_number_to_boolean(double value)
- {
- return (value != 0 && !is_nan(value));
- }
-
- PUGI__FN void truncate_zeros(char* begin, char* end)
- {
- while (begin != end && end[-1] == '0') end--;
-
- *end = 0;
- }
-
- // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
-#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
- PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
- {
- // get base values
- int sign, exponent;
- _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
-
- // truncate redundant zeros
- truncate_zeros(buffer, buffer + strlen(buffer));
-
- // fill results
- *out_mantissa = buffer;
- *out_exponent = exponent;
- }
-#else
- PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
- {
- // get a scientific notation value with IEEE DBL_DIG decimals
- sprintf(buffer, "%.*e", DBL_DIG, value);
- assert(strlen(buffer) < buffer_size);
- (void)!buffer_size;
-
- // get the exponent (possibly negative)
- char* exponent_string = strchr(buffer, 'e');
- assert(exponent_string);
-
- int exponent = atoi(exponent_string + 1);
-
- // extract mantissa string: skip sign
- char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
- assert(mantissa[0] != '0' && mantissa[1] == '.');
-
- // divide mantissa by 10 to eliminate integer part
- mantissa[1] = mantissa[0];
- mantissa++;
- exponent++;
-
- // remove extra mantissa digits and zero-terminate mantissa
- truncate_zeros(mantissa, exponent_string);
-
- // fill results
- *out_mantissa = mantissa;
- *out_exponent = exponent;
- }
-#endif
-
- PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
- {
- // try special number conversion
- const char_t* special = convert_number_to_string_special(value);
- if (special) return xpath_string::from_const(special);
-
- // get mantissa + exponent form
- char mantissa_buffer[32];
-
- char* mantissa;
- int exponent;
- convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
-
- // allocate a buffer of suitable length for the number
- size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
- char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
- assert(result);
-
- // make the number!
- char_t* s = result;
-
- // sign
- if (value < 0) *s++ = '-';
-
- // integer part
- if (exponent <= 0)
- {
- *s++ = '0';
- }
- else
- {
- while (exponent > 0)
- {
- assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
- *s++ = *mantissa ? *mantissa++ : '0';
- exponent--;
- }
- }
-
- // fractional part
- if (*mantissa)
- {
- // decimal point
- *s++ = '.';
-
- // extra zeroes from negative exponent
- while (exponent < 0)
- {
- *s++ = '0';
- exponent++;
- }
-
- // extra mantissa digits
- while (*mantissa)
- {
- assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
- *s++ = *mantissa++;
- }
- }
-
- // zero-terminate
- assert(s < result + result_size);
- *s = 0;
-
- return xpath_string::from_heap_preallocated(result, s);
- }
-
- PUGI__FN bool check_string_to_number_format(const char_t* string)
- {
- // parse leading whitespace
- while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
-
- // parse sign
- if (*string == '-') ++string;
-
- if (!*string) return false;
-
- // if there is no integer part, there should be a decimal part with at least one digit
- if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
-
- // parse integer part
- while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
-
- // parse decimal part
- if (*string == '.')
- {
- ++string;
-
- while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
- }
-
- // parse trailing whitespace
- while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
-
- return *string == 0;
- }
-
- PUGI__FN double convert_string_to_number(const char_t* string)
- {
- // check string format
- if (!check_string_to_number_format(string)) return gen_nan();
-
- // parse string
- #ifdef PUGIXML_WCHAR_MODE
- return wcstod(string, 0);
- #else
- return strtod(string, 0);
- #endif
- }
-
- PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
- {
- size_t length = static_cast<size_t>(end - begin);
- char_t* scratch = buffer;
-
- if (length >= sizeof(buffer) / sizeof(buffer[0]))
- {
- // need to make dummy on-heap copy
- scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!scratch) return false;
- }
-
- // copy string to zero-terminated buffer and perform conversion
- memcpy(scratch, begin, length * sizeof(char_t));
- scratch[length] = 0;
-
- *out_result = convert_string_to_number(scratch);
-
- // free dummy buffer
- if (scratch != buffer) xml_memory::deallocate(scratch);
-
- return true;
- }
-
- PUGI__FN double round_nearest(double value)
- {
- return floor(value + 0.5);
- }
-
- PUGI__FN double round_nearest_nzero(double value)
- {
- // same as round_nearest, but returns -0 for [-0.5, -0]
- // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
- return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
- }
-
- PUGI__FN const char_t* qualified_name(const xpath_node& node)
- {
- return node.attribute() ? node.attribute().name() : node.node().name();
- }
-
- PUGI__FN const char_t* local_name(const xpath_node& node)
- {
- const char_t* name = qualified_name(node);
- const char_t* p = find_char(name, ':');
-
- return p ? p + 1 : name;
- }
-
- struct namespace_uri_predicate
- {
- const char_t* prefix;
- size_t prefix_length;
-
- namespace_uri_predicate(const char_t* name)
- {
- const char_t* pos = find_char(name, ':');
-
- prefix = pos ? name : 0;
- prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
- }
-
- bool operator()(xml_attribute a) const
- {
- const char_t* name = a.name();
-
- if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
-
- return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
- }
- };
-
- PUGI__FN const char_t* namespace_uri(xml_node node)
- {
- namespace_uri_predicate pred = node.name();
-
- xml_node p = node;
-
- while (p)
- {
- xml_attribute a = p.find_attribute(pred);
-
- if (a) return a.value();
-
- p = p.parent();
- }
-
- return PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
- {
- namespace_uri_predicate pred = attr.name();
-
- // Default namespace does not apply to attributes
- if (!pred.prefix) return PUGIXML_TEXT("");
-
- xml_node p = parent;
-
- while (p)
- {
- xml_attribute a = p.find_attribute(pred);
-
- if (a) return a.value();
-
- p = p.parent();
- }
-
- return PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* namespace_uri(const xpath_node& node)
- {
- return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
- }
-
- PUGI__FN char_t* normalize_space(char_t* buffer)
- {
- char_t* write = buffer;
-
- for (char_t* it = buffer; *it; )
- {
- char_t ch = *it++;
-
- if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- // replace whitespace sequence with single space
- while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
-
- // avoid leading spaces
- if (write != buffer) *write++ = ' ';
- }
- else *write++ = ch;
- }
-
- // remove trailing space
- if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
-
- // zero-terminate
- *write = 0;
-
- return write;
- }
-
- PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
- {
- char_t* write = buffer;
-
- while (*buffer)
- {
- PUGI__DMC_VOLATILE char_t ch = *buffer++;
-
- const char_t* pos = find_char(from, ch);
-
- if (!pos)
- *write++ = ch; // do not process
- else if (static_cast<size_t>(pos - from) < to_length)
- *write++ = to[pos - from]; // replace
- }
-
- // zero-terminate
- *write = 0;
-
- return write;
- }
-
- PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
- {
- unsigned char table[128] = {0};
-
- while (*from)
- {
- unsigned int fc = static_cast<unsigned int>(*from);
- unsigned int tc = static_cast<unsigned int>(*to);
-
- if (fc >= 128 || tc >= 128)
- return 0;
-
- // code=128 means "skip character"
- if (!table[fc])
- table[fc] = static_cast<unsigned char>(tc ? tc : 128);
-
- from++;
- if (tc) to++;
- }
-
- for (int i = 0; i < 128; ++i)
- if (!table[i])
- table[i] = static_cast<unsigned char>(i);
-
- void* result = alloc->allocate_nothrow(sizeof(table));
-
- if (result)
- {
- memcpy(result, table, sizeof(table));
- }
-
- return static_cast<unsigned char*>(result);
- }
-
- PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
- {
- char_t* write = buffer;
-
- while (*buffer)
- {
- char_t ch = *buffer++;
- unsigned int index = static_cast<unsigned int>(ch);
-
- if (index < 128)
- {
- unsigned char code = table[index];
-
- // code=128 means "skip character" (table size is 128 so 128 can be a special value)
- // this code skips these characters without extra branches
- *write = static_cast<char_t>(code);
- write += 1 - (code >> 7);
- }
- else
- {
- *write++ = ch;
- }
- }
-
- // zero-terminate
- *write = 0;
-
- return write;
- }
-
- inline bool is_xpath_attribute(const char_t* name)
- {
- return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
- }
-
- struct xpath_variable_boolean: xpath_variable
- {
- xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
- {
- }
-
- bool value;
- char_t name[1];
- };
-
- struct xpath_variable_number: xpath_variable
- {
- xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
- {
- }
-
- double value;
- char_t name[1];
- };
-
- struct xpath_variable_string: xpath_variable
- {
- xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
- {
- }
-
- ~xpath_variable_string()
- {
- if (value) xml_memory::deallocate(value);
- }
-
- char_t* value;
- char_t name[1];
- };
-
- struct xpath_variable_node_set: xpath_variable
- {
- xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
- {
- }
-
- xpath_node_set value;
- char_t name[1];
- };
-
- static const xpath_node_set dummy_node_set;
-
- PUGI__FN unsigned int hash_string(const char_t* str)
- {
- // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
- unsigned int result = 0;
-
- while (*str)
- {
- result += static_cast<unsigned int>(*str++);
- result += result << 10;
- result ^= result >> 6;
- }
-
- result += result << 3;
- result ^= result >> 11;
- result += result << 15;
-
- return result;
- }
-
- template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
- {
- size_t length = strlength(name);
- if (length == 0) return 0; // empty variable names are invalid
-
- // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
- void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
- if (!memory) return 0;
-
- T* result = new (memory) T();
-
- memcpy(result->name, name, (length + 1) * sizeof(char_t));
-
- return result;
- }
-
- PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
- {
- switch (type)
- {
- case xpath_type_node_set:
- return new_xpath_variable<xpath_variable_node_set>(name);
-
- case xpath_type_number:
- return new_xpath_variable<xpath_variable_number>(name);
-
- case xpath_type_string:
- return new_xpath_variable<xpath_variable_string>(name);
-
- case xpath_type_boolean:
- return new_xpath_variable<xpath_variable_boolean>(name);
-
- default:
- return 0;
- }
- }
-
- template <typename T> PUGI__FN void delete_xpath_variable(T* var)
- {
- var->~T();
- xml_memory::deallocate(var);
- }
-
- PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
- {
- switch (type)
- {
- case xpath_type_node_set:
- delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
- break;
-
- case xpath_type_number:
- delete_xpath_variable(static_cast<xpath_variable_number*>(var));
- break;
-
- case xpath_type_string:
- delete_xpath_variable(static_cast<xpath_variable_string*>(var));
- break;
-
- case xpath_type_boolean:
- delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
- break;
-
- default:
- assert(!"Invalid variable type");
- }
- }
-
- PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
- {
- switch (rhs->type())
- {
- case xpath_type_node_set:
- return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
-
- case xpath_type_number:
- return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
-
- case xpath_type_string:
- return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
-
- case xpath_type_boolean:
- return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
-
- default:
- assert(!"Invalid variable type");
- return false;
- }
- }
-
- PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
- {
- size_t length = static_cast<size_t>(end - begin);
- char_t* scratch = buffer;
-
- if (length >= sizeof(buffer) / sizeof(buffer[0]))
- {
- // need to make dummy on-heap copy
- scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!scratch) return false;
- }
-
- // copy string to zero-terminated buffer and perform lookup
- memcpy(scratch, begin, length * sizeof(char_t));
- scratch[length] = 0;
-
- *out_result = set->get(scratch);
-
- // free dummy buffer
- if (scratch != buffer) xml_memory::deallocate(scratch);
-
- return true;
- }
-PUGI__NS_END
-
-// Internal node set class
-PUGI__NS_BEGIN
- PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
- {
- if (end - begin < 2)
- return xpath_node_set::type_sorted;
-
- document_order_comparator cmp;
-
- bool first = cmp(begin[0], begin[1]);
-
- for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
- if (cmp(it[0], it[1]) != first)
- return xpath_node_set::type_unsorted;
-
- return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
- }
-
- PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
- {
- xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
-
- if (type == xpath_node_set::type_unsorted)
- {
- xpath_node_set::type_t sorted = xpath_get_order(begin, end);
-
- if (sorted == xpath_node_set::type_unsorted)
- {
- sort(begin, end, document_order_comparator());
-
- type = xpath_node_set::type_sorted;
- }
- else
- type = sorted;
- }
-
- if (type != order) reverse(begin, end);
-
- return order;
- }
-
- PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
- {
- if (begin == end) return xpath_node();
-
- switch (type)
- {
- case xpath_node_set::type_sorted:
- return *begin;
-
- case xpath_node_set::type_sorted_reverse:
- return *(end - 1);
-
- case xpath_node_set::type_unsorted:
- return *min_element(begin, end, document_order_comparator());
-
- default:
- assert(!"Invalid node set type");
- return xpath_node();
- }
- }
-
- class xpath_node_set_raw
- {
- xpath_node_set::type_t _type;
-
- xpath_node* _begin;
- xpath_node* _end;
- xpath_node* _eos;
-
- public:
- xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
- {
- }
-
- xpath_node* begin() const
- {
- return _begin;
- }
-
- xpath_node* end() const
- {
- return _end;
- }
-
- bool empty() const
- {
- return _begin == _end;
- }
-
- size_t size() const
- {
- return static_cast<size_t>(_end - _begin);
- }
-
- xpath_node first() const
- {
- return xpath_first(_begin, _end, _type);
- }
-
- void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
-
- void push_back(const xpath_node& node, xpath_allocator* alloc)
- {
- if (_end != _eos)
- *_end++ = node;
- else
- push_back_grow(node, alloc);
- }
-
- void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
- {
- if (begin_ == end_) return;
-
- size_t size_ = static_cast<size_t>(_end - _begin);
- size_t capacity = static_cast<size_t>(_eos - _begin);
- size_t count = static_cast<size_t>(end_ - begin_);
-
- if (size_ + count > capacity)
- {
- // reallocate the old array or allocate a new one
- xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
- assert(data);
-
- // finalize
- _begin = data;
- _end = data + size_;
- _eos = data + size_ + count;
- }
-
- memcpy(_end, begin_, count * sizeof(xpath_node));
- _end += count;
- }
-
- void sort_do()
- {
- _type = xpath_sort(_begin, _end, _type, false);
- }
-
- void truncate(xpath_node* pos)
- {
- assert(_begin <= pos && pos <= _end);
-
- _end = pos;
- }
-
- void remove_duplicates()
- {
- if (_type == xpath_node_set::type_unsorted)
- sort(_begin, _end, duplicate_comparator());
-
- _end = unique(_begin, _end);
- }
-
- xpath_node_set::type_t type() const
- {
- return _type;
- }
-
- void set_type(xpath_node_set::type_t value)
- {
- _type = value;
- }
- };
-
- PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
- {
- size_t capacity = static_cast<size_t>(_eos - _begin);
-
- // get new capacity (1.5x rule)
- size_t new_capacity = capacity + capacity / 2 + 1;
-
- // reallocate the old array or allocate a new one
- xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
- assert(data);
-
- // finalize
- _begin = data;
- _end = data + capacity;
- _eos = data + new_capacity;
-
- // push
- *_end++ = node;
- }
-PUGI__NS_END
-
-PUGI__NS_BEGIN
- struct xpath_context
- {
- xpath_node n;
- size_t position, size;
-
- xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
- {
- }
- };
-
- enum lexeme_t
- {
- lex_none = 0,
- lex_equal,
- lex_not_equal,
- lex_less,
- lex_greater,
- lex_less_or_equal,
- lex_greater_or_equal,
- lex_plus,
- lex_minus,
- lex_multiply,
- lex_union,
- lex_var_ref,
- lex_open_brace,
- lex_close_brace,
- lex_quoted_string,
- lex_number,
- lex_slash,
- lex_double_slash,
- lex_open_square_brace,
- lex_close_square_brace,
- lex_string,
- lex_comma,
- lex_axis_attribute,
- lex_dot,
- lex_double_dot,
- lex_double_colon,
- lex_eof
- };
-
- struct xpath_lexer_string
- {
- const char_t* begin;
- const char_t* end;
-
- xpath_lexer_string(): begin(0), end(0)
- {
- }
-
- bool operator==(const char_t* other) const
- {
- size_t length = static_cast<size_t>(end - begin);
-
- return strequalrange(other, begin, length);
- }
- };
-
- class xpath_lexer
- {
- const char_t* _cur;
- const char_t* _cur_lexeme_pos;
- xpath_lexer_string _cur_lexeme_contents;
-
- lexeme_t _cur_lexeme;
-
- public:
- explicit xpath_lexer(const char_t* query): _cur(query)
- {
- next();
- }
-
- const char_t* state() const
- {
- return _cur;
- }
-
- void next()
- {
- const char_t* cur = _cur;
-
- while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
-
- // save lexeme position for error reporting
- _cur_lexeme_pos = cur;
-
- switch (*cur)
- {
- case 0:
- _cur_lexeme = lex_eof;
- break;
-
- case '>':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_greater_or_equal;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_greater;
- }
- break;
-
- case '<':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_less_or_equal;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_less;
- }
- break;
-
- case '!':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_not_equal;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- break;
-
- case '=':
- cur += 1;
- _cur_lexeme = lex_equal;
-
- break;
-
- case '+':
- cur += 1;
- _cur_lexeme = lex_plus;
-
- break;
-
- case '-':
- cur += 1;
- _cur_lexeme = lex_minus;
-
- break;
-
- case '*':
- cur += 1;
- _cur_lexeme = lex_multiply;
-
- break;
-
- case '|':
- cur += 1;
- _cur_lexeme = lex_union;
-
- break;
-
- case '$':
- cur += 1;
-
- if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
- {
- _cur_lexeme_contents.begin = cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
-
- if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
- {
- cur++; // :
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
- }
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_var_ref;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
-
- break;
-
- case '(':
- cur += 1;
- _cur_lexeme = lex_open_brace;
-
- break;
-
- case ')':
- cur += 1;
- _cur_lexeme = lex_close_brace;
-
- break;
-
- case '[':
- cur += 1;
- _cur_lexeme = lex_open_square_brace;
-
- break;
-
- case ']':
- cur += 1;
- _cur_lexeme = lex_close_square_brace;
-
- break;
-
- case ',':
- cur += 1;
- _cur_lexeme = lex_comma;
-
- break;
-
- case '/':
- if (*(cur+1) == '/')
- {
- cur += 2;
- _cur_lexeme = lex_double_slash;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_slash;
- }
- break;
-
- case '.':
- if (*(cur+1) == '.')
- {
- cur += 2;
- _cur_lexeme = lex_double_dot;
- }
- else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
- {
- _cur_lexeme_contents.begin = cur; // .
-
- ++cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_number;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_dot;
- }
- break;
-
- case '@':
- cur += 1;
- _cur_lexeme = lex_axis_attribute;
-
- break;
-
- case '"':
- case '\'':
- {
- char_t terminator = *cur;
-
- ++cur;
-
- _cur_lexeme_contents.begin = cur;
- while (*cur && *cur != terminator) cur++;
- _cur_lexeme_contents.end = cur;
-
- if (!*cur)
- _cur_lexeme = lex_none;
- else
- {
- cur += 1;
- _cur_lexeme = lex_quoted_string;
- }
-
- break;
- }
-
- case ':':
- if (*(cur+1) == ':')
- {
- cur += 2;
- _cur_lexeme = lex_double_colon;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- break;
-
- default:
- if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
- {
- _cur_lexeme_contents.begin = cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
-
- if (*cur == '.')
- {
- cur++;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
- }
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_number;
- }
- else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
- {
- _cur_lexeme_contents.begin = cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
-
- if (cur[0] == ':')
- {
- if (cur[1] == '*') // namespace test ncname:*
- {
- cur += 2; // :*
- }
- else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
- {
- cur++; // :
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
- }
- }
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_string;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- }
-
- _cur = cur;
- }
-
- lexeme_t current() const
- {
- return _cur_lexeme;
- }
-
- const char_t* current_pos() const
- {
- return _cur_lexeme_pos;
- }
-
- const xpath_lexer_string& contents() const
- {
- assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
-
- return _cur_lexeme_contents;
- }
- };
-
- enum ast_type_t
- {
- ast_unknown,
- ast_op_or, // left or right
- ast_op_and, // left and right
- ast_op_equal, // left = right
- ast_op_not_equal, // left != right
- ast_op_less, // left < right
- ast_op_greater, // left > right
- ast_op_less_or_equal, // left <= right
- ast_op_greater_or_equal, // left >= right
- ast_op_add, // left + right
- ast_op_subtract, // left - right
- ast_op_multiply, // left * right
- ast_op_divide, // left / right
- ast_op_mod, // left % right
- ast_op_negate, // left - right
- ast_op_union, // left | right
- ast_predicate, // apply predicate to set; next points to next predicate
- ast_filter, // select * from left where right
- ast_string_constant, // string constant
- ast_number_constant, // number constant
- ast_variable, // variable
- ast_func_last, // last()
- ast_func_position, // position()
- ast_func_count, // count(left)
- ast_func_id, // id(left)
- ast_func_local_name_0, // local-name()
- ast_func_local_name_1, // local-name(left)
- ast_func_namespace_uri_0, // namespace-uri()
- ast_func_namespace_uri_1, // namespace-uri(left)
- ast_func_name_0, // name()
- ast_func_name_1, // name(left)
- ast_func_string_0, // string()
- ast_func_string_1, // string(left)
- ast_func_concat, // concat(left, right, siblings)
- ast_func_starts_with, // starts_with(left, right)
- ast_func_contains, // contains(left, right)
- ast_func_substring_before, // substring-before(left, right)
- ast_func_substring_after, // substring-after(left, right)
- ast_func_substring_2, // substring(left, right)
- ast_func_substring_3, // substring(left, right, third)
- ast_func_string_length_0, // string-length()
- ast_func_string_length_1, // string-length(left)
- ast_func_normalize_space_0, // normalize-space()
- ast_func_normalize_space_1, // normalize-space(left)
- ast_func_translate, // translate(left, right, third)
- ast_func_boolean, // boolean(left)
- ast_func_not, // not(left)
- ast_func_true, // true()
- ast_func_false, // false()
- ast_func_lang, // lang(left)
- ast_func_number_0, // number()
- ast_func_number_1, // number(left)
- ast_func_sum, // sum(left)
- ast_func_floor, // floor(left)
- ast_func_ceiling, // ceiling(left)
- ast_func_round, // round(left)
- ast_step, // process set left with step
- ast_step_root, // select root node
-
- ast_opt_translate_table, // translate(left, right, third) where right/third are constants
- ast_opt_compare_attribute // @name = 'string'
- };
-
- enum axis_t
- {
- axis_ancestor,
- axis_ancestor_or_self,
- axis_attribute,
- axis_child,
- axis_descendant,
- axis_descendant_or_self,
- axis_following,
- axis_following_sibling,
- axis_namespace,
- axis_parent,
- axis_preceding,
- axis_preceding_sibling,
- axis_self
- };
-
- enum nodetest_t
- {
- nodetest_none,
- nodetest_name,
- nodetest_type_node,
- nodetest_type_comment,
- nodetest_type_pi,
- nodetest_type_text,
- nodetest_pi,
- nodetest_all,
- nodetest_all_in_namespace
- };
-
- enum predicate_t
- {
- predicate_default,
- predicate_posinv,
- predicate_constant,
- predicate_constant_one
- };
-
- enum nodeset_eval_t
- {
- nodeset_eval_all,
- nodeset_eval_any,
- nodeset_eval_first
- };
-
- template <axis_t N> struct axis_to_type
- {
- static const axis_t axis;
- };
-
- template <axis_t N> const axis_t axis_to_type<N>::axis = N;
-
- class xpath_ast_node
- {
- private:
- // node type
- char _type;
- char _rettype;
-
- // for ast_step
- char _axis;
-
- // for ast_step/ast_predicate/ast_filter
- char _test;
-
- // tree node structure
- xpath_ast_node* _left;
- xpath_ast_node* _right;
- xpath_ast_node* _next;
-
- union
- {
- // value for ast_string_constant
- const char_t* string;
- // value for ast_number_constant
- double number;
- // variable for ast_variable
- xpath_variable* variable;
- // node test for ast_step (node name/namespace/node type/pi target)
- const char_t* nodetest;
- // table for ast_opt_translate_table
- const unsigned char* table;
- } _data;
-
- xpath_ast_node(const xpath_ast_node&);
- xpath_ast_node& operator=(const xpath_ast_node&);
-
- template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
- {
- xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
-
- if (lt != xpath_type_node_set && rt != xpath_type_node_set)
- {
- if (lt == xpath_type_boolean || rt == xpath_type_boolean)
- return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
- else if (lt == xpath_type_number || rt == xpath_type_number)
- return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
- else if (lt == xpath_type_string || rt == xpath_type_string)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string ls = lhs->eval_string(c, stack);
- xpath_string rs = rhs->eval_string(c, stack);
-
- return comp(ls, rs);
- }
- }
- else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
- return true;
- }
-
- return false;
- }
- else
- {
- if (lt == xpath_type_node_set)
- {
- swap(lhs, rhs);
- swap(lt, rt);
- }
-
- if (lt == xpath_type_boolean)
- return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
- else if (lt == xpath_type_number)
- {
- xpath_allocator_capture cr(stack.result);
-
- double l = lhs->eval_number(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
- return true;
- }
-
- return false;
- }
- else if (lt == xpath_type_string)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string l = lhs->eval_string(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(l, string_value(*ri, stack.result)))
- return true;
- }
-
- return false;
- }
- }
-
- assert(!"Wrong types");
- return false;
- }
-
- static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
- {
- return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
- }
-
- template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
- {
- xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
-
- if (lt != xpath_type_node_set && rt != xpath_type_node_set)
- return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
- else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
- {
- xpath_allocator_capture cri(stack.result);
-
- double l = convert_string_to_number(string_value(*li, stack.result).c_str());
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture crii(stack.result);
-
- if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
- return true;
- }
- }
-
- return false;
- }
- else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- double l = lhs->eval_number(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
- return true;
- }
-
- return false;
- }
- else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
- double r = rhs->eval_number(c, stack);
-
- for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
- return true;
- }
-
- return false;
- }
- else
- {
- assert(!"Wrong types");
- return false;
- }
- }
-
- static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
- {
- assert(ns.size() >= first);
- assert(expr->rettype() != xpath_type_number);
-
- size_t i = 1;
- size_t size = ns.size() - first;
-
- xpath_node* last = ns.begin() + first;
-
- // remove_if... or well, sort of
- for (xpath_node* it = last; it != ns.end(); ++it, ++i)
- {
- xpath_context c(*it, i, size);
-
- if (expr->eval_boolean(c, stack))
- {
- *last++ = *it;
-
- if (once) break;
- }
- }
-
- ns.truncate(last);
- }
-
- static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
- {
- assert(ns.size() >= first);
- assert(expr->rettype() == xpath_type_number);
-
- size_t i = 1;
- size_t size = ns.size() - first;
-
- xpath_node* last = ns.begin() + first;
-
- // remove_if... or well, sort of
- for (xpath_node* it = last; it != ns.end(); ++it, ++i)
- {
- xpath_context c(*it, i, size);
-
- if (expr->eval_number(c, stack) == i)
- {
- *last++ = *it;
-
- if (once) break;
- }
- }
-
- ns.truncate(last);
- }
-
- static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
- {
- assert(ns.size() >= first);
- assert(expr->rettype() == xpath_type_number);
-
- size_t size = ns.size() - first;
-
- xpath_node* last = ns.begin() + first;
-
- xpath_context c(xpath_node(), 1, size);
-
- double er = expr->eval_number(c, stack);
-
- if (er >= 1.0 && er <= size)
- {
- size_t eri = static_cast<size_t>(er);
-
- if (er == eri)
- {
- xpath_node r = last[eri - 1];
-
- *last++ = r;
- }
- }
-
- ns.truncate(last);
- }
-
- void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
- {
- if (ns.size() == first) return;
-
- assert(_type == ast_filter || _type == ast_predicate);
-
- if (_test == predicate_constant || _test == predicate_constant_one)
- apply_predicate_number_const(ns, first, _right, stack);
- else if (_right->rettype() == xpath_type_number)
- apply_predicate_number(ns, first, _right, stack, once);
- else
- apply_predicate_boolean(ns, first, _right, stack, once);
- }
-
- void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
- {
- if (ns.size() == first) return;
-
- bool last_once = eval_once(ns.type(), eval);
-
- for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
- pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
- }
-
- bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
- {
- assert(a);
-
- const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
-
- switch (_test)
- {
- case nodetest_name:
- if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
- {
- ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
- return true;
- }
- break;
-
- case nodetest_type_node:
- case nodetest_all:
- if (is_xpath_attribute(name))
- {
- ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
- return true;
- }
- break;
-
- case nodetest_all_in_namespace:
- if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
- {
- ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
- return true;
- }
- break;
-
- default:
- ;
- }
-
- return false;
- }
-
- bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
- {
- assert(n);
-
- xml_node_type type = PUGI__NODETYPE(n);
-
- switch (_test)
- {
- case nodetest_name:
- if (type == node_element && n->name && strequal(n->name, _data.nodetest))
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_type_node:
- ns.push_back(xml_node(n), alloc);
- return true;
-
- case nodetest_type_comment:
- if (type == node_comment)
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_type_text:
- if (type == node_pcdata || type == node_cdata)
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_type_pi:
- if (type == node_pi)
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_pi:
- if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_all:
- if (type == node_element)
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_all_in_namespace:
- if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- default:
- assert(!"Unknown axis");
- }
-
- return false;
- }
-
- template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
- {
- const axis_t axis = T::axis;
-
- switch (axis)
- {
- case axis_attribute:
- {
- for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
- if (step_push(ns, a, n, alloc) & once)
- return;
-
- break;
- }
-
- case axis_child:
- {
- for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
- if (step_push(ns, c, alloc) & once)
- return;
-
- break;
- }
-
- case axis_descendant:
- case axis_descendant_or_self:
- {
- if (axis == axis_descendant_or_self)
- if (step_push(ns, n, alloc) & once)
- return;
-
- xml_node_struct* cur = n->first_child;
-
- while (cur)
- {
- if (step_push(ns, cur, alloc) & once)
- return;
-
- if (cur->first_child)
- cur = cur->first_child;
- else
- {
- while (!cur->next_sibling)
- {
- cur = cur->parent;
-
- if (cur == n) return;
- }
-
- cur = cur->next_sibling;
- }
- }
-
- break;
- }
-
- case axis_following_sibling:
- {
- for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
- if (step_push(ns, c, alloc) & once)
- return;
-
- break;
- }
-
- case axis_preceding_sibling:
- {
- for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
- if (step_push(ns, c, alloc) & once)
- return;
-
- break;
- }
-
- case axis_following:
- {
- xml_node_struct* cur = n;
-
- // exit from this node so that we don't include descendants
- while (!cur->next_sibling)
- {
- cur = cur->parent;
-
- if (!cur) return;
- }
-
- cur = cur->next_sibling;
-
- while (cur)
- {
- if (step_push(ns, cur, alloc) & once)
- return;
-
- if (cur->first_child)
- cur = cur->first_child;
- else
- {
- while (!cur->next_sibling)
- {
- cur = cur->parent;
-
- if (!cur) return;
- }
-
- cur = cur->next_sibling;
- }
- }
-
- break;
- }
-
- case axis_preceding:
- {
- xml_node_struct* cur = n;
-
- // exit from this node so that we don't include descendants
- while (!cur->prev_sibling_c->next_sibling)
- {
- cur = cur->parent;
-
- if (!cur) return;
- }
-
- cur = cur->prev_sibling_c;
-
- while (cur)
- {
- if (cur->first_child)
- cur = cur->first_child->prev_sibling_c;
- else
- {
- // leaf node, can't be ancestor
- if (step_push(ns, cur, alloc) & once)
- return;
-
- while (!cur->prev_sibling_c->next_sibling)
- {
- cur = cur->parent;
-
- if (!cur) return;
-
- if (!node_is_ancestor(cur, n))
- if (step_push(ns, cur, alloc) & once)
- return;
- }
-
- cur = cur->prev_sibling_c;
- }
- }
-
- break;
- }
-
- case axis_ancestor:
- case axis_ancestor_or_self:
- {
- if (axis == axis_ancestor_or_self)
- if (step_push(ns, n, alloc) & once)
- return;
-
- xml_node_struct* cur = n->parent;
-
- while (cur)
- {
- if (step_push(ns, cur, alloc) & once)
- return;
-
- cur = cur->parent;
- }
-
- break;
- }
-
- case axis_self:
- {
- step_push(ns, n, alloc);
-
- break;
- }
-
- case axis_parent:
- {
- if (n->parent)
- step_push(ns, n->parent, alloc);
-
- break;
- }
-
- default:
- assert(!"Unimplemented axis");
- }
- }
-
- template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
- {
- const axis_t axis = T::axis;
-
- switch (axis)
- {
- case axis_ancestor:
- case axis_ancestor_or_self:
- {
- if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
- if (step_push(ns, a, p, alloc) & once)
- return;
-
- xml_node_struct* cur = p;
-
- while (cur)
- {
- if (step_push(ns, cur, alloc) & once)
- return;
-
- cur = cur->parent;
- }
-
- break;
- }
-
- case axis_descendant_or_self:
- case axis_self:
- {
- if (_test == nodetest_type_node) // reject attributes based on principal node type test
- step_push(ns, a, p, alloc);
-
- break;
- }
-
- case axis_following:
- {
- xml_node_struct* cur = p;
-
- while (cur)
- {
- if (cur->first_child)
- cur = cur->first_child;
- else
- {
- while (!cur->next_sibling)
- {
- cur = cur->parent;
-
- if (!cur) return;
- }
-
- cur = cur->next_sibling;
- }
-
- if (step_push(ns, cur, alloc) & once)
- return;
- }
-
- break;
- }
-
- case axis_parent:
- {
- step_push(ns, p, alloc);
-
- break;
- }
-
- case axis_preceding:
- {
- // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
- step_fill(ns, p, alloc, once, v);
- break;
- }
-
- default:
- assert(!"Unimplemented axis");
- }
- }
-
- template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
- {
- const axis_t axis = T::axis;
- const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
-
- if (xn.node())
- step_fill(ns, xn.node().internal_object(), alloc, once, v);
- else if (axis_has_attributes && xn.attribute() && xn.parent())
- step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
- }
-
- template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
- {
- const axis_t axis = T::axis;
- const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
- const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
-
- bool once =
- (axis == axis_attribute && _test == nodetest_name) ||
- (!_right && eval_once(axis_type, eval)) ||
- (_right && !_right->_next && _right->_test == predicate_constant_one);
-
- xpath_node_set_raw ns;
- ns.set_type(axis_type);
-
- if (_left)
- {
- xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
-
- // self axis preserves the original order
- if (axis == axis_self) ns.set_type(s.type());
-
- for (const xpath_node* it = s.begin(); it != s.end(); ++it)
- {
- size_t size = ns.size();
-
- // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
- if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
-
- step_fill(ns, *it, stack.result, once, v);
- if (_right) apply_predicates(ns, size, stack, eval);
- }
- }
- else
- {
- step_fill(ns, c.n, stack.result, once, v);
- if (_right) apply_predicates(ns, 0, stack, eval);
- }
-
- // child, attribute and self axes always generate unique set of nodes
- // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
- if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
- ns.remove_duplicates();
-
- return ns;
- }
-
- public:
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
- {
- assert(type == ast_string_constant);
- _data.string = value;
- }
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
- {
- assert(type == ast_number_constant);
- _data.number = value;
- }
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
- {
- assert(type == ast_variable);
- _data.variable = value;
- }
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
- {
- }
-
- xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
- _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
- {
- assert(type == ast_step);
- _data.nodetest = contents;
- }
-
- xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
- _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
- {
- assert(type == ast_filter || type == ast_predicate);
- }
-
- void set_next(xpath_ast_node* value)
- {
- _next = value;
- }
-
- void set_right(xpath_ast_node* value)
- {
- _right = value;
- }
-
- bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
- {
- switch (_type)
- {
- case ast_op_or:
- return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
-
- case ast_op_and:
- return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
-
- case ast_op_equal:
- return compare_eq(_left, _right, c, stack, equal_to());
-
- case ast_op_not_equal:
- return compare_eq(_left, _right, c, stack, not_equal_to());
-
- case ast_op_less:
- return compare_rel(_left, _right, c, stack, less());
-
- case ast_op_greater:
- return compare_rel(_right, _left, c, stack, less());
-
- case ast_op_less_or_equal:
- return compare_rel(_left, _right, c, stack, less_equal());
-
- case ast_op_greater_or_equal:
- return compare_rel(_right, _left, c, stack, less_equal());
-
- case ast_func_starts_with:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string lr = _left->eval_string(c, stack);
- xpath_string rr = _right->eval_string(c, stack);
-
- return starts_with(lr.c_str(), rr.c_str());
- }
-
- case ast_func_contains:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string lr = _left->eval_string(c, stack);
- xpath_string rr = _right->eval_string(c, stack);
-
- return find_substring(lr.c_str(), rr.c_str()) != 0;
- }
-
- case ast_func_boolean:
- return _left->eval_boolean(c, stack);
-
- case ast_func_not:
- return !_left->eval_boolean(c, stack);
-
- case ast_func_true:
- return true;
-
- case ast_func_false:
- return false;
-
- case ast_func_lang:
- {
- if (c.n.attribute()) return false;
-
- xpath_allocator_capture cr(stack.result);
-
- xpath_string lang = _left->eval_string(c, stack);
-
- for (xml_node n = c.n.node(); n; n = n.parent())
- {
- xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
-
- if (a)
- {
- const char_t* value = a.value();
-
- // strnicmp / strncasecmp is not portable
- for (const char_t* lit = lang.c_str(); *lit; ++lit)
- {
- if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
- ++value;
- }
-
- return *value == 0 || *value == '-';
- }
- }
-
- return false;
- }
-
- case ast_opt_compare_attribute:
- {
- const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
-
- xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
-
- return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
- }
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_boolean)
- return _data.variable->get_boolean();
-
- // fallthrough to type conversion
- }
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_number:
- return convert_number_to_boolean(eval_number(c, stack));
-
- case xpath_type_string:
- {
- xpath_allocator_capture cr(stack.result);
-
- return !eval_string(c, stack).empty();
- }
-
- case xpath_type_node_set:
- {
- xpath_allocator_capture cr(stack.result);
-
- return !eval_node_set(c, stack, nodeset_eval_any).empty();
- }
-
- default:
- assert(!"Wrong expression for return type boolean");
- return false;
- }
- }
- }
- }
-
- double eval_number(const xpath_context& c, const xpath_stack& stack)
- {
- switch (_type)
- {
- case ast_op_add:
- return _left->eval_number(c, stack) + _right->eval_number(c, stack);
-
- case ast_op_subtract:
- return _left->eval_number(c, stack) - _right->eval_number(c, stack);
-
- case ast_op_multiply:
- return _left->eval_number(c, stack) * _right->eval_number(c, stack);
-
- case ast_op_divide:
- return _left->eval_number(c, stack) / _right->eval_number(c, stack);
-
- case ast_op_mod:
- return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
-
- case ast_op_negate:
- return -_left->eval_number(c, stack);
-
- case ast_number_constant:
- return _data.number;
-
- case ast_func_last:
- return static_cast<double>(c.size);
-
- case ast_func_position:
- return static_cast<double>(c.position);
-
- case ast_func_count:
- {
- xpath_allocator_capture cr(stack.result);
-
- return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
- }
-
- case ast_func_string_length_0:
- {
- xpath_allocator_capture cr(stack.result);
-
- return static_cast<double>(string_value(c.n, stack.result).length());
- }
-
- case ast_func_string_length_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- return static_cast<double>(_left->eval_string(c, stack).length());
- }
-
- case ast_func_number_0:
- {
- xpath_allocator_capture cr(stack.result);
-
- return convert_string_to_number(string_value(c.n, stack.result).c_str());
- }
-
- case ast_func_number_1:
- return _left->eval_number(c, stack);
-
- case ast_func_sum:
- {
- xpath_allocator_capture cr(stack.result);
-
- double r = 0;
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
- {
- xpath_allocator_capture cri(stack.result);
-
- r += convert_string_to_number(string_value(*it, stack.result).c_str());
- }
-
- return r;
- }
-
- case ast_func_floor:
- {
- double r = _left->eval_number(c, stack);
-
- return r == r ? floor(r) : r;
- }
-
- case ast_func_ceiling:
- {
- double r = _left->eval_number(c, stack);
-
- return r == r ? ceil(r) : r;
- }
-
- case ast_func_round:
- return round_nearest_nzero(_left->eval_number(c, stack));
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_number)
- return _data.variable->get_number();
-
- // fallthrough to type conversion
- }
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_boolean:
- return eval_boolean(c, stack) ? 1 : 0;
-
- case xpath_type_string:
- {
- xpath_allocator_capture cr(stack.result);
-
- return convert_string_to_number(eval_string(c, stack).c_str());
- }
-
- case xpath_type_node_set:
- {
- xpath_allocator_capture cr(stack.result);
-
- return convert_string_to_number(eval_string(c, stack).c_str());
- }
-
- default:
- assert(!"Wrong expression for return type number");
- return 0;
- }
-
- }
- }
- }
-
- xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
- {
- assert(_type == ast_func_concat);
-
- xpath_allocator_capture ct(stack.temp);
-
- // count the string number
- size_t count = 1;
- for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
-
- // gather all strings
- xpath_string static_buffer[4];
- xpath_string* buffer = static_buffer;
-
- // allocate on-heap for large concats
- if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
- {
- buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
- assert(buffer);
- }
-
- // evaluate all strings to temporary stack
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- buffer[0] = _left->eval_string(c, swapped_stack);
-
- size_t pos = 1;
- for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
- assert(pos == count);
-
- // get total length
- size_t length = 0;
- for (size_t i = 0; i < count; ++i) length += buffer[i].length();
-
- // create final string
- char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
- assert(result);
-
- char_t* ri = result;
-
- for (size_t j = 0; j < count; ++j)
- for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
- *ri++ = *bi;
-
- *ri = 0;
-
- return xpath_string::from_heap_preallocated(result, ri);
- }
-
- xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
- {
- switch (_type)
- {
- case ast_string_constant:
- return xpath_string::from_const(_data.string);
-
- case ast_func_local_name_0:
- {
- xpath_node na = c.n;
-
- return xpath_string::from_const(local_name(na));
- }
-
- case ast_func_local_name_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
- xpath_node na = ns.first();
-
- return xpath_string::from_const(local_name(na));
- }
-
- case ast_func_name_0:
- {
- xpath_node na = c.n;
-
- return xpath_string::from_const(qualified_name(na));
- }
-
- case ast_func_name_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
- xpath_node na = ns.first();
-
- return xpath_string::from_const(qualified_name(na));
- }
-
- case ast_func_namespace_uri_0:
- {
- xpath_node na = c.n;
-
- return xpath_string::from_const(namespace_uri(na));
- }
-
- case ast_func_namespace_uri_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
- xpath_node na = ns.first();
-
- return xpath_string::from_const(namespace_uri(na));
- }
-
- case ast_func_string_0:
- return string_value(c.n, stack.result);
-
- case ast_func_string_1:
- return _left->eval_string(c, stack);
-
- case ast_func_concat:
- return eval_string_concat(c, stack);
-
- case ast_func_substring_before:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- xpath_string p = _right->eval_string(c, swapped_stack);
-
- const char_t* pos = find_substring(s.c_str(), p.c_str());
-
- return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
- }
-
- case ast_func_substring_after:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- xpath_string p = _right->eval_string(c, swapped_stack);
-
- const char_t* pos = find_substring(s.c_str(), p.c_str());
- if (!pos) return xpath_string();
-
- const char_t* rbegin = pos + p.length();
- const char_t* rend = s.c_str() + s.length();
-
- return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
- }
-
- case ast_func_substring_2:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- size_t s_length = s.length();
-
- double first = round_nearest(_right->eval_number(c, stack));
-
- if (is_nan(first)) return xpath_string(); // NaN
- else if (first >= s_length + 1) return xpath_string();
-
- size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
- assert(1 <= pos && pos <= s_length + 1);
-
- const char_t* rbegin = s.c_str() + (pos - 1);
- const char_t* rend = s.c_str() + s.length();
-
- return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
- }
-
- case ast_func_substring_3:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- size_t s_length = s.length();
-
- double first = round_nearest(_right->eval_number(c, stack));
- double last = first + round_nearest(_right->_next->eval_number(c, stack));
-
- if (is_nan(first) || is_nan(last)) return xpath_string();
- else if (first >= s_length + 1) return xpath_string();
- else if (first >= last) return xpath_string();
- else if (last < 1) return xpath_string();
-
- size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
- size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
-
- assert(1 <= pos && pos <= end && end <= s_length + 1);
- const char_t* rbegin = s.c_str() + (pos - 1);
- const char_t* rend = s.c_str() + (end - 1);
-
- return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
- }
-
- case ast_func_normalize_space_0:
- {
- xpath_string s = string_value(c.n, stack.result);
-
- char_t* begin = s.data(stack.result);
- char_t* end = normalize_space(begin);
-
- return xpath_string::from_heap_preallocated(begin, end);
- }
-
- case ast_func_normalize_space_1:
- {
- xpath_string s = _left->eval_string(c, stack);
-
- char_t* begin = s.data(stack.result);
- char_t* end = normalize_space(begin);
-
- return xpath_string::from_heap_preallocated(begin, end);
- }
-
- case ast_func_translate:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, stack);
- xpath_string from = _right->eval_string(c, swapped_stack);
- xpath_string to = _right->_next->eval_string(c, swapped_stack);
-
- char_t* begin = s.data(stack.result);
- char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
-
- return xpath_string::from_heap_preallocated(begin, end);
- }
-
- case ast_opt_translate_table:
- {
- xpath_string s = _left->eval_string(c, stack);
-
- char_t* begin = s.data(stack.result);
- char_t* end = translate_table(begin, _data.table);
-
- return xpath_string::from_heap_preallocated(begin, end);
- }
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_string)
- return xpath_string::from_const(_data.variable->get_string());
-
- // fallthrough to type conversion
- }
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_boolean:
- return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
-
- case xpath_type_number:
- return convert_number_to_string(eval_number(c, stack), stack.result);
-
- case xpath_type_node_set:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
- return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
- }
-
- default:
- assert(!"Wrong expression for return type string");
- return xpath_string();
- }
- }
- }
- }
-
- xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
- {
- switch (_type)
- {
- case ast_op_union:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
- xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
-
- // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
- rs.set_type(xpath_node_set::type_unsorted);
-
- rs.append(ls.begin(), ls.end(), stack.result);
- rs.remove_duplicates();
-
- return rs;
- }
-
- case ast_filter:
- {
- xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
-
- // either expression is a number or it contains position() call; sort by document order
- if (_test != predicate_posinv) set.sort_do();
-
- bool once = eval_once(set.type(), eval);
-
- apply_predicate(set, 0, stack, once);
-
- return set;
- }
-
- case ast_func_id:
- return xpath_node_set_raw();
-
- case ast_step:
- {
- switch (_axis)
- {
- case axis_ancestor:
- return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
-
- case axis_ancestor_or_self:
- return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
-
- case axis_attribute:
- return step_do(c, stack, eval, axis_to_type<axis_attribute>());
-
- case axis_child:
- return step_do(c, stack, eval, axis_to_type<axis_child>());
-
- case axis_descendant:
- return step_do(c, stack, eval, axis_to_type<axis_descendant>());
-
- case axis_descendant_or_self:
- return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
-
- case axis_following:
- return step_do(c, stack, eval, axis_to_type<axis_following>());
-
- case axis_following_sibling:
- return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
-
- case axis_namespace:
- // namespaced axis is not supported
- return xpath_node_set_raw();
-
- case axis_parent:
- return step_do(c, stack, eval, axis_to_type<axis_parent>());
-
- case axis_preceding:
- return step_do(c, stack, eval, axis_to_type<axis_preceding>());
-
- case axis_preceding_sibling:
- return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
-
- case axis_self:
- return step_do(c, stack, eval, axis_to_type<axis_self>());
-
- default:
- assert(!"Unknown axis");
- return xpath_node_set_raw();
- }
- }
-
- case ast_step_root:
- {
- assert(!_right); // root step can't have any predicates
-
- xpath_node_set_raw ns;
-
- ns.set_type(xpath_node_set::type_sorted);
-
- if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
- else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
-
- return ns;
- }
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_node_set)
- {
- const xpath_node_set& s = _data.variable->get_node_set();
-
- xpath_node_set_raw ns;
-
- ns.set_type(s.type());
- ns.append(s.begin(), s.end(), stack.result);
-
- return ns;
- }
-
- // fallthrough to type conversion
- }
-
- default:
- assert(!"Wrong expression for return type node set");
- return xpath_node_set_raw();
- }
- }
-
- void optimize(xpath_allocator* alloc)
- {
- if (_left) _left->optimize(alloc);
- if (_right) _right->optimize(alloc);
- if (_next) _next->optimize(alloc);
-
- optimize_self(alloc);
- }
-
- void optimize_self(xpath_allocator* alloc)
- {
- // Rewrite [position()=expr] with [expr]
- // Note that this step has to go before classification to recognize [position()=1]
- if ((_type == ast_filter || _type == ast_predicate) &&
- _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
- {
- _right = _right->_right;
- }
-
- // Classify filter/predicate ops to perform various optimizations during evaluation
- if (_type == ast_filter || _type == ast_predicate)
- {
- assert(_test == predicate_default);
-
- if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
- _test = predicate_constant_one;
- else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
- _test = predicate_constant;
- else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
- _test = predicate_posinv;
- }
-
- // Rewrite descendant-or-self::node()/child::foo with descendant::foo
- // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
- // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
- // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
- if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
- _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
- is_posinv_step())
- {
- if (_axis == axis_child || _axis == axis_descendant)
- _axis = axis_descendant;
- else
- _axis = axis_descendant_or_self;
-
- _left = _left->_left;
- }
-
- // Use optimized lookup table implementation for translate() with constant arguments
- if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
- {
- unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
-
- if (table)
- {
- _type = ast_opt_translate_table;
- _data.table = table;
- }
- }
-
- // Use optimized path for @attr = 'value' or @attr = $value
- if (_type == ast_op_equal &&
- _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
- (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
- {
- _type = ast_opt_compare_attribute;
- }
- }
-
- bool is_posinv_expr() const
- {
- switch (_type)
- {
- case ast_func_position:
- case ast_func_last:
- return false;
-
- case ast_string_constant:
- case ast_number_constant:
- case ast_variable:
- return true;
-
- case ast_step:
- case ast_step_root:
- return true;
-
- case ast_predicate:
- case ast_filter:
- return true;
-
- default:
- if (_left && !_left->is_posinv_expr()) return false;
-
- for (xpath_ast_node* n = _right; n; n = n->_next)
- if (!n->is_posinv_expr()) return false;
-
- return true;
- }
- }
-
- bool is_posinv_step() const
- {
- assert(_type == ast_step);
-
- for (xpath_ast_node* n = _right; n; n = n->_next)
- {
- assert(n->_type == ast_predicate);
-
- if (n->_test != predicate_posinv)
- return false;
- }
-
- return true;
- }
-
- xpath_value_type rettype() const
- {
- return static_cast<xpath_value_type>(_rettype);
- }
- };
-
- struct xpath_parser
- {
- xpath_allocator* _alloc;
- xpath_lexer _lexer;
-
- const char_t* _query;
- xpath_variable_set* _variables;
-
- xpath_parse_result* _result;
-
- char_t _scratch[32];
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- jmp_buf _error_handler;
- #endif
-
- void throw_error(const char* message)
- {
- _result->error = message;
- _result->offset = _lexer.current_pos() - _query;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- longjmp(_error_handler, 1);
- #else
- throw xpath_exception(*_result);
- #endif
- }
-
- void throw_error_oom()
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- throw_error("Out of memory");
- #else
- throw std::bad_alloc();
- #endif
- }
-
- void* alloc_node()
- {
- void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
-
- if (!result) throw_error_oom();
-
- return result;
- }
-
- const char_t* alloc_string(const xpath_lexer_string& value)
- {
- if (value.begin)
- {
- size_t length = static_cast<size_t>(value.end - value.begin);
-
- char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
- if (!c) throw_error_oom();
- assert(c); // workaround for clang static analysis
-
- memcpy(c, value.begin, length * sizeof(char_t));
- c[length] = 0;
-
- return c;
- }
- else return 0;
- }
-
- xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
- {
- assert(argc <= 1);
-
- if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
-
- return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
- }
-
- xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
- {
- switch (name.begin[0])
- {
- case 'b':
- if (name == PUGIXML_TEXT("boolean") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
-
- break;
-
- case 'c':
- if (name == PUGIXML_TEXT("count") && argc == 1)
- {
- if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
- return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
- }
- else if (name == PUGIXML_TEXT("contains") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
- else if (name == PUGIXML_TEXT("concat") && argc >= 2)
- return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
-
- break;
-
- case 'f':
- if (name == PUGIXML_TEXT("false") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
- else if (name == PUGIXML_TEXT("floor") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
-
- break;
-
- case 'i':
- if (name == PUGIXML_TEXT("id") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
-
- break;
-
- case 'l':
- if (name == PUGIXML_TEXT("last") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
- else if (name == PUGIXML_TEXT("lang") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
- else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
- return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("name") && argc <= 1)
- return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
- else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
- return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
- else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("not") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
- else if (name == PUGIXML_TEXT("number") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("position") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
-
- break;
-
- case 'r':
- if (name == PUGIXML_TEXT("round") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
-
- break;
-
- case 's':
- if (name == PUGIXML_TEXT("string") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
- else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
- else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
- return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("sum") && argc == 1)
- {
- if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
- return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
- }
-
- break;
-
- case 't':
- if (name == PUGIXML_TEXT("translate") && argc == 3)
- return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("true") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
-
- break;
-
- default:
- break;
- }
-
- throw_error("Unrecognized function or wrong parameter count");
-
- return 0;
- }
-
- axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
- {
- specified = true;
-
- switch (name.begin[0])
- {
- case 'a':
- if (name == PUGIXML_TEXT("ancestor"))
- return axis_ancestor;
- else if (name == PUGIXML_TEXT("ancestor-or-self"))
- return axis_ancestor_or_self;
- else if (name == PUGIXML_TEXT("attribute"))
- return axis_attribute;
-
- break;
-
- case 'c':
- if (name == PUGIXML_TEXT("child"))
- return axis_child;
-
- break;
-
- case 'd':
- if (name == PUGIXML_TEXT("descendant"))
- return axis_descendant;
- else if (name == PUGIXML_TEXT("descendant-or-self"))
- return axis_descendant_or_self;
-
- break;
-
- case 'f':
- if (name == PUGIXML_TEXT("following"))
- return axis_following;
- else if (name == PUGIXML_TEXT("following-sibling"))
- return axis_following_sibling;
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("namespace"))
- return axis_namespace;
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("parent"))
- return axis_parent;
- else if (name == PUGIXML_TEXT("preceding"))
- return axis_preceding;
- else if (name == PUGIXML_TEXT("preceding-sibling"))
- return axis_preceding_sibling;
-
- break;
-
- case 's':
- if (name == PUGIXML_TEXT("self"))
- return axis_self;
-
- break;
-
- default:
- break;
- }
-
- specified = false;
- return axis_child;
- }
-
- nodetest_t parse_node_test_type(const xpath_lexer_string& name)
- {
- switch (name.begin[0])
- {
- case 'c':
- if (name == PUGIXML_TEXT("comment"))
- return nodetest_type_comment;
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("node"))
- return nodetest_type_node;
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("processing-instruction"))
- return nodetest_type_pi;
-
- break;
-
- case 't':
- if (name == PUGIXML_TEXT("text"))
- return nodetest_type_text;
-
- break;
-
- default:
- break;
- }
-
- return nodetest_none;
- }
-
- // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
- xpath_ast_node* parse_primary_expression()
- {
- switch (_lexer.current())
- {
- case lex_var_ref:
- {
- xpath_lexer_string name = _lexer.contents();
-
- if (!_variables)
- throw_error("Unknown variable: variable set is not provided");
-
- xpath_variable* var = 0;
- if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
- throw_error_oom();
-
- if (!var)
- throw_error("Unknown variable: variable set does not contain the given name");
-
- _lexer.next();
-
- return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
- }
-
- case lex_open_brace:
- {
- _lexer.next();
-
- xpath_ast_node* n = parse_expression();
-
- if (_lexer.current() != lex_close_brace)
- throw_error("Unmatched braces");
-
- _lexer.next();
-
- return n;
- }
-
- case lex_quoted_string:
- {
- const char_t* value = alloc_string(_lexer.contents());
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
- _lexer.next();
-
- return n;
- }
-
- case lex_number:
- {
- double value = 0;
-
- if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
- throw_error_oom();
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
- _lexer.next();
-
- return n;
- }
-
- case lex_string:
- {
- xpath_ast_node* args[2] = {0};
- size_t argc = 0;
-
- xpath_lexer_string function = _lexer.contents();
- _lexer.next();
-
- xpath_ast_node* last_arg = 0;
-
- if (_lexer.current() != lex_open_brace)
- throw_error("Unrecognized function call");
- _lexer.next();
-
- if (_lexer.current() != lex_close_brace)
- args[argc++] = parse_expression();
-
- while (_lexer.current() != lex_close_brace)
- {
- if (_lexer.current() != lex_comma)
- throw_error("No comma between function arguments");
- _lexer.next();
-
- xpath_ast_node* n = parse_expression();
-
- if (argc < 2) args[argc] = n;
- else last_arg->set_next(n);
-
- argc++;
- last_arg = n;
- }
-
- _lexer.next();
-
- return parse_function(function, argc, args);
- }
-
- default:
- throw_error("Unrecognizable primary expression");
-
- return 0;
- }
- }
-
- // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
- // Predicate ::= '[' PredicateExpr ']'
- // PredicateExpr ::= Expr
- xpath_ast_node* parse_filter_expression()
- {
- xpath_ast_node* n = parse_primary_expression();
-
- while (_lexer.current() == lex_open_square_brace)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_expression();
-
- if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
-
- n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default);
-
- if (_lexer.current() != lex_close_square_brace)
- throw_error("Unmatched square brace");
-
- _lexer.next();
- }
-
- return n;
- }
-
- // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
- // AxisSpecifier ::= AxisName '::' | '@'?
- // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
- // NameTest ::= '*' | NCName ':' '*' | QName
- // AbbreviatedStep ::= '.' | '..'
- xpath_ast_node* parse_step(xpath_ast_node* set)
- {
- if (set && set->rettype() != xpath_type_node_set)
- throw_error("Step has to be applied to node set");
-
- bool axis_specified = false;
- axis_t axis = axis_child; // implied child axis
-
- if (_lexer.current() == lex_axis_attribute)
- {
- axis = axis_attribute;
- axis_specified = true;
-
- _lexer.next();
- }
- else if (_lexer.current() == lex_dot)
- {
- _lexer.next();
-
- return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
- }
- else if (_lexer.current() == lex_double_dot)
- {
- _lexer.next();
-
- return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
- }
-
- nodetest_t nt_type = nodetest_none;
- xpath_lexer_string nt_name;
-
- if (_lexer.current() == lex_string)
- {
- // node name test
- nt_name = _lexer.contents();
- _lexer.next();
-
- // was it an axis name?
- if (_lexer.current() == lex_double_colon)
- {
- // parse axis name
- if (axis_specified) throw_error("Two axis specifiers in one step");
-
- axis = parse_axis_name(nt_name, axis_specified);
-
- if (!axis_specified) throw_error("Unknown axis");
-
- // read actual node test
- _lexer.next();
-
- if (_lexer.current() == lex_multiply)
- {
- nt_type = nodetest_all;
- nt_name = xpath_lexer_string();
- _lexer.next();
- }
- else if (_lexer.current() == lex_string)
- {
- nt_name = _lexer.contents();
- _lexer.next();
- }
- else throw_error("Unrecognized node test");
- }
-
- if (nt_type == nodetest_none)
- {
- // node type test or processing-instruction
- if (_lexer.current() == lex_open_brace)
- {
- _lexer.next();
-
- if (_lexer.current() == lex_close_brace)
- {
- _lexer.next();
-
- nt_type = parse_node_test_type(nt_name);
-
- if (nt_type == nodetest_none) throw_error("Unrecognized node type");
-
- nt_name = xpath_lexer_string();
- }
- else if (nt_name == PUGIXML_TEXT("processing-instruction"))
- {
- if (_lexer.current() != lex_quoted_string)
- throw_error("Only literals are allowed as arguments to processing-instruction()");
-
- nt_type = nodetest_pi;
- nt_name = _lexer.contents();
- _lexer.next();
-
- if (_lexer.current() != lex_close_brace)
- throw_error("Unmatched brace near processing-instruction()");
- _lexer.next();
- }
- else
- throw_error("Unmatched brace near node type test");
-
- }
- // QName or NCName:*
- else
- {
- if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
- {
- nt_name.end--; // erase *
-
- nt_type = nodetest_all_in_namespace;
- }
- else nt_type = nodetest_name;
- }
- }
- }
- else if (_lexer.current() == lex_multiply)
- {
- nt_type = nodetest_all;
- _lexer.next();
- }
- else throw_error("Unrecognized node test");
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
-
- xpath_ast_node* last = 0;
-
- while (_lexer.current() == lex_open_square_brace)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_expression();
-
- xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default);
-
- if (_lexer.current() != lex_close_square_brace)
- throw_error("Unmatched square brace");
- _lexer.next();
-
- if (last) last->set_next(pred);
- else n->set_right(pred);
-
- last = pred;
- }
-
- return n;
- }
-
- // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
- xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
- {
- xpath_ast_node* n = parse_step(set);
-
- while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
- {
- lexeme_t l = _lexer.current();
- _lexer.next();
-
- if (l == lex_double_slash)
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
-
- n = parse_step(n);
- }
-
- return n;
- }
-
- // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
- // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
- xpath_ast_node* parse_location_path()
- {
- if (_lexer.current() == lex_slash)
- {
- _lexer.next();
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
-
- // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
- lexeme_t l = _lexer.current();
-
- if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
- return parse_relative_location_path(n);
- else
- return n;
- }
- else if (_lexer.current() == lex_double_slash)
- {
- _lexer.next();
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
-
- return parse_relative_location_path(n);
- }
-
- // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
- return parse_relative_location_path(0);
- }
-
- // PathExpr ::= LocationPath
- // | FilterExpr
- // | FilterExpr '/' RelativeLocationPath
- // | FilterExpr '//' RelativeLocationPath
- // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
- // UnaryExpr ::= UnionExpr | '-' UnaryExpr
- xpath_ast_node* parse_path_or_unary_expression()
- {
- // Clarification.
- // PathExpr begins with either LocationPath or FilterExpr.
- // FilterExpr begins with PrimaryExpr
- // PrimaryExpr begins with '$' in case of it being a variable reference,
- // '(' in case of it being an expression, string literal, number constant or
- // function call.
-
- if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
- _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
- _lexer.current() == lex_string)
- {
- if (_lexer.current() == lex_string)
- {
- // This is either a function call, or not - if not, we shall proceed with location path
- const char_t* state = _lexer.state();
-
- while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
-
- if (*state != '(') return parse_location_path();
-
- // This looks like a function call; however this still can be a node-test. Check it.
- if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
- }
-
- xpath_ast_node* n = parse_filter_expression();
-
- if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
- {
- lexeme_t l = _lexer.current();
- _lexer.next();
-
- if (l == lex_double_slash)
- {
- if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
-
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
- }
-
- // select from location path
- return parse_relative_location_path(n);
- }
-
- return n;
- }
- else if (_lexer.current() == lex_minus)
- {
- _lexer.next();
-
- // precedence 7+ - only parses union expressions
- xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
-
- return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
- }
- else
- return parse_location_path();
- }
-
- struct binary_op_t
- {
- ast_type_t asttype;
- xpath_value_type rettype;
- int precedence;
-
- binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
- {
- }
-
- binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
- {
- }
-
- static binary_op_t parse(xpath_lexer& lexer)
- {
- switch (lexer.current())
- {
- case lex_string:
- if (lexer.contents() == PUGIXML_TEXT("or"))
- return binary_op_t(ast_op_or, xpath_type_boolean, 1);
- else if (lexer.contents() == PUGIXML_TEXT("and"))
- return binary_op_t(ast_op_and, xpath_type_boolean, 2);
- else if (lexer.contents() == PUGIXML_TEXT("div"))
- return binary_op_t(ast_op_divide, xpath_type_number, 6);
- else if (lexer.contents() == PUGIXML_TEXT("mod"))
- return binary_op_t(ast_op_mod, xpath_type_number, 6);
- else
- return binary_op_t();
-
- case lex_equal:
- return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
-
- case lex_not_equal:
- return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
-
- case lex_less:
- return binary_op_t(ast_op_less, xpath_type_boolean, 4);
-
- case lex_greater:
- return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
-
- case lex_less_or_equal:
- return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
-
- case lex_greater_or_equal:
- return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
-
- case lex_plus:
- return binary_op_t(ast_op_add, xpath_type_number, 5);
-
- case lex_minus:
- return binary_op_t(ast_op_subtract, xpath_type_number, 5);
-
- case lex_multiply:
- return binary_op_t(ast_op_multiply, xpath_type_number, 6);
-
- case lex_union:
- return binary_op_t(ast_op_union, xpath_type_node_set, 7);
-
- default:
- return binary_op_t();
- }
- }
- };
-
- xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
- {
- binary_op_t op = binary_op_t::parse(_lexer);
-
- while (op.asttype != ast_unknown && op.precedence >= limit)
- {
- _lexer.next();
-
- xpath_ast_node* rhs = parse_path_or_unary_expression();
-
- binary_op_t nextop = binary_op_t::parse(_lexer);
-
- while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
- {
- rhs = parse_expression_rec(rhs, nextop.precedence);
-
- nextop = binary_op_t::parse(_lexer);
- }
-
- if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
- throw_error("Union operator has to be applied to node sets");
-
- lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
-
- op = binary_op_t::parse(_lexer);
- }
-
- return lhs;
- }
-
- // Expr ::= OrExpr
- // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
- // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
- // EqualityExpr ::= RelationalExpr
- // | EqualityExpr '=' RelationalExpr
- // | EqualityExpr '!=' RelationalExpr
- // RelationalExpr ::= AdditiveExpr
- // | RelationalExpr '<' AdditiveExpr
- // | RelationalExpr '>' AdditiveExpr
- // | RelationalExpr '<=' AdditiveExpr
- // | RelationalExpr '>=' AdditiveExpr
- // AdditiveExpr ::= MultiplicativeExpr
- // | AdditiveExpr '+' MultiplicativeExpr
- // | AdditiveExpr '-' MultiplicativeExpr
- // MultiplicativeExpr ::= UnaryExpr
- // | MultiplicativeExpr '*' UnaryExpr
- // | MultiplicativeExpr 'div' UnaryExpr
- // | MultiplicativeExpr 'mod' UnaryExpr
- xpath_ast_node* parse_expression()
- {
- return parse_expression_rec(parse_path_or_unary_expression(), 0);
- }
-
- xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
- {
- }
-
- xpath_ast_node* parse()
- {
- xpath_ast_node* result = parse_expression();
-
- if (_lexer.current() != lex_eof)
- {
- // there are still unparsed tokens left, error
- throw_error("Incorrect query");
- }
-
- return result;
- }
-
- static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
- {
- xpath_parser parser(query, variables, alloc, result);
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- int error = setjmp(parser._error_handler);
-
- return (error == 0) ? parser.parse() : 0;
- #else
- return parser.parse();
- #endif
- }
- };
-
- struct xpath_query_impl
- {
- static xpath_query_impl* create()
- {
- void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
- if (!memory) return 0;
-
- return new (memory) xpath_query_impl();
- }
-
- static void destroy(xpath_query_impl* impl)
- {
- // free all allocated pages
- impl->alloc.release();
-
- // free allocator memory (with the first page)
- xml_memory::deallocate(impl);
- }
-
- xpath_query_impl(): root(0), alloc(&block)
- {
- block.next = 0;
- block.capacity = sizeof(block.data);
- }
-
- xpath_ast_node* root;
- xpath_allocator alloc;
- xpath_memory_block block;
- };
-
- PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
- {
- if (!impl) return xpath_string();
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return xpath_string();
- #endif
-
- xpath_context c(n, 1, 1);
-
- return impl->root->eval_string(c, sd.stack);
- }
-
- PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
- {
- if (!impl) return 0;
-
- if (impl->root->rettype() != xpath_type_node_set)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- return 0;
- #else
- xpath_parse_result res;
- res.error = "Expression does not evaluate to node set";
-
- throw xpath_exception(res);
- #endif
- }
-
- return impl->root;
- }
-PUGI__NS_END
-
-namespace pugi
-{
-#ifndef PUGIXML_NO_EXCEPTIONS
- PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
- {
- assert(_result.error);
- }
-
- PUGI__FN const char* xpath_exception::what() const throw()
- {
- return _result.error;
- }
-
- PUGI__FN const xpath_parse_result& xpath_exception::result() const
- {
- return _result;
- }
-#endif
-
- PUGI__FN xpath_node::xpath_node()
- {
- }
-
- PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
- {
- }
-
- PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
- {
- }
-
- PUGI__FN xml_node xpath_node::node() const
- {
- return _attribute ? xml_node() : _node;
- }
-
- PUGI__FN xml_attribute xpath_node::attribute() const
- {
- return _attribute;
- }
-
- PUGI__FN xml_node xpath_node::parent() const
- {
- return _attribute ? _node : _node.parent();
- }
-
- PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
- {
- }
-
- PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
- {
- return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
- }
-
- PUGI__FN bool xpath_node::operator!() const
- {
- return !(_node || _attribute);
- }
-
- PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
- {
- return _node == n._node && _attribute == n._attribute;
- }
-
- PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
- {
- return _node != n._node || _attribute != n._attribute;
- }
-
-#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
-#endif
-
- PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
- {
- assert(begin_ <= end_);
-
- size_t size_ = static_cast<size_t>(end_ - begin_);
-
- if (size_ <= 1)
- {
- // deallocate old buffer
- if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
-
- // use internal buffer
- if (begin_ != end_) _storage = *begin_;
-
- _begin = &_storage;
- _end = &_storage + size_;
- _type = type_;
- }
- else
- {
- // make heap copy
- xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
-
- if (!storage)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- return;
- #else
- throw std::bad_alloc();
- #endif
- }
-
- memcpy(storage, begin_, size_ * sizeof(xpath_node));
-
- // deallocate old buffer
- if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
-
- // finalize
- _begin = storage;
- _end = storage + size_;
- _type = type_;
- }
- }
-
-#if __cplusplus >= 201103
- PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs)
- {
- _type = rhs._type;
- _storage = rhs._storage;
- _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
- _end = _begin + (rhs._end - rhs._begin);
-
- rhs._type = type_unsorted;
- rhs._begin = &rhs._storage;
- rhs._end = rhs._begin;
- }
-#endif
-
- PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
- {
- }
-
- PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
- {
- _assign(begin_, end_, type_);
- }
-
- PUGI__FN xpath_node_set::~xpath_node_set()
- {
- if (_begin != &_storage)
- impl::xml_memory::deallocate(_begin);
- }
-
- PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
- {
- _assign(ns._begin, ns._end, ns._type);
- }
-
- PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
- {
- if (this == &ns) return *this;
-
- _assign(ns._begin, ns._end, ns._type);
-
- return *this;
- }
-
-#if __cplusplus >= 201103
- PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage)
- {
- _move(rhs);
- }
-
- PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs)
- {
- if (this == &rhs) return *this;
-
- if (_begin != &_storage)
- impl::xml_memory::deallocate(_begin);
-
- _move(rhs);
-
- return *this;
- }
-#endif
-
- PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
- {
- return _type;
- }
-
- PUGI__FN size_t xpath_node_set::size() const
- {
- return _end - _begin;
- }
-
- PUGI__FN bool xpath_node_set::empty() const
- {
- return _begin == _end;
- }
-
- PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
- {
- assert(index < size());
- return _begin[index];
- }
-
- PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
- {
- return _begin;
- }
-
- PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
- {
- return _end;
- }
-
- PUGI__FN void xpath_node_set::sort(bool reverse)
- {
- _type = impl::xpath_sort(_begin, _end, _type, reverse);
- }
-
- PUGI__FN xpath_node xpath_node_set::first() const
- {
- return impl::xpath_first(_begin, _end, _type);
- }
-
- PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
- {
- }
-
- PUGI__FN xpath_parse_result::operator bool() const
- {
- return error == 0;
- }
-
- PUGI__FN const char* xpath_parse_result::description() const
- {
- return error ? error : "No error";
- }
-
- PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
- {
- }
-
- PUGI__FN const char_t* xpath_variable::name() const
- {
- switch (_type)
- {
- case xpath_type_node_set:
- return static_cast<const impl::xpath_variable_node_set*>(this)->name;
-
- case xpath_type_number:
- return static_cast<const impl::xpath_variable_number*>(this)->name;
-
- case xpath_type_string:
- return static_cast<const impl::xpath_variable_string*>(this)->name;
-
- case xpath_type_boolean:
- return static_cast<const impl::xpath_variable_boolean*>(this)->name;
-
- default:
- assert(!"Invalid variable type");
- return 0;
- }
- }
-
- PUGI__FN xpath_value_type xpath_variable::type() const
- {
- return _type;
- }
-
- PUGI__FN bool xpath_variable::get_boolean() const
- {
- return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
- }
-
- PUGI__FN double xpath_variable::get_number() const
- {
- return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
- }
-
- PUGI__FN const char_t* xpath_variable::get_string() const
- {
- const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
- return value ? value : PUGIXML_TEXT("");
- }
-
- PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
- {
- return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
- }
-
- PUGI__FN bool xpath_variable::set(bool value)
- {
- if (_type != xpath_type_boolean) return false;
-
- static_cast<impl::xpath_variable_boolean*>(this)->value = value;
- return true;
- }
-
- PUGI__FN bool xpath_variable::set(double value)
- {
- if (_type != xpath_type_number) return false;
-
- static_cast<impl::xpath_variable_number*>(this)->value = value;
- return true;
- }
-
- PUGI__FN bool xpath_variable::set(const char_t* value)
- {
- if (_type != xpath_type_string) return false;
-
- impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
-
- // duplicate string
- size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
-
- char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
- if (!copy) return false;
-
- memcpy(copy, value, size);
-
- // replace old string
- if (var->value) impl::xml_memory::deallocate(var->value);
- var->value = copy;
-
- return true;
- }
-
- PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
- {
- if (_type != xpath_type_node_set) return false;
-
- static_cast<impl::xpath_variable_node_set*>(this)->value = value;
- return true;
- }
-
- PUGI__FN xpath_variable_set::xpath_variable_set()
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- _data[i] = 0;
- }
-
- PUGI__FN xpath_variable_set::~xpath_variable_set()
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- _destroy(_data[i]);
- }
-
- PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- _data[i] = 0;
-
- _assign(rhs);
- }
-
- PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
- {
- if (this == &rhs) return *this;
-
- _assign(rhs);
-
- return *this;
- }
-
-#if __cplusplus >= 201103
- PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs)
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- {
- _data[i] = rhs._data[i];
- rhs._data[i] = 0;
- }
- }
-
- PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs)
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- {
- _destroy(_data[i]);
-
- _data[i] = rhs._data[i];
- rhs._data[i] = 0;
- }
-
- return *this;
- }
-#endif
-
- PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
- {
- xpath_variable_set temp;
-
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
- return;
-
- _swap(temp);
- }
-
- PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- {
- xpath_variable* chain = _data[i];
-
- _data[i] = rhs._data[i];
- rhs._data[i] = chain;
- }
- }
-
- PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
- {
- const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
- size_t hash = impl::hash_string(name) % hash_size;
-
- // look for existing variable
- for (xpath_variable* var = _data[hash]; var; var = var->_next)
- if (impl::strequal(var->name(), name))
- return var;
-
- return 0;
- }
-
- PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
- {
- xpath_variable* last = 0;
-
- while (var)
- {
- // allocate storage for new variable
- xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
- if (!nvar) return false;
-
- // link the variable to the result immediately to handle failures gracefully
- if (last)
- last->_next = nvar;
- else
- *out_result = nvar;
-
- last = nvar;
-
- // copy the value; this can fail due to out-of-memory conditions
- if (!impl::copy_xpath_variable(nvar, var)) return false;
-
- var = var->_next;
- }
-
- return true;
- }
-
- PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
- {
- while (var)
- {
- xpath_variable* next = var->_next;
-
- impl::delete_xpath_variable(var->_type, var);
-
- var = next;
- }
- }
-
- PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
- {
- const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
- size_t hash = impl::hash_string(name) % hash_size;
-
- // look for existing variable
- for (xpath_variable* var = _data[hash]; var; var = var->_next)
- if (impl::strequal(var->name(), name))
- return var->type() == type ? var : 0;
-
- // add new variable
- xpath_variable* result = impl::new_xpath_variable(type, name);
-
- if (result)
- {
- result->_next = _data[hash];
-
- _data[hash] = result;
- }
-
- return result;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
- {
- xpath_variable* var = add(name, xpath_type_boolean);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
- {
- xpath_variable* var = add(name, xpath_type_number);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
- {
- xpath_variable* var = add(name, xpath_type_string);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
- {
- xpath_variable* var = add(name, xpath_type_node_set);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
- {
- return _find(name);
- }
-
- PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
- {
- return _find(name);
- }
-
- PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
- {
- impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
-
- if (!qimpl)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- _result.error = "Out of memory";
- #else
- throw std::bad_alloc();
- #endif
- }
- else
- {
- using impl::auto_deleter; // MSVC7 workaround
- auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
-
- qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
-
- if (qimpl->root)
- {
- qimpl->root->optimize(&qimpl->alloc);
-
- _impl = impl.release();
- _result.error = 0;
- }
- }
- }
-
- PUGI__FN xpath_query::xpath_query(): _impl(0)
- {
- }
-
- PUGI__FN xpath_query::~xpath_query()
- {
- if (_impl)
- impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
- }
-
-#if __cplusplus >= 201103
- PUGI__FN xpath_query::xpath_query(xpath_query&& rhs)
- {
- _impl = rhs._impl;
- _result = rhs._result;
- rhs._impl = 0;
- rhs._result = xpath_parse_result();
- }
-
- PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs)
- {
- if (this == &rhs) return *this;
-
- if (_impl)
- impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
-
- _impl = rhs._impl;
- _result = rhs._result;
- rhs._impl = 0;
- rhs._result = xpath_parse_result();
-
- return *this;
- }
-#endif
-
- PUGI__FN xpath_value_type xpath_query::return_type() const
- {
- if (!_impl) return xpath_type_none;
-
- return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
- }
-
- PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
- {
- if (!_impl) return false;
-
- impl::xpath_context c(n, 1, 1);
- impl::xpath_stack_data sd;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return false;
- #endif
-
- return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
- }
-
- PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
- {
- if (!_impl) return impl::gen_nan();
-
- impl::xpath_context c(n, 1, 1);
- impl::xpath_stack_data sd;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return impl::gen_nan();
- #endif
-
- return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
- {
- impl::xpath_stack_data sd;
-
- impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
-
- return string_t(r.c_str(), r.length());
- }
-#endif
-
- PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
- {
- impl::xpath_stack_data sd;
-
- impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
-
- size_t full_size = r.length() + 1;
-
- if (capacity > 0)
- {
- size_t size = (full_size < capacity) ? full_size : capacity;
- assert(size > 0);
-
- memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
- buffer[size - 1] = 0;
- }
-
- return full_size;
- }
-
- PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
- {
- impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
- if (!root) return xpath_node_set();
-
- impl::xpath_context c(n, 1, 1);
- impl::xpath_stack_data sd;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return xpath_node_set();
- #endif
-
- impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
-
- return xpath_node_set(r.begin(), r.end(), r.type());
- }
-
- PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
- {
- impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
- if (!root) return xpath_node();
-
- impl::xpath_context c(n, 1, 1);
- impl::xpath_stack_data sd;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return xpath_node();
- #endif
-
- impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
-
- return r.first();
- }
-
- PUGI__FN const xpath_parse_result& xpath_query::result() const
- {
- return _result;
- }
-
- PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
- {
- }
-
- PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
- {
- return _impl ? unspecified_bool_xpath_query : 0;
- }
-
- PUGI__FN bool xpath_query::operator!() const
- {
- return !_impl;
- }
-
- PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
- {
- xpath_query q(query, variables);
- return select_node(q);
- }
-
- PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
- {
- return query.evaluate_node(*this);
- }
-
- PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
- {
- xpath_query q(query, variables);
- return select_nodes(q);
- }
-
- PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
- {
- return query.evaluate_node_set(*this);
- }
-
- PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
- {
- xpath_query q(query, variables);
- return select_single_node(q);
- }
-
- PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
- {
- return query.evaluate_node(*this);
- }
-}
-
-#endif
-
-#ifdef __BORLANDC__
-# pragma option pop
-#endif
-
-// Intel C++ does not properly keep warning state for function templates,
-// so popping warning state at the end of translation unit leads to warnings in the middle.
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-# pragma warning(pop)
-#endif
-
-// Undefine all local macros (makes sure we're not leaking macros in header-only mode)
-#undef PUGI__NO_INLINE
-#undef PUGI__UNLIKELY
-#undef PUGI__STATIC_ASSERT
-#undef PUGI__DMC_VOLATILE
-#undef PUGI__MSVC_CRT_VERSION
-#undef PUGI__NS_BEGIN
-#undef PUGI__NS_END
-#undef PUGI__FN
-#undef PUGI__FN_NO_INLINE
-#undef PUGI__GETPAGE_IMPL
-#undef PUGI__GETPAGE
-#undef PUGI__NODETYPE
-#undef PUGI__IS_CHARTYPE_IMPL
-#undef PUGI__IS_CHARTYPE
-#undef PUGI__IS_CHARTYPEX
-#undef PUGI__ENDSWITH
-#undef PUGI__SKIPWS
-#undef PUGI__OPTSET
-#undef PUGI__PUSHNODE
-#undef PUGI__POPNODE
-#undef PUGI__SCANFOR
-#undef PUGI__SCANWHILE
-#undef PUGI__SCANWHILE_UNROLL
-#undef PUGI__ENDSEG
-#undef PUGI__THROW_ERROR
-#undef PUGI__CHECK_ERROR
-
-#endif
-
-/**
- * Copyright (c) 2006-2015 Arseny Kapoulkine
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
diff --git a/contrib/moses2/pugixml.hpp b/contrib/moses2/pugixml.hpp
deleted file mode 100644
index 9f7c3fbcf..000000000
--- a/contrib/moses2/pugixml.hpp
+++ /dev/null
@@ -1,1400 +0,0 @@
-/**
- * pugixml parser - version 1.7
- * --------------------------------------------------------
- * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
- * Report bugs and download new versions at http://pugixml.org/
- *
- * This library is distributed under the MIT License. See notice at the end
- * of this file.
- *
- * This work is based on the pugxml parser, which is:
- * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
- */
-
-#ifndef PUGIXML_VERSION
-// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
-# define PUGIXML_VERSION 170
-#endif
-
-// Include user configuration file (this can define various configuration macros)
-#include "pugiconfig.hpp"
-
-#ifndef HEADER_PUGIXML_HPP
-#define HEADER_PUGIXML_HPP
-
-// Include stddef.h for size_t and ptrdiff_t
-#include <stddef.h>
-
-// Include exception header for XPath
-#if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS)
-# include <exception>
-#endif
-
-// Include STL headers
-#ifndef PUGIXML_NO_STL
-# include <iterator>
-# include <iosfwd>
-# include <string>
-#endif
-
-// Macro for deprecated features
-#ifndef PUGIXML_DEPRECATED
-# if defined(__GNUC__)
-# define PUGIXML_DEPRECATED __attribute__((deprecated))
-# elif defined(_MSC_VER) && _MSC_VER >= 1300
-# define PUGIXML_DEPRECATED __declspec(deprecated)
-# else
-# define PUGIXML_DEPRECATED
-# endif
-#endif
-
-// If no API is defined, assume default
-#ifndef PUGIXML_API
-# define PUGIXML_API
-#endif
-
-// If no API for classes is defined, assume default
-#ifndef PUGIXML_CLASS
-# define PUGIXML_CLASS PUGIXML_API
-#endif
-
-// If no API for functions is defined, assume default
-#ifndef PUGIXML_FUNCTION
-# define PUGIXML_FUNCTION PUGIXML_API
-#endif
-
-// If the platform is known to have long long support, enable long long functions
-#ifndef PUGIXML_HAS_LONG_LONG
-# if __cplusplus >= 201103
-# define PUGIXML_HAS_LONG_LONG
-# elif defined(_MSC_VER) && _MSC_VER >= 1400
-# define PUGIXML_HAS_LONG_LONG
-# endif
-#endif
-
-// Character interface macros
-#ifdef PUGIXML_WCHAR_MODE
-# define PUGIXML_TEXT(t) L ## t
-# define PUGIXML_CHAR wchar_t
-#else
-# define PUGIXML_TEXT(t) t
-# define PUGIXML_CHAR char
-#endif
-
-namespace pugi
-{
- // Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE
- typedef PUGIXML_CHAR char_t;
-
-#ifndef PUGIXML_NO_STL
- // String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE
- typedef std::basic_string<PUGIXML_CHAR, std::char_traits<PUGIXML_CHAR>, std::allocator<PUGIXML_CHAR> > string_t;
-#endif
-}
-
-// The PugiXML namespace
-namespace pugi
-{
- // Tree node types
- enum xml_node_type
- {
- node_null, // Empty (null) node handle
- node_document, // A document tree's absolute root
- node_element, // Element tag, i.e. '<node/>'
- node_pcdata, // Plain character data, i.e. 'text'
- node_cdata, // Character data, i.e. '<![CDATA[text]]>'
- node_comment, // Comment tag, i.e. '<!-- text -->'
- node_pi, // Processing instruction, i.e. '<?name?>'
- node_declaration, // Document declaration, i.e. '<?xml version="1.0"?>'
- node_doctype // Document type declaration, i.e. '<!DOCTYPE doc>'
- };
-
- // Parsing options
-
- // Minimal parsing mode (equivalent to turning all other flags off).
- // Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed.
- const unsigned int parse_minimal = 0x0000;
-
- // This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default.
- const unsigned int parse_pi = 0x0001;
-
- // This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default.
- const unsigned int parse_comments = 0x0002;
-
- // This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default.
- const unsigned int parse_cdata = 0x0004;
-
- // This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree.
- // This flag is off by default; turning it on usually results in slower parsing and more memory consumption.
- const unsigned int parse_ws_pcdata = 0x0008;
-
- // This flag determines if character and entity references are expanded during parsing. This flag is on by default.
- const unsigned int parse_escapes = 0x0010;
-
- // This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
- const unsigned int parse_eol = 0x0020;
-
- // This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
- const unsigned int parse_wconv_attribute = 0x0040;
-
- // This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
- const unsigned int parse_wnorm_attribute = 0x0080;
-
- // This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
- const unsigned int parse_declaration = 0x0100;
-
- // This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default.
- const unsigned int parse_doctype = 0x0200;
-
- // This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only
- // of whitespace is added to the DOM tree.
- // This flag is off by default; turning it on may result in slower parsing and more memory consumption.
- const unsigned int parse_ws_pcdata_single = 0x0400;
-
- // This flag determines if leading and trailing whitespace is to be removed from plain character data. This flag is off by default.
- const unsigned int parse_trim_pcdata = 0x0800;
-
- // This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document
- // is a valid document. This flag is off by default.
- const unsigned int parse_fragment = 0x1000;
-
- // The default parsing mode.
- // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
- // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
- const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol;
-
- // The full parsing mode.
- // Nodes of all types are added to the DOM tree, character/reference entities are expanded,
- // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
- const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype;
-
- // These flags determine the encoding of input data for XML document
- enum xml_encoding
- {
- encoding_auto, // Auto-detect input encoding using BOM or < / <? detection; use UTF8 if BOM is not found
- encoding_utf8, // UTF8 encoding
- encoding_utf16_le, // Little-endian UTF16
- encoding_utf16_be, // Big-endian UTF16
- encoding_utf16, // UTF16 with native endianness
- encoding_utf32_le, // Little-endian UTF32
- encoding_utf32_be, // Big-endian UTF32
- encoding_utf32, // UTF32 with native endianness
- encoding_wchar, // The same encoding wchar_t has (either UTF16 or UTF32)
- encoding_latin1
- };
-
- // Formatting flags
-
- // Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
- const unsigned int format_indent = 0x01;
-
- // Write encoding-specific BOM to the output stream. This flag is off by default.
- const unsigned int format_write_bom = 0x02;
-
- // Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
- const unsigned int format_raw = 0x04;
-
- // Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
- const unsigned int format_no_declaration = 0x08;
-
- // Don't escape attribute values and PCDATA contents. This flag is off by default.
- const unsigned int format_no_escapes = 0x10;
-
- // Open file using text mode in xml_document::save_file. This enables special character (i.e. new-line) conversions on some systems. This flag is off by default.
- const unsigned int format_save_file_text = 0x20;
-
- // Write every attribute on a new line with appropriate indentation. This flag is off by default.
- const unsigned int format_indent_attributes = 0x40;
-
- // The default set of formatting flags.
- // Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
- const unsigned int format_default = format_indent;
-
- // Forward declarations
- struct xml_attribute_struct;
- struct xml_node_struct;
-
- class xml_node_iterator;
- class xml_attribute_iterator;
- class xml_named_node_iterator;
-
- class xml_tree_walker;
-
- struct xml_parse_result;
-
- class xml_node;
-
- class xml_text;
-
- #ifndef PUGIXML_NO_XPATH
- class xpath_node;
- class xpath_node_set;
- class xpath_query;
- class xpath_variable_set;
- #endif
-
- // Range-based for loop support
- template <typename It> class xml_object_range
- {
- public:
- typedef It const_iterator;
- typedef It iterator;
-
- xml_object_range(It b, It e): _begin(b), _end(e)
- {
- }
-
- It begin() const { return _begin; }
- It end() const { return _end; }
-
- private:
- It _begin, _end;
- };
-
- // Writer interface for node printing (see xml_node::print)
- class PUGIXML_CLASS xml_writer
- {
- public:
- virtual ~xml_writer() {}
-
- // Write memory chunk into stream/file/whatever
- virtual void write(const void* data, size_t size) = 0;
- };
-
- // xml_writer implementation for FILE*
- class PUGIXML_CLASS xml_writer_file: public xml_writer
- {
- public:
- // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
- xml_writer_file(void* file);
-
- virtual void write(const void* data, size_t size);
-
- private:
- void* file;
- };
-
- #ifndef PUGIXML_NO_STL
- // xml_writer implementation for streams
- class PUGIXML_CLASS xml_writer_stream: public xml_writer
- {
- public:
- // Construct writer from an output stream object
- xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
- xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
-
- virtual void write(const void* data, size_t size);
-
- private:
- std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
- std::basic_ostream<wchar_t, std::char_traits<wchar_t> >* wide_stream;
- };
- #endif
-
- // A light-weight handle for manipulating attributes in DOM tree
- class PUGIXML_CLASS xml_attribute
- {
- friend class xml_attribute_iterator;
- friend class xml_node;
-
- private:
- xml_attribute_struct* _attr;
-
- typedef void (*unspecified_bool_type)(xml_attribute***);
-
- public:
- // Default constructor. Constructs an empty attribute.
- xml_attribute();
-
- // Constructs attribute from internal pointer
- explicit xml_attribute(xml_attribute_struct* attr);
-
- // Safe bool conversion operator
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
-
- // Comparison operators (compares wrapped attribute pointers)
- bool operator==(const xml_attribute& r) const;
- bool operator!=(const xml_attribute& r) const;
- bool operator<(const xml_attribute& r) const;
- bool operator>(const xml_attribute& r) const;
- bool operator<=(const xml_attribute& r) const;
- bool operator>=(const xml_attribute& r) const;
-
- // Check if attribute is empty
- bool empty() const;
-
- // Get attribute name/value, or "" if attribute is empty
- const char_t* name() const;
- const char_t* value() const;
-
- // Get attribute value, or the default value if attribute is empty
- const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
-
- // Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty
- int as_int(int def = 0) const;
- unsigned int as_uint(unsigned int def = 0) const;
- double as_double(double def = 0) const;
- float as_float(float def = 0) const;
-
- #ifdef PUGIXML_HAS_LONG_LONG
- long long as_llong(long long def = 0) const;
- unsigned long long as_ullong(unsigned long long def = 0) const;
- #endif
-
- // Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty
- bool as_bool(bool def = false) const;
-
- // Set attribute name/value (returns false if attribute is empty or there is not enough memory)
- bool set_name(const char_t* rhs);
- bool set_value(const char_t* rhs);
-
- // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
- bool set_value(int rhs);
- bool set_value(unsigned int rhs);
- bool set_value(double rhs);
- bool set_value(float rhs);
- bool set_value(bool rhs);
-
- #ifdef PUGIXML_HAS_LONG_LONG
- bool set_value(long long rhs);
- bool set_value(unsigned long long rhs);
- #endif
-
- // Set attribute value (equivalent to set_value without error checking)
- xml_attribute& operator=(const char_t* rhs);
- xml_attribute& operator=(int rhs);
- xml_attribute& operator=(unsigned int rhs);
- xml_attribute& operator=(double rhs);
- xml_attribute& operator=(float rhs);
- xml_attribute& operator=(bool rhs);
-
- #ifdef PUGIXML_HAS_LONG_LONG
- xml_attribute& operator=(long long rhs);
- xml_attribute& operator=(unsigned long long rhs);
- #endif
-
- // Get next/previous attribute in the attribute list of the parent node
- xml_attribute next_attribute() const;
- xml_attribute previous_attribute() const;
-
- // Get hash value (unique for handles to the same object)
- size_t hash_value() const;
-
- // Get internal pointer
- xml_attribute_struct* internal_object() const;
- };
-
-#ifdef __BORLANDC__
- // Borland C++ workaround
- bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs);
- bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs);
-#endif
-
- // A light-weight handle for manipulating nodes in DOM tree
- class PUGIXML_CLASS xml_node
- {
- friend class xml_attribute_iterator;
- friend class xml_node_iterator;
- friend class xml_named_node_iterator;
-
- protected:
- xml_node_struct* _root;
-
- typedef void (*unspecified_bool_type)(xml_node***);
-
- public:
- // Default constructor. Constructs an empty node.
- xml_node();
-
- // Constructs node from internal pointer
- explicit xml_node(xml_node_struct* p);
-
- // Safe bool conversion operator
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
-
- // Comparison operators (compares wrapped node pointers)
- bool operator==(const xml_node& r) const;
- bool operator!=(const xml_node& r) const;
- bool operator<(const xml_node& r) const;
- bool operator>(const xml_node& r) const;
- bool operator<=(const xml_node& r) const;
- bool operator>=(const xml_node& r) const;
-
- // Check if node is empty.
- bool empty() const;
-
- // Get node type
- xml_node_type type() const;
-
- // Get node name, or "" if node is empty or it has no name
- const char_t* name() const;
-
- // Get node value, or "" if node is empty or it has no value
- // Note: For <node>text</node> node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes.
- const char_t* value() const;
-
- // Get attribute list
- xml_attribute first_attribute() const;
- xml_attribute last_attribute() const;
-
- // Get children list
- xml_node first_child() const;
- xml_node last_child() const;
-
- // Get next/previous sibling in the children list of the parent node
- xml_node next_sibling() const;
- xml_node previous_sibling() const;
-
- // Get parent node
- xml_node parent() const;
-
- // Get root of DOM tree this node belongs to
- xml_node root() const;
-
- // Get text object for the current node
- xml_text text() const;
-
- // Get child, attribute or next/previous sibling with the specified name
- xml_node child(const char_t* name) const;
- xml_attribute attribute(const char_t* name) const;
- xml_node next_sibling(const char_t* name) const;
- xml_node previous_sibling(const char_t* name) const;
-
- // Get attribute, starting the search from a hint (and updating hint so that searching for a sequence of attributes is fast)
- xml_attribute attribute(const char_t* name, xml_attribute& hint) const;
-
- // Get child value of current node; that is, value of the first child node of type PCDATA/CDATA
- const char_t* child_value() const;
-
- // Get child value of child with specified name. Equivalent to child(name).child_value().
- const char_t* child_value(const char_t* name) const;
-
- // Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
- bool set_name(const char_t* rhs);
- bool set_value(const char_t* rhs);
-
- // Add attribute with specified name. Returns added attribute, or empty attribute on errors.
- xml_attribute append_attribute(const char_t* name);
- xml_attribute prepend_attribute(const char_t* name);
- xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr);
- xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr);
-
- // Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors.
- xml_attribute append_copy(const xml_attribute& proto);
- xml_attribute prepend_copy(const xml_attribute& proto);
- xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
- xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
-
- // Add child node with specified type. Returns added node, or empty node on errors.
- xml_node append_child(xml_node_type type = node_element);
- xml_node prepend_child(xml_node_type type = node_element);
- xml_node insert_child_after(xml_node_type type, const xml_node& node);
- xml_node insert_child_before(xml_node_type type, const xml_node& node);
-
- // Add child element with specified name. Returns added node, or empty node on errors.
- xml_node append_child(const char_t* name);
- xml_node prepend_child(const char_t* name);
- xml_node insert_child_after(const char_t* name, const xml_node& node);
- xml_node insert_child_before(const char_t* name, const xml_node& node);
-
- // Add a copy of the specified node as a child. Returns added node, or empty node on errors.
- xml_node append_copy(const xml_node& proto);
- xml_node prepend_copy(const xml_node& proto);
- xml_node insert_copy_after(const xml_node& proto, const xml_node& node);
- xml_node insert_copy_before(const xml_node& proto, const xml_node& node);
-
- // Move the specified node to become a child of this node. Returns moved node, or empty node on errors.
- xml_node append_move(const xml_node& moved);
- xml_node prepend_move(const xml_node& moved);
- xml_node insert_move_after(const xml_node& moved, const xml_node& node);
- xml_node insert_move_before(const xml_node& moved, const xml_node& node);
-
- // Remove specified attribute
- bool remove_attribute(const xml_attribute& a);
- bool remove_attribute(const char_t* name);
-
- // Remove specified child
- bool remove_child(const xml_node& n);
- bool remove_child(const char_t* name);
-
- // Parses buffer as an XML document fragment and appends all nodes as children of the current node.
- // Copies/converts the buffer, so it may be deleted or changed after the function returns.
- // Note: append_buffer allocates memory that has the lifetime of the owning document; removing the appended nodes does not immediately reclaim that memory.
- xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
- // Find attribute using predicate. Returns first attribute for which predicate returned true.
- template <typename Predicate> xml_attribute find_attribute(Predicate pred) const
- {
- if (!_root) return xml_attribute();
-
- for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
- if (pred(attrib))
- return attrib;
-
- return xml_attribute();
- }
-
- // Find child node using predicate. Returns first child for which predicate returned true.
- template <typename Predicate> xml_node find_child(Predicate pred) const
- {
- if (!_root) return xml_node();
-
- for (xml_node node = first_child(); node; node = node.next_sibling())
- if (pred(node))
- return node;
-
- return xml_node();
- }
-
- // Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true.
- template <typename Predicate> xml_node find_node(Predicate pred) const
- {
- if (!_root) return xml_node();
-
- xml_node cur = first_child();
-
- while (cur._root && cur._root != _root)
- {
- if (pred(cur)) return cur;
-
- if (cur.first_child()) cur = cur.first_child();
- else if (cur.next_sibling()) cur = cur.next_sibling();
- else
- {
- while (!cur.next_sibling() && cur._root != _root) cur = cur.parent();
-
- if (cur._root != _root) cur = cur.next_sibling();
- }
- }
-
- return xml_node();
- }
-
- // Find child node by attribute name/value
- xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
- xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
-
- #ifndef PUGIXML_NO_STL
- // Get the absolute node path from root as a text string.
- string_t path(char_t delimiter = '/') const;
- #endif
-
- // Search for a node by path consisting of node names and . or .. elements.
- xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const;
-
- // Recursively traverse subtree with xml_tree_walker
- bool traverse(xml_tree_walker& walker);
-
- #ifndef PUGIXML_NO_XPATH
- // Select single node by evaluating XPath query. Returns first node from the resulting node set.
- xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const;
- xpath_node select_node(const xpath_query& query) const;
-
- // Select node set by evaluating XPath query
- xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
- xpath_node_set select_nodes(const xpath_query& query) const;
-
- // (deprecated: use select_node instead) Select single node by evaluating XPath query.
- xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
- xpath_node select_single_node(const xpath_query& query) const;
-
- #endif
-
- // Print subtree using a writer object
- void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
-
- #ifndef PUGIXML_NO_STL
- // Print subtree to stream
- void print(std::basic_ostream<char, std::char_traits<char> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
- void print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
- #endif
-
- // Child nodes iterators
- typedef xml_node_iterator iterator;
-
- iterator begin() const;
- iterator end() const;
-
- // Attribute iterators
- typedef xml_attribute_iterator attribute_iterator;
-
- attribute_iterator attributes_begin() const;
- attribute_iterator attributes_end() const;
-
- // Range-based for support
- xml_object_range<xml_node_iterator> children() const;
- xml_object_range<xml_named_node_iterator> children(const char_t* name) const;
- xml_object_range<xml_attribute_iterator> attributes() const;
-
- // Get node offset in parsed file/string (in char_t units) for debugging purposes
- ptrdiff_t offset_debug() const;
-
- // Get hash value (unique for handles to the same object)
- size_t hash_value() const;
-
- // Get internal pointer
- xml_node_struct* internal_object() const;
- };
-
-#ifdef __BORLANDC__
- // Borland C++ workaround
- bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs);
- bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs);
-#endif
-
- // A helper for working with text inside PCDATA nodes
- class PUGIXML_CLASS xml_text
- {
- friend class xml_node;
-
- xml_node_struct* _root;
-
- typedef void (*unspecified_bool_type)(xml_text***);
-
- explicit xml_text(xml_node_struct* root);
-
- xml_node_struct* _data_new();
- xml_node_struct* _data() const;
-
- public:
- // Default constructor. Constructs an empty object.
- xml_text();
-
- // Safe bool conversion operator
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
-
- // Check if text object is empty
- bool empty() const;
-
- // Get text, or "" if object is empty
- const char_t* get() const;
-
- // Get text, or the default value if object is empty
- const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
-
- // Get text as a number, or the default value if conversion did not succeed or object is empty
- int as_int(int def = 0) const;
- unsigned int as_uint(unsigned int def = 0) const;
- double as_double(double def = 0) const;
- float as_float(float def = 0) const;
-
- #ifdef PUGIXML_HAS_LONG_LONG
- long long as_llong(long long def = 0) const;
- unsigned long long as_ullong(unsigned long long def = 0) const;
- #endif
-
- // Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty
- bool as_bool(bool def = false) const;
-
- // Set text (returns false if object is empty or there is not enough memory)
- bool set(const char_t* rhs);
-
- // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
- bool set(int rhs);
- bool set(unsigned int rhs);
- bool set(double rhs);
- bool set(float rhs);
- bool set(bool rhs);
-
- #ifdef PUGIXML_HAS_LONG_LONG
- bool set(long long rhs);
- bool set(unsigned long long rhs);
- #endif
-
- // Set text (equivalent to set without error checking)
- xml_text& operator=(const char_t* rhs);
- xml_text& operator=(int rhs);
- xml_text& operator=(unsigned int rhs);
- xml_text& operator=(double rhs);
- xml_text& operator=(float rhs);
- xml_text& operator=(bool rhs);
-
- #ifdef PUGIXML_HAS_LONG_LONG
- xml_text& operator=(long long rhs);
- xml_text& operator=(unsigned long long rhs);
- #endif
-
- // Get the data node (node_pcdata or node_cdata) for this object
- xml_node data() const;
- };
-
-#ifdef __BORLANDC__
- // Borland C++ workaround
- bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs);
- bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs);
-#endif
-
- // Child node iterator (a bidirectional iterator over a collection of xml_node)
- class PUGIXML_CLASS xml_node_iterator
- {
- friend class xml_node;
-
- private:
- mutable xml_node _wrap;
- xml_node _parent;
-
- xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent);
-
- public:
- // Iterator traits
- typedef ptrdiff_t difference_type;
- typedef xml_node value_type;
- typedef xml_node* pointer;
- typedef xml_node& reference;
-
- #ifndef PUGIXML_NO_STL
- typedef std::bidirectional_iterator_tag iterator_category;
- #endif
-
- // Default constructor
- xml_node_iterator();
-
- // Construct an iterator which points to the specified node
- xml_node_iterator(const xml_node& node);
-
- // Iterator operators
- bool operator==(const xml_node_iterator& rhs) const;
- bool operator!=(const xml_node_iterator& rhs) const;
-
- xml_node& operator*() const;
- xml_node* operator->() const;
-
- const xml_node_iterator& operator++();
- xml_node_iterator operator++(int);
-
- const xml_node_iterator& operator--();
- xml_node_iterator operator--(int);
- };
-
- // Attribute iterator (a bidirectional iterator over a collection of xml_attribute)
- class PUGIXML_CLASS xml_attribute_iterator
- {
- friend class xml_node;
-
- private:
- mutable xml_attribute _wrap;
- xml_node _parent;
-
- xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent);
-
- public:
- // Iterator traits
- typedef ptrdiff_t difference_type;
- typedef xml_attribute value_type;
- typedef xml_attribute* pointer;
- typedef xml_attribute& reference;
-
- #ifndef PUGIXML_NO_STL
- typedef std::bidirectional_iterator_tag iterator_category;
- #endif
-
- // Default constructor
- xml_attribute_iterator();
-
- // Construct an iterator which points to the specified attribute
- xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent);
-
- // Iterator operators
- bool operator==(const xml_attribute_iterator& rhs) const;
- bool operator!=(const xml_attribute_iterator& rhs) const;
-
- xml_attribute& operator*() const;
- xml_attribute* operator->() const;
-
- const xml_attribute_iterator& operator++();
- xml_attribute_iterator operator++(int);
-
- const xml_attribute_iterator& operator--();
- xml_attribute_iterator operator--(int);
- };
-
- // Named node range helper
- class PUGIXML_CLASS xml_named_node_iterator
- {
- friend class xml_node;
-
- public:
- // Iterator traits
- typedef ptrdiff_t difference_type;
- typedef xml_node value_type;
- typedef xml_node* pointer;
- typedef xml_node& reference;
-
- #ifndef PUGIXML_NO_STL
- typedef std::bidirectional_iterator_tag iterator_category;
- #endif
-
- // Default constructor
- xml_named_node_iterator();
-
- // Construct an iterator which points to the specified node
- xml_named_node_iterator(const xml_node& node, const char_t* name);
-
- // Iterator operators
- bool operator==(const xml_named_node_iterator& rhs) const;
- bool operator!=(const xml_named_node_iterator& rhs) const;
-
- xml_node& operator*() const;
- xml_node* operator->() const;
-
- const xml_named_node_iterator& operator++();
- xml_named_node_iterator operator++(int);
-
- const xml_named_node_iterator& operator--();
- xml_named_node_iterator operator--(int);
-
- private:
- mutable xml_node _wrap;
- xml_node _parent;
- const char_t* _name;
-
- xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name);
- };
-
- // Abstract tree walker class (see xml_node::traverse)
- class PUGIXML_CLASS xml_tree_walker
- {
- friend class xml_node;
-
- private:
- int _depth;
-
- protected:
- // Get current traversal depth
- int depth() const;
-
- public:
- xml_tree_walker();
- virtual ~xml_tree_walker();
-
- // Callback that is called when traversal begins
- virtual bool begin(xml_node& node);
-
- // Callback that is called for each node traversed
- virtual bool for_each(xml_node& node) = 0;
-
- // Callback that is called when traversal ends
- virtual bool end(xml_node& node);
- };
-
- // Parsing status, returned as part of xml_parse_result object
- enum xml_parse_status
- {
- status_ok = 0, // No error
-
- status_file_not_found, // File was not found during load_file()
- status_io_error, // Error reading from file/stream
- status_out_of_memory, // Could not allocate memory
- status_internal_error, // Internal error occurred
-
- status_unrecognized_tag, // Parser could not determine tag type
-
- status_bad_pi, // Parsing error occurred while parsing document declaration/processing instruction
- status_bad_comment, // Parsing error occurred while parsing comment
- status_bad_cdata, // Parsing error occurred while parsing CDATA section
- status_bad_doctype, // Parsing error occurred while parsing document type declaration
- status_bad_pcdata, // Parsing error occurred while parsing PCDATA section
- status_bad_start_element, // Parsing error occurred while parsing start element tag
- status_bad_attribute, // Parsing error occurred while parsing element attribute
- status_bad_end_element, // Parsing error occurred while parsing end element tag
- status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag)
-
- status_append_invalid_root, // Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer)
-
- status_no_document_element // Parsing resulted in a document without element nodes
- };
-
- // Parsing result
- struct PUGIXML_CLASS xml_parse_result
- {
- // Parsing status (see xml_parse_status)
- xml_parse_status status;
-
- // Last parsed offset (in char_t units from start of input data)
- ptrdiff_t offset;
-
- // Source document encoding
- xml_encoding encoding;
-
- // Default constructor, initializes object to failed state
- xml_parse_result();
-
- // Cast to bool operator
- operator bool() const;
-
- // Get error description
- const char* description() const;
- };
-
- // Document class (DOM tree root)
- class PUGIXML_CLASS xml_document: public xml_node
- {
- private:
- char_t* _buffer;
-
- char _memory[192];
-
- // Non-copyable semantics
- xml_document(const xml_document&);
- xml_document& operator=(const xml_document&);
-
- void create();
- void destroy();
-
- public:
- // Default constructor, makes empty document
- xml_document();
-
- // Destructor, invalidates all node/attribute handles to this document
- ~xml_document();
-
- // Removes all nodes, leaving the empty document
- void reset();
-
- // Removes all nodes, then copies the entire contents of the specified document
- void reset(const xml_document& proto);
-
- #ifndef PUGIXML_NO_STL
- // Load document from stream.
- xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
- xml_parse_result load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options = parse_default);
- #endif
-
- // (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
- xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
-
- // Load document from zero-terminated string. No encoding conversions are applied.
- xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
-
- // Load document from file
- xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
- xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
- // Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns.
- xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
- // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
- // You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed.
- xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
- // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
- // You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore).
- xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
- // Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details).
- void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
-
- #ifndef PUGIXML_NO_STL
- // Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details).
- void save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
- void save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
- #endif
-
- // Save XML to file
- bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
- bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
-
- // Get document element
- xml_node document_element() const;
- };
-
-#ifndef PUGIXML_NO_XPATH
- // XPath query return type
- enum xpath_value_type
- {
- xpath_type_none, // Unknown type (query failed to compile)
- xpath_type_node_set, // Node set (xpath_node_set)
- xpath_type_number, // Number
- xpath_type_string, // String
- xpath_type_boolean // Boolean
- };
-
- // XPath parsing result
- struct PUGIXML_CLASS xpath_parse_result
- {
- // Error message (0 if no error)
- const char* error;
-
- // Last parsed offset (in char_t units from string start)
- ptrdiff_t offset;
-
- // Default constructor, initializes object to failed state
- xpath_parse_result();
-
- // Cast to bool operator
- operator bool() const;
-
- // Get error description
- const char* description() const;
- };
-
- // A single XPath variable
- class PUGIXML_CLASS xpath_variable
- {
- friend class xpath_variable_set;
-
- protected:
- xpath_value_type _type;
- xpath_variable* _next;
-
- xpath_variable(xpath_value_type type);
-
- // Non-copyable semantics
- xpath_variable(const xpath_variable&);
- xpath_variable& operator=(const xpath_variable&);
-
- public:
- // Get variable name
- const char_t* name() const;
-
- // Get variable type
- xpath_value_type type() const;
-
- // Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error
- bool get_boolean() const;
- double get_number() const;
- const char_t* get_string() const;
- const xpath_node_set& get_node_set() const;
-
- // Set variable value; no type conversion is performed, false is returned on type mismatch error
- bool set(bool value);
- bool set(double value);
- bool set(const char_t* value);
- bool set(const xpath_node_set& value);
- };
-
- // A set of XPath variables
- class PUGIXML_CLASS xpath_variable_set
- {
- private:
- xpath_variable* _data[64];
-
- void _assign(const xpath_variable_set& rhs);
- void _swap(xpath_variable_set& rhs);
-
- xpath_variable* _find(const char_t* name) const;
-
- static bool _clone(xpath_variable* var, xpath_variable** out_result);
- static void _destroy(xpath_variable* var);
-
- public:
- // Default constructor/destructor
- xpath_variable_set();
- ~xpath_variable_set();
-
- // Copy constructor/assignment operator
- xpath_variable_set(const xpath_variable_set& rhs);
- xpath_variable_set& operator=(const xpath_variable_set& rhs);
-
- #if __cplusplus >= 201103
- // Move semantics support
- xpath_variable_set(xpath_variable_set&& rhs);
- xpath_variable_set& operator=(xpath_variable_set&& rhs);
- #endif
-
- // Add a new variable or get the existing one, if the types match
- xpath_variable* add(const char_t* name, xpath_value_type type);
-
- // Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch
- bool set(const char_t* name, bool value);
- bool set(const char_t* name, double value);
- bool set(const char_t* name, const char_t* value);
- bool set(const char_t* name, const xpath_node_set& value);
-
- // Get existing variable by name
- xpath_variable* get(const char_t* name);
- const xpath_variable* get(const char_t* name) const;
- };
-
- // A compiled XPath query object
- class PUGIXML_CLASS xpath_query
- {
- private:
- void* _impl;
- xpath_parse_result _result;
-
- typedef void (*unspecified_bool_type)(xpath_query***);
-
- // Non-copyable semantics
- xpath_query(const xpath_query&);
- xpath_query& operator=(const xpath_query&);
-
- public:
- // Construct a compiled object from XPath expression.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors.
- explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0);
-
- // Constructor
- xpath_query();
-
- // Destructor
- ~xpath_query();
-
- #if __cplusplus >= 201103
- // Move semantics support
- xpath_query(xpath_query&& rhs);
- xpath_query& operator=(xpath_query&& rhs);
- #endif
-
- // Get query expression return type
- xpath_value_type return_type() const;
-
- // Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
- bool evaluate_boolean(const xpath_node& n) const;
-
- // Evaluate expression as double value in the specified context; performs type conversion if necessary.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
- double evaluate_number(const xpath_node& n) const;
-
- #ifndef PUGIXML_NO_STL
- // Evaluate expression as string value in the specified context; performs type conversion if necessary.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
- string_t evaluate_string(const xpath_node& n) const;
- #endif
-
- // Evaluate expression as string value in the specified context; performs type conversion if necessary.
- // At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
- // If PUGIXML_NO_EXCEPTIONS is defined, returns empty set instead.
- size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
-
- // Evaluate expression as node set in the specified context.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
- // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead.
- xpath_node_set evaluate_node_set(const xpath_node& n) const;
-
- // Evaluate expression as node set in the specified context.
- // Return first node in document order, or empty node if node set is empty.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
- // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node instead.
- xpath_node evaluate_node(const xpath_node& n) const;
-
- // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode)
- const xpath_parse_result& result() const;
-
- // Safe bool conversion operator
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
- };
-
- #ifndef PUGIXML_NO_EXCEPTIONS
- // XPath exception class
- class PUGIXML_CLASS xpath_exception: public std::exception
- {
- private:
- xpath_parse_result _result;
-
- public:
- // Construct exception from parse result
- explicit xpath_exception(const xpath_parse_result& result);
-
- // Get error message
- virtual const char* what() const throw();
-
- // Get parse result
- const xpath_parse_result& result() const;
- };
- #endif
-
- // XPath node class (either xml_node or xml_attribute)
- class PUGIXML_CLASS xpath_node
- {
- private:
- xml_node _node;
- xml_attribute _attribute;
-
- typedef void (*unspecified_bool_type)(xpath_node***);
-
- public:
- // Default constructor; constructs empty XPath node
- xpath_node();
-
- // Construct XPath node from XML node/attribute
- xpath_node(const xml_node& node);
- xpath_node(const xml_attribute& attribute, const xml_node& parent);
-
- // Get node/attribute, if any
- xml_node node() const;
- xml_attribute attribute() const;
-
- // Get parent of contained node/attribute
- xml_node parent() const;
-
- // Safe bool conversion operator
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
-
- // Comparison operators
- bool operator==(const xpath_node& n) const;
- bool operator!=(const xpath_node& n) const;
- };
-
-#ifdef __BORLANDC__
- // Borland C++ workaround
- bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs);
- bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs);
-#endif
-
- // A fixed-size collection of XPath nodes
- class PUGIXML_CLASS xpath_node_set
- {
- public:
- // Collection type
- enum type_t
- {
- type_unsorted, // Not ordered
- type_sorted, // Sorted by document order (ascending)
- type_sorted_reverse // Sorted by document order (descending)
- };
-
- // Constant iterator type
- typedef const xpath_node* const_iterator;
-
- // We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work
- typedef const xpath_node* iterator;
-
- // Default constructor. Constructs empty set.
- xpath_node_set();
-
- // Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful
- xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
-
- // Destructor
- ~xpath_node_set();
-
- // Copy constructor/assignment operator
- xpath_node_set(const xpath_node_set& ns);
- xpath_node_set& operator=(const xpath_node_set& ns);
-
- #if __cplusplus >= 201103
- // Move semantics support
- xpath_node_set(xpath_node_set&& rhs);
- xpath_node_set& operator=(xpath_node_set&& rhs);
- #endif
-
- // Get collection type
- type_t type() const;
-
- // Get collection size
- size_t size() const;
-
- // Indexing operator
- const xpath_node& operator[](size_t index) const;
-
- // Collection iterators
- const_iterator begin() const;
- const_iterator end() const;
-
- // Sort the collection in ascending/descending order by document order
- void sort(bool reverse = false);
-
- // Get first node in the collection by document order
- xpath_node first() const;
-
- // Check if collection is empty
- bool empty() const;
-
- private:
- type_t _type;
-
- xpath_node _storage;
-
- xpath_node* _begin;
- xpath_node* _end;
-
- void _assign(const_iterator begin, const_iterator end, type_t type);
- void _move(xpath_node_set& rhs);
- };
-#endif
-
-#ifndef PUGIXML_NO_STL
- // Convert wide string to UTF8
- std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
- std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
-
- // Convert UTF8 to wide string
- std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
- std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
-#endif
-
- // Memory allocation function interface; returns pointer to allocated memory or NULL on failure
- typedef void* (*allocation_function)(size_t size);
-
- // Memory deallocation function interface
- typedef void (*deallocation_function)(void* ptr);
-
- // Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
- void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
-
- // Get current memory management functions
- allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
- deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
-}
-
-#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
-namespace std
-{
- // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
- std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&);
- std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&);
- std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&);
-}
-#endif
-
-#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
-namespace std
-{
- // Workarounds for (non-standard) iterator category detection
- std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&);
- std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&);
- std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&);
-}
-#endif
-
-#endif
-
-// Make sure implementation is included in header-only mode
-// Use macro expansion in #include to work around QMake (QTBUG-11923)
-#if defined(PUGIXML_HEADER_ONLY) && !defined(PUGIXML_SOURCE)
-# define PUGIXML_SOURCE "pugixml.cpp"
-# include PUGIXML_SOURCE
-#endif
-
-/**
- * Copyright (c) 2006-2015 Arseny Kapoulkine
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
diff --git a/contrib/other-builds/.metadata/.plugins/org.eclipse.cdt.make.core/specs.cpp b/contrib/other-builds/.metadata/.plugins/org.eclipse.cdt.make.core/specs.cpp
deleted file mode 100644
index 8b1378917..000000000
--- a/contrib/other-builds/.metadata/.plugins/org.eclipse.cdt.make.core/specs.cpp
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/contrib/other-builds/CreateOnDiskPt/.cproject b/contrib/other-builds/CreateOnDiskPt/.cproject
index 95c0a6a01..5109ea082 100644
--- a/contrib/other-builds/CreateOnDiskPt/.cproject
+++ b/contrib/other-builds/CreateOnDiskPt/.cproject
@@ -5,7 +5,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.602770742" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -21,20 +21,21 @@
<builder buildPath="${workspace_loc:/CreateOnDiskPt}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1448999623" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.2139008298" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2008193341" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.627728792" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1832148270" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1681469807" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.627728792" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1832148270" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1681469807" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.425758466" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <option id="gnu.cpp.compiler.option.preprocessor.def.425758466" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.285185442" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.587301391" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2116328611" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.2129089003" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2116328611" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.2129089003" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.1726327101" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1464765114" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.606542044" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
@@ -65,12 +66,14 @@
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
+ <listOptionValue builtIn="false" value="probingpt"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
<option id="gnu.cpp.link.option.paths.815001500" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/probingpt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
@@ -98,7 +101,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.168814843" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -114,13 +117,13 @@
<builder buildPath="${workspace_loc:/CreateOnDiskPt}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.361379130" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.799410017" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1404799808" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.696270987" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1052942304" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.696270987" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1052942304" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2139553528" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1633770352" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1936692829" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.2077864052" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1936692829" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.2077864052" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1045097629" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.455462639" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
diff --git a/contrib/other-builds/CreateProbingPT/.cproject b/contrib/other-builds/CreateProbingPT/.cproject
index ef52fa87a..30c840321 100644
--- a/contrib/other-builds/CreateProbingPT/.cproject
+++ b/contrib/other-builds/CreateProbingPT/.cproject
@@ -31,13 +31,14 @@
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
- <option id="gnu.cpp.compiler.option.dialect.std.582049299" name="Language standard" superClass="gnu.cpp.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.cpp.compiler.dialect.default" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.dialect.std.582049299" name="Language standard" superClass="gnu.cpp.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.cpp.compiler.dialect.c++11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1025890847" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.901839598" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1207177372" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.paths.1842543896" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/probingpt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="/opt/local/lib"/>
@@ -54,6 +55,7 @@
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="pthread"/>
+ <listOptionValue builtIn="false" value="probingpt"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1820802929" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
@@ -145,4 +147,5 @@
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
+ <storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>
diff --git a/contrib/other-builds/CreateProbingPT/.project b/contrib/other-builds/CreateProbingPT/.project
index 1cf1441b2..6e863a77e 100644
--- a/contrib/other-builds/CreateProbingPT/.project
+++ b/contrib/other-builds/CreateProbingPT/.project
@@ -30,7 +30,7 @@
<link>
<name>CreateProbingPT.cpp</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/misc/CreateProbingPT.cpp</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/probingpt/CreateProbingPT.cpp</locationURI>
</link>
</linkedResources>
</projectDescription>
diff --git a/contrib/other-builds/OnDiskPt/.cproject b/contrib/other-builds/OnDiskPt/.cproject
index f551380fd..e8f1a07e8 100644
--- a/contrib/other-builds/OnDiskPt/.cproject
+++ b/contrib/other-builds/OnDiskPt/.cproject
@@ -20,7 +20,7 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
+ <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.725420545" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1586272140" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
@@ -37,14 +37,14 @@
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.901309550" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.2001028511" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
- <option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.676959181" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1484480101" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1556683035" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.676959181" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1484480101" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1556683035" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
</option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.1052680347" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <option id="gnu.cpp.compiler.option.preprocessor.def.1052680347" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="HAVE_BOOST"/>
@@ -54,8 +54,9 @@
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.576529322" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.426851981" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.576529322" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.426851981" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.488444816" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1925590121" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
@@ -82,7 +83,7 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.701931933" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.701931933" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.701931933." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.5036266" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.396818757" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
@@ -99,13 +100,13 @@
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.385722535" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.983488413" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
- <option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.21058138" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1704184753" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.21058138" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1704184753" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1034344194" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1029035384" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.171488636" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.843129626" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.171488636" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.843129626" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1014721928" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
diff --git a/contrib/other-builds/consolidate/.cproject b/contrib/other-builds/consolidate/.cproject
index 410fbcb8c..d4b35e500 100644
--- a/contrib/other-builds/consolidate/.cproject
+++ b/contrib/other-builds/consolidate/.cproject
@@ -5,7 +5,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2091728208" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -21,17 +21,18 @@
<builder buildPath="${workspace_loc:/consolidate}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1286696537" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1571215005" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1626949654" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1186248186" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1416850495" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.534201039" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1186248186" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1416850495" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.534201039" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1468157552" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.82249493" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.83105790" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.937329669" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.83105790" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.937329669" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.1673130461" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.461173729" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1950007837" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
@@ -90,7 +91,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.185559773" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -106,13 +107,13 @@
<builder buildPath="${workspace_loc:/consolidate}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.1812036307" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1942293389" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.520681695" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.649091161" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1279967053" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.649091161" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1279967053" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.624630717" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.233526141" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1882834640" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.1438334736" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1882834640" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.1438334736" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1338220126" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.2105674082" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
diff --git a/contrib/other-builds/extract-ghkm/.cproject b/contrib/other-builds/extract-ghkm/.cproject
index 4a07699dc..d0ebe75f8 100644
--- a/contrib/other-builds/extract-ghkm/.cproject
+++ b/contrib/other-builds/extract-ghkm/.cproject
@@ -5,33 +5,34 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1975272196" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1975272196" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1975272196" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1975272196." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1513645956" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.621141597" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/extract-ghkm}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1641243676" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.150240237" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.494510261" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.520735766" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.730994342" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1461708548" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.520735766" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.730994342" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1461708548" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1669405610" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.849972124" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.154971011" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.600284918" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.154971011" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.600284918" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.799123490" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2129236570" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1041890522" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
@@ -61,29 +62,29 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1834059581" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1834059581" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1834059581" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1834059581." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.154645030" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.483189041" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/extract-ghkm}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.882065438" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1816735709" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.788831102" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1367749352" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1361465069" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1367749352" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1361465069" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.162097682" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.394449415" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.573463904" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.361552728" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.573463904" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.361552728" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.769108402" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1636823200" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
diff --git a/contrib/other-builds/extract-mixed-syntax/.cproject b/contrib/other-builds/extract-mixed-syntax/.cproject
index f246b0c32..3507b8755 100644
--- a/contrib/other-builds/extract-mixed-syntax/.cproject
+++ b/contrib/other-builds/extract-mixed-syntax/.cproject
@@ -5,25 +5,25 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1409305044" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1409305044" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1409305044" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1409305044." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1388217813" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.933039924" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/extract-mixed-syntax}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.48110463" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.98916974" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1188224255" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.391351501" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1590628643" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.968781133" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.391351501" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1590628643" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.968781133" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../phrase-extract&quot;"/>
@@ -31,8 +31,9 @@
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1981472807" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.902271411" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.736647824" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.2105683691" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.736647824" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.2105683691" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.1299117505" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1947641767" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.966210211" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
@@ -65,29 +66,29 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1529383679" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1529383679" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1529383679" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1529383679." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1048718406" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.456212753" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/extract-mixed-syntax}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.1570266419" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.577209301" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1943090599" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1506916262" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.2132167444" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1506916262" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.2132167444" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.619145487" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.2063838952" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.391536740" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.147725572" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.391536740" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.147725572" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1423330814" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1089231126" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
diff --git a/contrib/other-builds/extract-rules/.cproject b/contrib/other-builds/extract-rules/.cproject
index afeef551b..6867c15f9 100644
--- a/contrib/other-builds/extract-rules/.cproject
+++ b/contrib/other-builds/extract-rules/.cproject
@@ -5,7 +5,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1909818145" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -21,9 +21,9 @@
<builder buildPath="${workspace_loc:/extract-rules}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1538811811" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.417385938" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.274036343" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1227466042" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.640603457" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.231971122" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1227466042" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.640603457" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.231971122" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
@@ -31,8 +31,9 @@
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.61884195" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.212337827" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.831633145" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.1948518292" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.831633145" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.1948518292" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.1484037341" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1036034505" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.982611610" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
@@ -62,7 +63,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.130284564" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -78,13 +79,13 @@
<builder buildPath="${workspace_loc:/extract-rules}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.1972638661" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1382194499" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.605692631" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1543139461" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.307019882" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1543139461" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.307019882" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.771498068" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1332689416" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1372281360" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.2028047264" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1372281360" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.2028047264" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1645644335" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.586184465" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
diff --git a/contrib/other-builds/extract/.cproject b/contrib/other-builds/extract/.cproject
index 4c80306be..63e57b8b7 100644
--- a/contrib/other-builds/extract/.cproject
+++ b/contrib/other-builds/extract/.cproject
@@ -5,7 +5,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -21,17 +21,18 @@
<builder buildPath="${workspace_loc:/extract}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1816006533" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.876593881" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1859867372" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1585316374" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.535775760" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.874182289" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1585316374" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.535775760" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.874182289" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1355287045" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1202195555" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1840757183" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.876682032" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1840757183" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.876682032" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.1297324451" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.676382830" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.83617569" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
@@ -62,7 +63,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1230189043" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -78,13 +79,13 @@
<builder buildPath="${workspace_loc:/extract}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.872962284" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1342549060" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1229278587" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.509799885" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.682561415" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.509799885" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.682561415" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1043901368" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1628542348" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1033362550" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.429156793" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1033362550" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.429156793" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.389761516" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1299282565" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
diff --git a/contrib/other-builds/extractor/.cproject b/contrib/other-builds/extractor/.cproject
index 79805f176..728ed4410 100644
--- a/contrib/other-builds/extractor/.cproject
+++ b/contrib/other-builds/extractor/.cproject
@@ -5,7 +5,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1133345948" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -21,17 +21,18 @@
<builder buildPath="${workspace_loc:/extractor/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.238577912" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1956867596" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1512268277" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.2143789149" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.285958391" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.966722418" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.2143789149" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.285958391" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.966722418" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1839105433" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.554846982" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.538786560" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.2125704556" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.538786560" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.2125704556" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.511347863" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.100176353" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1048685119" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
@@ -69,7 +70,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1385955159" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -85,13 +86,13 @@
<builder buildPath="${workspace_loc:/extractor/Release}" id="cdt.managedbuild.target.gnu.builder.exe.release.1583162909" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.141140356" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.2048722912" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1971624451" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.582466413" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1971624451" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.582466413" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1466533418" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.328232610" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.447164665" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.28848417" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.447164665" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.28848417" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1088446293" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1134906841" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
diff --git a/contrib/other-builds/lm/.cproject b/contrib/other-builds/lm/.cproject
index 3455890f7..4f428751d 100644
--- a/contrib/other-builds/lm/.cproject
+++ b/contrib/other-builds/lm/.cproject
@@ -11,16 +11,16 @@
</externalSetting>
</externalSettings>
<extensions>
+ <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
+ <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.640882096" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.793478365" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
@@ -37,14 +37,14 @@
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.775866405" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
- <option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.586969644" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.7139692" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1988092227" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.586969644" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.7139692" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1988092227" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
</option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.1980966336" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <option id="gnu.cpp.compiler.option.preprocessor.def.1980966336" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="HAVE_BOOST"/>
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
@@ -53,15 +53,13 @@
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.934764060" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.2078705375" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.934764060" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.2078705375" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.170940382" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1028526865" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
- <fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.279457772" name="corpus_count_test.cc" rcbsApplicability="disable" resourcePath="builder/corpus_count_test.cc" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140.654966100">
- <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140.654966100" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140"/>
- </fileInfo>
<sourceEntries>
<entry excluding="builder/corpus_count_test.cc|builder/adjust_counts_test.cc|wrappers|left_test.cc|model_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
@@ -73,17 +71,17 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.203229648" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.203229648" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.203229648" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.203229648." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.1942852701" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.2107180060" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
@@ -100,13 +98,13 @@
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.2103660404" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.2026817795" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
- <option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1671568858" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.230723898" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1671568858" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.230723898" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1058671602" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.990116990" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1934130159" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1848737807" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1934130159" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1848737807" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1294441742" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
diff --git a/contrib/other-builds/mert_lib/.cproject b/contrib/other-builds/mert_lib/.cproject
index 908ecf784..a3deea5c0 100644
--- a/contrib/other-builds/mert_lib/.cproject
+++ b/contrib/other-builds/mert_lib/.cproject
@@ -11,7 +11,7 @@
</externalSetting>
</externalSettings>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -26,20 +26,21 @@
<builder buildPath="${workspace_loc:/mert_lib/Debug}" id="cdt.managedbuild.target.gnu.builder.lib.debug.1369910974" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.lib.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.lib.debug.89397980" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug">
- <option id="gnu.cpp.compiler.lib.debug.option.optimization.level.469164841" name="Optimization Level" superClass="gnu.cpp.compiler.lib.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.lib.debug.option.debugging.level.1050747398" name="Debug Level" superClass="gnu.cpp.compiler.lib.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1565260476" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.lib.debug.option.optimization.level.469164841" name="Optimization Level" superClass="gnu.cpp.compiler.lib.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.lib.debug.option.debugging.level.1050747398" name="Debug Level" superClass="gnu.cpp.compiler.lib.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1565260476" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.2072043013" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <option id="gnu.cpp.compiler.option.preprocessor.def.2072043013" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1183866856" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.1365367786" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.lib.debug.option.optimization.level.2015372664" name="Optimization Level" superClass="gnu.c.compiler.lib.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.lib.debug.option.debugging.level.1305426004" name="Debug Level" superClass="gnu.c.compiler.lib.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.lib.debug.option.optimization.level.2015372664" name="Optimization Level" superClass="gnu.c.compiler.lib.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.lib.debug.option.debugging.level.1305426004" name="Debug Level" superClass="gnu.c.compiler.lib.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.1074385956" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1773858729" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.base.665057130" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.base"/>
@@ -66,7 +67,7 @@
</externalSetting>
</externalSettings>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -81,13 +82,13 @@
<builder buildPath="${workspace_loc:/mert_lib/Release}" id="cdt.managedbuild.target.gnu.builder.lib.release.1571526654" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.lib.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.lib.release.135514273" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release.1908677536" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release">
- <option id="gnu.cpp.compiler.lib.release.option.optimization.level.2066806653" name="Optimization Level" superClass="gnu.cpp.compiler.lib.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.lib.release.option.debugging.level.1745834437" name="Debug Level" superClass="gnu.cpp.compiler.lib.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.lib.release.option.optimization.level.2066806653" name="Optimization Level" superClass="gnu.cpp.compiler.lib.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.lib.release.option.debugging.level.1745834437" name="Debug Level" superClass="gnu.cpp.compiler.lib.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1107467937" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.release.698587478" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.lib.release.option.optimization.level.1553399912" name="Optimization Level" superClass="gnu.c.compiler.lib.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.lib.release.option.debugging.level.1151183164" name="Debug Level" superClass="gnu.c.compiler.lib.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.lib.release.option.optimization.level.1553399912" name="Optimization Level" superClass="gnu.c.compiler.lib.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.lib.release.option.debugging.level.1151183164" name="Debug Level" superClass="gnu.c.compiler.lib.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.599680392" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.base.612246593" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.base"/>
diff --git a/contrib/other-builds/moses-cmd/.cproject b/contrib/other-builds/moses-cmd/.cproject
index 05ff2d1e3..2dde393bc 100644
--- a/contrib/other-builds/moses-cmd/.cproject
+++ b/contrib/other-builds/moses-cmd/.cproject
@@ -5,32 +5,32 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.461114338" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.461114338" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.461114338" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.461114338." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1896491482" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.target.gnu.platform.exe.debug.2144309834" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/moses-cmd/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.56664170" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1278274354" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.626095182" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.2084031389" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.811344734" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.2118465683" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.2084031389" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.811344734" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.2118465683" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include/"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
</option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.849384962" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <option id="gnu.cpp.compiler.option.preprocessor.def.849384962" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="WITH_THREADS"/>
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="HAVE_BOOST"/>
@@ -39,14 +39,16 @@
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.363379373" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.504208780" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.782785840" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.1722468661" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.782785840" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.1722468661" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.1946460401" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.860636318" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.2096997198" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1546774818" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.523170942" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/probingpt/Debug&quot;"/>
<listOptionValue builtIn="false" value="/home/hieu/workspace/xmlrpc-c/xmlrpc-c-1.39.07/lib"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/lib&quot;"/>
@@ -85,6 +87,7 @@
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
+ <listOptionValue builtIn="false" value="probingpt"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
<option id="gnu.cpp.link.option.userobjs.1542590830" name="Other objects" superClass="gnu.cpp.link.option.userobjs"/>
@@ -109,30 +112,30 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.2121690436" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.2121690436" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.2121690436" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.2121690436." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1577734572" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.target.gnu.platform.exe.release.1535487925" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/moses-cmd/Release}" id="cdt.managedbuild.target.gnu.builder.exe.release.2122426151" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.441254004" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.376987001" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1276092407" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1794377625" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1276092407" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1794377625" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.93276909" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1553350132" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.93522212" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.1860716465" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.93522212" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.1860716465" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1508465135" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1658143889" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
@@ -171,10 +174,10 @@
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="2">
- <configuration configurationName="Release">
+ <configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/moses-cmd"/>
</configuration>
- <configuration configurationName="Debug">
+ <configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/moses-cmd"/>
</configuration>
</storageModule>
diff --git a/contrib/other-builds/moses-cmd/.project b/contrib/other-builds/moses-cmd/.project
index 312c61654..9aca6fe7a 100644
--- a/contrib/other-builds/moses-cmd/.project
+++ b/contrib/other-builds/moses-cmd/.project
@@ -6,6 +6,7 @@
<project>lm</project>
<project>moses</project>
<project>OnDiskPt</project>
+ <project>probingpt</project>
<project>search</project>
<project>util</project>
</projects>
diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
index 491caa587..5080c02ad 100644
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@@ -36,7 +36,6 @@
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.53427549" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="HAVE_XMLRPC_C"/>
- <listOptionValue builtIn="false" value="PT_UG"/>
<listOptionValue builtIn="false" value="MOSES_VERSION_ID=0"/>
<listOptionValue builtIn="false" value="HAVE_CMPH"/>
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
@@ -51,6 +50,7 @@
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1313249282" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.146557271" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.1656486500" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.1722325939" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.570559630" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1471271407" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
@@ -76,7 +76,7 @@
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1459438132" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.871386239" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
<sourceEntries>
- <entry excluding="TranslationModel/UG/ptable-lookup.cc|TranslationModel/UG/ptable-lookup-corpus.cc|TranslationModel/UG/mm/test-http-client.cc|TranslationModel/UG/ptable-describe-features.cc|TranslationModel/UG/count-ptable-features.cc|TranslationModel/UG/try-align2.cc|TranslationModel/UG/try-align.cc|TranslationModel/UG/spe-check-coverage3.cc|TranslationModel/UG/spe-check-coverage2.cc|TranslationModel/UG/spe-check-coverage.cc|TranslationModel/UG/sim-pe.cc|TranslationModel/UG/generic/stringdist|TranslationModel/UG/mm/test-dynamic-im-tsa.cc|TranslationModel/UG/mm/mtt.count.cc|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/bilingual-lm|LM/MaxEntSRI.h|LM/MaxEntSRI.cpp|LM/BilingualLM.h|LM/BilingualLM.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp|LM/ORLM.h|LM/ORLM.cpp|LM/NeuralLMWrapper.h|LM/NeuralLMWrapper.cpp|LM/SRI.h|LM/SRI.cpp|LM/IRST.h|LM/IRST.cpp|LM/DALMWrapper.h|LM/DALMWrapper.cpp|LM/oxlm|TranslationModel/UG/util" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+ <entry excluding="TranslationModel/UG|TranslationModel/UG/ptable-lookup.cc|TranslationModel/UG/ptable-lookup-corpus.cc|TranslationModel/UG/mm/test-http-client.cc|TranslationModel/UG/ptable-describe-features.cc|TranslationModel/UG/count-ptable-features.cc|TranslationModel/UG/try-align2.cc|TranslationModel/UG/try-align.cc|TranslationModel/UG/spe-check-coverage3.cc|TranslationModel/UG/spe-check-coverage2.cc|TranslationModel/UG/spe-check-coverage.cc|TranslationModel/UG/sim-pe.cc|TranslationModel/UG/generic/stringdist|TranslationModel/UG/mm/test-dynamic-im-tsa.cc|TranslationModel/UG/mm/mtt.count.cc|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/bilingual-lm|LM/MaxEntSRI.h|LM/MaxEntSRI.cpp|LM/BilingualLM.h|LM/BilingualLM.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp|LM/ORLM.h|LM/ORLM.cpp|LM/NeuralLMWrapper.h|LM/NeuralLMWrapper.cpp|LM/SRI.h|LM/SRI.cpp|LM/IRST.h|LM/IRST.cpp|LM/DALMWrapper.h|LM/DALMWrapper.cpp|LM/oxlm|TranslationModel/UG/util" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
@@ -102,13 +102,13 @@
<builder buildPath="${workspace_loc:/moses}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.391025866" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1623685179" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1914197251" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.2144875045" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.9472765" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.2144875045" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.9472765" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1143887599" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1469504539" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1950806117" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.1109082339" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1950806117" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.1109082339" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2103068478" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.105686784" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index adb9ad47e..a57a9df90 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1201,6 +1201,31 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/EditOps.h</locationURI>
</link>
<link>
+ <name>FF/ExampleStatefulFF.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/ExampleStatefulFF.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/ExampleStatefulFF.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/ExampleStatefulFF.h</locationURI>
+ </link>
+ <link>
+ <name>FF/ExampleStatelessFF.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/ExampleStatelessFF.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/ExampleStatelessFF.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/ExampleStatelessFF.h</locationURI>
+ </link>
+ <link>
+ <name>FF/ExampleTranslationOptionListFeature.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/ExampleTranslationOptionListFeature.h</locationURI>
+ </link>
+ <link>
<name>FF/FFState.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/FFState.cpp</locationURI>
@@ -1421,26 +1446,6 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SetSourcePhrase.h</locationURI>
</link>
<link>
- <name>FF/SkeletonStatefulFF.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonStatefulFF.cpp</locationURI>
- </link>
- <link>
- <name>FF/SkeletonStatefulFF.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonStatefulFF.h</locationURI>
- </link>
- <link>
- <name>FF/SkeletonStatelessFF.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonStatelessFF.cpp</locationURI>
- </link>
- <link>
- <name>FF/SkeletonStatelessFF.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonStatelessFF.h</locationURI>
- </link>
- <link>
<name>FF/SoftMatchingFeature.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftMatchingFeature.cpp</locationURI>
@@ -1696,6 +1701,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/DALMWrapper.h</locationURI>
</link>
<link>
+ <name>LM/ExampleLM.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/LM/ExampleLM.cpp</locationURI>
+ </link>
+ <link>
+ <name>LM/ExampleLM.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/LM/ExampleLM.h</locationURI>
+ </link>
+ <link>
<name>LM/IRST.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/IRST.cpp</locationURI>
@@ -1846,16 +1861,6 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/SingleFactor.h</locationURI>
</link>
<link>
- <name>LM/SkeletonLM.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/LM/SkeletonLM.cpp</locationURI>
- </link>
- <link>
- <name>LM/SkeletonLM.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/LM/SkeletonLM.h</locationURI>
- </link>
- <link>
<name>LM/backward.arpa</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/backward.arpa</locationURI>
@@ -2141,6 +2146,16 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
+ <name>TranslationModel/ExamplePT.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ExamplePT.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/ExamplePT.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ExamplePT.h</locationURI>
+ </link>
+ <link>
<name>TranslationModel/PhraseDictionary.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionary.cpp</locationURI>
@@ -2151,11 +2166,6 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionary.h</locationURI>
</link>
<link>
- <name>TranslationModel/PhraseDictionaryDynSuffixArray.README</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryDynSuffixArray.README</locationURI>
- </link>
- <link>
<name>TranslationModel/PhraseDictionaryDynamicCacheBased.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp</locationURI>
@@ -2276,9 +2286,14 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h</locationURI>
</link>
<link>
- <name>TranslationModel/ProbingPT</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
+ <name>TranslationModel/ProbingPT.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/ProbingPT.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT.h</locationURI>
</link>
<link>
<name>TranslationModel/RuleTable</name>
@@ -2291,16 +2306,6 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/SkeletonPT.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/SkeletonPT.cpp</locationURI>
- </link>
- <link>
- <name>TranslationModel/SkeletonPT.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/SkeletonPT.h</locationURI>
- </link>
- <link>
<name>TranslationModel/UG</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
@@ -3136,6 +3141,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h</locationURI>
</link>
<link>
+ <name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h</locationURI>
+ </link>
+ <link>
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp</locationURI>
@@ -3166,16 +3181,6 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h</locationURI>
</link>
<link>
- <name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp</locationURI>
- </link>
- <link>
- <name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h</locationURI>
- </link>
- <link>
<name>TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp</locationURI>
@@ -3331,6 +3336,11 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/StringVector.h</locationURI>
</link>
<link>
+ <name>TranslationModel/CompactPT/StringVectorTemp.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/StringVectorTemp.h</locationURI>
+ </link>
+ <link>
<name>TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp</locationURI>
@@ -3356,101 +3366,6 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/ProbingPT/Jamfile</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/Jamfile</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/ProbingPT.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/ProbingPT.cpp</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/ProbingPT.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/ProbingPT.h</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/StoreTarget.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/StoreTarget.cpp</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/StoreTarget.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/StoreTarget.h</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/StoreVocab.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/StoreVocab.cpp</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/StoreVocab.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/StoreVocab.h</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/hash.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/hash.cpp</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/hash.hh</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/hash.hh</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/line_splitter.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/line_splitter.cpp</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/line_splitter.hh</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/line_splitter.hh</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/probing_hash_utils.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/probing_hash_utils.hh</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/probing_hash_utils.hh</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/querying.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/querying.cpp</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/querying.hh</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/querying.hh</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/storing.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/storing.cpp</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/storing.hh</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/storing.hh</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/vocabid.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/vocabid.cpp</locationURI>
- </link>
- <link>
- <name>TranslationModel/ProbingPT/vocabid.hh</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/vocabid.hh</locationURI>
- </link>
- <link>
<name>TranslationModel/RuleTable/Loader.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/RuleTable/Loader.h</locationURI>
@@ -3646,11 +3561,46 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/bitext-find.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bitext-find.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/check-coverage.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/check-coverage.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/check-coverage2.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/check-coverage2.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/check-coverage3.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/check-coverage3.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/check-coverage5.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/check-coverage5.cc</locationURI>
+ </link>
+ <link>
<name>TranslationModel/UG/count-ptable-features.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/count-ptable-features.cc</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/filter-pt.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/filter-pt.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/fuzzy.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/fuzzy.cc</locationURI>
+ </link>
+ <link>
<name>TranslationModel/UG/generic</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
@@ -3706,6 +3656,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_coherence.h</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/sapt_pscore_cumulative_bias.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_cumulative_bias.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/sapt_pscore_length_ratio.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_length_ratio.h</locationURI>
+ </link>
+ <link>
<name>TranslationModel/UG/sapt_pscore_lex1.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_lex1.h</locationURI>
@@ -3771,6 +3731,26 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/spe-check-coverage3.cc</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/test-boost-threadpool.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/test-boost-threadpool.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/test-domspec.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/test-domspec.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/test-iptr.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/test-iptr.cc</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/test-ranked-phrase-lookup.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/test-ranked-phrase-lookup.cc</locationURI>
+ </link>
+ <link>
<name>TranslationModel/UG/try-align.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/try-align.cc</locationURI>
@@ -3881,12 +3861,12 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -3936,9 +3916,9 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/Jamfile</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/Makefile</name>
+ <name>TranslationModel/UG/mm/Makefile.x</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/Makefile</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/Makefile.x</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin</name>
@@ -4436,17 +4416,17 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/generic/bin/gcc-4.8</name>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -4531,12 +4511,7 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.h</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -4561,27 +4536,32 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/generic/bin/gcc-4.8/release</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release</name>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -4601,27 +4581,42 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on</name>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -5181,27 +5176,97 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Rand.o</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/BlockHashIndex.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/BlockHashIndex.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/CmphStringVectorAdapter.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/CmphStringVectorAdapter.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/LexicalReorderingTableCompact.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/LexicalReorderingTableCompact.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/LexicalReorderingTableCreator.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/LexicalReorderingTableCreator.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/MurmurHash3.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/MurmurHash3.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseDecoder.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseDecoder.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseDictionaryCompact.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseDictionaryCompact.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseTableCreator.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseTableCreator.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/TargetPhraseCollectionCache.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/TargetPhraseCollectionCache.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/ThrowingFwrite.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/ThrowingFwrite.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/TargetPhraseCollectionCache.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/TargetPhraseCollectionCache.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/mmsapt.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/mmsapt.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/mmsapt_align.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/mmsapt_align.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -5686,136 +5751,271 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/bin/BackwardTest.test/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/SkeletonLM.o</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/BlockHashIndex.o</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/BlockHashIndex.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/BlockHashIndex.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/BlockHashIndex.o</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/CmphStringVectorAdapter.o</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/CmphStringVectorAdapter.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/CmphStringVectorAdapter.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/CmphStringVectorAdapter.o</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCompact.o</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCompact.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCompact.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCompact.o</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCreator.o</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCreator.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCreator.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCreator.o</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/MurmurHash3.o</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/MurmurHash3.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/MurmurHash3.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/MurmurHash3.o</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseDecoder.o</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDecoder.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseDecoder.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDecoder.o</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryCompact.o</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryCompact.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryCompact.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryCompact.o</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseTableCreator.o</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseTableCreator.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseTableCreator.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseTableCreator.o</locationURI>
</link>
<link>
- <name>TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ThrowingFwrite.o</name>
+ <name>TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ThrowingFwrite.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ThrowingFwrite.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ThrowingFwrite.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/count-ptable-features</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/count-ptable-features</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/count-ptable-features</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/count-ptable-features</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/count-ptable-features.o</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/count-ptable-features.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/count-ptable-features.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/count-ptable-features.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt.o</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmsapt.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmsapt.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features.o</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-lookup</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-lookup</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus.o</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/try-align</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/try-align</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o</name>
+ <name>TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/try-align.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/try-align.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align</name>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_get_options.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_get_options.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align.o</name>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_splice_arglist.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_splice_arglist.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_stream.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_stream.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_thread_pool.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_thread_pool.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_thread_safe_counter.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_thread_safe_counter.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mmlex-build</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mmlex-build</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mmlex-build.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mmlex-build.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mtt-build</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mtt-build</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mtt-build.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mtt-build.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/num_read_write.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/num_read_write.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/symal2mam</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/symal2mam</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/symal2mam.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/symal2mam.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_pickler.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_pickler.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_tightindex.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_tightindex.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_tokenindex.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_tokenindex.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext_jstats.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext_jstats.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext_pstats.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext_pstats.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_conll_record.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_conll_record.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_corpus_token.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_corpus_token.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_deptree.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_deptree.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_http_client.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_http_client.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_im_bitext.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_im_bitext.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_lexical_reordering.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_lexical_reordering.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_load_primer.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_load_primer.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_phrasepair.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_phrasepair.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_sampling_bias.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_sampling_bias.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_tsa_array_entry.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_tsa_array_entry.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_ttrack_base.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_ttrack_base.o</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_ttrack_position.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_ttrack_position.o</locationURI>
+ </link>
+ <link>
<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FF/LexicalReordering/LexicalReordering.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FF/LexicalReordering/LexicalReordering.o</locationURI>
@@ -6046,389 +6246,194 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TranslationModel/fuzzy-match/create_xml.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_stream.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_stream.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_jstats.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_jstats.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_pstats.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_pstats.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_http_client.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_http_client.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_im_bitext.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_im_bitext.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_lexical_reordering.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_lexical_reordering.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</locationURI>
- </link>
- <link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_sampling_bias.o</name>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_sampling_bias.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</name>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</name>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_stream.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_stream.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</name>
+ <name>TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-demo1</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-demo1</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/num_read_write.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/num_read_write.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</locationURI>
</link>
<link>
- <name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</name>
+ <name>TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</locationURI>
</link>
<link>
<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Syntax/S2T/Parsers/Scope3Parser</name>
diff --git a/contrib/moses2-cmd/.cproject b/contrib/other-builds/moses2-cmd/.cproject
index 9f4548c68..9fd7f85e4 100644
--- a/contrib/moses2-cmd/.cproject
+++ b/contrib/other-builds/moses2-cmd/.cproject
@@ -5,7 +5,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.597260676" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -21,28 +21,29 @@
<builder buildPath="${workspace_loc:/moses2-cmd}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.219597164" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.2087910158" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1546967275" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.826148068" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1303802900" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.368826329" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.826148068" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1303802900" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.368826329" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include/"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/include&quot;"/>
</option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.758438174" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <option id="gnu.cpp.compiler.option.preprocessor.def.758438174" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.123491630" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.848723608" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1977842293" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.322285470" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1977842293" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.322285470" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.1409988791" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1011859741" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1706155110" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.24079646" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
- <option id="gnu.cpp.link.option.libs.587418382" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+ <option id="gnu.cpp.link.option.libs.587418382" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" useByScannerDiscovery="false" valueType="libs">
<listOptionValue builtIn="false" value="moses2"/>
<listOptionValue builtIn="false" value="xmlrpc_xmltok"/>
<listOptionValue builtIn="false" value="xmlrpc_xmlparse"/>
@@ -56,6 +57,7 @@
<listOptionValue builtIn="false" value="xmlrpc++"/>
<listOptionValue builtIn="false" value="xmlrpc"/>
<listOptionValue builtIn="false" value="cmph"/>
+ <listOptionValue builtIn="false" value="probingpt"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
@@ -71,12 +73,13 @@
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
- <option id="gnu.cpp.link.option.paths.1920945405" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+ <option id="gnu.cpp.link.option.paths.1920945405" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" useByScannerDiscovery="false" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../moses2/Debug&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses2/Debug&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/probingpt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
@@ -101,7 +104,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.347900682" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -117,13 +120,13 @@
<builder buildPath="${workspace_loc:/moses2-cmd}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.249336616" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.475854190" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1047605391" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.881009789" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.695719104" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.881009789" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.695719104" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2077834205" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.534514015" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.301062410" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.1891262877" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.301062410" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.1891262877" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.176623232" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1762742642" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
@@ -163,10 +166,10 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
- <configuration configurationName="Release">
+ <configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/moses2-cmd"/>
</configuration>
- <configuration configurationName="Debug">
+ <configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/moses2-cmd"/>
</configuration>
</storageModule>
diff --git a/contrib/moses2-cmd/.project b/contrib/other-builds/moses2-cmd/.project
index 5e0e0e2b1..7b1b96ecc 100644
--- a/contrib/moses2-cmd/.project
+++ b/contrib/other-builds/moses2-cmd/.project
@@ -6,6 +6,7 @@
<project>lm</project>
<project>moses</project>
<project>moses2</project>
+ <project>probingpt</project>
<project>util</project>
</projects>
<buildSpec>
@@ -32,12 +33,12 @@
<link>
<name>Main.cpp</name>
<type>1</type>
- <locationURI>PARENT-1-PROJECT_LOC/moses2/Main.cpp</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Main.cpp</locationURI>
</link>
<link>
<name>Main.h</name>
<type>1</type>
- <locationURI>PARENT-1-PROJECT_LOC/moses2/Main.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Main.h</locationURI>
</link>
</linkedResources>
</projectDescription>
diff --git a/contrib/moses2/.cproject b/contrib/other-builds/moses2/.cproject
index 82b82d591..c905c998e 100644
--- a/contrib/moses2/.cproject
+++ b/contrib/other-builds/moses2/.cproject
@@ -49,6 +49,7 @@
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
<listOptionValue builtIn="false" value="WITH_THREADS"/>
</option>
+ <option id="gnu.cpp.compiler.option.dialect.std.1836172568" superClass="gnu.cpp.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.cpp.compiler.dialect.c++11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2101942464" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1439481930" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
diff --git a/contrib/other-builds/moses2/.project b/contrib/other-builds/moses2/.project
new file mode 100644
index 000000000..1588c88b6
--- /dev/null
+++ b/contrib/other-builds/moses2/.project
@@ -0,0 +1,1621 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>moses2</name>
+ <comment></comment>
+ <projects>
+ <project>moses</project>
+ <project>util</project>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+ <triggers>clean,full,incremental,</triggers>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+ <triggers>full,incremental,</triggers>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.cdt.core.cnature</nature>
+ <nature>org.eclipse.cdt.core.ccnature</nature>
+ <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+ <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+ </natures>
+ <linkedResources>
+ <link>
+ <name>AlignmentInfo.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/AlignmentInfo.cpp</locationURI>
+ </link>
+ <link>
+ <name>AlignmentInfo.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/AlignmentInfo.h</locationURI>
+ </link>
+ <link>
+ <name>AlignmentInfoCollection.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/AlignmentInfoCollection.cpp</locationURI>
+ </link>
+ <link>
+ <name>AlignmentInfoCollection.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/AlignmentInfoCollection.h</locationURI>
+ </link>
+ <link>
+ <name>ArcLists.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/ArcLists.cpp</locationURI>
+ </link>
+ <link>
+ <name>ArcLists.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/ArcLists.h</locationURI>
+ </link>
+ <link>
+ <name>Array.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Array.h</locationURI>
+ </link>
+ <link>
+ <name>EstimatedScores.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/EstimatedScores.cpp</locationURI>
+ </link>
+ <link>
+ <name>EstimatedScores.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/EstimatedScores.h</locationURI>
+ </link>
+ <link>
+ <name>FF</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>HypothesisBase.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/HypothesisBase.cpp</locationURI>
+ </link>
+ <link>
+ <name>HypothesisBase.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/HypothesisBase.h</locationURI>
+ </link>
+ <link>
+ <name>HypothesisColl.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/HypothesisColl.cpp</locationURI>
+ </link>
+ <link>
+ <name>HypothesisColl.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/HypothesisColl.h</locationURI>
+ </link>
+ <link>
+ <name>InMemoryTrie</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>InputPathBase.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/InputPathBase.cpp</locationURI>
+ </link>
+ <link>
+ <name>InputPathBase.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/InputPathBase.h</locationURI>
+ </link>
+ <link>
+ <name>InputPathsBase.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/InputPathsBase.cpp</locationURI>
+ </link>
+ <link>
+ <name>InputPathsBase.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/InputPathsBase.h</locationURI>
+ </link>
+ <link>
+ <name>InputType.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/InputType.cpp</locationURI>
+ </link>
+ <link>
+ <name>InputType.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/InputType.h</locationURI>
+ </link>
+ <link>
+ <name>Jamfile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Jamfile</locationURI>
+ </link>
+ <link>
+ <name>LM</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>Main.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Main.cpp</locationURI>
+ </link>
+ <link>
+ <name>ManagerBase.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/ManagerBase.cpp</locationURI>
+ </link>
+ <link>
+ <name>ManagerBase.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/ManagerBase.h</locationURI>
+ </link>
+ <link>
+ <name>MemPool.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/MemPool.cpp</locationURI>
+ </link>
+ <link>
+ <name>MemPool.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/MemPool.h</locationURI>
+ </link>
+ <link>
+ <name>MemPoolAllocator.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/MemPoolAllocator.h</locationURI>
+ </link>
+ <link>
+ <name>Phrase.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Phrase.cpp</locationURI>
+ </link>
+ <link>
+ <name>Phrase.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Phrase.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>PhraseImplTemplate.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseImplTemplate.h</locationURI>
+ </link>
+ <link>
+ <name>Recycler.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Recycler.cpp</locationURI>
+ </link>
+ <link>
+ <name>Recycler.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Recycler.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>Scores.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Scores.cpp</locationURI>
+ </link>
+ <link>
+ <name>Scores.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Scores.h</locationURI>
+ </link>
+ <link>
+ <name>SubPhrase.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SubPhrase.cpp</locationURI>
+ </link>
+ <link>
+ <name>SubPhrase.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SubPhrase.h</locationURI>
+ </link>
+ <link>
+ <name>System.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/System.cpp</locationURI>
+ </link>
+ <link>
+ <name>System.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/System.h</locationURI>
+ </link>
+ <link>
+ <name>TargetPhrase.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TargetPhrase.cpp</locationURI>
+ </link>
+ <link>
+ <name>TargetPhrase.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TargetPhrase.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationTask.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationTask.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationTask.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationTask.h</locationURI>
+ </link>
+ <link>
+ <name>TrellisPaths.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TrellisPaths.cpp</locationURI>
+ </link>
+ <link>
+ <name>TrellisPaths.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TrellisPaths.h</locationURI>
+ </link>
+ <link>
+ <name>TypeDef.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TypeDef.cpp</locationURI>
+ </link>
+ <link>
+ <name>TypeDef.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TypeDef.h</locationURI>
+ </link>
+ <link>
+ <name>Vector.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Vector.cpp</locationURI>
+ </link>
+ <link>
+ <name>Vector.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Vector.h</locationURI>
+ </link>
+ <link>
+ <name>Weights.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Weights.cpp</locationURI>
+ </link>
+ <link>
+ <name>Weights.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Weights.h</locationURI>
+ </link>
+ <link>
+ <name>Word.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Word.cpp</locationURI>
+ </link>
+ <link>
+ <name>Word.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/Word.h</locationURI>
+ </link>
+ <link>
+ <name>defer</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>legacy</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>parameters</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>pugiconfig.hpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/pugiconfig.hpp</locationURI>
+ </link>
+ <link>
+ <name>pugixml.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/pugixml.cpp</locationURI>
+ </link>
+ <link>
+ <name>pugixml.hpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/pugixml.hpp</locationURI>
+ </link>
+ <link>
+ <name>server</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>FF/Distortion.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/Distortion.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/Distortion.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/Distortion.h</locationURI>
+ </link>
+ <link>
+ <name>FF/ExampleStatefulFF.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/ExampleStatefulFF.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/ExampleStatefulFF.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/ExampleStatefulFF.h</locationURI>
+ </link>
+ <link>
+ <name>FF/ExampleStatelessFF.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/ExampleStatelessFF.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/ExampleStatelessFF.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/ExampleStatelessFF.h</locationURI>
+ </link>
+ <link>
+ <name>FF/FFState.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/FFState.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/FFState.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/FFState.h</locationURI>
+ </link>
+ <link>
+ <name>FF/FeatureFunction.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/FeatureFunction.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/FeatureFunction.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/FeatureFunction.h</locationURI>
+ </link>
+ <link>
+ <name>FF/FeatureFunctions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/FeatureFunctions.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/FeatureFunctions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/FeatureFunctions.h</locationURI>
+ </link>
+ <link>
+ <name>FF/FeatureRegistry.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/FeatureRegistry.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/FeatureRegistry.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/FeatureRegistry.h</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>FF/OSM</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>FF/PhrasePenalty.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/PhrasePenalty.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/PhrasePenalty.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/PhrasePenalty.h</locationURI>
+ </link>
+ <link>
+ <name>FF/PointerState.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/PointerState.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/PointerState.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/PointerState.h</locationURI>
+ </link>
+ <link>
+ <name>FF/StatefulFeatureFunction.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/StatefulFeatureFunction.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/StatefulFeatureFunction.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/StatefulFeatureFunction.h</locationURI>
+ </link>
+ <link>
+ <name>FF/StatelessFeatureFunction.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/StatelessFeatureFunction.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/StatelessFeatureFunction.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/StatelessFeatureFunction.h</locationURI>
+ </link>
+ <link>
+ <name>FF/WordPenalty.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/WordPenalty.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/WordPenalty.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/WordPenalty.h</locationURI>
+ </link>
+ <link>
+ <name>InMemoryTrie/InMemoryTrie.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/InMemoryTrie/InMemoryTrie.h</locationURI>
+ </link>
+ <link>
+ <name>InMemoryTrie/Node.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/InMemoryTrie/Node.h</locationURI>
+ </link>
+ <link>
+ <name>InMemoryTrie/utils.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/InMemoryTrie/utils.h</locationURI>
+ </link>
+ <link>
+ <name>LM/GPULM.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/LM/GPULM.cpp</locationURI>
+ </link>
+ <link>
+ <name>LM/GPULM.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/LM/GPULM.h</locationURI>
+ </link>
+ <link>
+ <name>LM/KENLM.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/LM/KENLM.cpp</locationURI>
+ </link>
+ <link>
+ <name>LM/KENLM.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/LM/KENLM.h</locationURI>
+ </link>
+ <link>
+ <name>LM/KENLMBatch.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/LM/KENLMBatch.cpp</locationURI>
+ </link>
+ <link>
+ <name>LM/KENLMBatch.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/LM/KENLMBatch.h</locationURI>
+ </link>
+ <link>
+ <name>LM/LanguageModel.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/LM/LanguageModel.cpp</locationURI>
+ </link>
+ <link>
+ <name>LM/LanguageModel.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/LM/LanguageModel.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/CubePruningMiniStack</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Hypothesis.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Hypothesis.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Hypothesis.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Hypothesis.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/InputPath.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/InputPath.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/InputPath.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/InputPath.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/InputPaths.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/InputPaths.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/InputPaths.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/InputPaths.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Manager.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Manager.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Manager.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Manager.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Normal</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/PhraseImpl.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/PhraseImpl.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/PhraseImpl.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/PhraseImpl.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/ReorderingConstraint.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/ReorderingConstraint.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/ReorderingConstraint.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/ReorderingConstraint.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Search.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Search.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Search.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Search.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Sentence.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Sentence.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Sentence.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Sentence.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/TargetPhraseImpl.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/TargetPhraseImpl.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/TargetPhraseImpl.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/TargetPhraseImpl.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/TargetPhrases.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/TargetPhrases.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/TargetPhrases.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/TargetPhrases.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/TrellisPath.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/TrellisPath.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/TrellisPath.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/TrellisPath.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/ActiveChart.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/ActiveChart.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/ActiveChart.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/ActiveChart.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Hypothesis.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Hypothesis.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Hypothesis.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Hypothesis.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/InputPath.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/InputPath.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/InputPath.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/InputPath.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/InputPaths.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/InputPaths.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/InputPaths.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/InputPaths.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Manager.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Manager.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Manager.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Manager.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Misc.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Misc.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Misc.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Misc.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/PhraseImpl.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/PhraseImpl.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/PhraseImpl.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/PhraseImpl.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Sentence.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Sentence.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Sentence.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Sentence.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Stack.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Stack.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Stack.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Stack.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Stacks.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Stacks.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Stacks.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Stacks.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/TargetPhraseImpl.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/TargetPhraseImpl.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/TargetPhraseImpl.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/TargetPhraseImpl.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/TargetPhrases.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/TargetPhrases.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/TargetPhrases.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/TargetPhrases.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Word.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Word.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/Word.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/Word.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/nbest</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/Memory</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/PhraseTable.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/PhraseTable.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/PhraseTable.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/PhraseTable.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/ProbingPT.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/ProbingPT.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/Transliteration.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/Transliteration.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/Transliteration.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/Transliteration.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UnknownWordPenalty.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/UnknownWordPenalty.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/UnknownWordPenalty.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/UnknownWordPenalty.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningBitmapStack</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningCardinalStack</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerBitmap</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerMiniStack</name>
+ <type>2</type>
+ <locationURI>virtual:/virtual</locationURI>
+ </link>
+ <link>
+ <name>legacy/Bitmap.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Bitmap.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/Bitmap.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Bitmap.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/Bitmaps.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Bitmaps.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/Bitmaps.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Bitmaps.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/Factor.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Factor.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/Factor.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Factor.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/FactorCollection.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/FactorCollection.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/FactorCollection.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/FactorCollection.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/InputFileStream.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/InputFileStream.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/InputFileStream.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/InputFileStream.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/Matrix.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Matrix.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/Matrix.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Matrix.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/OutputCollector.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/OutputCollector.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/OutputFileStream.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/OutputFileStream.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/OutputFileStream.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/OutputFileStream.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/Parameter.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Parameter.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/Parameter.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Parameter.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/Range.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Range.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/Range.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Range.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/ThreadPool.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/ThreadPool.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/ThreadPool.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/ThreadPool.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/Timer.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Timer.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/Timer.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Timer.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/Util2.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Util2.cpp</locationURI>
+ </link>
+ <link>
+ <name>legacy/Util2.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/Util2.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/gzfilebuf.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/gzfilebuf.h</locationURI>
+ </link>
+ <link>
+ <name>legacy/xmlrpc-c.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/legacy/xmlrpc-c.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/AllOptions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/AllOptions.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/AllOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/AllOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/BeamSearchOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/BeamSearchOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/BookkeepingOptions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/BookkeepingOptions.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/BookkeepingOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/BookkeepingOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/ContextParameters.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/ContextParameters.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/ContextParameters.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/ContextParameters.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/CubePruningOptions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/CubePruningOptions.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/CubePruningOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/CubePruningOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/InputOptions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/InputOptions.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/InputOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/InputOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/LMBR_Options.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/LMBR_Options.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/LMBR_Options.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/LMBR_Options.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/LookupOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/LookupOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/MBR_Options.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/MBR_Options.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/MBR_Options.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/MBR_Options.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/NBestOptions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/NBestOptions.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/NBestOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/NBestOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/OOVHandlingOptions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/OOVHandlingOptions.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/OOVHandlingOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/OOVHandlingOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/OptionsBaseClass.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/OptionsBaseClass.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/OptionsBaseClass.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/OptionsBaseClass.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/ReorderingOptions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/ReorderingOptions.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/ReorderingOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/ReorderingOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/ReportingOptions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/ReportingOptions.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/ReportingOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/ReportingOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/SearchOptions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/SearchOptions.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/SearchOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/SearchOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/ServerOptions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/ServerOptions.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/ServerOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/ServerOptions.h</locationURI>
+ </link>
+ <link>
+ <name>parameters/SyntaxOptions.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/SyntaxOptions.cpp</locationURI>
+ </link>
+ <link>
+ <name>parameters/SyntaxOptions.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/parameters/SyntaxOptions.h</locationURI>
+ </link>
+ <link>
+ <name>server/Server.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/server/Server.cpp</locationURI>
+ </link>
+ <link>
+ <name>server/Server.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/server/Server.h</locationURI>
+ </link>
+ <link>
+ <name>server/TranslationRequest.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/server/TranslationRequest.cpp</locationURI>
+ </link>
+ <link>
+ <name>server/TranslationRequest.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/server/TranslationRequest.h</locationURI>
+ </link>
+ <link>
+ <name>server/Translator.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/server/Translator.cpp</locationURI>
+ </link>
+ <link>
+ <name>server/Translator.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/server/Translator.h</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/BidirectionalReorderingState.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/BidirectionalReorderingState.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/BidirectionalReorderingState.h</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/HReorderingBackwardState.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/HReorderingBackwardState.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/HReorderingBackwardState.h</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/HReorderingForwardState.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/HReorderingForwardState.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/HReorderingForwardState.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/HReorderingForwardState.h</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/LRModel.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LRModel.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/LRModel.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LRModel.h</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/LRState.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LRState.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/LRState.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LRState.h</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/LexicalReordering.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LexicalReordering.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/LexicalReordering.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LexicalReordering.h</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/PhraseBasedReorderingState.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/PhraseBasedReorderingState.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/ReorderingStack.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/ReorderingStack.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/LexicalReordering/ReorderingStack.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/ReorderingStack.h</locationURI>
+ </link>
+ <link>
+ <name>FF/OSM/KenOSM.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/OSM/KenOSM.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/OSM/KenOSM.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/OSM/KenOSM.h</locationURI>
+ </link>
+ <link>
+ <name>FF/OSM/OpSequenceModel.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/OSM/OpSequenceModel.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/OSM/OpSequenceModel.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/OSM/OpSequenceModel.h</locationURI>
+ </link>
+ <link>
+ <name>FF/OSM/osmHyp.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/OSM/osmHyp.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/OSM/osmHyp.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/FF/OSM/osmHyp.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/CubePruningMiniStack/Misc.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/CubePruningMiniStack/Misc.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Misc.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/CubePruningMiniStack/Search.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Search.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/CubePruningMiniStack/Search.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Search.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/CubePruningMiniStack/Stack.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/CubePruningMiniStack/Stack.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Stack.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Normal/Search.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Search.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Normal/Search.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Search.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Normal/Stack.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Stack.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Normal/Stack.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Stack.h</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Normal/Stacks.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Stacks.cpp</locationURI>
+ </link>
+ <link>
+ <name>PhraseBased/Normal/Stacks.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Stacks.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/nbest/KBestExtractor.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/KBestExtractor.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/nbest/KBestExtractor.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/KBestExtractor.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/nbest/NBest.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBest.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/nbest/NBest.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBest.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/nbest/NBestColl.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBestColl.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/nbest/NBestColl.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBestColl.h</locationURI>
+ </link>
+ <link>
+ <name>SCFG/nbest/NBests.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBests.cpp</locationURI>
+ </link>
+ <link>
+ <name>SCFG/nbest/NBests.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBests.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/BlockHashIndex.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/BlockHashIndex.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/BlockHashIndex.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/CanonicalHuffman.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/CanonicalHuffman.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/CmphStringVectorAdapter.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/CmphStringVectorAdapter.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/LexicalReorderingTableCompact.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/ListCoders.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/ListCoders.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/MmapAllocator.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/MmapAllocator.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/MonotonicVector.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/MonotonicVector.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/MurmurHash3.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/MurmurHash3.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/MurmurHash3.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/MurmurHash3.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/PackedArray.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/PackedArray.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/StringVector.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/StringVector.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/TargetPhraseCollectionCache.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/ThrowingFwrite.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/CompactPT/ThrowingFwrite.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/ThrowingFwrite.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/Memory/Node.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/Memory/Node.h</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/Memory/PhraseTableMemory.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/Memory/PhraseTableMemory.cpp</locationURI>
+ </link>
+ <link>
+ <name>TranslationModel/Memory/PhraseTableMemory.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/TranslationModel/Memory/PhraseTableMemory.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningBitmapStack/Misc.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Misc.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningBitmapStack/Misc.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Misc.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningBitmapStack/Search.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Search.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningBitmapStack/Search.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Search.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningBitmapStack/Stack.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Stack.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningBitmapStack/Stack.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Stack.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningCardinalStack/Misc.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Misc.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningCardinalStack/Misc.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Misc.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningCardinalStack/Search.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Search.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningCardinalStack/Search.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Search.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningCardinalStack/Stack.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Stack.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningCardinalStack/Stack.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Stack.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerBitmap/Misc.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Misc.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerBitmap/Misc.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Misc.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerBitmap/Search.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Search.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerBitmap/Search.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Search.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerBitmap/Stacks.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Stacks.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerBitmap/Stacks.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Stacks.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerMiniStack/Misc.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Misc.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerMiniStack/Misc.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Misc.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerMiniStack/Search.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Search.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerMiniStack/Search.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Search.h</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerMiniStack/Stacks.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Stacks.cpp</locationURI>
+ </link>
+ <link>
+ <name>defer/CubePruningPerMiniStack/Stacks.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Stacks.h</locationURI>
+ </link>
+ </linkedResources>
+</projectDescription>
diff --git a/contrib/other-builds/probingpt/.cproject b/contrib/other-builds/probingpt/.cproject
new file mode 100644
index 000000000..4605b3cf7
--- /dev/null
+++ b/contrib/other-builds/probingpt/.cproject
@@ -0,0 +1,120 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+ <storageModule moduleId="org.eclipse.cdt.core.settings">
+ <cconfiguration id="cdt.managedbuild.config.gnu.cross.lib.debug.1390723927">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.lib.debug.1390723927" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+ <externalSettings>
+ <externalSetting>
+ <entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/probingpt"/>
+ <entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/probingpt/Debug"/>
+ <entry flags="RESOLVED" kind="libraryFile" name="probingpt" srcPrefixMapping="" srcRootPath=""/>
+ </externalSetting>
+ </externalSettings>
+ <extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ </extensions>
+ </storageModule>
+ <storageModule moduleId="cdtBuildSystem" version="4.0.0">
+ <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.lib.debug.1390723927" name="Debug" parent="cdt.managedbuild.config.gnu.cross.lib.debug">
+ <folderInfo id="cdt.managedbuild.config.gnu.cross.lib.debug.1390723927." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.cross.lib.debug.783086312" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.lib.debug">
+ <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.965048061" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
+ <builder buildPath="${workspace_loc:/probingpt}/Debug" id="cdt.managedbuild.builder.gnu.cross.2025185909" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.728323531" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.781305932" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.debugging.level.1265608945" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.183186094" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1845983140" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
+ <option id="gnu.cpp.compiler.option.optimization.level.961528826" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.debugging.level.754161376" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1356496512" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
+ </option>
+ <option id="gnu.cpp.compiler.option.dialect.std.1330788612" superClass="gnu.cpp.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.cpp.compiler.dialect.c++11" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1147743441" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.423549467" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.10489702" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1080671388" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.assembler.686017340" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.599273349" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ </tool>
+ </toolChain>
+ </folderInfo>
+ </configuration>
+ </storageModule>
+ <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+ </cconfiguration>
+ <cconfiguration id="cdt.managedbuild.config.gnu.cross.lib.release.890240571">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.lib.release.890240571" moduleId="org.eclipse.cdt.core.settings" name="Release">
+ <externalSettings>
+ <externalSetting>
+ <entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/probingpt"/>
+ <entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/probingpt/Release"/>
+ <entry flags="RESOLVED" kind="libraryFile" name="probingpt" srcPrefixMapping="" srcRootPath=""/>
+ </externalSetting>
+ </externalSettings>
+ <extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ </extensions>
+ </storageModule>
+ <storageModule moduleId="cdtBuildSystem" version="4.0.0">
+ <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.lib.release.890240571" name="Release" parent="cdt.managedbuild.config.gnu.cross.lib.release">
+ <folderInfo id="cdt.managedbuild.config.gnu.cross.lib.release.890240571." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.cross.lib.release.885632145" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.lib.release">
+ <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1366680617" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
+ <builder buildPath="${workspace_loc:/probingpt}/Release" id="cdt.managedbuild.builder.gnu.cross.191907712" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.808139639" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.224210565" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.debugging.level.53109509" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.474846340" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.825247044" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
+ <option id="gnu.cpp.compiler.option.optimization.level.1484316632" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.debugging.level.1585066416" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.149988472" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1042542443" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1342788782" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.archiver.56708033" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.assembler.1620829860" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1936527691" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ </tool>
+ </toolChain>
+ </folderInfo>
+ </configuration>
+ </storageModule>
+ <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+ </cconfiguration>
+ </storageModule>
+ <storageModule moduleId="cdtBuildSystem" version="4.0.0">
+ <project id="probingpt.cdt.managedbuild.target.gnu.cross.lib.1901222999" name="Static Library" projectType="cdt.managedbuild.target.gnu.cross.lib"/>
+ </storageModule>
+ <storageModule moduleId="scannerConfiguration">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.lib.release.890240571;cdt.managedbuild.config.gnu.cross.lib.release.890240571.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.825247044;cdt.managedbuild.tool.gnu.cpp.compiler.input.149988472">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+ </scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.lib.release.890240571;cdt.managedbuild.config.gnu.cross.lib.release.890240571.;cdt.managedbuild.tool.gnu.cross.c.compiler.808139639;cdt.managedbuild.tool.gnu.c.compiler.input.474846340">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+ </scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.lib.debug.1390723927;cdt.managedbuild.config.gnu.cross.lib.debug.1390723927.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1845983140;cdt.managedbuild.tool.gnu.cpp.compiler.input.1147743441">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+ </scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.lib.debug.1390723927;cdt.managedbuild.config.gnu.cross.lib.debug.1390723927.;cdt.managedbuild.tool.gnu.cross.c.compiler.728323531;cdt.managedbuild.tool.gnu.c.compiler.input.183186094">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+ </scannerConfigBuildInfo>
+ </storageModule>
+ <storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+ <storageModule moduleId="refreshScope"/>
+</cproject>
diff --git a/contrib/moses2/.project b/contrib/other-builds/probingpt/.project
index b17dc477e..023a89212 100644
--- a/contrib/moses2/.project
+++ b/contrib/other-builds/probingpt/.project
@@ -1,10 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
- <name>moses2</name>
+ <name>probingpt</name>
<comment></comment>
<projects>
- <project>moses</project>
- <project>util</project>
</projects>
<buildSpec>
<buildCommand>
@@ -26,4 +24,11 @@
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
+ <linkedResources>
+ <link>
+ <name>probingpt</name>
+ <type>2</type>
+ <locationURI>PARENT-3-PROJECT_LOC/probingpt</locationURI>
+ </link>
+ </linkedResources>
</projectDescription>
diff --git a/contrib/other-builds/score/.cproject b/contrib/other-builds/score/.cproject
index d904122eb..a6d4e1b88 100644
--- a/contrib/other-builds/score/.cproject
+++ b/contrib/other-builds/score/.cproject
@@ -5,7 +5,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.852684782" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -21,17 +21,18 @@
<builder buildPath="${workspace_loc:/score}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1494414913" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1369030665" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1299858559" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1103483066" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.11930558" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1147799314" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1103483066" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.11930558" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1147799314" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1638578889" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2096513387" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1877980632" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.1972289345" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1877980632" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.1972289345" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.645941284" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1767499123" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.9477188" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
@@ -80,7 +81,7 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1878418244" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -96,13 +97,13 @@
<builder buildPath="${workspace_loc:/score}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.1694318208" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1857970512" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.464441024" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1302447353" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.143379331" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1302447353" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.143379331" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.859419943" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1103707928" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.2144910639" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.158963791" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.2144910639" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.158963791" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.558236570" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1915067544" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
diff --git a/contrib/other-builds/search/.cproject b/contrib/other-builds/search/.cproject
index 44ae0e94e..ad505c569 100644
--- a/contrib/other-builds/search/.cproject
+++ b/contrib/other-builds/search/.cproject
@@ -20,29 +20,30 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.722547278" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.722547278" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.722547278." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1512691763" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.target.gnu.platform.exe.debug.633526059" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/search/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.164367197" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.854512708" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1096845166" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.240381177" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.275467568" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1356228283" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.240381177" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.275467568" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1356228283" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="/opt/local/include"/>
</option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.207824043" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <option id="gnu.cpp.compiler.option.preprocessor.def.207824043" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1099209487" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1160060999" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1646958507" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.770533945" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1646958507" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.770533945" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.720715523" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.448849586" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.772519271" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
@@ -75,20 +76,20 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.443554127" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.443554127" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.443554127." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.2087651883" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.target.gnu.platform.exe.release.1177425262" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/search/Release}" id="cdt.managedbuild.target.gnu.builder.exe.release.1508486313" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1570343986" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.156371039" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.659087940" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1733942639" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.659087940" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1733942639" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1227769637" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.453047218" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.316944308" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.1549298576" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.316944308" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.1549298576" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1350942207" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.592800732" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
@@ -136,4 +137,3 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
</cproject>
-
diff --git a/contrib/other-builds/server/.cproject b/contrib/other-builds/server/.cproject
index 9789dbfb7..153b8c8e4 100644
--- a/contrib/other-builds/server/.cproject
+++ b/contrib/other-builds/server/.cproject
@@ -22,14 +22,14 @@
<builder buildPath="${workspace_loc:/server}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.857185882" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.142173353" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1657626940" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.269939241" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1769920565" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.649991225" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.269939241" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1769920565" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.649991225" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost&quot;"/>
</option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.2063944336" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <option id="gnu.cpp.compiler.option.preprocessor.def.2063944336" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
<listOptionValue builtIn="false" value="WITH_THREADS"/>
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
@@ -37,14 +37,16 @@
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.603240279" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.165185265" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.502789927" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.1365428538" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.502789927" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.1365428538" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.1402545564" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.836267531" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1867046221" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1443553047" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.1096041402" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/lib&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/probingpt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
@@ -80,6 +82,7 @@
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="z"/>
+ <listOptionValue builtIn="false" value="probingpt"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
@@ -119,13 +122,13 @@
<builder buildPath="${workspace_loc:/server}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.24884855" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1561001393" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1260095073" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.824342210" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.620231073" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.824342210" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.620231073" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.372465520" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1635883096" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.74859509" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.1604502606" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.74859509" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.1604502606" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.624155660" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.727800742" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
diff --git a/contrib/other-builds/util/.cproject b/contrib/other-builds/util/.cproject
index 8c7e4221b..ff268247c 100644
--- a/contrib/other-builds/util/.cproject
+++ b/contrib/other-builds/util/.cproject
@@ -37,14 +37,14 @@
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1252745601" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1018784824" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
- <option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.623959371" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.892917290" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1401298824" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.623959371" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.892917290" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1401298824" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="/opt/local/include"/>
</option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.1952961175" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <option id="gnu.cpp.compiler.option.preprocessor.def.1952961175" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="HAVE_ZLIB"/>
@@ -55,8 +55,9 @@
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1420621104" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1724141901" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.36067607" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.460849578" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.36067607" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.460849578" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.dialect.std.1558084095" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.289923594" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
@@ -82,7 +83,7 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.172239955" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.172239955" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.172239955." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.822279811" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.533470822" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
@@ -99,16 +100,16 @@
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1010248526" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.549134109" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
- <option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1741196615" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1171704152" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.883129829" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1741196615" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1171704152" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.883129829" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.685540722" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.279247859" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1371842588" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1581172024" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1371842588" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+ <option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1581172024" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1632081663" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
diff --git a/mert/ForestRescore.cpp b/mert/ForestRescore.cpp
index 009152e35..8638ebc97 100644
--- a/mert/ForestRescore.cpp
+++ b/mert/ForestRescore.cpp
@@ -346,7 +346,7 @@ static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<
void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
{
- BackPointer init(NULL,kMinScore);
+ BackPointer init((const Edge*) NULL,kMinScore);
vector<BackPointer> backPointers(graph.VertexSize(),init);
HgBleuScorer bleuScorer(references, graph, sentenceId, backgroundBleu);
vector<FeatureStatsType> winnerStats(kBleuNgramOrder*2+1);
diff --git a/misc/Jamfile b/misc/Jamfile
index 135490a46..9539aaabd 100644
--- a/misc/Jamfile
+++ b/misc/Jamfile
@@ -30,11 +30,6 @@ else {
alias programsMin ;
}
-exe CreateProbingPT : CreateProbingPT.cpp ..//boost_filesystem ../moses//moses ;
-#exe QueryProbingPT : QueryProbingPT.cpp ..//boost_filesystem ../moses//moses ;
-
-alias programsProbing : CreateProbingPT ; #QueryProbingPT
-
exe merge-sorted :
merge-sorted.cc
../moses//moses
@@ -43,6 +38,6 @@ $(TOP)//boost_iostreams
$(TOP)//boost_program_options
;
-alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable pruneGeneration ;
+alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin merge-sorted prunePhraseTable pruneGeneration ;
#processPhraseTable queryPhraseTable
diff --git a/misc/misc.xcodeproj/project.pbxproj b/misc/misc.xcodeproj/project.pbxproj
deleted file mode 100644
index 0b52b8170..000000000
--- a/misc/misc.xcodeproj/project.pbxproj
+++ /dev/null
@@ -1,323 +0,0 @@
-// !$*UTF8*$!
-{
- archiveVersion = 1;
- classes = {
- };
- objectVersion = 45;
- objects = {
-
-/* Begin PBXBuildFile section */
- 1EF455C41227C4BB0022403A /* processLexicalTable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EF455991227C4050022403A /* processLexicalTable.cpp */; };
- 1EF455D01227C4F40022403A /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EF455CD1227C4D60022403A /* libmoses.a */; };
- 1EF455D91227C5140022403A /* libOnDiskPt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EF455D81227C50C0022403A /* libOnDiskPt.a */; };
-/* End PBXBuildFile section */
-
-/* Begin PBXContainerItemProxy section */
- 1EF455CC1227C4D60022403A /* PBXContainerItemProxy */ = {
- isa = PBXContainerItemProxy;
- containerPortal = 1EF455C81227C4D60022403A /* moses.xcodeproj */;
- proxyType = 2;
- remoteGlobalIDString = D2AAC046055464E500DB518D;
- remoteInfo = moses;
- };
- 1EF455D71227C50C0022403A /* PBXContainerItemProxy */ = {
- isa = PBXContainerItemProxy;
- containerPortal = 1EF455D31227C50C0022403A /* OnDiskPt.xcodeproj */;
- proxyType = 2;
- remoteGlobalIDString = D2AAC046055464E500DB518D;
- remoteInfo = OnDiskPt;
- };
- 1EF456211227C8A30022403A /* PBXContainerItemProxy */ = {
- isa = PBXContainerItemProxy;
- containerPortal = 1EF455C81227C4D60022403A /* moses.xcodeproj */;
- proxyType = 1;
- remoteGlobalIDString = D2AAC045055464E500DB518D;
- remoteInfo = moses;
- };
- 1EF456231227C8A80022403A /* PBXContainerItemProxy */ = {
- isa = PBXContainerItemProxy;
- containerPortal = 1EF455D31227C50C0022403A /* OnDiskPt.xcodeproj */;
- proxyType = 1;
- remoteGlobalIDString = D2AAC045055464E500DB518D;
- remoteInfo = OnDiskPt;
- };
-/* End PBXContainerItemProxy section */
-
-/* Begin PBXFileReference section */
- 1EF455991227C4050022403A /* processLexicalTable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = processLexicalTable.cpp; sourceTree = "<group>"; };
- 1EF455BA1227C4760022403A /* processLexicalTable */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = processLexicalTable; sourceTree = BUILT_PRODUCTS_DIR; };
- 1EF455C81227C4D60022403A /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = moses.xcodeproj; path = ../moses/moses.xcodeproj; sourceTree = SOURCE_ROOT; };
- 1EF455D31227C50C0022403A /* OnDiskPt.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = OnDiskPt.xcodeproj; path = ../OnDiskPt/OnDiskPt.xcodeproj; sourceTree = SOURCE_ROOT; };
- C6859E8B029090EE04C91782 /* misc.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = misc.1; sourceTree = "<group>"; };
-/* End PBXFileReference section */
-
-/* Begin PBXFrameworksBuildPhase section */
- 1EF455B81227C4760022403A /* Frameworks */ = {
- isa = PBXFrameworksBuildPhase;
- buildActionMask = 2147483647;
- files = (
- 1EF455D91227C5140022403A /* libOnDiskPt.a in Frameworks */,
- 1EF455D01227C4F40022403A /* libmoses.a in Frameworks */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
-/* End PBXFrameworksBuildPhase section */
-
-/* Begin PBXGroup section */
- 08FB7794FE84155DC02AAC07 /* misc */ = {
- isa = PBXGroup;
- children = (
- 1EF455D31227C50C0022403A /* OnDiskPt.xcodeproj */,
- 1EF455C81227C4D60022403A /* moses.xcodeproj */,
- 08FB7795FE84155DC02AAC07 /* Source */,
- C6859E8C029090F304C91782 /* Documentation */,
- 1AB674ADFE9D54B511CA2CBB /* Products */,
- );
- name = misc;
- sourceTree = "<group>";
- };
- 08FB7795FE84155DC02AAC07 /* Source */ = {
- isa = PBXGroup;
- children = (
- 1EF455991227C4050022403A /* processLexicalTable.cpp */,
- );
- name = Source;
- sourceTree = "<group>";
- };
- 1AB674ADFE9D54B511CA2CBB /* Products */ = {
- isa = PBXGroup;
- children = (
- 1EF455BA1227C4760022403A /* processLexicalTable */,
- );
- name = Products;
- sourceTree = "<group>";
- };
- 1EF455C91227C4D60022403A /* Products */ = {
- isa = PBXGroup;
- children = (
- 1EF455CD1227C4D60022403A /* libmoses.a */,
- );
- name = Products;
- sourceTree = "<group>";
- };
- 1EF455D41227C50C0022403A /* Products */ = {
- isa = PBXGroup;
- children = (
- 1EF455D81227C50C0022403A /* libOnDiskPt.a */,
- );
- name = Products;
- sourceTree = "<group>";
- };
- C6859E8C029090F304C91782 /* Documentation */ = {
- isa = PBXGroup;
- children = (
- C6859E8B029090EE04C91782 /* misc.1 */,
- );
- name = Documentation;
- sourceTree = "<group>";
- };
-/* End PBXGroup section */
-
-/* Begin PBXNativeTarget section */
- 1EF455B91227C4760022403A /* processLexicalTable */ = {
- isa = PBXNativeTarget;
- buildConfigurationList = 1EF455C11227C4A70022403A /* Build configuration list for PBXNativeTarget "processLexicalTable" */;
- buildPhases = (
- 1EF455B71227C4760022403A /* Sources */,
- 1EF455B81227C4760022403A /* Frameworks */,
- );
- buildRules = (
- );
- dependencies = (
- 1EF456221227C8A30022403A /* PBXTargetDependency */,
- 1EF456241227C8A80022403A /* PBXTargetDependency */,
- );
- name = processLexicalTable;
- productName = processLexicalTable;
- productReference = 1EF455BA1227C4760022403A /* processLexicalTable */;
- productType = "com.apple.product-type.tool";
- };
-/* End PBXNativeTarget section */
-
-/* Begin PBXProject section */
- 08FB7793FE84155DC02AAC07 /* Project object */ = {
- isa = PBXProject;
- buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "misc" */;
- compatibilityVersion = "Xcode 3.1";
- hasScannedForEncodings = 1;
- mainGroup = 08FB7794FE84155DC02AAC07 /* misc */;
- projectDirPath = "";
- projectReferences = (
- {
- ProductGroup = 1EF455C91227C4D60022403A /* Products */;
- ProjectRef = 1EF455C81227C4D60022403A /* moses.xcodeproj */;
- },
- {
- ProductGroup = 1EF455D41227C50C0022403A /* Products */;
- ProjectRef = 1EF455D31227C50C0022403A /* OnDiskPt.xcodeproj */;
- },
- );
- projectRoot = "";
- targets = (
- 1EF455B91227C4760022403A /* processLexicalTable */,
- );
- };
-/* End PBXProject section */
-
-/* Begin PBXReferenceProxy section */
- 1EF455CD1227C4D60022403A /* libmoses.a */ = {
- isa = PBXReferenceProxy;
- fileType = archive.ar;
- path = libmoses.a;
- remoteRef = 1EF455CC1227C4D60022403A /* PBXContainerItemProxy */;
- sourceTree = BUILT_PRODUCTS_DIR;
- };
- 1EF455D81227C50C0022403A /* libOnDiskPt.a */ = {
- isa = PBXReferenceProxy;
- fileType = archive.ar;
- path = libOnDiskPt.a;
- remoteRef = 1EF455D71227C50C0022403A /* PBXContainerItemProxy */;
- sourceTree = BUILT_PRODUCTS_DIR;
- };
-/* End PBXReferenceProxy section */
-
-/* Begin PBXSourcesBuildPhase section */
- 1EF455B71227C4760022403A /* Sources */ = {
- isa = PBXSourcesBuildPhase;
- buildActionMask = 2147483647;
- files = (
- 1EF455C41227C4BB0022403A /* processLexicalTable.cpp in Sources */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
-/* End PBXSourcesBuildPhase section */
-
-/* Begin PBXTargetDependency section */
- 1EF456221227C8A30022403A /* PBXTargetDependency */ = {
- isa = PBXTargetDependency;
- name = moses;
- targetProxy = 1EF456211227C8A30022403A /* PBXContainerItemProxy */;
- };
- 1EF456241227C8A80022403A /* PBXTargetDependency */ = {
- isa = PBXTargetDependency;
- name = OnDiskPt;
- targetProxy = 1EF456231227C8A80022403A /* PBXContainerItemProxy */;
- };
-/* End PBXTargetDependency section */
-
-/* Begin XCBuildConfiguration section */
- 1DEB923608733DC60010E9CD /* Debug */ = {
- isa = XCBuildConfiguration;
- buildSettings = {
- ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
- GCC_C_LANGUAGE_STANDARD = gnu99;
- GCC_OPTIMIZATION_LEVEL = 0;
- GCC_WARN_ABOUT_RETURN_TYPE = YES;
- GCC_WARN_UNUSED_VARIABLE = YES;
- ONLY_ACTIVE_ARCH = YES;
- PREBINDING = NO;
- SDKROOT = macosx10.6;
- };
- name = Debug;
- };
- 1DEB923708733DC60010E9CD /* Release */ = {
- isa = XCBuildConfiguration;
- buildSettings = {
- ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
- GCC_C_LANGUAGE_STANDARD = gnu99;
- GCC_WARN_ABOUT_RETURN_TYPE = YES;
- GCC_WARN_UNUSED_VARIABLE = YES;
- PREBINDING = NO;
- SDKROOT = macosx10.6;
- };
- name = Release;
- };
- 1EF455BC1227C4760022403A /* Debug */ = {
- isa = XCBuildConfiguration;
- buildSettings = {
- ALWAYS_SEARCH_USER_PATHS = NO;
- COPY_PHASE_STRIP = NO;
- GCC_DYNAMIC_NO_PIC = NO;
- GCC_ENABLE_FIX_AND_CONTINUE = YES;
- GCC_MODEL_TUNING = G5;
- GCC_OPTIMIZATION_LEVEL = 0;
- HEADER_SEARCH_PATHS = ../moses/src;
- INSTALL_PATH = /usr/local/bin;
- LIBRARY_SEARCH_PATHS = (
- ../irstlm/lib/i386,
- ../srilm/lib/macosx,
- ../kenlm/lm,
- ../randlm/lib,
- );
- OTHER_LDFLAGS = (
- "-lflm",
- "-lmisc",
- "-loolm",
- "-ldstruct",
- "-lz",
- "-lirstlm",
- "-lkenlm",
- "-lrandlm",
- );
- PREBINDING = NO;
- PRODUCT_NAME = processLexicalTable;
- };
- name = Debug;
- };
- 1EF455BD1227C4760022403A /* Release */ = {
- isa = XCBuildConfiguration;
- buildSettings = {
- ALWAYS_SEARCH_USER_PATHS = NO;
- COPY_PHASE_STRIP = YES;
- DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
- GCC_ENABLE_FIX_AND_CONTINUE = NO;
- GCC_MODEL_TUNING = G5;
- HEADER_SEARCH_PATHS = ../moses/src;
- INSTALL_PATH = /usr/local/bin;
- LIBRARY_SEARCH_PATHS = (
- ../irstlm/lib/i386,
- ../srilm/lib/macosx,
- ../kenlm/lm,
- ../randlm/lib,
- );
- OTHER_LDFLAGS = (
- "-lflm",
- "-lmisc",
- "-loolm",
- "-ldstruct",
- "-lz",
- "-lirstlm",
- "-lkenlm",
- "-lrandlm",
- );
- PREBINDING = NO;
- PRODUCT_NAME = processLexicalTable;
- ZERO_LINK = NO;
- };
- name = Release;
- };
-/* End XCBuildConfiguration section */
-
-/* Begin XCConfigurationList section */
- 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "misc" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- 1DEB923608733DC60010E9CD /* Debug */,
- 1DEB923708733DC60010E9CD /* Release */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Release;
- };
- 1EF455C11227C4A70022403A /* Build configuration list for PBXNativeTarget "processLexicalTable" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- 1EF455BC1227C4760022403A /* Debug */,
- 1EF455BD1227C4760022403A /* Release */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Release;
- };
-/* End XCConfigurationList section */
- };
- rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
-}
diff --git a/misc/processLexicalTable.vcxproj b/misc/processLexicalTable.vcxproj
deleted file mode 100644
index bdb2719fd..000000000
--- a/misc/processLexicalTable.vcxproj
+++ /dev/null
@@ -1,108 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup Label="ProjectConfigurations">
- <ProjectConfiguration Include="Debug|Win32">
- <Configuration>Debug</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Release|Win32">
- <Configuration>Release</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- </ItemGroup>
- <PropertyGroup Label="Globals">
- <ProjectGuid>{9834EABB-2033-4607-9DAC-36D16E0725B5}</ProjectGuid>
- <RootNamespace>processLexicalTable</RootNamespace>
- <Keyword>Win32Proj</Keyword>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
- <ImportGroup Label="ExtensionSettings">
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <PropertyGroup Label="UserMacros" />
- <PropertyGroup>
- <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
- </PropertyGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <ClCompile>
- <Optimization>Disabled</Optimization>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/moses/src;$(SolutionDir)/kenlm;$(SolutionDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <MinimalRebuild>true</MinimalRebuild>
- <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
- <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
- </ClCompile>
- <Link>
- <AdditionalDependencies>zdll.lib;$(SolutionDir)$(Configuration)\moses.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <RandomizedBaseAddress>false</RandomizedBaseAddress>
- <DataExecutionPrevention>
- </DataExecutionPrevention>
- <TargetMachine>MachineX86</TargetMachine>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <ClCompile>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/moses/src;$(SolutionDir)/kenlm;$(SolutionDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- </ClCompile>
- <Link>
- <AdditionalDependencies>zdll.lib;$(SolutionDir)$(Configuration)\moses.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <OptimizeReferences>true</OptimizeReferences>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <RandomizedBaseAddress>false</RandomizedBaseAddress>
- <DataExecutionPrevention>
- </DataExecutionPrevention>
- <TargetMachine>MachineX86</TargetMachine>
- </Link>
- </ItemDefinitionGroup>
- <ItemGroup>
- <ClCompile Include="processLexicalTable.cpp" />
- </ItemGroup>
- <ItemGroup>
- <ProjectReference Include="..\moses\moses.vcxproj">
- <Project>{8122157a-0de5-44ff-8e5b-024ed6ace7af}</Project>
- <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
- </ProjectReference>
- <ProjectReference Include="..\OnDiskPt\OnDiskPt.vcxproj">
- <Project>{8b07671b-cbaf-4514-affd-ce238cd427e9}</Project>
- <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
- </ProjectReference>
- </ItemGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
- <ImportGroup Label="ExtensionTargets">
- </ImportGroup>
-</Project> \ No newline at end of file
diff --git a/misc/processPhraseTable.vcxproj b/misc/processPhraseTable.vcxproj
deleted file mode 100644
index 692eff058..000000000
--- a/misc/processPhraseTable.vcxproj
+++ /dev/null
@@ -1,108 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup Label="ProjectConfigurations">
- <ProjectConfiguration Include="Debug|Win32">
- <Configuration>Debug</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Release|Win32">
- <Configuration>Release</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- </ItemGroup>
- <PropertyGroup Label="Globals">
- <ProjectGuid>{AA230564-6DF1-4662-9BF9-7AD73DE53B76}</ProjectGuid>
- <RootNamespace>processPhraseTable</RootNamespace>
- <Keyword>Win32Proj</Keyword>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
- <ImportGroup Label="ExtensionSettings">
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <PropertyGroup Label="UserMacros" />
- <PropertyGroup>
- <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
- </PropertyGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <ClCompile>
- <Optimization>Disabled</Optimization>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/moses/src;$(SolutionDir)/kenlm;$(SolutionDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <MinimalRebuild>true</MinimalRebuild>
- <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
- <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
- </ClCompile>
- <Link>
- <AdditionalDependencies>zdll.lib;$(SolutionDir)$(Configuration)\moses.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <RandomizedBaseAddress>false</RandomizedBaseAddress>
- <DataExecutionPrevention>
- </DataExecutionPrevention>
- <TargetMachine>MachineX86</TargetMachine>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <ClCompile>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/moses/src;$(SolutionDir)/kenlm;$(SolutionDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- </ClCompile>
- <Link>
- <AdditionalDependencies>zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <OptimizeReferences>true</OptimizeReferences>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <RandomizedBaseAddress>false</RandomizedBaseAddress>
- <DataExecutionPrevention>
- </DataExecutionPrevention>
- <TargetMachine>MachineX86</TargetMachine>
- </Link>
- </ItemDefinitionGroup>
- <ItemGroup>
- <ClCompile Include="processPhraseTable.cpp" />
- </ItemGroup>
- <ItemGroup>
- <ProjectReference Include="..\moses\moses.vcxproj">
- <Project>{8122157a-0de5-44ff-8e5b-024ed6ace7af}</Project>
- <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
- </ProjectReference>
- <ProjectReference Include="..\OnDiskPt\OnDiskPt.vcxproj">
- <Project>{8b07671b-cbaf-4514-affd-ce238cd427e9}</Project>
- <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
- </ProjectReference>
- </ItemGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
- <ImportGroup Label="ExtensionTargets">
- </ImportGroup>
-</Project> \ No newline at end of file
diff --git a/moses-cmd/MainVW.cpp b/moses-cmd/MainVW.cpp
index 2f313df01..694dcee8a 100644
--- a/moses-cmd/MainVW.cpp
+++ b/moses-cmd/MainVW.cpp
@@ -51,12 +51,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "hypergraph.pb.h"
#endif
-#ifdef PT_UG
-#include <boost/foreach.hpp>
-#include "moses/TranslationModel/UG/mmsapt.h"
-#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
-#endif
-
using namespace std;
using namespace Moses;
@@ -76,6 +70,9 @@ void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
/** main function of the command line version of the decoder **/
int main(int argc, char const** argv)
{
+ //setting in the Staticdata a link between the thread id of this process and a NULL tasksptr
+ // StaticData::InstanceNonConst().SetTask(); // => moved into StaticData constructor
+
try {
#ifdef HAVE_PROTOBUF
@@ -141,11 +138,13 @@ int main(int argc, char const** argv)
}
#ifdef WITH_THREADS
+#pragma message ("Compiling with Threads.")
ThreadPool pool(staticData.ThreadCount());
#endif
// main loop over set of input sentences
+ boost::shared_ptr<ContextScope> scope(new ContextScope);
boost::shared_ptr<InputType> source;
while ((source = ioWrapper->ReadInput()) != NULL) {
IFVERBOSE(1) {
@@ -154,7 +153,7 @@ int main(int argc, char const** argv)
// set up task of training one sentence
boost::shared_ptr<TrainingTask> task;
- task = TrainingTask::create(source, ioWrapper);
+ task = TrainingTask::create(source, ioWrapper, scope);
// execute task
#ifdef WITH_THREADS
diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/ExampleStatefulFF.cpp
index 2acaf2d2e..5a53c4f87 100644
--- a/moses/FF/SkeletonStatefulFF.cpp
+++ b/moses/FF/ExampleStatefulFF.cpp
@@ -1,5 +1,5 @@
#include <vector>
-#include "SkeletonStatefulFF.h"
+#include "ExampleStatefulFF.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/Hypothesis.h"
@@ -9,7 +9,7 @@ namespace Moses
{
////////////////////////////////////////////////////////////////
-SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line)
+ExampleStatefulFF::ExampleStatefulFF(const std::string &line)
:StatefulFeatureFunction(3, line)
{
ReadParameters();
@@ -19,7 +19,7 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line)
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
-void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
+void ExampleStatefulFF::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
@@ -28,7 +28,7 @@ void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
-void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
+void ExampleStatefulFF::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -39,11 +39,11 @@ void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
-void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext
+void ExampleStatefulFF::EvaluateTranslationOptionListWithSourceContext
(const InputType &input, const TranslationOptionList &translationOptionList) const
{}
-FFState* SkeletonStatefulFF::EvaluateWhenApplied(
+FFState* ExampleStatefulFF::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
@@ -59,18 +59,18 @@ FFState* SkeletonStatefulFF::EvaluateWhenApplied(
accumulator->PlusEquals(this, "sparse-name", 2.4);
// int targetLen = cur_hypo.GetCurrTargetPhrase().GetSize(); // ??? [UG]
- return new SkeletonState(0);
+ return new ExampleState(0);
}
-FFState* SkeletonStatefulFF::EvaluateWhenApplied(
+FFState* ExampleStatefulFF::EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
{
- return new SkeletonState(0);
+ return new ExampleState(0);
}
-void SkeletonStatefulFF::SetParameter(const std::string& key, const std::string& value)
+void ExampleStatefulFF::SetParameter(const std::string& key, const std::string& value)
{
if (key == "arg") {
// set value here
diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/ExampleStatefulFF.h
index 7544ddd30..d66274295 100644
--- a/moses/FF/SkeletonStatefulFF.h
+++ b/moses/FF/ExampleStatefulFF.h
@@ -7,11 +7,11 @@
namespace Moses
{
-class SkeletonState : public FFState
+class ExampleState : public FFState
{
int m_targetLen;
public:
- SkeletonState(int targetLen)
+ ExampleState(int targetLen)
:m_targetLen(targetLen) {
}
@@ -19,22 +19,22 @@ public:
return (size_t) m_targetLen;
}
virtual bool operator==(const FFState& o) const {
- const SkeletonState& other = static_cast<const SkeletonState&>(o);
+ const ExampleState& other = static_cast<const ExampleState&>(o);
return m_targetLen == other.m_targetLen;
}
};
-class SkeletonStatefulFF : public StatefulFeatureFunction
+class ExampleStatefulFF : public StatefulFeatureFunction
{
public:
- SkeletonStatefulFF(const std::string &line);
+ ExampleStatefulFF(const std::string &line);
bool IsUseable(const FactorMask &mask) const {
return true;
}
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
- return new SkeletonState(0);
+ return new ExampleState(0);
}
// An empty implementation of this function is provided by StatefulFeatureFunction.
diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/ExampleStatelessFF.cpp
index 8474efe76..0e62ad0ad 100644
--- a/moses/FF/SkeletonStatelessFF.cpp
+++ b/moses/FF/ExampleStatelessFF.cpp
@@ -1,5 +1,5 @@
#include <vector>
-#include "SkeletonStatelessFF.h"
+#include "ExampleStatelessFF.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/TargetPhrase.h"
@@ -7,13 +7,13 @@ using namespace std;
namespace Moses
{
-SkeletonStatelessFF::SkeletonStatelessFF(const std::string &line)
+ExampleStatelessFF::ExampleStatelessFF(const std::string &line)
:StatelessFeatureFunction(2, line)
{
ReadParameters();
}
-void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source
+void ExampleStatelessFF::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
@@ -29,7 +29,7 @@ void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source
}
-void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input
+void ExampleStatelessFF::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -43,20 +43,20 @@ void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input
}
}
-void SkeletonStatelessFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input
+void ExampleStatelessFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
-void SkeletonStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo,
+void ExampleStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
-void SkeletonStatelessFF::EvaluateWhenApplied(const ChartHypothesis &hypo,
+void ExampleStatelessFF::EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
-void SkeletonStatelessFF::SetParameter(const std::string& key, const std::string& value)
+void ExampleStatelessFF::SetParameter(const std::string& key, const std::string& value)
{
if (key == "arg") {
// set value here
diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/ExampleStatelessFF.h
index 0dc46e214..e1f007d21 100644
--- a/moses/FF/SkeletonStatelessFF.h
+++ b/moses/FF/ExampleStatelessFF.h
@@ -6,10 +6,10 @@
namespace Moses
{
-class SkeletonStatelessFF : public StatelessFeatureFunction
+class ExampleStatelessFF : public StatelessFeatureFunction
{
public:
- SkeletonStatelessFF(const std::string &line);
+ ExampleStatelessFF(const std::string &line);
bool IsUseable(const FactorMask &mask) const {
return true;
diff --git a/moses/FF/SkeletonTranslationOptionListFeature.h b/moses/FF/ExampleTranslationOptionListFeature.h
index e47e691aa..7686eb3ff 100644
--- a/moses/FF/SkeletonTranslationOptionListFeature.h
+++ b/moses/FF/ExampleTranslationOptionListFeature.h
@@ -6,10 +6,10 @@
namespace Moses
{
-class SkeletonTranslationOptionListFeature : public StatelessFeatureFunction
+class ExampleTranslationOptionListFeature : public StatelessFeatureFunction
{
public:
- SkeletonTranslationOptionListFeature(const std::string &line)
+ ExampleTranslationOptionListFeature(const std::string &line)
:StatelessFeatureFunction(1, line) {
ReadParameters();
}
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 9ae145504..398d6593c 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -14,7 +14,7 @@
#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
-#include "moses/TranslationModel/ProbingPT/ProbingPT.h"
+#include "moses/TranslationModel/ProbingPT.h"
#include "moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h"
#include "moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h"
@@ -65,16 +65,16 @@
#include "SyntaxRHS.h"
#include "DeleteRules.h"
-#include "moses/FF/SkeletonStatelessFF.h"
-#include "moses/FF/SkeletonStatefulFF.h"
-#include "moses/LM/SkeletonLM.h"
-#include "moses/LM/InMemoryPerSentenceOnDemandLM.h"
-#include "moses/FF/SkeletonTranslationOptionListFeature.h"
+#include "moses/FF/ExampleStatelessFF.h"
+#include "moses/FF/ExampleStatefulFF.h"
+#include "moses/LM/ExampleLM.h"
+#include "moses/FF/ExampleTranslationOptionListFeature.h"
#include "moses/LM/BilingualLM.h"
-#include "moses/TranslationModel/SkeletonPT.h"
+#include "moses/TranslationModel/ExamplePT.h"
#include "moses/Syntax/InputWeightFF.h"
#include "moses/Syntax/RuleTableFF.h"
+#include "moses/LM/InMemoryPerSentenceOnDemandLM.h"
#include "moses/FF/EditOps.h"
#include "moses/FF/CorrectionPattern.h"
@@ -297,13 +297,13 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(UnalignedWordCountFeature);
MOSES_FNAME(DeleteRules);
- MOSES_FNAME(SkeletonStatelessFF);
- MOSES_FNAME(SkeletonStatefulFF);
- MOSES_FNAME(SkeletonLM);
- MOSES_FNAME(InMemoryPerSentenceOnDemandLM);
- MOSES_FNAME(SkeletonTranslationOptionListFeature);
- MOSES_FNAME(SkeletonPT);
+ MOSES_FNAME(ExampleStatelessFF);
+ MOSES_FNAME(ExampleStatefulFF);
+ MOSES_FNAME(ExampleLM);
+ MOSES_FNAME(ExampleTranslationOptionListFeature);
+ MOSES_FNAME(ExamplePT);
+ MOSES_FNAME(InMemoryPerSentenceOnDemandLM);
MOSES_FNAME(EditOps);
MOSES_FNAME(CorrectionPattern);
diff --git a/moses/GenerationDictionary.cpp b/moses/GenerationDictionary.cpp
index 29a4fa2b3..35546e62c 100644
--- a/moses/GenerationDictionary.cpp
+++ b/moses/GenerationDictionary.cpp
@@ -120,7 +120,14 @@ const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) con
{
const OutputWordCollection *ret;
- Collection::const_iterator iter = m_collection.find(&word);
+ Word wordInput;
+ const std::vector<FactorType> &inputFactors = GetInput();
+ for (size_t i = 0; i < inputFactors.size(); ++i) {
+ FactorType factorType = inputFactors[i];
+ wordInput[factorType] = word[factorType];
+ }
+
+ Collection::const_iterator iter = m_collection.find(&wordInput);
if (iter == m_collection.end()) {
// can't find source phrase
ret = NULL;
diff --git a/moses/Jamfile b/moses/Jamfile
index 49aab9025..5200029fb 100644
--- a/moses/Jamfile
+++ b/moses/Jamfile
@@ -122,10 +122,10 @@ vwfiles synlm mmlib mserver headers
FF_Factory.o
LM//LM
TranslationModel/CompactPT//CompactPT
-TranslationModel/ProbingPT//ProbingPT
ThreadPool
..//search
../util/double-conversion//double-conversion
+../probingpt//probingpt
..//z
../OnDiskPt//OnDiskPt
$(TOP)//boost_filesystem
@@ -139,5 +139,5 @@ alias headers-to-install : [ glob-tree *.h ] ;
import testing ;
-unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] ..//boost_filesystem moses headers ..//z ../OnDiskPt//OnDiskPt ..//boost_unit_test_framework ;
+unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] ..//boost_filesystem moses headers ..//z ../OnDiskPt//OnDiskPt ../probingpt//probingpt ..//boost_unit_test_framework ;
diff --git a/moses/LM/SkeletonLM.cpp b/moses/LM/ExampleLM.cpp
index f944de23a..034afef2e 100644
--- a/moses/LM/SkeletonLM.cpp
+++ b/moses/LM/ExampleLM.cpp
@@ -1,12 +1,12 @@
-#include "SkeletonLM.h"
+#include "ExampleLM.h"
#include "moses/FactorCollection.h"
using namespace std;
namespace Moses
{
-SkeletonLM::SkeletonLM(const std::string &line)
+ExampleLM::ExampleLM(const std::string &line)
:LanguageModelSingleFactor(line)
{
ReadParameters();
@@ -24,11 +24,11 @@ SkeletonLM::SkeletonLM(const std::string &line)
m_sentenceEndWord[m_factorType] = m_sentenceEnd;
}
-SkeletonLM::~SkeletonLM()
+ExampleLM::~ExampleLM()
{
}
-LMResult SkeletonLM::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
+LMResult ExampleLM::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
{
LMResult ret;
ret.score = contextFactor.size();
diff --git a/moses/LM/SkeletonLM.h b/moses/LM/ExampleLM.h
index 988c9def9..292462917 100644
--- a/moses/LM/SkeletonLM.h
+++ b/moses/LM/ExampleLM.h
@@ -7,13 +7,13 @@
namespace Moses
{
-class SkeletonLM : public LanguageModelSingleFactor
+class ExampleLM : public LanguageModelSingleFactor
{
protected:
public:
- SkeletonLM(const std::string &line);
- ~SkeletonLM();
+ ExampleLM(const std::string &line);
+ ~ExampleLM();
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const;
};
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 4eafbd632..0c152d555 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -138,7 +138,7 @@ if $(with-dalm) {
#Top-level LM library. If you've added a file that doesn't depend on external
#libraries, put it here.
-alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp
+alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ExampleLM.cpp
../../lm//kenlm ..//headers $(dependencies) ;
alias macros : : : : <define>$(lmmacros) ;
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index 4d3e96000..b65c22eb4 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -423,8 +423,13 @@ LoadDecodeGraphsOld(const vector<string> &mappingVector,
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
DecodeGraph *decodeGraph;
if (is_syntax(m_options->search.algo)) {
- size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
- VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
+ size_t maxChartSpan;
+ if (decodeGraphInd < maxChartSpans.size()) {
+ maxChartSpan = maxChartSpans[decodeGraphInd];
+ VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
+ } else {
+ maxChartSpan = DEFAULT_MAX_CHART_SPAN;
+ }
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
} else {
decodeGraph = new DecodeGraph(m_decodeGraphs.size());
diff --git a/moses/TrainingTask.h b/moses/TrainingTask.h
index 4d2152920..83933691d 100644
--- a/moses/TrainingTask.h
+++ b/moses/TrainingTask.h
@@ -39,6 +39,18 @@ public:
boost::shared_ptr<IOWrapper> const& ioWrapper) {
boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, ioWrapper));
ret->m_self = ret;
+ ret->m_scope.reset(new ContextScope);
+ return ret;
+ }
+
+ // factory function
+ static boost::shared_ptr<TrainingTask>
+ create(boost::shared_ptr<InputType> const& source,
+ boost::shared_ptr<IOWrapper> const& ioWrapper,
+ boost::shared_ptr<ContextScope> const& scope) {
+ boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, ioWrapper));
+ ret->m_self = ret;
+ ret->m_scope = scope;
return ret;
}
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp
index ca219f249..6c80e30af 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp
@@ -18,7 +18,7 @@
***********************************************************************/
#include <iostream>
-#include "ChartRuleLookupManagerSkeleton.h"
+#include "ChartRuleLookupManagerExample.h"
#include "DotChartInMemory.h"
#include "moses/Util.h"
@@ -29,29 +29,29 @@
#include "moses/NonTerminal.h"
#include "moses/ChartCellCollection.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
-#include "moses/TranslationModel/SkeletonPT.h"
+#include "moses/TranslationModel/ExamplePT.h"
using namespace std;
namespace Moses
{
-ChartRuleLookupManagerSkeleton::ChartRuleLookupManagerSkeleton(
+ChartRuleLookupManagerExample::ChartRuleLookupManagerExample(
const ChartParser &parser,
const ChartCellCollectionBase &cellColl,
- const SkeletonPT &skeletonPt)
+ const ExamplePT &skeletonPt)
: ChartRuleLookupManager(parser, cellColl)
, m_skeletonPT(skeletonPt)
{
- cerr << "starting ChartRuleLookupManagerSkeleton" << endl;
+ cerr << "starting ChartRuleLookupManagerExample" << endl;
}
-ChartRuleLookupManagerSkeleton::~ChartRuleLookupManagerSkeleton()
+ChartRuleLookupManagerExample::~ChartRuleLookupManagerExample()
{
// RemoveAllInColl(m_tpColl);
}
-void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
+void ChartRuleLookupManagerExample::GetChartRuleCollection(
const InputPath &inputPath,
size_t last,
ChartParserCallback &outColl)
@@ -74,12 +74,12 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
}
TargetPhrase *
-ChartRuleLookupManagerSkeleton::
+ChartRuleLookupManagerExample::
CreateTargetPhrase(const Word &sourceWord) const
{
- // create a target phrase from the 1st word of the source, prefix with 'ChartManagerSkeleton:'
+ // create a target phrase from the 1st word of the source, prefix with 'ChartManagerExample:'
string str = sourceWord.GetFactor(0)->GetString().as_string();
- str = "ChartManagerSkeleton:" + str;
+ str = "ChartManagerExample:" + str;
TargetPhrase *tp = new TargetPhrase(&m_skeletonPT);
Word &word = tp->AddWord();
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h
index d01f3b9bd..3b3f59ace 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h
@@ -29,16 +29,16 @@ class TargetPhraseCollection;
class ChartParserCallback;
class DottedRuleColl;
class Range;
-class SkeletonPT;
+class ExamplePT;
-class ChartRuleLookupManagerSkeleton : public ChartRuleLookupManager
+class ChartRuleLookupManagerExample : public ChartRuleLookupManager
{
public:
- ChartRuleLookupManagerSkeleton(const ChartParser &parser,
- const ChartCellCollectionBase &cellColl,
- const SkeletonPT &skeletonPt);
+ ChartRuleLookupManagerExample(const ChartParser &parser,
+ const ChartCellCollectionBase &cellColl,
+ const ExamplePT &skeletonPt);
- ~ChartRuleLookupManagerSkeleton();
+ ~ChartRuleLookupManagerExample();
virtual void GetChartRuleCollection(
const InputPath &inputPath,
@@ -50,7 +50,7 @@ private:
StackVec m_stackVec;
std::vector<TargetPhraseCollection::shared_ptr > m_tpColl;
- const SkeletonPT &m_skeletonPT;
+ const ExamplePT &m_skeletonPT;
};
} // namespace Moses
diff --git a/moses/TranslationModel/SkeletonPT.cpp b/moses/TranslationModel/ExamplePT.cpp
index 6b42212f9..198ce2814 100644
--- a/moses/TranslationModel/SkeletonPT.cpp
+++ b/moses/TranslationModel/ExamplePT.cpp
@@ -1,29 +1,29 @@
// vim:tabstop=2
-#include "SkeletonPT.h"
-#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
+#include "ExamplePT.h"
+#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h"
using namespace std;
namespace Moses
{
-SkeletonPT::SkeletonPT(const std::string &line)
+ExamplePT::ExamplePT(const std::string &line)
: PhraseDictionary(line, true)
{
ReadParameters();
}
-void SkeletonPT::Load(AllOptions::ptr const& opts)
+void ExamplePT::Load(AllOptions::ptr const& opts)
{
m_options = opts;
SetFeaturesToApply();
}
-void SkeletonPT::InitializeForInput(ttasksptr const& ttask)
+void ExamplePT::InitializeForInput(ttasksptr const& ttask)
{
ReduceCache();
}
-void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
+void ExamplePT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
{
CacheColl &cache = GetCache();
@@ -46,14 +46,14 @@ void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQu
}
}
-TargetPhrase *SkeletonPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
+TargetPhrase *ExamplePT::CreateTargetPhrase(const Phrase &sourcePhrase) const
{
- // create a target phrase from the 1st word of the source, prefix with 'SkeletonPT:'
+ // create a target phrase from the 1st word of the source, prefix with 'ExamplePT:'
assert(sourcePhrase.GetSize());
assert(m_output.size() == 1);
string str = sourcePhrase.GetWord(0).GetFactor(0)->GetString().as_string();
- str = "SkeletonPT:" + str;
+ str = "ExamplePT:" + str;
TargetPhrase *tp = new TargetPhrase(this);
Word &word = tp->AddWord();
@@ -69,17 +69,17 @@ TargetPhrase *SkeletonPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
return tp;
}
-ChartRuleLookupManager* SkeletonPT::CreateRuleLookupManager(const ChartParser &parser,
+ChartRuleLookupManager* ExamplePT::CreateRuleLookupManager(const ChartParser &parser,
const ChartCellCollectionBase &cellCollection,
std::size_t /*maxChartSpan*/)
{
- return new ChartRuleLookupManagerSkeleton(parser, cellCollection, *this);
+ return new ChartRuleLookupManagerExample(parser, cellCollection, *this);
}
-TO_STRING_BODY(SkeletonPT);
+TO_STRING_BODY(ExamplePT);
// friend
-ostream& operator<<(ostream& out, const SkeletonPT& phraseDict)
+ostream& operator<<(ostream& out, const ExamplePT& phraseDict)
{
return out;
}
diff --git a/moses/TranslationModel/SkeletonPT.h b/moses/TranslationModel/ExamplePT.h
index 443f1cc8e..6ec7764c9 100644
--- a/moses/TranslationModel/SkeletonPT.h
+++ b/moses/TranslationModel/ExamplePT.h
@@ -9,12 +9,12 @@ class ChartParser;
class ChartCellCollectionBase;
class ChartRuleLookupManager;
-class SkeletonPT : public PhraseDictionary
+class ExamplePT : public PhraseDictionary
{
- friend std::ostream& operator<<(std::ostream&, const SkeletonPT&);
+ friend std::ostream& operator<<(std::ostream&, const ExamplePT&);
public:
- SkeletonPT(const std::string &line);
+ ExamplePT(const std::string &line);
void Load(AllOptions::ptr const& opts);
diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp
index 36a28089b..fc62f0679 100644
--- a/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp
@@ -1,6 +1,6 @@
// vim:tabstop=2
#include "PhraseDictionaryMemoryPerSentence.h"
-#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
+#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h"
using namespace std;
diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp
index 072e482de..acf834cbd 100644
--- a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp
@@ -1,6 +1,5 @@
// vim:tabstop=2
#include "PhraseDictionaryMemoryPerSentenceOnDemand.h"
-#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
#include <sstream>
using namespace std;
diff --git a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
index 3d1664822..2ffe880c7 100644
--- a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
@@ -2,7 +2,6 @@
#include <cstdlib>
#include "PhraseDictionaryTransliteration.h"
-#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
#include "moses/DecodeGraph.h"
#include "moses/DecodeStep.h"
#include "util/tempfile.hh"
diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.cpp b/moses/TranslationModel/ProbingPT.cpp
index 1ae0c67c3..dca7835f5 100644
--- a/moses/TranslationModel/ProbingPT/ProbingPT.cpp
+++ b/moses/TranslationModel/ProbingPT.cpp
@@ -4,8 +4,8 @@
#include "moses/FactorCollection.h"
#include "moses/TargetPhraseCollection.h"
#include "moses/InputFileStream.h"
-#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
-#include "querying.hh"
+#include "probingpt/querying.h"
+#include "probingpt/probing_hash_utils.h"
using namespace std;
@@ -14,6 +14,7 @@ namespace Moses
ProbingPT::ProbingPT(const std::string &line)
: PhraseDictionary(line,true)
,m_engine(NULL)
+ ,load_method(util::POPULATE_OR_READ)
{
ReadParameters();
@@ -31,7 +32,7 @@ void ProbingPT::Load(AllOptions::ptr const& opts)
m_options = opts;
SetFeaturesToApply();
- m_engine = new QueryEngine(m_filePath.c_str());
+ m_engine = new probingpt::QueryEngine(m_filePath.c_str(), load_method);
m_unkId = 456456546456;
@@ -116,6 +117,28 @@ void ProbingPT::CreateAlignmentMap(const std::string path)
}
}
+void ProbingPT::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "load") {
+ if (value == "lazy") {
+ load_method = util::LAZY;
+ } else if (value == "populate_or_lazy") {
+ load_method = util::POPULATE_OR_LAZY;
+ } else if (value == "populate_or_read" || value == "populate") {
+ load_method = util::POPULATE_OR_READ;
+ } else if (value == "read") {
+ load_method = util::READ;
+ } else if (value == "parallel_read") {
+ load_method = util::PARALLEL_READ;
+ } else {
+ UTIL_THROW2("load method not supported" << value);
+ }
+ } else {
+ PhraseDictionary::SetParameter(key, value);
+ }
+
+}
+
void ProbingPT::InitializeForInput(ttasksptr const& ttask)
{
@@ -256,12 +279,12 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrases(
TargetPhrase *ProbingPT::CreateTargetPhrase(
const char *&offset) const
{
- TargetPhraseInfo *tpInfo = (TargetPhraseInfo*) offset;
+ probingpt::TargetPhraseInfo *tpInfo = (probingpt::TargetPhraseInfo*) offset;
size_t numRealWords = tpInfo->numWords / m_output.size();
TargetPhrase *tp = new TargetPhrase(this);
- offset += sizeof(TargetPhraseInfo);
+ offset += sizeof(probingpt::TargetPhraseInfo);
// scores
float *scores = (float*) offset;
diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.h b/moses/TranslationModel/ProbingPT.h
index 953a2dc2f..1c996f5fa 100644
--- a/moses/TranslationModel/ProbingPT/ProbingPT.h
+++ b/moses/TranslationModel/ProbingPT.h
@@ -3,16 +3,20 @@
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/bimap.hpp>
#include <boost/unordered_map.hpp>
-#include "../PhraseDictionary.h"
+#include "PhraseDictionary.h"
+#include "util/mmap.hh"
+namespace probingpt
+{
+class QueryEngine;
+class target_text;
+}
namespace Moses
{
class ChartParser;
class ChartCellCollectionBase;
class ChartRuleLookupManager;
-class QueryEngine;
-class target_text;
class ProbingPT : public PhraseDictionary
{
@@ -26,6 +30,8 @@ public:
void InitializeForInput(ttasksptr const& ttask);
+ void SetParameter(const std::string& key, const std::string& value);
+
// for phrase-based model
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
@@ -39,12 +45,13 @@ public:
protected:
- QueryEngine *m_engine;
+ probingpt::QueryEngine *m_engine;
uint64_t m_unkId;
std::vector<uint64_t> m_sourceVocab; // factor id -> pt id
std::vector<const Factor*> m_targetVocab; // pt id -> factor*
std::vector<const AlignmentInfo*> m_aligns;
+ util::LoadMethod load_method;
boost::iostreams::mapped_file_source file;
const char *data;
diff --git a/moses/TranslationModel/ProbingPT/Jamfile b/moses/TranslationModel/ProbingPT/Jamfile
deleted file mode 100644
index 29c6ec41d..000000000
--- a/moses/TranslationModel/ProbingPT/Jamfile
+++ /dev/null
@@ -1,8 +0,0 @@
-local current = "" ;
-local includes = ;
-
-fakelib ProbingPT : [ glob *.cpp ] ../..//headers : $(includes) <dependency>$(PT-LOG) : : $(includes) ;
-
-path-constant PT-LOG : bin/pt.log ;
-update-if-changed $(PT-LOG) $(current) ;
-
diff --git a/moses/TranslationModel/ProbingPT/StoreVocab.cpp b/moses/TranslationModel/ProbingPT/StoreVocab.cpp
deleted file mode 100644
index 6515bac63..000000000
--- a/moses/TranslationModel/ProbingPT/StoreVocab.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * StoreVocab.cpp
- *
- * Created on: 15 Jun 2016
- * Author: hieu
- */
-#include <fstream>
-#include "StoreVocab.h"
-
-namespace Moses
-{
-
-} /* namespace Moses2 */
diff --git a/moses/TranslationModel/ProbingPT/hash.hh b/moses/TranslationModel/ProbingPT/hash.hh
deleted file mode 100644
index f218ad9da..000000000
--- a/moses/TranslationModel/ProbingPT/hash.hh
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-#include "util/string_piece.hh"
-#include "util/murmur_hash.hh"
-#include "util/string_piece.hh" //Tokenization and work with StringPiece
-#include "util/tokenize_piece.hh"
-#include <vector>
-
-namespace Moses
-{
-
-//Gets the MurmurmurHash for give string
-uint64_t getHash(StringPiece text);
-
-std::vector<uint64_t> getVocabIDs(const StringPiece &textin);
-
-}
diff --git a/moses/TranslationModel/ProbingPT/line_splitter.cpp b/moses/TranslationModel/ProbingPT/line_splitter.cpp
deleted file mode 100644
index cb9e47fec..000000000
--- a/moses/TranslationModel/ProbingPT/line_splitter.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-#include "line_splitter.hh"
-
-namespace Moses
-{
-
-line_text splitLine(const StringPiece &textin, bool scfg)
-{
- const char delim[] = "|||";
- line_text output;
-
- //Tokenize
- util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
- //Get source phrase
- output.source_phrase = Trim(*it);
- //std::cerr << "output.source_phrase=" << output.source_phrase << "AAAA" << std::endl;
-
- //Get target_phrase
- it++;
- output.target_phrase = Trim(*it);
- //std::cerr << "output.target_phrase=" << output.target_phrase << "AAAA" << std::endl;
-
- if (scfg) {
- /*
- std::cerr << "output.source_phrase=" << output.source_phrase << std::endl;
- std::cerr << "output.target_phrase=" << output.target_phrase << std::endl;
- reformatSCFG(output);
- std::cerr << "output.source_phrase=" << output.source_phrase << std::endl;
- std::cerr << "output.target_phrase=" << output.target_phrase << std::endl;
- */
- }
-
- //Get probabilities
- it++;
- output.prob = Trim(*it);
- //std::cerr << "output.prob=" << output.prob << "AAAA" << std::endl;
-
- //Get WordAllignment
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.word_align = Trim(*it);
- //std::cerr << "output.word_align=" << output.word_align << "AAAA" << std::endl;
-
- //Get count
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.counts = Trim(*it);
- //std::cerr << "output.counts=" << output.counts << "AAAA" << std::endl;
-
- //Get sparse_score
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.sparse_score = Trim(*it);
- //std::cerr << "output.sparse_score=" << output.sparse_score << "AAAA" << std::endl;
-
- //Get property
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.property = Trim(*it);
- //std::cerr << "output.property=" << output.property << "AAAA" << std::endl;
-
- return output;
-}
-
-std::vector<unsigned char> splitWordAll1(const StringPiece &textin)
-{
- const char delim[] = " ";
- const char delim2[] = "-";
- std::vector<unsigned char> output;
-
- //Case with no word alignments.
- if (textin.size() == 0) {
- return output;
- }
-
- //Split on space
- util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
-
- //For each int
- while (it) {
- //Split on dash (-)
- util::TokenIter<util::MultiCharacter> itInner(*it,
- util::MultiCharacter(delim2));
-
- //Insert the two entries in the vector. User will read entry 0 and 1 to get the first,
- //2 and 3 for second etc. Use unsigned char instead of int to save space, as
- //word allignments are all very small numbers that fit in a single byte
- output.push_back((unsigned char) (atoi(itInner->data())));
- itInner++;
- output.push_back((unsigned char) (atoi(itInner->data())));
- it++;
- }
-
- return output;
-
-}
-
-void reformatSCFG(line_text &output)
-{
-
-}
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp b/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp
deleted file mode 100644
index f23f57d66..000000000
--- a/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-#include "probing_hash_utils.hh"
-
-namespace Moses
-{
-
-//Read table from disk, return memory map location
-char * readTable(const char * filename, size_t size)
-{
- //Initial position of the file is the end of the file, thus we know the size
- int fd;
- char * map;
-
- fd = open(filename, O_RDONLY);
- if (fd == -1) {
- perror("Error opening file for reading");
- exit(EXIT_FAILURE);
- }
-
- map = (char *) mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
-
- if (map == MAP_FAILED) {
- close(fd);
- perror("Error mmapping the file");
- exit(EXIT_FAILURE);
- }
-
- return map;
-}
-
-void serialize_table(char *mem, size_t size, const std::string &filename)
-{
- std::ofstream os(filename.c_str(), std::ios::binary);
- os.write((const char*) &mem[0], size);
- os.close();
-
-}
-
-uint64_t getKey(const uint64_t source_phrase[], size_t size)
-{
- //TOO SLOW
- //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
- uint64_t key = 0;
- for (size_t i = 0; i < size; i++) {
- key += (source_phrase[i] << i);
- }
- return key;
-}
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/probing_hash_utils.hh b/moses/TranslationModel/ProbingPT/probing_hash_utils.hh
deleted file mode 100644
index 998686b2e..000000000
--- a/moses/TranslationModel/ProbingPT/probing_hash_utils.hh
+++ /dev/null
@@ -1,51 +0,0 @@
-#pragma once
-
-#include "util/probing_hash_table.hh"
-
-#include <sys/mman.h>
-#include <boost/functional/hash.hpp>
-#include <fcntl.h>
-#include <fstream>
-
-namespace Moses
-{
-
-#define API_VERSION 15
-
-//Hash table entry
-struct Entry {
- typedef uint64_t Key;
- Key key;
-
- Key GetKey() const {
- return key;
- }
-
- void SetKey(Key to) {
- key = to;
- }
-
- uint64_t value;
-};
-
-#define NONE std::numeric_limits<uint64_t>::max()
-
-//Define table
-typedef util::ProbingHashTable<Entry, boost::hash<uint64_t> > Table;
-
-void serialize_table(char *mem, size_t size, const std::string &filename);
-
-char * readTable(const char * filename, size_t size);
-
-uint64_t getKey(const uint64_t source_phrase[], size_t size);
-
-struct TargetPhraseInfo {
- uint32_t alignTerm;
- uint32_t alignNonTerm;
- uint16_t numWords;
- uint16_t propLength;
- uint16_t filler;
-};
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/querying.cpp b/moses/TranslationModel/ProbingPT/querying.cpp
deleted file mode 100644
index 10c35e361..000000000
--- a/moses/TranslationModel/ProbingPT/querying.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-#include "querying.hh"
-#include "util/exception.hh"
-
-using namespace std;
-
-namespace Moses
-{
-
-QueryEngine::QueryEngine(const char * filepath)
-{
-
- //Create filepaths
- std::string basepath(filepath);
- std::string path_to_config = basepath + "/config";
- std::string path_to_hashtable = basepath + "/probing_hash.dat";
- std::string path_to_source_vocabid = basepath + "/source_vocabids";
- std::string alignPath = basepath + "/Alignments.dat";
-
- if (!FileExists(path_to_config)) {
- UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config);
- }
-
- ///Source phrase vocabids
- read_map(source_vocabids, path_to_source_vocabid.c_str());
-
- // alignments
- read_alignments(alignPath);
-
- //Read config file
- boost::unordered_map<std::string, std::string> keyValue;
-
- std::ifstream config(path_to_config.c_str());
- std::string line;
- while (getline(config, line)) {
- std::vector<std::string> toks = Tokenize(line, "\t");
- UTIL_THROW_IF2(toks.size() != 2, "Wrong config format:" << line);
- keyValue[ toks[0] ] = toks[1];
- }
-
- bool found;
- //Check API version:
- int version;
- found = Get(keyValue, "API_VERSION", version);
- if (!found) {
- std::cerr << "Old or corrupted version of ProbingPT. Please rebinarize your phrase tables." << std::endl;
- } else if (version != API_VERSION) {
- std::cerr << "The ProbingPT API has changed. " << version << "!="
- << API_VERSION << " Please rebinarize your phrase tables." << std::endl;
- exit(EXIT_FAILURE);
- }
-
- //Get tablesize.
- int tablesize;
- found = Get(keyValue, "uniq_entries", tablesize);
- if (!found) {
- std::cerr << "uniq_entries not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- //Number of scores
- found = Get(keyValue, "num_scores", num_scores);
- if (!found) {
- std::cerr << "num_scores not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- //How may scores from lex reordering models
- found = Get(keyValue, "num_lex_scores", num_lex_scores);
- if (!found) {
- std::cerr << "num_lex_scores not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- // have the scores been log() and FloorScore()?
- found = Get(keyValue, "log_prob", logProb);
- if (!found) {
- std::cerr << "logProb not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- config.close();
-
- //Read hashtable
- table_filesize = Table::Size(tablesize, 1.2);
- mem = readTable(path_to_hashtable.c_str(), table_filesize);
- Table table_init(mem, table_filesize);
- table = table_init;
-
- std::cerr << "Initialized successfully! " << std::endl;
-}
-
-QueryEngine::~QueryEngine()
-{
- //Clear mmap content from memory.
- munmap(mem, table_filesize);
-
-}
-
-uint64_t QueryEngine::getKey(uint64_t source_phrase[], size_t size) const
-{
- //TOO SLOW
- //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
- return Moses::getKey(source_phrase, size);
-}
-
-std::pair<bool, uint64_t> QueryEngine::query(uint64_t key)
-{
- std::pair<bool, uint64_t> ret;
-
- const Entry * entry;
- ret.first = table.Find(key, entry);
- if (ret.first) {
- ret.second = entry->value;
- }
- return ret;
-}
-
-void QueryEngine::read_alignments(const std::string &alignPath)
-{
- std::ifstream strm(alignPath.c_str());
-
- string line;
- while (getline(strm, line)) {
- vector<string> toks = Tokenize(line, "\t ");
- UTIL_THROW_IF2(toks.size() == 0, "Corrupt alignment file");
-
- uint32_t alignInd = Scan<uint32_t>(toks[0]);
- if (alignInd >= alignColl.size()) {
- alignColl.resize(alignInd + 1);
- }
-
- Alignments &aligns = alignColl[alignInd];
- for (size_t i = 1; i < toks.size(); ++i) {
- size_t pos = Scan<size_t>(toks[i]);
- aligns.push_back(pos);
- }
- }
-}
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/querying.hh b/moses/TranslationModel/ProbingPT/querying.hh
deleted file mode 100644
index 915bc4806..000000000
--- a/moses/TranslationModel/ProbingPT/querying.hh
+++ /dev/null
@@ -1,66 +0,0 @@
-#pragma once
-
-#include <boost/unordered_map.hpp>
-#include <sys/stat.h> //For finding size of file
-#include "vocabid.hh"
-#include <algorithm> //toLower
-#include <deque>
-#include "probing_hash_utils.hh"
-#include "hash.hh" //Includes line splitter
-#include "line_splitter.hh"
-#include "moses//Util.h"
-
-namespace Moses
-{
-
-class QueryEngine
-{
- std::map<uint64_t, std::string> source_vocabids;
-
- typedef std::vector<unsigned char> Alignments;
- std::vector<Alignments> alignColl;
-
- Table table;
- char *mem; //Memory for the table, necessary so that we can correctly destroy the object
-
- size_t table_filesize;
- bool is_reordering;
-
- void read_alignments(const std::string &alignPath);
-
-public:
- int num_scores;
- int num_lex_scores;
- bool logProb;
-
- QueryEngine(const char *);
- ~QueryEngine();
-
- std::pair<bool, uint64_t> query(uint64_t key);
-
- const std::map<uint64_t, std::string> &getSourceVocab() const {
- return source_vocabids;
- }
-
- const std::vector<Alignments> &getAlignments() const {
- return alignColl;
- }
-
- uint64_t getKey(uint64_t source_phrase[], size_t size) const;
-
- template<typename T>
- inline bool Get(const boost::unordered_map<std::string, std::string> &keyValue, const std::string &sought, T &found) const {
- boost::unordered_map<std::string, std::string>::const_iterator iter = keyValue.find(sought);
- if (iter == keyValue.end()) {
- return false;
- }
-
- const std::string &foundStr = iter->second;
- found = Scan<T>(foundStr);
- return true;
- }
-
-};
-
-}
-
diff --git a/moses/parameters/AllOptions.cpp b/moses/parameters/AllOptions.cpp
index 868b8e4fb..019e1b0c7 100644
--- a/moses/parameters/AllOptions.cpp
+++ b/moses/parameters/AllOptions.cpp
@@ -101,6 +101,11 @@ namespace Moses
if (!syntax.update(param)) return false;
return sanity_check();
}
+#else
+ bool
+ AllOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& param)
+ {}
#endif
bool
diff --git a/moses/parameters/CubePruningOptions.cpp b/moses/parameters/CubePruningOptions.cpp
index 793ae2db6..847a421f8 100644
--- a/moses/parameters/CubePruningOptions.cpp
+++ b/moses/parameters/CubePruningOptions.cpp
@@ -72,6 +72,11 @@ namespace Moses
return true;
}
+#else
+ bool
+ CubePruningOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& params)
+ {}
#endif
diff --git a/moses/parameters/InputOptions.cpp b/moses/parameters/InputOptions.cpp
index dbdb2f60e..55bc49885 100644
--- a/moses/parameters/InputOptions.cpp
+++ b/moses/parameters/InputOptions.cpp
@@ -95,6 +95,11 @@ namespace Moses {
xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
return true;
}
+#else
+ bool
+ InputOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& param)
+ {}
#endif
}
diff --git a/moses/parameters/NBestOptions.cpp b/moses/parameters/NBestOptions.cpp
index 3000f49df..afae0ed6c 100644
--- a/moses/parameters/NBestOptions.cpp
+++ b/moses/parameters/NBestOptions.cpp
@@ -62,6 +62,11 @@ update(std::map<std::string,xmlrpc_c::value>const& param)
enabled = (nbest_size > 0);
return true;
}
+#else
+bool
+NBestOptions::
+update(std::map<std::string,xmlrpc_c::value>const& param)
+{}
#endif
diff --git a/moses/parameters/OOVHandlingOptions.cpp b/moses/parameters/OOVHandlingOptions.cpp
index 154074664..a55026b98 100644
--- a/moses/parameters/OOVHandlingOptions.cpp
+++ b/moses/parameters/OOVHandlingOptions.cpp
@@ -43,6 +43,11 @@ namespace Moses {
// xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
return true;
}
+#else
+ bool
+ OOVHandlingOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& param)
+ {}
#endif
}
diff --git a/moses/parameters/OptionsBaseClass.cpp b/moses/parameters/OptionsBaseClass.cpp
index a19aaf7a6..324359e93 100644
--- a/moses/parameters/OptionsBaseClass.cpp
+++ b/moses/parameters/OptionsBaseClass.cpp
@@ -24,5 +24,10 @@ namespace Moses {
if (m == param.end()) return dfltval;
return Scan<bool>(xmlrpc_c::value_string(m->second));
}
+#else
+ bool
+ check(std::map<std::string, xmlrpc_c::value> const& param,
+ std::string const key, bool dfltval)
+ {}
#endif
}
diff --git a/moses/parameters/ReportingOptions.cpp b/moses/parameters/ReportingOptions.cpp
index 210950a3c..fe56a7356 100644
--- a/moses/parameters/ReportingOptions.cpp
+++ b/moses/parameters/ReportingOptions.cpp
@@ -128,11 +128,17 @@ namespace Moses {
for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
factor_order.push_back(i);
}
+
+ m = param.find("no-ReportSegmentation");
+ if (m == param.end() || !Scan<bool>(xmlrpc_c::value_string(m->second))) {
+
+ // If we are reporting alignment info, turn on ReportSegmentation, unless XML request explicitly says not to
+ m = param.find("align");
+ if (m != param.end() && Scan<bool>(xmlrpc_c::value_string(m->second)))
+ ReportSegmentation = 1;
- m = param.find("align");
- if (m != param.end() && Scan<bool>(xmlrpc_c::value_string(m->second)))
- ReportSegmentation = 1;
-
+ }
+
PrintAlignmentInfo = check(param,"word-align",PrintAlignmentInfo);
m = param.find("factor-delimiter");
diff --git a/moses/parameters/SearchOptions.cpp b/moses/parameters/SearchOptions.cpp
index 958569e94..35028b8fb 100644
--- a/moses/parameters/SearchOptions.cpp
+++ b/moses/parameters/SearchOptions.cpp
@@ -102,6 +102,11 @@ namespace Moses
return true;
}
+#else
+ bool
+ SearchOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& params)
+ {}
#endif
}
diff --git a/moses/parameters/SyntaxOptions.cpp b/moses/parameters/SyntaxOptions.cpp
index f76c187ec..e977dd4c0 100644
--- a/moses/parameters/SyntaxOptions.cpp
+++ b/moses/parameters/SyntaxOptions.cpp
@@ -77,6 +77,11 @@ namespace Moses {
// xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
return true;
}
+#else
+ bool
+ SyntaxOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& param)
+ {}
#endif
}
diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp
index e2580fe2f..767358e5c 100644
--- a/moses/server/TranslationRequest.cpp
+++ b/moses/server/TranslationRequest.cpp
@@ -85,13 +85,14 @@ void
TranslationRequest::
add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const
{
- // if (!m_withAlignInfo) return;
- if (!options()->output.ReportSegmentation) return;
+ if (!m_withAlignInfo) return;
+ // if (!options()->output.ReportSegmentation) return;
Range const& trg = h.GetCurrTargetWordsRange();
Range const& src = h.GetCurrSourceWordsRange();
std::map<std::string, xmlrpc_c::value> pAlnInfo;
pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos());
+ pAlnInfo["tgt-end"] = xmlrpc_c::value_int(trg.GetEndPos());
pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos());
pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos());
aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo));
@@ -356,6 +357,12 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
}
}
+ // Report alignment info if Moses config says to or if XML request says to
+ m_withAlignInfo = options()->output.ReportSegmentation || check(params, "align");
+
+ // Report word alignment info if Moses config says to or if XML request says to
+ m_withWordAlignInfo = options()->output.PrintAlignmentInfo || check(params, "word-align");
+
si = params.find("weights");
if (si != params.end())
{
@@ -465,8 +472,8 @@ pack_hypothesis(const Moses::Manager& manager,
<< std::endl);
dest[key] = xmlrpc_c::value_string(target.str());
- // if (m_withAlignInfo) {
- if (options()->output.ReportSegmentation) {
+ if (m_withAlignInfo) {
+ // if (options()->output.ReportSegmentation) {
// phrase alignment, if requested
vector<xmlrpc_c::value> p_aln;
@@ -475,8 +482,8 @@ pack_hypothesis(const Moses::Manager& manager,
dest["align"] = xmlrpc_c::value_array(p_aln);
}
- // if (m_withWordAlignInfo) {
- if (options()->output.PrintAlignmentInfo) {
+ if (m_withWordAlignInfo) {
+ //if (options()->output.PrintAlignmentInfo) {
// word alignment, if requested
vector<xmlrpc_c::value> w_aln;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h
index 2554e5544..3463c72c8 100644
--- a/moses/server/TranslationRequest.h
+++ b/moses/server/TranslationRequest.h
@@ -38,8 +38,8 @@ TranslationRequest : public virtual Moses::TranslationTask
Translator* m_translator;
std::string m_source_string, m_target_string;
- // bool m_withAlignInfo;
- // bool m_withWordAlignInfo;
+ bool m_withAlignInfo;
+ bool m_withWordAlignInfo;
bool m_withGraphInfo;
bool m_withTopts;
bool m_withScoreBreakdown;
diff --git a/contrib/moses2/AlignmentInfo.cpp b/moses2/AlignmentInfo.cpp
index 2e19fa481..2e19fa481 100644
--- a/contrib/moses2/AlignmentInfo.cpp
+++ b/moses2/AlignmentInfo.cpp
diff --git a/contrib/moses2/AlignmentInfo.h b/moses2/AlignmentInfo.h
index 89b31a1fc..89b31a1fc 100644
--- a/contrib/moses2/AlignmentInfo.h
+++ b/moses2/AlignmentInfo.h
diff --git a/contrib/moses2/AlignmentInfoCollection.cpp b/moses2/AlignmentInfoCollection.cpp
index a6116400c..a6116400c 100644
--- a/contrib/moses2/AlignmentInfoCollection.cpp
+++ b/moses2/AlignmentInfoCollection.cpp
diff --git a/contrib/moses2/AlignmentInfoCollection.h b/moses2/AlignmentInfoCollection.h
index 0d409430d..0d409430d 100644
--- a/contrib/moses2/AlignmentInfoCollection.h
+++ b/moses2/AlignmentInfoCollection.h
diff --git a/moses2/ArcLists.cpp b/moses2/ArcLists.cpp
new file mode 100644
index 000000000..1143024c0
--- /dev/null
+++ b/moses2/ArcLists.cpp
@@ -0,0 +1,127 @@
+/*
+ * ArcList.cpp
+ *
+ * Created on: 26 Oct 2015
+ * Author: hieu
+ */
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <boost/foreach.hpp>
+#include "ArcLists.h"
+#include "HypothesisBase.h"
+#include "util/exception.hh"
+
+using namespace std;
+
+namespace Moses2
+{
+
+ArcLists::ArcLists()
+{
+ // TODO Auto-generated constructor stub
+
+}
+
+ArcLists::~ArcLists()
+{
+ BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
+ const ArcList *arcList = collPair.second;
+ delete arcList;
+ }
+}
+
+void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
+ const HypothesisBase *otherHypo)
+{
+ //cerr << added << " " << currHypo << " " << otherHypo << endl;
+ ArcList *arcList;
+ if (added) {
+ // we're winners!
+ if (otherHypo) {
+ // there was a existing losing hypo
+ arcList = &GetAndDetachArcList(otherHypo);
+ } else {
+ // there was no existing hypo
+ arcList = new ArcList;
+ }
+ m_coll[currHypo] = arcList;
+ } else {
+ // we're losers!
+ // there should be a winner, we're not doing beam pruning
+ UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
+ arcList = &GetArcList(otherHypo);
+ }
+
+ // in any case, add the curr hypo
+ arcList->push_back(currHypo);
+}
+
+ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
+{
+ Coll::iterator iter = m_coll.find(hypo);
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
+ ArcList &arcList = *iter->second;
+ return arcList;
+}
+
+const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
+{
+ Coll::const_iterator iter = m_coll.find(hypo);
+
+ if (iter == m_coll.end()) {
+ cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
+ BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
+ const HypothesisBase *hypo = collPair.first;
+ cerr << hypo << " ";
+ }
+ }
+
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
+ ArcList &arcList = *iter->second;
+ return arcList;
+}
+
+ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
+{
+ Coll::iterator iter = m_coll.find(hypo);
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
+ ArcList &arcList = *iter->second;
+
+ m_coll.erase(iter);
+
+ return arcList;
+}
+
+void ArcLists::Sort()
+{
+ BOOST_FOREACH(Coll::value_type &collPair, m_coll) {
+ ArcList &list = *collPair.second;
+ std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
+ }
+}
+
+void ArcLists::Delete(const HypothesisBase *hypo)
+{
+ //cerr << "hypo=" << hypo->Debug() << endl;
+ //cerr << "m_coll=" << m_coll.size() << endl;
+ Coll::iterator iter = m_coll.find(hypo);
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
+ ArcList *arcList = iter->second;
+
+ m_coll.erase(iter);
+ delete arcList;
+}
+
+std::string ArcLists::Debug(const System &system) const
+{
+ stringstream strm;
+ BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
+ const ArcList *arcList = collPair.second;
+ strm << arcList << "(" << arcList->size() << ") ";
+ }
+ return strm.str();
+}
+
+}
+
diff --git a/contrib/moses2/ArcLists.h b/moses2/ArcLists.h
index db606401f..742c9d9e2 100644
--- a/contrib/moses2/ArcLists.h
+++ b/moses2/ArcLists.h
@@ -23,7 +23,7 @@ public:
virtual ~ArcLists();
void AddArc(bool added, const HypothesisBase *currHypo,
- const HypothesisBase *otherHypo);
+ const HypothesisBase *otherHypo);
void Sort();
void Delete(const HypothesisBase *hypo);
diff --git a/contrib/moses2/Array.h b/moses2/Array.h
index 59b003135..d9402a704 100644
--- a/contrib/moses2/Array.h
+++ b/moses2/Array.h
@@ -13,26 +13,21 @@ public:
typedef T* iterator;
typedef const T* const_iterator;
//! iterators
- const_iterator begin() const
- {
+ const_iterator begin() const {
return m_arr;
}
- const_iterator end() const
- {
+ const_iterator end() const {
return m_arr + m_size;
}
- iterator begin()
- {
+ iterator begin() {
return m_arr;
}
- iterator end()
- {
+ iterator end() {
return m_arr + m_size;
}
- Array(MemPool &pool, size_t size = 0, const T &val = T())
- {
+ Array(MemPool &pool, size_t size = 0, const T &val = T()) {
m_size = size;
m_maxSize = size;
m_arr = pool.Allocate<T>(size);
@@ -41,26 +36,23 @@ public:
}
}
- size_t size() const
- {
+ size_t size() const {
return m_size;
}
- const T& operator[](size_t ind) const
- {
+ const T& operator[](size_t ind) const {
return m_arr[ind];
}
- T& operator[](size_t ind)
- {
+ T& operator[](size_t ind) {
return m_arr[ind];
}
- T *GetArray()
- { return m_arr; }
+ T *GetArray() {
+ return m_arr;
+ }
- size_t hash() const
- {
+ size_t hash() const {
size_t seed = 0;
for (size_t i = 0; i < m_size; ++i) {
boost::hash_combine(seed, m_arr[i]);
@@ -68,21 +60,18 @@ public:
return seed;
}
- int Compare(const Array &compare) const
- {
+ int Compare(const Array &compare) const {
int cmp = memcmp(m_arr, compare.m_arr, sizeof(T) * m_size);
return cmp;
}
- bool operator==(const Array &compare) const
- {
+ bool operator==(const Array &compare) const {
int cmp = Compare(compare);
return cmp == 0;
}
- void resize(size_t newSize)
- {
+ void resize(size_t newSize) {
assert(m_size < m_maxSize);
m_size = newSize;
}
diff --git a/contrib/moses2/EstimatedScores.cpp b/moses2/EstimatedScores.cpp
index dfe52bb2b..e71647ce5 100644
--- a/contrib/moses2/EstimatedScores.cpp
+++ b/moses2/EstimatedScores.cpp
@@ -99,8 +99,8 @@ float EstimatedScores::CalcEstimatedScore(Bitmap const &bitmap, size_t startPos,
}
// end of a gap?
else if (startGap != notInGap
- && (bitmap.GetValue(currPos) == true
- || (startPos <= currPos && currPos <= endPos))) {
+ && (bitmap.GetValue(currPos) == true
+ || (startPos <= currPos && currPos <= endPos))) {
estimatedScore += GetValue(startGap, currPos - 1);
startGap = notInGap;
}
diff --git a/contrib/moses2/EstimatedScores.h b/moses2/EstimatedScores.h
index eae2e08ab..f85470783 100644
--- a/contrib/moses2/EstimatedScores.h
+++ b/moses2/EstimatedScores.h
@@ -36,8 +36,7 @@ class EstimatedScores: public Matrix<float>
{
public:
EstimatedScores(MemPool &pool, size_t size) :
- Matrix<float>(pool, size, size)
- {
+ Matrix<float>(pool, size, size) {
}
~EstimatedScores(); // not implemented
@@ -45,8 +44,7 @@ public:
float CalcEstimatedScore(Bitmap const&) const;
float CalcEstimatedScore(Bitmap const&, size_t startPos, size_t endPos) const;
- std::ostream &Debug(std::ostream &out, const System &system) const
- {
+ std::ostream &Debug(std::ostream &out, const System &system) const {
for (size_t endPos = 0; endPos < GetSize(); endPos++) {
for (size_t startPos = 0; startPos < GetSize(); startPos++)
out << GetValue(startPos, endPos) << " ";
diff --git a/contrib/moses2/FF/Distortion.cpp b/moses2/FF/Distortion.cpp
index 1d7b7246d..3c0cd8cee 100644
--- a/contrib/moses2/FF/Distortion.cpp
+++ b/moses2/FF/Distortion.cpp
@@ -16,36 +16,30 @@ using namespace std;
namespace Moses2
{
-struct DistortionState_traditional: public FFState
-{
+struct DistortionState_traditional: public FFState {
Range range;
int first_gap;
DistortionState_traditional() :
- range()
- {
+ range() {
// uninitialised
}
- void Set(const Range& wr, int fg)
- {
+ void Set(const Range& wr, int fg) {
range = wr;
first_gap = fg;
}
- size_t hash() const
- {
+ size_t hash() const {
return range.GetEndPos();
}
- virtual bool operator==(const FFState& other) const
- {
+ virtual bool operator==(const FFState& other) const {
const DistortionState_traditional& o =
- static_cast<const DistortionState_traditional&>(other);
+ static_cast<const DistortionState_traditional&>(other);
return range.GetEndPos() == o.range.GetEndPos();
}
- virtual std::string ToString() const
- {
+ virtual std::string ToString() const {
stringstream sb;
sb << first_gap << " " << range;
return sb.str();
@@ -55,7 +49,7 @@ struct DistortionState_traditional: public FFState
///////////////////////////////////////////////////////////////////////
Distortion::Distortion(size_t startInd, const std::string &line) :
- StatefulFeatureFunction(startInd, line)
+ StatefulFeatureFunction(startInd, line)
{
ReadParameters();
}
@@ -71,10 +65,10 @@ FFState* Distortion::BlankState(MemPool &pool, const System &sys) const
}
void Distortion::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const
+ const InputType &input, const Hypothesis &hypo) const
{
DistortionState_traditional &stateCast =
- static_cast<DistortionState_traditional&>(state);
+ static_cast<DistortionState_traditional&>(state);
// fake previous translated phrase start and end
size_t start = NOT_FOUND;
@@ -92,31 +86,31 @@ void Distortion::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
}
void Distortion::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
}
void Distortion::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
}
void Distortion::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const
{
const DistortionState_traditional &prev =
- static_cast<const DistortionState_traditional&>(prevState);
+ static_cast<const DistortionState_traditional&>(prevState);
SCORE distortionScore = CalculateDistortionScore(prev.range,
- hypo.GetInputPath().range, prev.first_gap);
+ hypo.GetInputPath().range, prev.first_gap);
//cerr << "distortionScore=" << distortionScore << endl;
scores.PlusEquals(mgr.system, *this, distortionScore);
DistortionState_traditional &stateCast =
- static_cast<DistortionState_traditional&>(state);
+ static_cast<DistortionState_traditional&>(state);
stateCast.Set(hypo.GetInputPath().range, hypo.GetBitmap().GetFirstGapPos());
//cerr << "hypo=" << hypo.Debug(mgr.system) << endl;
@@ -128,8 +122,7 @@ SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr,
bool useEarlyDistortionCost = false;
if (!useEarlyDistortionCost) {
return -(SCORE) ComputeDistortionDistance(prev, curr);
- }
- else {
+ } else {
/* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
Definitions:
S : current source range
@@ -162,7 +155,7 @@ SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr,
// case4: otherwise => return 2(nbWordBetween(S,S')+length(S))
//IFVERBOSE(4) std::cerr<< "MQ07disto:case4" << std::endl;
return (float) -2
- * ((int) curr.GetNumWordsBetween(prev) + (int) curr.GetNumWordsCovered());
+ * ((int) curr.GetNumWordsBetween(prev) + (int) curr.GetNumWordsCovered());
}
}
@@ -173,16 +166,15 @@ int Distortion::ComputeDistortionDistance(const Range& prev,
int dist = 0;
if (prev.GetNumWordsCovered() == 0) {
dist = current.GetStartPos();
- }
- else {
+ } else {
dist = (int) prev.GetEndPos() - (int) current.GetStartPos() + 1;
}
return abs(dist);
}
void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const
{
UTIL_THROW2("Not implemented");
}
diff --git a/contrib/moses2/FF/Distortion.h b/moses2/FF/Distortion.h
index 45577d1c3..685aa1445 100644
--- a/contrib/moses2/FF/Distortion.h
+++ b/moses2/FF/Distortion.h
@@ -23,33 +23,32 @@ public:
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
+ const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
- virtual void EvaluateWhenApplied(const std::deque<Hypothesis*> &hypos) const
- {
+ virtual void EvaluateWhenApplied(const std::deque<Hypothesis*> &hypos) const {
}
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const;
protected:
SCORE CalculateDistortionScore(const Range &prev, const Range &curr,
- const int FirstGap) const;
+ const int FirstGap) const;
int ComputeDistortionDistance(const Range& prev, const Range& current) const;
diff --git a/contrib/moses2/FF/SkeletonStatefulFF.cpp b/moses2/FF/ExampleStatefulFF.cpp
index d159794f0..86b364f53 100644
--- a/contrib/moses2/FF/SkeletonStatefulFF.cpp
+++ b/moses2/FF/ExampleStatefulFF.cpp
@@ -1,11 +1,11 @@
/*
- * SkeletonStatefulFF.cpp
+ * ExampleStatefulFF.cpp
*
* Created on: 27 Oct 2015
* Author: hieu
*/
#include <sstream>
-#include "SkeletonStatefulFF.h"
+#include "ExampleStatefulFF.h"
#include "../PhraseBased/Manager.h"
#include "../PhraseBased/Hypothesis.h"
@@ -14,28 +14,24 @@ using namespace std;
namespace Moses2
{
-class SkeletonState: public FFState
+class ExampleState: public FFState
{
public:
int targetLen;
- SkeletonState()
- {
+ ExampleState() {
// uninitialised
}
- virtual size_t hash() const
- {
+ virtual size_t hash() const {
return (size_t) targetLen;
}
- virtual bool operator==(const FFState& o) const
- {
- const SkeletonState& other = static_cast<const SkeletonState&>(o);
+ virtual bool operator==(const FFState& o) const {
+ const ExampleState& other = static_cast<const ExampleState&>(o);
return targetLen == other.targetLen;
}
- virtual std::string ToString() const
- {
+ virtual std::string ToString() const {
stringstream sb;
sb << targetLen;
return sb.str();
@@ -44,52 +40,52 @@ public:
};
////////////////////////////////////////////////////////////////////////////////////////
-SkeletonStatefulFF::SkeletonStatefulFF(size_t startInd, const std::string &line) :
- StatefulFeatureFunction(startInd, line)
+ExampleStatefulFF::ExampleStatefulFF(size_t startInd, const std::string &line) :
+ StatefulFeatureFunction(startInd, line)
{
ReadParameters();
}
-SkeletonStatefulFF::~SkeletonStatefulFF()
+ExampleStatefulFF::~ExampleStatefulFF()
{
// TODO Auto-generated destructor stub
}
-FFState* SkeletonStatefulFF::BlankState(MemPool &pool, const System &sys) const
+FFState* ExampleStatefulFF::BlankState(MemPool &pool, const System &sys) const
{
- return new (pool.Allocate<SkeletonState>()) SkeletonState();
+ return new (pool.Allocate<ExampleState>()) ExampleState();
}
-void SkeletonStatefulFF::EmptyHypothesisState(FFState &state,
+void ExampleStatefulFF::EmptyHypothesisState(FFState &state,
const ManagerBase &mgr, const InputType &input,
const Hypothesis &hypo) const
{
- SkeletonState &stateCast = static_cast<SkeletonState&>(state);
+ ExampleState &stateCast = static_cast<ExampleState&>(state);
stateCast.targetLen = 0;
}
-void SkeletonStatefulFF::EvaluateInIsolation(MemPool &pool,
+void ExampleStatefulFF::EvaluateInIsolation(MemPool &pool,
const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
-void SkeletonStatefulFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
+void ExampleStatefulFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
-void SkeletonStatefulFF::EvaluateWhenApplied(const ManagerBase &mgr,
+void ExampleStatefulFF::EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const
{
- SkeletonState &stateCast = static_cast<SkeletonState&>(state);
+ ExampleState &stateCast = static_cast<ExampleState&>(state);
stateCast.targetLen = hypo.GetTargetPhrase().GetSize();
}
-void SkeletonStatefulFF::EvaluateWhenApplied(const SCFG::Manager &mgr,
+void ExampleStatefulFF::EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const
{
diff --git a/moses2/FF/ExampleStatefulFF.h b/moses2/FF/ExampleStatefulFF.h
new file mode 100644
index 000000000..437f54515
--- /dev/null
+++ b/moses2/FF/ExampleStatefulFF.h
@@ -0,0 +1,46 @@
+/*
+ * ExampleStatefulFF.h
+ *
+ * Created on: 27 Oct 2015
+ * Author: hieu
+ */
+
+#pragma once
+
+#include "StatefulFeatureFunction.h"
+
+namespace Moses2
+{
+
+class ExampleStatefulFF: public StatefulFeatureFunction
+{
+public:
+ ExampleStatefulFF(size_t startInd, const std::string &line);
+ virtual ~ExampleStatefulFF();
+
+ virtual FFState* BlankState(MemPool &pool, const System &sys) const;
+ virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
+ const InputType &input, const Hypothesis &hypo) const;
+
+ virtual void
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
+
+ virtual void
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
+
+ virtual void EvaluateWhenApplied(const ManagerBase &mgr,
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const;
+
+ virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const;
+
+};
+
+}
+
diff --git a/contrib/moses2/FF/SkeletonStatelessFF.cpp b/moses2/FF/ExampleStatelessFF.cpp
index 981f9dd75..29716aaf8 100644
--- a/contrib/moses2/FF/SkeletonStatelessFF.cpp
+++ b/moses2/FF/ExampleStatelessFF.cpp
@@ -6,31 +6,31 @@
*/
#include "../Scores.h"
-#include "SkeletonStatelessFF.h"
+#include "ExampleStatelessFF.h"
namespace Moses2
{
-SkeletonStatelessFF::SkeletonStatelessFF(size_t startInd,
- const std::string &line) :
- StatelessFeatureFunction(startInd, line)
+ExampleStatelessFF::ExampleStatelessFF(size_t startInd,
+ const std::string &line) :
+ StatelessFeatureFunction(startInd, line)
{
ReadParameters();
}
-SkeletonStatelessFF::~SkeletonStatelessFF()
+ExampleStatelessFF::~ExampleStatelessFF()
{
// TODO Auto-generated destructor stub
}
-void SkeletonStatelessFF::EvaluateInIsolation(MemPool &pool,
+void ExampleStatelessFF::EvaluateInIsolation(MemPool &pool,
const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
-void SkeletonStatelessFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
+void ExampleStatelessFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
diff --git a/moses2/FF/ExampleStatelessFF.h b/moses2/FF/ExampleStatelessFF.h
new file mode 100644
index 000000000..20b1acaaf
--- /dev/null
+++ b/moses2/FF/ExampleStatelessFF.h
@@ -0,0 +1,34 @@
+/*
+ * SkeletonStatefulFF.h
+ *
+ * Created on: 27 Oct 2015
+ * Author: hieu
+ */
+
+#pragma once
+
+#include "StatelessFeatureFunction.h"
+
+namespace Moses2
+{
+
+class ExampleStatelessFF: public StatelessFeatureFunction
+{
+public:
+ ExampleStatelessFF(size_t startInd, const std::string &line);
+ virtual ~ExampleStatelessFF();
+
+ virtual void
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
+
+ virtual void
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
+
+};
+
+}
+
diff --git a/moses2/FF/FFState.cpp b/moses2/FF/FFState.cpp
new file mode 100644
index 000000000..c92b213fa
--- /dev/null
+++ b/moses2/FF/FFState.cpp
@@ -0,0 +1 @@
+#include "FFState.h"
diff --git a/contrib/moses2/FF/FFState.h b/moses2/FF/FFState.h
index 33ef5d1f6..41789b7dc 100644
--- a/contrib/moses2/FF/FFState.h
+++ b/moses2/FF/FFState.h
@@ -10,14 +10,12 @@ namespace Moses2
class FFState
{
public:
- virtual ~FFState()
- {
+ virtual ~FFState() {
}
virtual size_t hash() const = 0;
virtual bool operator==(const FFState& other) const = 0;
- virtual bool operator!=(const FFState& other) const
- {
+ virtual bool operator!=(const FFState& other) const {
return !(*this == other);
}
@@ -35,17 +33,14 @@ inline std::ostream& operator<<(std::ostream& out, const FFState& obj)
class DummyState: public FFState
{
public:
- DummyState()
- {
+ DummyState() {
}
- virtual size_t hash() const
- {
+ virtual size_t hash() const {
return 0;
}
- virtual bool operator==(const FFState& other) const
- {
+ virtual bool operator==(const FFState& other) const {
return true;
}
diff --git a/contrib/moses2/FF/FeatureFunction.cpp b/moses2/FF/FeatureFunction.cpp
index 3326ceaa4..6b4617dc5 100644
--- a/contrib/moses2/FF/FeatureFunction.cpp
+++ b/moses2/FF/FeatureFunction.cpp
@@ -17,10 +17,10 @@ namespace Moses2
{
FeatureFunction::FeatureFunction(size_t startInd, const std::string &line)
-:m_startInd(startInd)
-,m_numScores(1)
-,m_PhraseTableInd(NOT_FOUND)
-,m_tuneable(true)
+ :m_startInd(startInd)
+ ,m_numScores(1)
+ ,m_PhraseTableInd(NOT_FOUND)
+ ,m_tuneable(true)
{
ParseLine(line);
//cerr << GetName() << " " << m_startInd << "-" << (m_startInd + m_numScores - 1) << endl;
@@ -43,18 +43,16 @@ void FeatureFunction::ParseLine(const std::string &line)
for (size_t i = 1; i < toks.size(); ++i) {
vector<string> args = TokenizeFirstOnly(toks[i], "=");
UTIL_THROW_IF2(args.size() != 2,
- "Incorrect format for feature function arg: " << toks[i]);
+ "Incorrect format for feature function arg: " << toks[i]);
pair<set<string>::iterator, bool> ret = keys.insert(args[0]);
UTIL_THROW_IF2(!ret.second, "Duplicate key in line " << line);
if (args[0] == "num-features") {
m_numScores = Scan<size_t>(args[1]);
- }
- else if (args[0] == "name") {
+ } else if (args[0] == "name") {
m_name = args[1];
- }
- else {
+ } else {
m_args.push_back(args);
}
}
@@ -71,12 +69,11 @@ void FeatureFunction::ReadParameters()
}
void FeatureFunction::SetParameter(const std::string& key,
- const std::string& value)
+ const std::string& value)
{
if (key == "tuneable") {
m_tuneable = Scan<bool>(value);
- }
- else {
+ } else {
UTIL_THROW2(GetName() << ": Unknown argument " << key << "=" << value);
}
}
diff --git a/contrib/moses2/FF/FeatureFunction.h b/moses2/FF/FeatureFunction.h
index 1e25fce39..102bda8f1 100644
--- a/contrib/moses2/FF/FeatureFunction.h
+++ b/moses2/FF/FeatureFunction.h
@@ -39,44 +39,35 @@ public:
FeatureFunction(size_t startInd, const std::string &line);
virtual ~FeatureFunction();
- virtual void Load(System &system)
- {
+ virtual void Load(System &system) {
}
- size_t GetStartInd() const
- {
+ size_t GetStartInd() const {
return m_startInd;
}
- size_t GetNumScores() const
- {
+ size_t GetNumScores() const {
return m_numScores;
}
- const std::string &GetName() const
- {
+ const std::string &GetName() const {
return m_name;
}
- void SetName(const std::string &val)
- {
+ void SetName(const std::string &val) {
m_name = val;
}
- virtual size_t HasPhraseTableInd() const
- {
+ virtual size_t HasPhraseTableInd() const {
return false;
}
- void SetPhraseTableInd(size_t ind)
- {
+ void SetPhraseTableInd(size_t ind) {
m_PhraseTableInd = ind;
}
- size_t GetPhraseTableInd() const
- {
+ size_t GetPhraseTableInd() const {
return m_PhraseTableInd;
}
//! if false, then this feature is not displayed in the n-best list.
// use with care
- virtual bool IsTuneable() const
- {
+ virtual bool IsTuneable() const {
return m_tuneable;
}
@@ -85,30 +76,27 @@ public:
// may have more factors than actually need, but not guaranteed.
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const = 0;
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const = 0;
// For SCFG decoding, the source can contain non-terminals, NOT the raw
// source from the input sentence
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const = 0;
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const = 0;
// used by lexicalised reordering model to add scores to tp data structures
virtual void EvaluateAfterTablePruning(MemPool &pool,
- const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
- {
+ const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const {
}
virtual void EvaluateAfterTablePruning(MemPool &pool,
- const SCFG::TargetPhrases &tps, const Phrase<SCFG::Word> &sourcePhrase) const
- {
+ const SCFG::TargetPhrases &tps, const Phrase<SCFG::Word> &sourcePhrase) const {
}
// clean up temporary memory, called after processing each sentence
- virtual void CleanUpAfterSentenceProcessing() const
- {
+ virtual void CleanUpAfterSentenceProcessing() const {
}
protected:
diff --git a/contrib/moses2/FF/FeatureFunctions.cpp b/moses2/FF/FeatureFunctions.cpp
index 5cb0bb1c2..0e61fb0e4 100644
--- a/contrib/moses2/FF/FeatureFunctions.cpp
+++ b/moses2/FF/FeatureFunctions.cpp
@@ -6,6 +6,7 @@
*/
#include <boost/foreach.hpp>
+#include "FeatureRegistry.h"
#include "FeatureFunctions.h"
#include "StatefulFeatureFunction.h"
#include "../System.h"
@@ -24,9 +25,8 @@ using namespace std;
namespace Moses2
{
FeatureFunctions::FeatureFunctions(System &system) :
- m_system(system), m_ffStartInd(0)
+ m_system(system), m_ffStartInd(0)
{
- //m_registry.PrintFF();
}
FeatureFunctions::~FeatureFunctions()
@@ -37,27 +37,26 @@ FeatureFunctions::~FeatureFunctions()
void FeatureFunctions::Load()
{
// load, everything but pts
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
- FeatureFunction *nonConstFF = const_cast<FeatureFunction*>(ff);
- PhraseTable *pt = dynamic_cast<PhraseTable*>(nonConstFF);
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
+ FeatureFunction *nonConstFF = const_cast<FeatureFunction*>(ff);
+ PhraseTable *pt = dynamic_cast<PhraseTable*>(nonConstFF);
- if (pt) {
- // do nothing. load pt last
- }
- else {
- cerr << "Loading " << nonConstFF->GetName() << endl;
- nonConstFF->Load(m_system);
- cerr << "Finished loading " << nonConstFF->GetName() << endl;
+ if (pt) {
+ // do nothing. load pt last
+ } else {
+ cerr << "Loading " << nonConstFF->GetName() << endl;
+ nonConstFF->Load(m_system);
+ cerr << "Finished loading " << nonConstFF->GetName() << endl;
+ }
}
-}
// load pt
-BOOST_FOREACH(const PhraseTable *pt, phraseTables) {
- PhraseTable *nonConstPT = const_cast<PhraseTable*>(pt);
- cerr << "Loading " << nonConstPT->GetName() << endl;
- nonConstPT->Load(m_system);
- cerr << "Finished loading " << nonConstPT->GetName() << endl;
-}
+ BOOST_FOREACH(const PhraseTable *pt, phraseTables) {
+ PhraseTable *nonConstPT = const_cast<PhraseTable*>(pt);
+ cerr << "Loading " << nonConstPT->GetName() << endl;
+ nonConstPT->Load(m_system);
+ cerr << "Finished loading " << nonConstPT->GetName() << endl;
+ }
}
void FeatureFunctions::Create()
@@ -67,7 +66,7 @@ void FeatureFunctions::Create()
const PARAM_VEC *ffParams = params.GetParam("feature");
UTIL_THROW_IF2(ffParams == NULL, "Must have [feature] section");
- BOOST_FOREACH(const std::string &line, *ffParams){
+ BOOST_FOREACH(const std::string &line, *ffParams) {
//cerr << "line=" << line << endl;
FeatureFunction *ff = Create(line);
@@ -112,7 +111,7 @@ FeatureFunction *FeatureFunctions::Create(const std::string &line)
{
vector<string> toks = Tokenize(line);
- FeatureFunction *ff = m_registry.Construct(m_ffStartInd, toks[0], line);
+ FeatureFunction *ff = FeatureRegistry::Instance().Construct(m_ffStartInd, toks[0], line);
UTIL_THROW_IF2(ff == NULL, "Feature function not created");
// name
@@ -129,34 +128,33 @@ std::string FeatureFunctions::GetDefaultName(const std::string &stub)
{
size_t ind;
boost::unordered_map<std::string, size_t>::iterator iter =
- m_defaultNames.find(stub);
+ m_defaultNames.find(stub);
if (iter == m_defaultNames.end()) {
m_defaultNames[stub] = 0;
ind = 0;
- }
- else {
+ } else {
ind = ++(iter->second);
}
return stub + SPrint(ind);
}
const FeatureFunction *FeatureFunctions::FindFeatureFunction(
- const std::string &name) const
+ const std::string &name) const
{
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
- if (ff->GetName() == name) {
- return ff;
- }
- }
- return NULL;
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
+ if (ff->GetName() == name) {
+ return ff;
+ }
+ }
+ return NULL;
}
FeatureFunction *FeatureFunctions::FindFeatureFunction(
- const std::string &name)
+ const std::string &name)
{
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
if (ff->GetName() == name) {
- return const_cast<FeatureFunction *>(ff);
+ return const_cast<FeatureFunction *>(ff);
}
}
return NULL;
@@ -184,7 +182,7 @@ void FeatureFunctions::EvaluateInIsolation(MemPool &pool, const System &system,
{
SCORE estimatedScore = 0;
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
Scores& scores = targetPhrase.GetScores();
ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore);
}
@@ -193,14 +191,14 @@ void FeatureFunctions::EvaluateInIsolation(MemPool &pool, const System &system,
}
void FeatureFunctions::EvaluateInIsolation(
- MemPool &pool,
- const System &system,
- const Phrase<SCFG::Word> &source,
- SCFG::TargetPhraseImpl &targetPhrase) const
+ MemPool &pool,
+ const System &system,
+ const Phrase<SCFG::Word> &source,
+ SCFG::TargetPhraseImpl &targetPhrase) const
{
SCORE estimatedScore = 0;
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
Scores& scores = targetPhrase.GetScores();
ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore);
}
@@ -234,7 +232,7 @@ void FeatureFunctions::EvaluateWhenAppliedBatch(const Batch &batch) const
void FeatureFunctions::CleanUpAfterSentenceProcessing() const
{
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
- ff->CleanUpAfterSentenceProcessing();
+ ff->CleanUpAfterSentenceProcessing();
}
}
@@ -274,7 +272,7 @@ void FeatureFunctions::OverrideFeatures()
UTIL_THROW_IF2(keyVal.size() != 2, "Incorrect format for parameter override: " << keyValStr);
cerr << "Override " << ff->GetName() << " "
- << keyVal[0] << "=" << keyVal[1] << endl;
+ << keyVal[0] << "=" << keyVal[1] << endl;
ff->SetParameter(keyVal[0], keyVal[1]);
diff --git a/contrib/moses2/FF/FeatureFunctions.h b/moses2/FF/FeatureFunctions.h
index 271f68c0f..6a3f9bb78 100644
--- a/contrib/moses2/FF/FeatureFunctions.h
+++ b/moses2/FF/FeatureFunctions.h
@@ -7,10 +7,10 @@
#pragma once
+#include <boost/unordered_map.hpp>
#include <vector>
#include <string>
#include "../legacy/Parameter.h"
-#include "FeatureRegistry.h"
#include "../Phrase.h"
namespace Moses2
@@ -47,17 +47,21 @@ public:
FeatureFunctions(System &system);
virtual ~FeatureFunctions();
- const std::vector<const FeatureFunction*> &GetFeatureFunctions() const
- { return m_featureFunctions; }
+ const std::vector<const FeatureFunction*> &GetFeatureFunctions() const {
+ return m_featureFunctions;
+ }
- const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const
- { return m_statefulFeatureFunctions; }
+ const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const {
+ return m_statefulFeatureFunctions;
+ }
- const std::vector<const FeatureFunction*> &GetWithPhraseTableInd() const
- { return m_withPhraseTableInd; }
+ const std::vector<const FeatureFunction*> &GetWithPhraseTableInd() const {
+ return m_withPhraseTableInd;
+ }
- size_t GetNumScores() const
- { return m_ffStartInd; }
+ size_t GetNumScores() const {
+ return m_ffStartInd;
+ }
void Create();
void Load();
@@ -65,19 +69,20 @@ public:
const FeatureFunction *FindFeatureFunction(const std::string &name) const;
const PhraseTable *GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd);
- const UnknownWordPenalty *GetUnknownWordPenalty() const
- { return m_unkWP; }
+ const UnknownWordPenalty *GetUnknownWordPenalty() const {
+ return m_unkWP;
+ }
// the pool here must be the system pool if the rule was loaded during load, or the mgr pool if it was loaded on demand
void EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, TargetPhraseImpl &targetPhrase) const;
+ const Phrase<Moses2::Word> &source, TargetPhraseImpl &targetPhrase) const;
void EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<SCFG::Word> &source, SCFG::TargetPhraseImpl &targetPhrase) const;
+ const Phrase<SCFG::Word> &source, SCFG::TargetPhraseImpl &targetPhrase) const;
void EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps,
- const Phrase<Moses2::Word> &sourcePhrase) const;
+ const Phrase<Moses2::Word> &sourcePhrase) const;
void EvaluateAfterTablePruning(MemPool &pool, const SCFG::TargetPhrases &tps,
- const Phrase<SCFG::Word> &sourcePhrase) const;
+ const Phrase<SCFG::Word> &sourcePhrase) const;
void EvaluateWhenAppliedBatch(const Batch &batch) const;
@@ -95,8 +100,6 @@ protected:
System &m_system;
size_t m_ffStartInd;
- FeatureRegistry m_registry;
-
FeatureFunction *Create(const std::string &line);
std::string GetDefaultName(const std::string &stub);
void OverrideFeatures();
diff --git a/contrib/moses2/FF/FeatureRegistry.cpp b/moses2/FF/FeatureRegistry.cpp
index af0af9d0f..3947d58c0 100644
--- a/contrib/moses2/FF/FeatureRegistry.cpp
+++ b/moses2/FF/FeatureRegistry.cpp
@@ -1,8 +1,7 @@
#include "FeatureRegistry.h"
#include "../TranslationModel/Memory/PhraseTableMemory.h"
-#include "../TranslationModel/CompactPT/PhraseTableCompact.h"
-#include "../TranslationModel/ProbingPT/ProbingPT.h"
+#include "../TranslationModel/ProbingPT.h"
#include "../TranslationModel/UnknownWordPenalty.h"
#include "../TranslationModel/Transliteration.h"
@@ -17,19 +16,21 @@
#include "WordPenalty.h"
#include "OSM/OpSequenceModel.h"
-#include "SkeletonStatefulFF.h"
-#include "SkeletonStatelessFF.h"
+#include "ExampleStatefulFF.h"
+#include "ExampleStatelessFF.h"
using namespace std;
+
namespace Moses2
{
+FeatureRegistry FeatureRegistry::s_instance;
+
template<class F>
class DefaultFeatureFactory: public FeatureFactory
{
public:
- FeatureFunction *Create(size_t startInd, const std::string &line)
- {
+ FeatureFunction *Create(size_t startInd, const std::string &line) const {
return new F(startInd, line);
}
};
@@ -38,9 +39,8 @@ public:
class KenFactory: public FeatureFactory
{
public:
- FeatureFunction *Create(size_t startInd, const std::string &line)
- {
- ConstructKenLM(startInd, line);
+ FeatureFunction *Create(size_t startInd, const std::string &line) const {
+ return ConstructKenLM(startInd, line);
}
};
@@ -52,7 +52,6 @@ FeatureRegistry::FeatureRegistry()
// Feature with different name than class.
#define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >());
- MOSES_FNAME2("PhraseDictionaryCompact", PhraseTableCompact);
MOSES_FNAME2("PhraseDictionaryMemory", PhraseTableMemory);
MOSES_FNAME(ProbingPT);
MOSES_FNAME2("PhraseDictionaryTransliteration", Transliteration);
@@ -71,8 +70,8 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(WordPenalty);
MOSES_FNAME(OpSequenceModel);
- MOSES_FNAME(SkeletonStatefulFF);
- MOSES_FNAME(SkeletonStatelessFF);
+ MOSES_FNAME(ExampleStatefulFF);
+ MOSES_FNAME(ExampleStatelessFF);
}
FeatureRegistry::~FeatureRegistry()
@@ -91,9 +90,9 @@ void FeatureRegistry::Add(const std::string &name, FeatureFactory *factory)
}
FeatureFunction *FeatureRegistry::Construct(size_t startInd,
- const std::string &name, const std::string &line)
+ const std::string &name, const std::string &line) const
{
- Map::iterator i = registry_.find(name);
+ Map::const_iterator i = registry_.find(name);
if (i == registry_.end()) {
cerr << "Feature name " << name << " is not registered.";
abort();
diff --git a/contrib/moses2/FF/FeatureRegistry.h b/moses2/FF/FeatureRegistry.h
index 065820ea7..1e6fd399d 100644
--- a/contrib/moses2/FF/FeatureRegistry.h
+++ b/moses2/FF/FeatureRegistry.h
@@ -10,15 +10,13 @@ class FeatureFunction;
class FeatureFactory
{
public:
- virtual ~FeatureFactory()
- {
+ virtual ~FeatureFactory() {
}
- virtual FeatureFunction *Create(size_t startInd, const std::string &line) = 0;
+ virtual FeatureFunction *Create(size_t startInd, const std::string &line) const = 0;
protected:
- FeatureFactory()
- {
+ FeatureFactory() {
}
};
@@ -26,20 +24,26 @@ protected:
class FeatureRegistry
{
public:
- FeatureRegistry();
+ static const FeatureRegistry &Instance() {
+ return s_instance;
+ }
~FeatureRegistry();
FeatureFunction *Construct(size_t startInd, const std::string &name,
- const std::string &line);
+ const std::string &line) const;
void PrintFF() const;
private:
- void Add(const std::string &name, FeatureFactory *factory);
+ static FeatureRegistry s_instance;
typedef boost::unordered_map<std::string, boost::shared_ptr<FeatureFactory> > Map;
-
Map registry_;
+
+ FeatureRegistry();
+
+ void Add(const std::string &name, FeatureFactory *factory);
+
};
////////////////////////////////////////////////////////////////////
diff --git a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp b/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp
index 8c1b409c3..36e232f91 100644
--- a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp
+++ b/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp
@@ -15,9 +15,9 @@ namespace Moses2
{
BidirectionalReorderingState::BidirectionalReorderingState(
- const LRModel &config, LRState *bw, LRState *fw, size_t offset) :
- LRState(config, LRModel::Bidirectional, offset), m_backward(bw), m_forward(
- fw)
+ const LRModel &config, LRState *bw, LRState *fw, size_t offset) :
+ LRState(config, LRModel::Bidirectional, offset), m_backward(bw), m_forward(
+ fw)
{
}
@@ -27,8 +27,8 @@ BidirectionalReorderingState::~BidirectionalReorderingState()
}
void BidirectionalReorderingState::Init(const LRState *prev,
- const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
- const Bitmap *coverage)
+ const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
+ const Bitmap *coverage)
{
if (m_backward) {
m_backward->Init(prev, topt, path, first, coverage);
@@ -41,7 +41,7 @@ void BidirectionalReorderingState::Init(const LRState *prev,
std::string BidirectionalReorderingState::ToString() const
{
return "BidirectionalReorderingState " + SPrint(this) + " "
- + SPrint(m_backward) + " " + SPrint(m_forward);
+ + SPrint(m_backward) + " " + SPrint(m_forward);
}
size_t BidirectionalReorderingState::hash() const
@@ -57,10 +57,10 @@ bool BidirectionalReorderingState::operator==(const FFState& o) const
if (&o == this) return true;
BidirectionalReorderingState const &other =
- static_cast<BidirectionalReorderingState const&>(o);
+ static_cast<BidirectionalReorderingState const&>(o);
bool ret = (*m_backward == *other.m_backward)
- && (*m_forward == *other.m_forward);
+ && (*m_forward == *other.m_forward);
return ret;
}
@@ -69,11 +69,11 @@ void BidirectionalReorderingState::Expand(const ManagerBase &mgr,
Scores &scores, FFState &state) const
{
BidirectionalReorderingState &stateCast =
- static_cast<BidirectionalReorderingState&>(state);
+ static_cast<BidirectionalReorderingState&>(state);
m_backward->Expand(mgr, ff, hypo, phraseTableInd, scores,
- *stateCast.m_backward);
+ *stateCast.m_backward);
m_forward->Expand(mgr, ff, hypo, phraseTableInd, scores,
- *stateCast.m_forward);
+ *stateCast.m_forward);
}
} /* namespace Moses2 */
diff --git a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.h b/moses2/FF/LexicalReordering/BidirectionalReorderingState.h
index 487e84928..289809798 100644
--- a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.h
+++ b/moses2/FF/LexicalReordering/BidirectionalReorderingState.h
@@ -14,12 +14,12 @@ class BidirectionalReorderingState: public LRState
{
public:
BidirectionalReorderingState(const LRModel &config, LRState *bw, LRState *fw,
- size_t offset);
+ size_t offset);
virtual ~BidirectionalReorderingState();
void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
- const InputPathBase &path, bool first, const Bitmap *coverage);
+ const InputPathBase &path, bool first, const Bitmap *coverage);
size_t hash() const;
virtual bool operator==(const FFState& other) const;
@@ -27,8 +27,8 @@ public:
virtual std::string ToString() const;
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
- const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
- FFState &state) const;
+ const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
+ FFState &state) const;
protected:
LRState *m_backward;
diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp b/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp
index 600a208b9..a54cd7fcf 100644
--- a/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp
+++ b/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp
@@ -14,7 +14,7 @@ namespace Moses2
HReorderingBackwardState::HReorderingBackwardState(MemPool &pool,
const LRModel &config, size_t offset) :
- LRState(config, LRModel::Backward, offset), reoStack(pool)
+ LRState(config, LRModel::Backward, offset), reoStack(pool)
{
// TODO Auto-generated constructor stub
@@ -26,8 +26,8 @@ HReorderingBackwardState::~HReorderingBackwardState()
}
void HReorderingBackwardState::Init(const LRState *prev,
- const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
- const Bitmap *coverage)
+ const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
+ const Bitmap *coverage)
{
prevTP = &topt;
reoStack.Init();
@@ -42,7 +42,7 @@ size_t HReorderingBackwardState::hash() const
bool HReorderingBackwardState::operator==(const FFState& o) const
{
const HReorderingBackwardState& other =
- static_cast<const HReorderingBackwardState&>(o);
+ static_cast<const HReorderingBackwardState&>(o);
bool ret = reoStack == other.reoStack;
return ret;
}
@@ -53,13 +53,13 @@ std::string HReorderingBackwardState::ToString() const
}
void HReorderingBackwardState::Expand(const ManagerBase &mgr,
- const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
- Scores &scores, FFState &state) const
+ const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
+ Scores &scores, FFState &state) const
{
HReorderingBackwardState &nextState =
- static_cast<HReorderingBackwardState&>(state);
+ static_cast<HReorderingBackwardState&>(state);
nextState.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
- NULL);
+ NULL);
nextState.reoStack = reoStack;
const Range &swrange = hypo.GetInputPath().range;
diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.h b/moses2/FF/LexicalReordering/HReorderingBackwardState.h
index 9977724d3..8cdea5a44 100644
--- a/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.h
+++ b/moses2/FF/LexicalReordering/HReorderingBackwardState.h
@@ -20,7 +20,7 @@ public:
HReorderingBackwardState(MemPool &pool, const LRModel &config, size_t offset);
virtual void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
- const InputPathBase &path, bool first, const Bitmap *coverage);
+ const InputPathBase &path, bool first, const Bitmap *coverage);
virtual ~HReorderingBackwardState();
@@ -28,8 +28,8 @@ public:
virtual bool operator==(const FFState& other) const;
virtual std::string ToString() const;
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
- const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
- FFState &state) const;
+ const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
+ FFState &state) const;
};
diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.cpp b/moses2/FF/LexicalReordering/HReorderingForwardState.cpp
index c50626106..1041115f7 100644
--- a/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.cpp
+++ b/moses2/FF/LexicalReordering/HReorderingForwardState.cpp
@@ -15,7 +15,7 @@ namespace Moses2
HReorderingForwardState::HReorderingForwardState(const LRModel &config,
size_t offset) :
- LRState(config, LRModel::Forward, offset), m_first(true)
+ LRState(config, LRModel::Forward, offset), m_first(true)
{
prevPath = NULL;
m_coverage = NULL;
@@ -27,8 +27,8 @@ HReorderingForwardState::~HReorderingForwardState()
}
void HReorderingForwardState::Init(const LRState *prev,
- const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
- const Bitmap *coverage)
+ const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
+ const Bitmap *coverage)
{
prevTP = &topt;
prevPath = &path;
@@ -48,12 +48,12 @@ bool HReorderingForwardState::operator==(const FFState& o) const
if (&o == this) return true;
HReorderingForwardState const& other =
- static_cast<HReorderingForwardState const&>(o);
+ static_cast<HReorderingForwardState const&>(o);
int compareScores = (
- (prevPath->range == other.prevPath->range) ?
- ComparePrevScores(other.prevTP) :
- (prevPath->range < other.prevPath->range) ? -1 : 1);
+ (prevPath->range == other.prevPath->range) ?
+ ComparePrevScores(other.prevTP) :
+ (prevPath->range < other.prevPath->range) ? -1 : 1);
return compareScores == 0;
}
@@ -63,8 +63,8 @@ std::string HReorderingForwardState::ToString() const
}
void HReorderingForwardState::Expand(const ManagerBase &mgr,
- const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
- Scores &scores, FFState &state) const
+ const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
+ Scores &scores, FFState &state) const
{
const Range &cur = hypo.GetInputPath().range;
// keep track of the current coverage ourselves so we don't need the hypothesis
@@ -79,9 +79,9 @@ void HReorderingForwardState::Expand(const ManagerBase &mgr,
}
HReorderingForwardState &stateCast =
- static_cast<HReorderingForwardState&>(state);
+ static_cast<HReorderingForwardState&>(state);
stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
- &cov);
+ &cov);
}
} /* namespace Moses2 */
diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.h b/moses2/FF/LexicalReordering/HReorderingForwardState.h
index 8f9b8bd23..51358daa3 100644
--- a/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.h
+++ b/moses2/FF/LexicalReordering/HReorderingForwardState.h
@@ -20,14 +20,14 @@ public:
virtual ~HReorderingForwardState();
void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
- const InputPathBase &path, bool first, const Bitmap *coverage);
+ const InputPathBase &path, bool first, const Bitmap *coverage);
size_t hash() const;
virtual bool operator==(const FFState& other) const;
virtual std::string ToString() const;
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
- const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
- FFState &state) const;
+ const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
+ FFState &state) const;
protected:
bool m_first;
diff --git a/contrib/moses2/FF/LexicalReordering/LRModel.cpp b/moses2/FF/LexicalReordering/LRModel.cpp
index 47b711369..c2a914009 100644
--- a/contrib/moses2/FF/LexicalReordering/LRModel.cpp
+++ b/moses2/FF/LexicalReordering/LRModel.cpp
@@ -22,8 +22,8 @@ namespace Moses2
{
bool IsMonotonicStep(Range const& prev, // words range of last source phrase
- Range const& cur, // words range of current source phrase
- Bitmap const& cov) // coverage bitmap
+ Range const& cur, // words range of current source phrase
+ Bitmap const& cov) // coverage bitmap
{
size_t e = prev.GetEndPos() + 1;
size_t s = cur.GetStartPos();
@@ -38,19 +38,17 @@ bool IsSwap(Range const& prev, Range const& cur, Bitmap const& cov)
}
LRModel::LRModel(const std::string &modelType, LexicalReordering &ff) :
- m_modelType(None), m_phraseBased(true), m_collapseScores(false), m_direction(
- Backward), m_scoreProducer(&ff)
+ m_modelType(None), m_phraseBased(true), m_collapseScores(false), m_direction(
+ Backward), m_scoreProducer(&ff)
{
std::vector<std::string> config = Tokenize(modelType, "-");
for (size_t i = 0; i < config.size(); ++i) {
if (config[i] == "hier") {
m_phraseBased = false;
- }
- else if (config[i] == "phrase") {
+ } else if (config[i] == "phrase") {
m_phraseBased = true;
- }
- else if (config[i] == "wbe") {
+ } else if (config[i] == "wbe") {
m_phraseBased = true;
}
// no word-based decoding available, fall-back to phrase-based
@@ -58,45 +56,36 @@ LRModel::LRModel(const std::string &modelType, LexicalReordering &ff) :
else if (config[i] == "msd") {
m_modelType = MSD;
- }
- else if (config[i] == "mslr") {
+ } else if (config[i] == "mslr") {
m_modelType = MSLR;
- }
- else if (config[i] == "monotonicity") {
+ } else if (config[i] == "monotonicity") {
m_modelType = Monotonic;
- }
- else if (config[i] == "leftright") {
+ } else if (config[i] == "leftright") {
m_modelType = LeftRight;
}
// unidirectional is deprecated, use backward instead
else if (config[i] == "unidirectional") {
m_direction = Backward;
- }
- else if (config[i] == "backward") {
+ } else if (config[i] == "backward") {
m_direction = Backward;
- }
- else if (config[i] == "forward") {
+ } else if (config[i] == "forward") {
m_direction = Forward;
- }
- else if (config[i] == "bidirectional") {
+ } else if (config[i] == "bidirectional") {
m_direction = Bidirectional;
}
else if (config[i] == "f") {
m_condition = F;
- }
- else if (config[i] == "fe") {
+ } else if (config[i] == "fe") {
m_condition = FE;
}
else if (config[i] == "collapseff") {
m_collapseScores = true;
- }
- else if (config[i] == "allff") {
+ } else if (config[i] == "allff") {
m_collapseScores = false;
- }
- else {
+ } else {
std::cerr
<< "Illegal part in the lexical reordering configuration string: "
<< config[i] << std::endl;
@@ -106,7 +95,7 @@ LRModel::LRModel(const std::string &modelType, LexicalReordering &ff) :
if (m_modelType == None) {
std::cerr << "You need to specify the type of the reordering model "
- << "(msd, monotonicity,...)" << std::endl;
+ << "(msd, monotonicity,...)" << std::endl;
exit(1);
}
@@ -135,19 +124,19 @@ LRModel::ReorderingType LRModel::GetOrientation(Range const& prev,
{
UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
return (
- (m_modelType == LeftRight) ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
- : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M :
- (m_modelType == Monotonic) ? NM :
- (prev.GetStartPos() == cur.GetEndPos() + 1) ? S :
- (m_modelType == MSD) ? D :
- (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
+ (m_modelType == LeftRight) ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
+ : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M :
+ (m_modelType == Monotonic) ? NM :
+ (prev.GetStartPos() == cur.GetEndPos() + 1) ? S :
+ (m_modelType == MSD) ? D :
+ (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
}
LRModel::ReorderingType LRModel::GetOrientation(int const reoDistance) const
{
// this one is for HierarchicalReorderingBackwardState
return ((m_modelType == LeftRight) ? (reoDistance >= 1) ? R : L
- : (reoDistance == 1) ? M : (m_modelType == Monotonic) ? NM :
+ : (reoDistance == 1) ? M : (m_modelType == Monotonic) ? NM :
(reoDistance == -1) ? S : (m_modelType == MSD) ? D :
(reoDistance > 1) ? DR : DL);
}
@@ -162,28 +151,26 @@ LRState *LRModel::CreateLRState(MemPool &pool) const
case Bidirectional:
if (m_phraseBased) {
bwd =
- new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
- *this, Backward, offset);
+ new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
+ *this, Backward, offset);
//cerr << "bwd=" << bwd << bwd->ToString() << endl;
- }
- else {
+ } else {
bwd =
- new (pool.Allocate<HReorderingBackwardState>()) HReorderingBackwardState(
- pool, *this, offset);
+ new (pool.Allocate<HReorderingBackwardState>()) HReorderingBackwardState(
+ pool, *this, offset);
}
offset += m_collapseScores ? 1 : GetNumberOfTypes();
if (m_direction == Backward) return bwd; // else fall through
case Forward:
if (m_phraseBased) {
fwd =
- new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
- *this, Forward, offset);
+ new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
+ *this, Forward, offset);
//cerr << "fwd=" << fwd << fwd->ToString() << endl;
- }
- else {
+ } else {
fwd =
- new (pool.Allocate<HReorderingForwardState>()) HReorderingForwardState(
- *this, offset);
+ new (pool.Allocate<HReorderingForwardState>()) HReorderingForwardState(
+ *this, offset);
}
offset += m_collapseScores ? 1 : GetNumberOfTypes();
if (m_direction == Forward) return fwd;
@@ -191,8 +178,8 @@ LRState *LRModel::CreateLRState(MemPool &pool) const
//cerr << "LRStates:" << *bwd << endl << *fwd << endl;
BidirectionalReorderingState *ret =
- new (pool.Allocate<BidirectionalReorderingState>()) BidirectionalReorderingState(
- *this, bwd, fwd, 0);
+ new (pool.Allocate<BidirectionalReorderingState>()) BidirectionalReorderingState(
+ *this, bwd, fwd, 0);
return ret;
}
@@ -200,10 +187,10 @@ LRModel::ReorderingType LRModel::GetOrientation(Range const& prev,
Range const& cur, Bitmap const& cov) const
{
return (
- (m_modelType == LeftRight) ? cur.GetStartPos() > prev.GetEndPos() ? R : L
- : IsMonotonicStep(prev, cur, cov) ? M : (m_modelType == Monotonic) ? NM :
- IsSwap(prev, cur, cov) ? S : (m_modelType == MSD) ? D :
- cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
+ (m_modelType == LeftRight) ? cur.GetStartPos() > prev.GetEndPos() ? R : L
+ : IsMonotonicStep(prev, cur, cov) ? M : (m_modelType == Monotonic) ? NM :
+ IsSwap(prev, cur, cov) ? S : (m_modelType == MSD) ? D :
+ cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
}
} /* namespace Moses2 */
diff --git a/contrib/moses2/FF/LexicalReordering/LRModel.h b/moses2/FF/LexicalReordering/LRModel.h
index 2713fa46d..0309d5386 100644
--- a/contrib/moses2/FF/LexicalReordering/LRModel.h
+++ b/moses2/FF/LexicalReordering/LRModel.h
@@ -19,21 +19,17 @@ class LexicalReordering;
class LRModel
{
public:
- enum ModelType
- {
+ enum ModelType {
Monotonic, MSD, MSLR, LeftRight, None
};
- enum Direction
- {
+ enum Direction {
Forward, Backward, Bidirectional
};
- enum Condition
- {
+ enum Condition {
F, E, FE
};
- enum ReorderingType
- {
+ enum ReorderingType {
M = 0, // monotonic
NM = 1, // non-monotonic
S = 1, // swap
@@ -49,34 +45,28 @@ public:
LRModel(const std::string &modelType, LexicalReordering &ff);
virtual ~LRModel();
- ModelType GetModelType() const
- {
+ ModelType GetModelType() const {
return m_modelType;
}
- Direction GetDirection() const
- {
+ Direction GetDirection() const {
return m_direction;
}
- Condition GetCondition() const
- {
+ Condition GetCondition() const {
return m_condition;
}
- bool IsPhraseBased() const
- {
+ bool IsPhraseBased() const {
return m_phraseBased;
}
- bool CollapseScores() const
- {
+ bool CollapseScores() const {
return m_collapseScores;
}
size_t GetNumberOfTypes() const;
LexicalReordering*
- GetScoreProducer() const
- {
+ GetScoreProducer() const {
return m_scoreProducer;
}
diff --git a/contrib/moses2/FF/LexicalReordering/LRState.cpp b/moses2/FF/LexicalReordering/LRState.cpp
index 4e9abd774..a8a3bf6d0 100644
--- a/contrib/moses2/FF/LexicalReordering/LRState.cpp
+++ b/moses2/FF/LexicalReordering/LRState.cpp
@@ -17,7 +17,7 @@ namespace Moses2
class InputType;
LRState::LRState(const LRModel &config, LRModel::Direction dir, size_t offset) :
- m_configuration(config), m_direction(dir), m_offset(offset)
+ m_configuration(config), m_direction(dir), m_offset(offset)
{
}
@@ -43,15 +43,15 @@ int LRState::ComparePrevScores(const TargetPhrase<Moses2::Word> *other) const
}
void LRState::CopyScores(const System &system, Scores &accum,
- const TargetPhrase<Moses2::Word> &topt, ReorderingType reoType) const
+ const TargetPhrase<Moses2::Word> &topt, ReorderingType reoType) const
{
// don't call this on a bidirectional object
UTIL_THROW_IF2(
- m_direction != LRModel::Backward && m_direction != LRModel::Forward,
- "Unknown direction: " << m_direction);
+ m_direction != LRModel::Backward && m_direction != LRModel::Forward,
+ "Unknown direction: " << m_direction);
TargetPhrase<Moses2::Word> const* relevantOpt = (
- (m_direction == LRModel::Backward) ? &topt : prevTP);
+ (m_direction == LRModel::Backward) ? &topt : prevTP);
LexicalReordering* producer = m_configuration.GetScoreProducer();
size_t phraseTableInd = producer->GetPhraseTableInd();
@@ -65,7 +65,7 @@ void LRState::CopyScores(const System &system, Scores &accum,
size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote;
UTIL_THROW_IF2(off_local >= producer->GetNumScores(),
- "offset out of vector bounds!");
+ "offset out of vector bounds!");
// look up applicable score from vector of scores
//UTIL_THROW_IF2(off_remote >= cached->size(), "offset out of vector bounds!");
diff --git a/contrib/moses2/FF/LexicalReordering/LRState.h b/moses2/FF/LexicalReordering/LRState.h
index 0e906d09a..c53b9de78 100644
--- a/contrib/moses2/FF/LexicalReordering/LRState.h
+++ b/moses2/FF/LexicalReordering/LRState.h
@@ -26,14 +26,14 @@ public:
LRState(const LRModel &config, LRModel::Direction dir, size_t offset);
virtual void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
- const InputPathBase &path, bool first, const Bitmap *coverage) = 0;
+ const InputPathBase &path, bool first, const Bitmap *coverage) = 0;
virtual void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
- const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
- FFState &state) const = 0;
+ const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
+ FFState &state) const = 0;
void CopyScores(const System &system, Scores &accum, const TargetPhrase<Moses2::Word> &topt,
- ReorderingType reoType) const;
+ ReorderingType reoType) const;
protected:
const LRModel& m_configuration;
diff --git a/contrib/moses2/FF/LexicalReordering/LexicalReordering.cpp b/moses2/FF/LexicalReordering/LexicalReordering.cpp
index 97394ce84..6f510574c 100644
--- a/contrib/moses2/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses2/FF/LexicalReordering/LexicalReordering.cpp
@@ -12,7 +12,6 @@
#include "PhraseBasedReorderingState.h"
#include "BidirectionalReorderingState.h"
#include "../../TranslationModel/PhraseTable.h"
-#include "../../TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
#include "../../System.h"
#include "../../PhraseBased/PhraseImpl.h"
#include "../../PhraseBased/Manager.h"
@@ -22,6 +21,11 @@
#include "../../legacy/InputFileStream.h"
#include "../../legacy/Util2.h"
+#ifdef HAVE_CMPH
+#include "../../TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
+#endif
+
+
using namespace std;
namespace Moses2
@@ -29,9 +33,15 @@ namespace Moses2
///////////////////////////////////////////////////////////////////////
-LexicalReordering::LexicalReordering(size_t startInd, const std::string &line) :
- StatefulFeatureFunction(startInd, line), m_compactModel(NULL), m_blank(
- NULL), m_propertyInd(-1), m_coll(NULL), m_configuration(NULL)
+LexicalReordering::LexicalReordering(size_t startInd, const std::string &line)
+ : StatefulFeatureFunction(startInd, line)
+ , m_blank(NULL)
+ , m_propertyInd(-1)
+ , m_coll(NULL)
+ , m_configuration(NULL)
+#ifdef HAVE_CMPH
+ , m_compactModel(NULL)
+#endif
{
ReadParameters();
assert(m_configuration);
@@ -40,9 +50,11 @@ LexicalReordering::LexicalReordering(size_t startInd, const std::string &line) :
LexicalReordering::~LexicalReordering()
{
- delete m_compactModel;
delete m_coll;
delete m_configuration;
+#ifdef HAVE_CMPH
+ delete m_compactModel;
+#endif
}
void LexicalReordering::Load(System &system)
@@ -51,13 +63,13 @@ void LexicalReordering::Load(System &system)
if (m_propertyInd >= 0) {
// Using integrate Lex RO. No loading needed
- }
- else if (FileExists(m_path + ".minlexr")) {
+#ifdef HAVE_CMPH
+ } else if (FileExists(m_path + ".minlexr")) {
m_compactModel = new LexicalReorderingTableCompact(m_path + ".minlexr",
m_FactorsF, m_FactorsE, m_FactorsC);
m_blank = new (pool.Allocate<PhraseImpl>()) PhraseImpl(pool, 0);
- }
- else {
+#endif
+ } else {
m_coll = new Coll();
InputFileStream file(m_path);
string line;
@@ -71,12 +83,12 @@ void LexicalReordering::Load(System &system)
std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
assert(toks.size() == 3);
PhraseImpl *source = PhraseImpl::CreateFromString(pool, system.GetVocab(),
- system, toks[0]);
+ system, toks[0]);
PhraseImpl *target = PhraseImpl::CreateFromString(pool, system.GetVocab(),
- system, toks[1]);
+ system, toks[1]);
std::vector<SCORE> scores = Tokenize<SCORE>(toks[2]);
std::transform(scores.begin(), scores.end(), scores.begin(),
- TransformScore);
+ TransformScore);
std::transform(scores.begin(), scores.end(), scores.begin(), FloorScore);
Key key(source, target);
@@ -86,24 +98,19 @@ void LexicalReordering::Load(System &system)
}
void LexicalReordering::SetParameter(const std::string& key,
- const std::string& value)
+ const std::string& value)
{
if (key == "path") {
m_path = value;
- }
- else if (key == "type") {
+ } else if (key == "type") {
m_configuration = new LRModel(value, *this);
- }
- else if (key == "input-factor") {
+ } else if (key == "input-factor") {
m_FactorsF = Tokenize<FactorType>(value);
- }
- else if (key == "output-factor") {
+ } else if (key == "output-factor") {
m_FactorsE = Tokenize<FactorType>(value);
- }
- else if (key == "property-index") {
+ } else if (key == "property-index") {
m_propertyInd = Scan<int>(value);
- }
- else {
+ } else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
@@ -119,9 +126,9 @@ void LexicalReordering::EmptyHypothesisState(FFState &state,
const Hypothesis &hypo) const
{
BidirectionalReorderingState &stateCast =
- static_cast<BidirectionalReorderingState&>(state);
+ static_cast<BidirectionalReorderingState&>(state);
stateCast.Init(NULL, hypo.GetTargetPhrase(), hypo.GetInputPath(), true,
- &hypo.GetBitmap());
+ &hypo.GetBitmap());
}
void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system,
@@ -141,9 +148,9 @@ void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system,
void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
{
- BOOST_FOREACH(const TargetPhraseImpl *tp, tps){
- EvaluateAfterTablePruning(pool, *tp, sourcePhrase);
-}
+ BOOST_FOREACH(const TargetPhraseImpl *tp, tps) {
+ EvaluateAfterTablePruning(pool, *tp, sourcePhrase);
+ }
}
void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
@@ -152,11 +159,11 @@ void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
if (m_propertyInd >= 0) {
SCORE *scoreArr = targetPhrase.GetScoresProperty(m_propertyInd);
targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
- }
- else if (m_compactModel) {
+#ifdef HAVE_CMPH
+ } else if (m_compactModel) {
// using external compact binary model
const Values values = m_compactModel->GetScore(sourcePhrase, targetPhrase,
- *m_blank);
+ *m_blank);
if (values.size()) {
assert(values.size() == m_numScores);
@@ -165,12 +172,11 @@ void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
scoreArr[i] = values[i];
}
targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
- }
- else {
+ } else {
targetPhrase.ffData[m_PhraseTableInd] = NULL;
}
- }
- else if (m_coll) {
+#endif
+ } else if (m_coll) {
// using external memory model
// cache data in target phrase
@@ -183,8 +189,7 @@ void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
scoreArr[i] = (*values)[i];
}
targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
- }
- else {
+ } else {
targetPhrase.ffData[m_PhraseTableInd] = NULL;
}
}
@@ -199,15 +204,14 @@ void LexicalReordering::EvaluateWhenApplied(const ManagerBase &mgr,
}
const LexicalReordering::Values *LexicalReordering::GetValues(
- const Phrase<Moses2::Word> &source, const Phrase<Moses2::Word> &target) const
+ const Phrase<Moses2::Word> &source, const Phrase<Moses2::Word> &target) const
{
Key key(&source, &target);
Coll::const_iterator iter;
iter = m_coll->find(key);
if (iter == m_coll->end()) {
return NULL;
- }
- else {
+ } else {
return &iter->second;
}
}
diff --git a/contrib/moses2/FF/LexicalReordering/LexicalReordering.h b/moses2/FF/LexicalReordering/LexicalReordering.h
index b14517db2..59f63eba2 100644
--- a/contrib/moses2/FF/LexicalReordering/LexicalReordering.h
+++ b/moses2/FF/LexicalReordering/LexicalReordering.h
@@ -30,36 +30,35 @@ public:
virtual void SetParameter(const std::string& key, const std::string& value);
- virtual size_t HasPhraseTableInd() const
- {
+ virtual size_t HasPhraseTableInd() const {
return true;
}
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
+ const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void
EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps,
- const Phrase<Moses2::Word> &sourcePhrase) const;
+ const Phrase<Moses2::Word> &sourcePhrase) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const;
protected:
std::string m_path;
@@ -71,30 +70,30 @@ protected:
virtual void
EvaluateAfterTablePruning(MemPool &pool, const TargetPhraseImpl &targetPhrase,
- const Phrase<Moses2::Word> &sourcePhrase) const;
+ const Phrase<Moses2::Word> &sourcePhrase) const;
// PROPERTY IN PT
int m_propertyInd;
// COMPACT MODEL
+#ifdef HAVE_CMPH
LexicalReorderingTableCompact *m_compactModel;
+#endif
+
Phrase<Moses2::Word> *m_blank;
// MEMORY MODEL
typedef std::pair<const Phrase<Moses2::Word>*, const Phrase<Moses2::Word>* > Key;
typedef std::vector<SCORE> Values;
- struct KeyComparer
- {
- size_t operator()(const Key &obj) const
- {
+ struct KeyComparer {
+ size_t operator()(const Key &obj) const {
size_t seed = obj.first->hash();
boost::hash_combine(seed, obj.second->hash());
return seed;
}
- bool operator()(const Key& a, const Key& b) const
- {
+ bool operator()(const Key& a, const Key& b) const {
if ((*a.first) != (*b.first)) {
return false;
}
diff --git a/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp b/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp
index c7d4abf03..6b8060021 100644
--- a/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp
+++ b/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp
@@ -18,7 +18,7 @@ namespace Moses2
PhraseBasedReorderingState::PhraseBasedReorderingState(const LRModel &config,
LRModel::Direction dir, size_t offset) :
- LRState(config, dir, offset)
+ LRState(config, dir, offset)
{
// uninitialised
prevPath = NULL;
@@ -26,8 +26,8 @@ PhraseBasedReorderingState::PhraseBasedReorderingState(const LRModel &config,
}
void PhraseBasedReorderingState::Init(const LRState *prev,
- const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
- const Bitmap *coverage)
+ const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
+ const Bitmap *coverage)
{
prevTP = &topt;
prevPath = &path;
@@ -48,39 +48,37 @@ bool PhraseBasedReorderingState::operator==(const FFState& o) const
if (&o == this) return true;
const PhraseBasedReorderingState &other =
- static_cast<const PhraseBasedReorderingState&>(o);
+ static_cast<const PhraseBasedReorderingState&>(o);
if (&prevPath->range == &other.prevPath->range) {
if (m_direction == LRModel::Forward) {
int compareScore = ComparePrevScores(other.prevTP);
return compareScore == 0;
- }
- else {
+ } else {
return true;
}
- }
- else {
+ } else {
return false;
}
}
void PhraseBasedReorderingState::Expand(const ManagerBase &mgr,
- const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
- Scores &scores, FFState &state) const
+ const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
+ Scores &scores, FFState &state) const
{
if ((m_direction != LRModel::Forward) || !m_first) {
LRModel const& lrmodel = m_configuration;
Range const &cur = hypo.GetInputPath().range;
LRModel::ReorderingType reoType = (
- m_first ?
- lrmodel.GetOrientation(cur) :
- lrmodel.GetOrientation(prevPath->range, cur));
+ m_first ?
+ lrmodel.GetOrientation(cur) :
+ lrmodel.GetOrientation(prevPath->range, cur));
CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
}
PhraseBasedReorderingState &stateCast =
- static_cast<PhraseBasedReorderingState&>(state);
+ static_cast<PhraseBasedReorderingState&>(state);
stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
- NULL);
+ NULL);
}
} /* namespace Moses2 */
diff --git a/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h b/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h
index e26237cf7..77994e477 100644
--- a/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h
+++ b/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h
@@ -20,22 +20,21 @@ public:
bool m_first;
PhraseBasedReorderingState(const LRModel &config, LRModel::Direction dir,
- size_t offset);
+ size_t offset);
void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
- const InputPathBase &path, bool first, const Bitmap *coverage);
+ const InputPathBase &path, bool first, const Bitmap *coverage);
size_t hash() const;
virtual bool operator==(const FFState& other) const;
- virtual std::string ToString() const
- {
+ virtual std::string ToString() const {
return "PhraseBasedReorderingState";
}
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
- const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
- FFState &state) const;
+ const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
+ FFState &state) const;
protected:
diff --git a/contrib/moses2/FF/LexicalReordering/ReorderingStack.cpp b/moses2/FF/LexicalReordering/ReorderingStack.cpp
index 298257fc4..6a4bf3c33 100644
--- a/contrib/moses2/FF/LexicalReordering/ReorderingStack.cpp
+++ b/moses2/FF/LexicalReordering/ReorderingStack.cpp
@@ -11,7 +11,7 @@
namespace Moses2
{
ReorderingStack::ReorderingStack(MemPool &pool) :
- m_stack(pool)
+ m_stack(pool)
{
}
@@ -50,8 +50,7 @@ int ReorderingStack::ShiftReduce(const Range &input_span)
//calculate the distance we are returning
if (input_span.GetStartPos() > prev_span.GetStartPos()) {
distance = input_span.GetStartPos() - prev_span.GetEndPos();
- }
- else {
+ } else {
distance = input_span.GetEndPos() - prev_span.GetStartPos();
}
@@ -59,13 +58,11 @@ int ReorderingStack::ShiftReduce(const Range &input_span)
m_stack.pop_back();
Range new_span(prev_span.GetStartPos(), input_span.GetEndPos());
Reduce(new_span);
- }
- else if (distance == -1) { //swap
+ } else if (distance == -1) { //swap
m_stack.pop_back();
Range new_span(input_span.GetStartPos(), prev_span.GetEndPos());
Reduce(new_span);
- }
- else { // discontinuous
+ } else { // discontinuous
m_stack.push_back(input_span);
}
@@ -85,13 +82,11 @@ void ReorderingStack::Reduce(Range current)
m_stack.pop_back();
Range t(previous.GetStartPos(), current.GetEndPos());
current = t;
- }
- else if (previous.GetStartPos() - current.GetEndPos() == 1) { //swap&merge
+ } else if (previous.GetStartPos() - current.GetEndPos() == 1) { //swap&merge
m_stack.pop_back();
Range t(current.GetStartPos(), previous.GetEndPos());
current = t;
- }
- else { // discontinuous, no more merging
+ } else { // discontinuous, no more merging
cont_loop = false;
}
} // finished reducing, exit
diff --git a/contrib/moses2/FF/LexicalReordering/ReorderingStack.h b/moses2/FF/LexicalReordering/ReorderingStack.h
index fab986bc0..fab986bc0 100644
--- a/contrib/moses2/FF/LexicalReordering/ReorderingStack.h
+++ b/moses2/FF/LexicalReordering/ReorderingStack.h
diff --git a/contrib/moses2/FF/OSM/KenOSM.cpp b/moses2/FF/OSM/KenOSM.cpp
index 6b410fc9e..6b410fc9e 100644
--- a/contrib/moses2/FF/OSM/KenOSM.cpp
+++ b/moses2/FF/OSM/KenOSM.cpp
diff --git a/contrib/moses2/FF/OSM/KenOSM.h b/moses2/FF/OSM/KenOSM.h
index f1275232f..f1275232f 100644
--- a/contrib/moses2/FF/OSM/KenOSM.h
+++ b/moses2/FF/OSM/KenOSM.h
diff --git a/contrib/moses2/FF/OSM/OpSequenceModel.cpp b/moses2/FF/OSM/OpSequenceModel.cpp
index 572065813..093e5d819 100644
--- a/contrib/moses2/FF/OSM/OpSequenceModel.cpp
+++ b/moses2/FF/OSM/OpSequenceModel.cpp
@@ -17,7 +17,7 @@ namespace Moses2
////////////////////////////////////////////////////////////////////////////////////////
OpSequenceModel::OpSequenceModel(size_t startInd, const std::string &line) :
- StatefulFeatureFunction(startInd, line)
+ StatefulFeatureFunction(startInd, line)
{
sFactor = 0;
tFactor = 0;
@@ -96,7 +96,7 @@ void OpSequenceModel::EvaluateInIsolation(MemPool &pool,
obj.populateScores(scoresVec,numFeatures);
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- scoresVec.data());
+ scoresVec.data());
estimatedScore += weightedScore;
}
diff --git a/contrib/moses2/FF/OSM/OpSequenceModel.h b/moses2/FF/OSM/OpSequenceModel.h
index d46cc82fb..f8b99e95c 100644
--- a/contrib/moses2/FF/OSM/OpSequenceModel.h
+++ b/moses2/FF/OSM/OpSequenceModel.h
@@ -16,32 +16,32 @@ public:
int tFactor; // Target Factor ...
util::LoadMethod load_method; // method to load model
- OpSequenceModel(size_t startInd, const std::string &line);
+ OpSequenceModel(size_t startInd, const std::string &line);
virtual ~OpSequenceModel();
virtual void Load(System &system);
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
+ const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const;
void SetParameter(const std::string& key, const std::string& value);
diff --git a/contrib/moses2/FF/OSM/osmHyp.cpp b/moses2/FF/OSM/osmHyp.cpp
index ede841a80..ede841a80 100644
--- a/contrib/moses2/FF/OSM/osmHyp.cpp
+++ b/moses2/FF/OSM/osmHyp.cpp
diff --git a/contrib/moses2/FF/OSM/osmHyp.h b/moses2/FF/OSM/osmHyp.h
index c2893d366..338b73ec2 100644
--- a/contrib/moses2/FF/OSM/osmHyp.h
+++ b/moses2/FF/OSM/osmHyp.h
@@ -22,8 +22,9 @@ public:
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
- virtual std::string ToString() const
- { return "osmState"; }
+ virtual std::string ToString() const {
+ return "osmState";
+ }
void saveState(int jVal, int eVal, std::map <int , std::string> & gapVal);
int getJ()const {
diff --git a/contrib/moses2/FF/PhrasePenalty.cpp b/moses2/FF/PhrasePenalty.cpp
index 2a1764a0e..84087740d 100644
--- a/contrib/moses2/FF/PhrasePenalty.cpp
+++ b/moses2/FF/PhrasePenalty.cpp
@@ -12,7 +12,7 @@ namespace Moses2
{
PhrasePenalty::PhrasePenalty(size_t startInd, const std::string &line) :
- StatelessFeatureFunction(startInd, line)
+ StatelessFeatureFunction(startInd, line)
{
ReadParameters();
}
@@ -23,15 +23,15 @@ PhrasePenalty::~PhrasePenalty()
}
void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
scores.PlusEquals(system, *this, 1);
}
void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
scores.PlusEquals(system, *this, 1);
}
diff --git a/contrib/moses2/FF/PhrasePenalty.h b/moses2/FF/PhrasePenalty.h
index c2066356c..855bdbf09 100644
--- a/contrib/moses2/FF/PhrasePenalty.h
+++ b/moses2/FF/PhrasePenalty.h
@@ -20,13 +20,13 @@ public:
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
};
diff --git a/moses2/FF/PointerState.cpp b/moses2/FF/PointerState.cpp
new file mode 100644
index 000000000..facb0a2f9
--- /dev/null
+++ b/moses2/FF/PointerState.cpp
@@ -0,0 +1,6 @@
+#include "PointerState.h"
+
+namespace Moses2
+{
+
+}
diff --git a/contrib/moses2/FF/PointerState.h b/moses2/FF/PointerState.h
index 41e6edf9f..a73b57650 100644
--- a/contrib/moses2/FF/PointerState.h
+++ b/moses2/FF/PointerState.h
@@ -6,31 +6,25 @@
namespace Moses2
{
-struct PointerState: public FFState
-{
+struct PointerState: public FFState {
const void* lmstate;
- explicit PointerState()
- {
+ explicit PointerState() {
// uninitialised
}
- PointerState(const void* lms)
- {
+ PointerState(const void* lms) {
lmstate = lms;
}
- virtual size_t hash() const
- {
+ virtual size_t hash() const {
return (size_t) lmstate;
}
- virtual bool operator==(const FFState& other) const
- {
+ virtual bool operator==(const FFState& other) const {
const PointerState& o = static_cast<const PointerState&>(other);
return lmstate == o.lmstate;
}
- virtual std::string ToString() const
- {
+ virtual std::string ToString() const {
std::stringstream sb;
sb << lmstate;
return sb.str();
diff --git a/contrib/moses2/FF/StatefulFeatureFunction.cpp b/moses2/FF/StatefulFeatureFunction.cpp
index 060338159..6d8045dd3 100644
--- a/contrib/moses2/FF/StatefulFeatureFunction.cpp
+++ b/moses2/FF/StatefulFeatureFunction.cpp
@@ -4,7 +4,9 @@
* Created on: 24 Oct 2015
* Author: hieu
*/
+#ifdef __linux
#include <pthread.h>
+#endif
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
@@ -20,7 +22,7 @@ namespace Moses2
StatefulFeatureFunction::StatefulFeatureFunction(size_t startInd,
const std::string &line) :
- FeatureFunction(startInd, line)
+ FeatureFunction(startInd, line)
{
}
@@ -30,10 +32,10 @@ StatefulFeatureFunction::~StatefulFeatureFunction()
}
void StatefulFeatureFunction::EvaluateWhenAppliedBatch(
- const System &system,
- const Batch &batch) const
+ const System &system,
+ const Batch &batch) const
{
- //cerr << "EvaluateWhenAppliedBatch:" << m_name << endl;
+ //cerr << "EvaluateWhenAppliedBatch:" << m_name << endl;
#ifdef __linux
/*
pthread_t handle;
diff --git a/contrib/moses2/FF/StatefulFeatureFunction.h b/moses2/FF/StatefulFeatureFunction.h
index fffb1eea7..7cb3eaae9 100644
--- a/contrib/moses2/FF/StatefulFeatureFunction.h
+++ b/moses2/FF/StatefulFeatureFunction.h
@@ -30,12 +30,10 @@ public:
StatefulFeatureFunction(size_t startInd, const std::string &line);
virtual ~StatefulFeatureFunction();
- void SetStatefulInd(size_t ind)
- {
+ void SetStatefulInd(size_t ind) {
m_statefulInd = ind;
}
- size_t GetStatefulInd() const
- {
+ size_t GetStatefulInd() const {
return m_statefulInd;
}
@@ -44,19 +42,19 @@ public:
//! return the state associated with the empty hypothesis for a given sentence
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const = 0;
+ const InputType &input, const Hypothesis &hypo) const = 0;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const = 0;
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const = 0;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const = 0;
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const = 0;
virtual void EvaluateWhenAppliedBatch(
- const System &system,
- const Batch &batch) const;
+ const System &system,
+ const Batch &batch) const;
protected:
size_t m_statefulInd;
diff --git a/contrib/moses2/FF/StatelessFeatureFunction.cpp b/moses2/FF/StatelessFeatureFunction.cpp
index 62fa35d3f..c73d8907c 100644
--- a/contrib/moses2/FF/StatelessFeatureFunction.cpp
+++ b/moses2/FF/StatelessFeatureFunction.cpp
@@ -12,7 +12,7 @@ namespace Moses2
StatelessFeatureFunction::StatelessFeatureFunction(size_t startInd,
const std::string &line) :
- FeatureFunction(startInd, line)
+ FeatureFunction(startInd, line)
{
// TODO Auto-generated constructor stub
diff --git a/contrib/moses2/FF/StatelessFeatureFunction.h b/moses2/FF/StatelessFeatureFunction.h
index 249e4fdfe..249e4fdfe 100644
--- a/contrib/moses2/FF/StatelessFeatureFunction.h
+++ b/moses2/FF/StatelessFeatureFunction.h
diff --git a/contrib/moses2/FF/WordPenalty.cpp b/moses2/FF/WordPenalty.cpp
index e8af47568..576820539 100644
--- a/contrib/moses2/FF/WordPenalty.cpp
+++ b/moses2/FF/WordPenalty.cpp
@@ -17,7 +17,7 @@ namespace Moses2
{
WordPenalty::WordPenalty(size_t startInd, const std::string &line) :
- StatelessFeatureFunction(startInd, line)
+ StatelessFeatureFunction(startInd, line)
{
ReadParameters();
}
@@ -28,16 +28,16 @@ WordPenalty::~WordPenalty()
}
void WordPenalty::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
SCORE score = -(SCORE) targetPhrase.GetSize();
scores.PlusEquals(system, *this, score);
}
void WordPenalty::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
size_t count = 0;
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
diff --git a/contrib/moses2/FF/WordPenalty.h b/moses2/FF/WordPenalty.h
index c322a15f7..acd1bb873 100644
--- a/contrib/moses2/FF/WordPenalty.h
+++ b/moses2/FF/WordPenalty.h
@@ -21,13 +21,13 @@ public:
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
};
diff --git a/contrib/moses2/HypothesisBase.cpp b/moses2/HypothesisBase.cpp
index 8b65a0cdf..c124866d1 100644
--- a/contrib/moses2/HypothesisBase.cpp
+++ b/moses2/HypothesisBase.cpp
@@ -29,15 +29,15 @@ HypothesisBase::HypothesisBase(MemPool &pool, const System &system)
// FF states
const std::vector<const StatefulFeatureFunction*> &sfffs =
- system.featureFunctions.GetStatefulFeatureFunctions();
+ system.featureFunctions.GetStatefulFeatureFunctions();
size_t numStatefulFFs = sfffs.size();
m_ffStates = (FFState **) pool.Allocate(sizeof(FFState*) * numStatefulFFs);
- BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){
- size_t statefulInd = sfff->GetStatefulInd();
- FFState *state = sfff->BlankState(pool, system);
- m_ffStates[statefulInd] = state;
-}
+ BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) {
+ size_t statefulInd = sfff->GetStatefulInd();
+ FFState *state = sfff->BlankState(pool, system);
+ m_ffStates[statefulInd] = state;
+ }
}
size_t HypothesisBase::hash() const
@@ -48,7 +48,7 @@ size_t HypothesisBase::hash() const
size_t HypothesisBase::hash(size_t seed) const
{
size_t numStatefulFFs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
+ GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
// states
for (size_t i = 0; i < numStatefulFFs; ++i) {
@@ -63,7 +63,7 @@ size_t HypothesisBase::hash(size_t seed) const
bool HypothesisBase::operator==(const HypothesisBase &other) const
{
size_t numStatefulFFs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
+ GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
// states
for (size_t i = 0; i < numStatefulFFs; ++i) {
diff --git a/contrib/moses2/HypothesisBase.h b/moses2/HypothesisBase.h
index 6ef4d3891..557479906 100644
--- a/contrib/moses2/HypothesisBase.h
+++ b/moses2/HypothesisBase.h
@@ -20,28 +20,31 @@ class Scores;
class HypothesisBase
{
public:
- virtual ~HypothesisBase()
- {
+ virtual ~HypothesisBase() {
}
- inline ManagerBase &GetManager() const
- {
+ inline ManagerBase &GetManager() const {
return *m_mgr;
}
template<typename T>
- const T &Cast() const
- { return static_cast<const T&>(*this); }
+ const T &Cast() const {
+ return static_cast<const T&>(*this);
+ }
- const Scores &GetScores() const
- { return *m_scores; }
- Scores &GetScores()
- { return *m_scores; }
+ const Scores &GetScores() const {
+ return *m_scores;
+ }
+ Scores &GetScores() {
+ return *m_scores;
+ }
- const FFState *GetState(size_t ind) const
- { return m_ffStates[ind]; }
- FFState *GetState(size_t ind)
- { return m_ffStates[ind]; }
+ const FFState *GetState(size_t ind) const {
+ return m_ffStates[ind];
+ }
+ FFState *GetState(size_t ind) {
+ return m_ffStates[ind];
+ }
virtual size_t hash() const;
virtual size_t hash(size_t seed) const;
@@ -64,8 +67,7 @@ protected:
class HypothesisFutureScoreOrderer
{
public:
- bool operator()(const HypothesisBase* a, const HypothesisBase* b) const
- {
+ bool operator()(const HypothesisBase* a, const HypothesisBase* b) const {
return a->GetFutureScore() > b->GetFutureScore();
}
};
diff --git a/contrib/moses2/HypothesisColl.cpp b/moses2/HypothesisColl.cpp
index a75113d58..18046bd4c 100644
--- a/contrib/moses2/HypothesisColl.cpp
+++ b/moses2/HypothesisColl.cpp
@@ -19,8 +19,8 @@ namespace Moses2
{
HypothesisColl::HypothesisColl(const ManagerBase &mgr)
-:m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool()))
-,m_sortedHypos(NULL)
+ :m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool()))
+ ,m_sortedHypos(NULL)
{
m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = std::numeric_limits<float>::infinity();
@@ -28,29 +28,29 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr)
const HypothesisBase *HypothesisColl::GetBestHypo() const
{
- if (GetSize() == 0) {
- return NULL;
- }
- if (m_sortedHypos) {
- return (*m_sortedHypos)[0];
- }
-
- SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
- const HypothesisBase *bestHypo;
- BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
- if (hypo->GetFutureScore() > bestScore) {
- bestScore = hypo->GetFutureScore();
- bestHypo = hypo;
- }
- }
- return bestHypo;
+ if (GetSize() == 0) {
+ return NULL;
+ }
+ if (m_sortedHypos) {
+ return (*m_sortedHypos)[0];
+ }
+
+ SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
+ const HypothesisBase *bestHypo;
+ BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
+ if (hypo->GetFutureScore() > bestScore) {
+ bestScore = hypo->GetFutureScore();
+ bestHypo = hypo;
+ }
+ }
+ return bestHypo;
}
void HypothesisColl::Add(
- const ManagerBase &mgr,
- HypothesisBase *hypo,
- Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists)
+ const ManagerBase &mgr,
+ HypothesisBase *hypo,
+ Recycler<HypothesisBase*> &hypoRecycle,
+ ArcLists &arcLists)
{
size_t maxStackSize = mgr.system.options.search.stack_size;
@@ -76,105 +76,100 @@ void HypothesisColl::Add(
return;
}
- StackAdd added = Add(hypo);
+ StackAdd added = Add(hypo);
- size_t nbestSize = mgr.system.options.nbest.nbest_size;
- if (nbestSize) {
- arcLists.AddArc(added.added, hypo, added.other);
- }
- else {
- if (added.added) {
+ size_t nbestSize = mgr.system.options.nbest.nbest_size;
+ if (nbestSize) {
+ arcLists.AddArc(added.added, hypo, added.other);
+ } else {
+ if (added.added) {
if (added.other) {
hypoRecycle.Recycle(added.other);
}
- }
- else {
+ } else {
hypoRecycle.Recycle(hypo);
- }
- }
+ }
+ }
// update beam variables
- if (added.added) {
+ if (added.added) {
if (futureScore > m_bestScore) {
m_bestScore = futureScore;
float beamWidth = mgr.system.options.search.beam_width;
if ( m_bestScore + beamWidth > m_worstScore ) {
m_worstScore = m_bestScore + beamWidth;
}
- }
- else if (GetSize() <= maxStackSize && futureScore < m_worstScore) {
+ } else if (GetSize() <= maxStackSize && futureScore < m_worstScore) {
m_worstScore = futureScore;
}
- }
+ }
}
StackAdd HypothesisColl::Add(const HypothesisBase *hypo)
{
- std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
- //cerr << endl << "new=" << hypo->Debug(hypo->GetManager().system) << endl;
+ std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
+ //cerr << endl << "new=" << hypo->Debug(hypo->GetManager().system) << endl;
- // CHECK RECOMBINATION
- if (addRet.second) {
- // equiv hypo doesn't exists
+ // CHECK RECOMBINATION
+ if (addRet.second) {
+ // equiv hypo doesn't exists
//cerr << "Added " << hypo << endl;
- return StackAdd(true, NULL);
- }
- else {
- HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
- //cerr << "hypoExisting=" << hypoExisting->Debug(hypo->GetManager().system) << endl;
-
- if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
- // incoming hypo is better than the one we have
- const HypothesisBase * const &hypoExisting1 = *addRet.first;
- const HypothesisBase *&hypoExisting2 =
- const_cast<const HypothesisBase *&>(hypoExisting1);
- hypoExisting2 = hypo;
+ return StackAdd(true, NULL);
+ } else {
+ HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
+ //cerr << "hypoExisting=" << hypoExisting->Debug(hypo->GetManager().system) << endl;
+
+ if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
+ // incoming hypo is better than the one we have
+ const HypothesisBase * const &hypoExisting1 = *addRet.first;
+ const HypothesisBase *&hypoExisting2 =
+ const_cast<const HypothesisBase *&>(hypoExisting1);
+ hypoExisting2 = hypo;
//cerr << "Added " << hypo << " dicard existing " << hypoExisting2 << endl;
- return StackAdd(true, hypoExisting);
- }
- else {
- // already storing the best hypo. discard incoming hypo
+ return StackAdd(true, hypoExisting);
+ } else {
+ // already storing the best hypo. discard incoming hypo
//cerr << "Keep existing " << hypoExisting << " dicard new " << hypo << endl;
- return StackAdd(false, hypoExisting);
- }
- }
+ return StackAdd(false, hypoExisting);
+ }
+ }
- //assert(false);
+ //assert(false);
}
const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos(
- const ManagerBase &mgr,
- ArcLists &arcLists) const
+ const ManagerBase &mgr,
+ ArcLists &arcLists) const
{
- if (m_sortedHypos == NULL) {
- // create sortedHypos first
- MemPool &pool = mgr.GetPool();
- m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
- m_coll.size());
-
- SortHypos(mgr, m_sortedHypos->GetArray());
-
- // prune
- Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
-
- size_t maxStackSize = mgr.system.options.search.stack_size;
- if (maxStackSize && m_sortedHypos->size() > maxStackSize) {
- for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) {
- HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
- recycler.Recycle(hypo);
-
- // delete from arclist
- if (mgr.system.options.nbest.nbest_size) {
- arcLists.Delete(hypo);
- }
- }
- m_sortedHypos->resize(maxStackSize);
- }
-
- }
-
- return *m_sortedHypos;
+ if (m_sortedHypos == NULL) {
+ // create sortedHypos first
+ MemPool &pool = mgr.GetPool();
+ m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
+ m_coll.size());
+
+ SortHypos(mgr, m_sortedHypos->GetArray());
+
+ // prune
+ Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
+
+ size_t maxStackSize = mgr.system.options.search.stack_size;
+ if (maxStackSize && m_sortedHypos->size() > maxStackSize) {
+ for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) {
+ HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
+ recycler.Recycle(hypo);
+
+ // delete from arclist
+ if (mgr.system.options.nbest.nbest_size) {
+ arcLists.Delete(hypo);
+ }
+ }
+ m_sortedHypos->resize(maxStackSize);
+ }
+
+ }
+
+ return *m_sortedHypos;
}
void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists)
@@ -183,7 +178,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists)
Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
- const HypothesisBase *sortedHypos[GetSize()];
+ const HypothesisBase **sortedHypos = (const HypothesisBase **) alloca(GetSize() * sizeof(const HypothesisBase *));
SortHypos(mgr, sortedHypos);
// update worse score
@@ -221,7 +216,7 @@ void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **so
cerr << endl;
*/
size_t ind = 0;
- BOOST_FOREACH(const HypothesisBase *hypo, m_coll){
+ BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
sortedHypos[ind] = hypo;
++ind;
}
@@ -229,11 +224,9 @@ void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **so
size_t indMiddle;
if (maxStackSize == 0) {
indMiddle = GetSize();
- }
- else if (GetSize() > maxStackSize) {
+ } else if (GetSize() > maxStackSize) {
indMiddle = maxStackSize;
- }
- else {
+ } else {
// GetSize() <= maxStackSize
indMiddle = GetSize();
}
@@ -241,10 +234,10 @@ void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **so
const HypothesisBase **iterMiddle = sortedHypos + indMiddle;
std::partial_sort(
- sortedHypos,
- iterMiddle,
- sortedHypos + GetSize(),
- HypothesisFutureScoreOrderer());
+ sortedHypos,
+ iterMiddle,
+ sortedHypos + GetSize(),
+ HypothesisFutureScoreOrderer());
/*
cerr << "sorted hypos: ";
@@ -266,8 +259,8 @@ void HypothesisColl::Delete(const HypothesisBase *hypo)
void HypothesisColl::Clear()
{
- m_sortedHypos = NULL;
- m_coll.clear();
+ m_sortedHypos = NULL;
+ m_coll.clear();
m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = std::numeric_limits<float>::infinity();
@@ -275,13 +268,13 @@ void HypothesisColl::Clear()
std::string HypothesisColl::Debug(const System &system) const
{
- stringstream out;
- BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
- out << hypo->Debug(system);
- out << std::endl << std::endl;
- }
+ stringstream out;
+ BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
+ out << hypo->Debug(system);
+ out << std::endl << std::endl;
+ }
- return out.str();
+ return out.str();
}
} /* namespace Moses2 */
diff --git a/contrib/moses2/HypothesisColl.h b/moses2/HypothesisColl.h
index 81a3b25c3..63a8551ba 100644
--- a/contrib/moses2/HypothesisColl.h
+++ b/moses2/HypothesisColl.h
@@ -26,24 +26,24 @@ public:
HypothesisColl(const ManagerBase &mgr);
void Add(const ManagerBase &mgr,
- HypothesisBase *hypo,
- Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists);
+ HypothesisBase *hypo,
+ Recycler<HypothesisBase*> &hypoRecycle,
+ ArcLists &arcLists);
- size_t GetSize() const
- { return m_coll.size(); }
+ size_t GetSize() const {
+ return m_coll.size();
+ }
void Clear();
const Hypotheses &GetSortedAndPrunedHypos(
- const ManagerBase &mgr,
- ArcLists &arcLists) const;
+ const ManagerBase &mgr,
+ ArcLists &arcLists) const;
const HypothesisBase *GetBestHypo() const;
template<typename T>
- const T *GetBestHypo() const
- {
+ const T *GetBestHypo() const {
const HypothesisBase *hypo = GetBestHypo();
return hypo ? &hypo->Cast<T>() : NULL;
}
@@ -54,8 +54,8 @@ public:
protected:
typedef boost::unordered_set<const HypothesisBase*,
- UnorderedComparer<HypothesisBase>, UnorderedComparer<HypothesisBase>,
- MemPoolAllocator<const HypothesisBase*> > _HCType;
+ UnorderedComparer<HypothesisBase>, UnorderedComparer<HypothesisBase>,
+ MemPoolAllocator<const HypothesisBase*> > _HCType;
_HCType m_coll;
mutable Hypotheses *m_sortedHypos;
diff --git a/contrib/moses2/MorphoTrie/MorphTrie.h b/moses2/InMemoryTrie/InMemoryTrie.h
index 0b013b5bb..ba085f6ad 100644
--- a/contrib/moses2/MorphoTrie/MorphTrie.h
+++ b/moses2/InMemoryTrie/InMemoryTrie.h
@@ -1,5 +1,4 @@
-#ifndef MORPHTRIE_H_
-#define MORPHTRIE_H_
+#pragma once
#include <vector>
#include "Node.h"
@@ -8,27 +7,26 @@ namespace Moses2
{
template<class KeyClass, class ValueClass>
-class MorphTrie
+class InMemoryTrie
{
public:
- MorphTrie()
- {
+ InMemoryTrie() {
}
Node<KeyClass, ValueClass>* insert(const std::vector<KeyClass>& word,
- const ValueClass& value);
+ const ValueClass& value);
const Node<KeyClass, ValueClass>* getNode(
- const std::vector<KeyClass>& words) const;
+ const std::vector<KeyClass>& words) const;
const Node<KeyClass, ValueClass> &getNode(const std::vector<KeyClass>& words,
size_t &stoppedAtInd) const;
std::vector<const Node<KeyClass, ValueClass>*> getNodes(
- const std::vector<KeyClass>& words, size_t &stoppedAtInd) const;
+ const std::vector<KeyClass>& words, size_t &stoppedAtInd) const;
private:
Node<KeyClass, ValueClass> root;
};
template<class KeyClass, class ValueClass>
-Node<KeyClass, ValueClass>* MorphTrie<KeyClass, ValueClass>::insert(
- const std::vector<KeyClass>& word, const ValueClass& value)
+Node<KeyClass, ValueClass>* InMemoryTrie<KeyClass, ValueClass>::insert(
+ const std::vector<KeyClass>& word, const ValueClass& value)
{
Node<KeyClass, ValueClass>* cNode = &root;
for (size_t i = 0; i < word.size(); ++i) {
@@ -40,8 +38,8 @@ Node<KeyClass, ValueClass>* MorphTrie<KeyClass, ValueClass>::insert(
}
template<class KeyClass, class ValueClass>
-const Node<KeyClass, ValueClass>* MorphTrie<KeyClass, ValueClass>::getNode(
- const std::vector<KeyClass>& words) const
+const Node<KeyClass, ValueClass>* InMemoryTrie<KeyClass, ValueClass>::getNode(
+ const std::vector<KeyClass>& words) const
{
size_t stoppedAtInd;
const Node<KeyClass, ValueClass> &ret = getNode(words, stoppedAtInd);
@@ -52,8 +50,8 @@ const Node<KeyClass, ValueClass>* MorphTrie<KeyClass, ValueClass>::getNode(
}
template<class KeyClass, class ValueClass>
-const Node<KeyClass, ValueClass> &MorphTrie<KeyClass, ValueClass>::getNode(
- const std::vector<KeyClass>& words, size_t &stoppedAtInd) const
+const Node<KeyClass, ValueClass> &InMemoryTrie<KeyClass, ValueClass>::getNode(
+ const std::vector<KeyClass>& words, size_t &stoppedAtInd) const
{
const Node<KeyClass, ValueClass> *prevNode = &root, *newNode;
for (size_t i = 0; i < words.size(); ++i) {
@@ -71,8 +69,8 @@ const Node<KeyClass, ValueClass> &MorphTrie<KeyClass, ValueClass>::getNode(
}
template<class KeyClass, class ValueClass>
-std::vector<const Node<KeyClass, ValueClass>*> MorphTrie<KeyClass, ValueClass>::getNodes(
- const std::vector<KeyClass>& words, size_t &stoppedAtInd) const
+std::vector<const Node<KeyClass, ValueClass>*> InMemoryTrie<KeyClass, ValueClass>::getNodes(
+ const std::vector<KeyClass>& words, size_t &stoppedAtInd) const
{
std::vector<const Node<KeyClass, ValueClass>*> ret;
const Node<KeyClass, ValueClass> *prevNode = &root, *newNode;
@@ -84,8 +82,7 @@ std::vector<const Node<KeyClass, ValueClass>*> MorphTrie<KeyClass, ValueClass>::
if (newNode == NULL) {
stoppedAtInd = i;
return ret;
- }
- else {
+ } else {
ret.push_back(newNode);
}
prevNode = newNode;
@@ -97,4 +94,3 @@ std::vector<const Node<KeyClass, ValueClass>*> MorphTrie<KeyClass, ValueClass>::
}
-#endif /* end of include guard: MORPHTRIE_H_ */
diff --git a/contrib/moses2/MorphoTrie/Node.h b/moses2/InMemoryTrie/Node.h
index ca165ef67..39c38e22f 100644
--- a/contrib/moses2/MorphoTrie/Node.h
+++ b/moses2/InMemoryTrie/Node.h
@@ -1,5 +1,4 @@
-#ifndef NODE_H_
-#define NODE_H_
+#pragma once
#include <vector>
#include <boost/unordered_map.hpp>
@@ -12,28 +11,23 @@ template<class KeyClass, class ValueClass>
class Node
{
public:
- Node()
- {
+ Node() {
}
Node(const ValueClass& value) :
- m_value(value)
- {
+ m_value(value) {
}
~Node();
void setKey(const KeyClass& key);
- void setValue(const ValueClass& value)
- {
+ void setValue(const ValueClass& value) {
m_value = value;
}
Node* findSub(const KeyClass& key);
const Node* findSub(const KeyClass& key) const;
- Node *addSubnode(const KeyClass& cKey)
- {
+ Node *addSubnode(const KeyClass& cKey) {
Node *node = findSub(cKey);
if (node) {
return node;
- }
- else {
+ } else {
node = new Node();
subNodes[cKey] = node;
return node;
@@ -41,8 +35,7 @@ public:
}
std::vector<Node*> getSubnodes();
- const ValueClass &getValue() const
- {
+ const ValueClass &getValue() const {
return m_value;
}
@@ -64,7 +57,7 @@ Node<KeyClass, ValueClass>::~Node()
template<class KeyClass, class ValueClass>
const Node<KeyClass, ValueClass>* Node<KeyClass, ValueClass>::findSub(
- const KeyClass& cKey) const
+ const KeyClass& cKey) const
{
typename boost::unordered_map<KeyClass, Node*>::const_iterator iter;
iter = subNodes.find(cKey);
@@ -77,7 +70,7 @@ const Node<KeyClass, ValueClass>* Node<KeyClass, ValueClass>::findSub(
template<class KeyClass, class ValueClass>
Node<KeyClass, ValueClass>* Node<KeyClass, ValueClass>::findSub(
- const KeyClass& cKey)
+ const KeyClass& cKey)
{
typename boost::unordered_map<KeyClass, Node*>::iterator iter;
iter = subNodes.find(cKey);
@@ -90,4 +83,3 @@ Node<KeyClass, ValueClass>* Node<KeyClass, ValueClass>::findSub(
}
-#endif /* end of include guard: NODE_H_ */
diff --git a/contrib/moses2/MorphoTrie/utils.h b/moses2/InMemoryTrie/utils.h
index e6f0aa7d6..eccb95a93 100644
--- a/contrib/moses2/MorphoTrie/utils.h
+++ b/moses2/InMemoryTrie/utils.h
@@ -1,4 +1,6 @@
-#include "MorphTrie.h"
+#pragma once
+
+#include "InMemoryTrie.h"
#include <fstream>
#include <ostream>
#include <string>
diff --git a/contrib/moses2/InputPathBase.cpp b/moses2/InputPathBase.cpp
index 034122cc2..c77033548 100644
--- a/contrib/moses2/InputPathBase.cpp
+++ b/moses2/InputPathBase.cpp
@@ -11,8 +11,8 @@
namespace Moses2
{
InputPathBase::InputPathBase(MemPool &pool,
- const Range &range, size_t numPt, const InputPathBase *prefixPath) :
- range(range), prefixPath(prefixPath)
+ const Range &range, size_t numPt, const InputPathBase *prefixPath) :
+ range(range), prefixPath(prefixPath)
{
}
diff --git a/contrib/moses2/InputPathBase.h b/moses2/InputPathBase.h
index d95d29e35..59fb219e3 100644
--- a/contrib/moses2/InputPathBase.h
+++ b/moses2/InputPathBase.h
@@ -24,7 +24,7 @@ public:
Range range;
InputPathBase(MemPool &pool, const Range &range,
- size_t numPt, const InputPathBase *prefixPath);
+ size_t numPt, const InputPathBase *prefixPath);
};
diff --git a/contrib/moses2/InputPathsBase.cpp b/moses2/InputPathsBase.cpp
index bcc57a7f7..bcc57a7f7 100644
--- a/contrib/moses2/InputPathsBase.cpp
+++ b/moses2/InputPathsBase.cpp
diff --git a/contrib/moses2/InputPathsBase.h b/moses2/InputPathsBase.h
index 861bbf9f7..88e69ea04 100644
--- a/contrib/moses2/InputPathsBase.h
+++ b/moses2/InputPathsBase.h
@@ -22,8 +22,7 @@ class InputPathsBase
{
typedef std::vector<InputPathBase*> Coll;
public:
- InputPathsBase()
- {
+ InputPathsBase() {
}
virtual ~InputPathsBase();
@@ -31,21 +30,17 @@ public:
typedef Coll::iterator iterator;
typedef Coll::const_iterator const_iterator;
- const_iterator begin() const
- {
+ const_iterator begin() const {
return m_inputPaths.begin();
}
- const_iterator end() const
- {
+ const_iterator end() const {
return m_inputPaths.end();
}
- iterator begin()
- {
+ iterator begin() {
return m_inputPaths.begin();
}
- iterator end()
- {
+ iterator end() {
return m_inputPaths.end();
}
diff --git a/contrib/moses2/InputType.cpp b/moses2/InputType.cpp
index 01169c162..60664a85b 100644
--- a/contrib/moses2/InputType.cpp
+++ b/moses2/InputType.cpp
@@ -12,35 +12,35 @@ namespace Moses2
{
//////////////////////////////////////////////////////////////////////////////
InputType::XMLOption::XMLOption(MemPool &pool, const std::string &nodeName, size_t vStartPos)
-:startPos(vStartPos)
-,prob(0)
-,m_entity(NULL)
+ :startPos(vStartPos)
+ ,prob(0)
+ ,m_entity(NULL)
{
- m_nodeName = pool.Allocate<char>(nodeName.size() + 1);
- strcpy(m_nodeName, nodeName.c_str());
+ m_nodeName = pool.Allocate<char>(nodeName.size() + 1);
+ strcpy(m_nodeName, nodeName.c_str());
}
void InputType::XMLOption::SetTranslation(MemPool &pool, const std::string &val)
{
- m_translation = pool.Allocate<char>(val.size() + 1);
- strcpy(m_translation, val.c_str());
+ m_translation = pool.Allocate<char>(val.size() + 1);
+ strcpy(m_translation, val.c_str());
}
void InputType::XMLOption::SetEntity(MemPool &pool, const std::string &val)
{
- m_entity = pool.Allocate<char>(val.size() + 1);
- strcpy(m_entity, val.c_str());
+ m_entity = pool.Allocate<char>(val.size() + 1);
+ strcpy(m_entity, val.c_str());
}
std::string InputType::XMLOption::Debug(const System &system) const
{
std::stringstream out;
out << "[" << startPos << "," << phraseSize << "]="
- << m_nodeName << ","
- << m_translation << ","
- << prob;
+ << m_nodeName << ","
+ << m_translation << ","
+ << prob;
if (m_entity) {
- out << "," << m_entity;
+ out << "," << m_entity;
}
return out.str();
}
@@ -48,9 +48,9 @@ std::string InputType::XMLOption::Debug(const System &system) const
//////////////////////////////////////////////////////////////////////////////
InputType::InputType(MemPool &pool)
-:m_reorderingConstraint(pool)
-,m_xmlOptions(pool)
-,m_xmlCoverageMap(pool)
+ :m_reorderingConstraint(pool)
+ ,m_xmlOptions(pool)
+ ,m_xmlCoverageMap(pool)
{
}
@@ -64,18 +64,18 @@ void InputType::Init(const System &system, size_t size, int max_distortion)
m_reorderingConstraint.InitializeWalls(size, max_distortion);
if (system.options.input.xml_policy != XmlPassThrough) {
- m_xmlCoverageMap.assign(size, false);
+ m_xmlCoverageMap.assign(size, false);
}
}
void InputType::AddXMLOption(const System &system, const XMLOption *xmlOption)
{
- m_xmlOptions.push_back(xmlOption);
+ m_xmlOptions.push_back(xmlOption);
if (system.options.input.xml_policy != XmlPassThrough) {
- for(size_t j = xmlOption->startPos; j < xmlOption->startPos + xmlOption->phraseSize; ++j) {
- m_xmlCoverageMap[j]=true;
- }
+ for(size_t j = xmlOption->startPos; j < xmlOption->startPos + xmlOption->phraseSize; ++j) {
+ m_xmlCoverageMap[j]=true;
+ }
}
}
diff --git a/contrib/moses2/InputType.h b/moses2/InputType.h
index 0a2aebfa1..8813bc484 100644
--- a/contrib/moses2/InputType.h
+++ b/moses2/InputType.h
@@ -20,29 +20,32 @@ public:
class XMLOption
{
public:
- size_t startPos, phraseSize;
+ size_t startPos, phraseSize;
- SCORE prob;
+ SCORE prob;
- XMLOption(MemPool &pool, const std::string &nodeName, size_t vStartPos);
+ XMLOption(MemPool &pool, const std::string &nodeName, size_t vStartPos);
- const char *GetNodeName() const
- { return m_nodeName; }
+ const char *GetNodeName() const {
+ return m_nodeName;
+ }
- const char *GetTranslation() const
- { return m_translation; }
+ const char *GetTranslation() const {
+ return m_translation;
+ }
- const char *GetEntity() const
- { return m_entity; }
+ const char *GetEntity() const {
+ return m_entity;
+ }
- void SetTranslation(MemPool &pool, const std::string &val);
- void SetEntity(MemPool &pool, const std::string &val);
+ void SetTranslation(MemPool &pool, const std::string &val);
+ void SetEntity(MemPool &pool, const std::string &val);
- std::string Debug(const System &system) const;
+ std::string Debug(const System &system) const;
public:
- char *m_nodeName;
- char *m_translation;
- char *m_entity;
+ char *m_nodeName;
+ char *m_translation;
+ char *m_entity;
};
@@ -53,14 +56,17 @@ public:
virtual void Init(const System &system, size_t size, int max_distortion);
- ReorderingConstraint &GetReorderingConstraint()
- { return m_reorderingConstraint; }
+ ReorderingConstraint &GetReorderingConstraint() {
+ return m_reorderingConstraint;
+ }
- const ReorderingConstraint &GetReorderingConstraint() const
- { return m_reorderingConstraint; }
+ const ReorderingConstraint &GetReorderingConstraint() const {
+ return m_reorderingConstraint;
+ }
- const Vector<const XMLOption*> &GetXMLOptions() const
- { return m_xmlOptions; }
+ const Vector<const XMLOption*> &GetXMLOptions() const {
+ return m_xmlOptions;
+ }
void AddXMLOption(const System &system, const XMLOption *xmlOption);
diff --git a/contrib/moses2/Jamfile b/moses2/Jamfile
index 98e1c1e30..42676c065 100644
--- a/contrib/moses2/Jamfile
+++ b/moses2/Jamfile
@@ -1,4 +1,22 @@
-alias deps : ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../moses/TranslationModel/CompactPT//cmph ../../moses//moses ;
+local with-cmph = [ option.get "with-cmph" ] ;
+local includes = ;
+
+if $(with-cmph) {
+ lib cmph : : <search>$(with-cmph)/lib <search>$(with-cmph)/lib64 ;
+ includes += <include>$(with-cmph)/include ;
+}
+else {
+ alias cmph ;
+}
+
+max-factors = [ option.get "max-factors" : 4 : 4 ] ;
+max-factors = <define>MAX_NUM_FACTORS=$(max-factors) <dependency>$(FACTOR-LOG) ;
+
+max-order = [ option.get "max-kenlm-order" : 6 : 6 ] ;
+max-order = <define>KENLM_MAX_ORDER=$(max-order) ;
+
+alias deps : ..//z ..//boost_iostreams ..//boost_filesystem : : : $(max-factors) $(max-order) ;
+
lib moses2_lib :
AlignmentInfo.cpp
@@ -29,8 +47,8 @@ alias deps : ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../mose
FF/FeatureFunctions.cpp
FF/FeatureRegistry.cpp
FF/PhrasePenalty.cpp
- FF/SkeletonStatefulFF.cpp
- FF/SkeletonStatelessFF.cpp
+ FF/ExampleStatefulFF.cpp
+ FF/ExampleStatelessFF.cpp
FF/StatefulFeatureFunction.cpp
FF/StatelessFeatureFunction.cpp
FF/WordPenalty.cpp
@@ -48,36 +66,24 @@ alias deps : ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../mose
FF/OSM/KenOSM.cpp
FF/OSM/osmHyp.cpp
- # LM/LanguageModelDALM.cpp
LM/LanguageModel.cpp
LM/KENLM.cpp
LM/KENLMBatch.cpp
LM/GPULM.cpp
- TranslationModel/PhraseTable.cpp
- TranslationModel/Transliteration.cpp
- TranslationModel/UnknownWordPenalty.cpp
+ TranslationModel/PhraseTable.cpp
+ TranslationModel/ProbingPT.cpp
+ TranslationModel/Transliteration.cpp
+ TranslationModel/UnknownWordPenalty.cpp
TranslationModel/Memory/PhraseTableMemory.cpp
- TranslationModel/CompactPT/PhraseTableCompact.cpp
TranslationModel/CompactPT/BlockHashIndex.cpp
TranslationModel/CompactPT/CmphStringVectorAdapter.cpp
TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp
TranslationModel/CompactPT/MurmurHash3.cpp
- TranslationModel/CompactPT/PhraseDecoder.cpp
TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp
TranslationModel/CompactPT/ThrowingFwrite.cpp
- TranslationModel/ProbingPT/ProbingPT.cpp
- TranslationModel/ProbingPT/hash.cpp
- TranslationModel/ProbingPT/line_splitter.cpp
- TranslationModel/ProbingPT/probing_hash_utils.cpp
- TranslationModel/ProbingPT/querying.cpp
- TranslationModel/ProbingPT/storing.cpp
- TranslationModel/ProbingPT/StoreVocab.cpp
- TranslationModel/ProbingPT/StoreTarget.cpp
- TranslationModel/ProbingPT/vocabid.cpp
-
parameters/AllOptions.cpp
parameters/BookkeepingOptions.cpp
parameters/ContextParameters.cpp
@@ -166,9 +172,13 @@ alias deps : ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../mose
server/Translator.cpp
server/TranslationRequest.cpp
- deps ;
+ deps
+ cmph
+ :
+ $(includes)
+ ;
-exe moses2 : Main.cpp moses2_lib ;
+exe moses2 : Main.cpp moses2_lib ../probingpt//probingpt ../util//kenutil ../lm//kenlm ;
if [ xmlrpc ] {
echo "Building Moses2" ;
diff --git a/contrib/moses2/LM/GPULM.cpp b/moses2/LM/GPULM.cpp
index f2ff7b7e7..714ff8ff2 100644
--- a/contrib/moses2/LM/GPULM.cpp
+++ b/moses2/LM/GPULM.cpp
@@ -8,11 +8,13 @@
#include <sstream>
#include <vector>
+#ifdef _linux
#include <pthread.h>
+#include <unistd.h>
+#endif
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
-#include <unistd.h>
#include "GPULM.h"
#include "../Phrase.h"
@@ -29,28 +31,23 @@ using namespace std;
namespace Moses2
{
-struct GPULMState: public FFState
-{
- virtual std::string ToString() const
- {
+struct GPULMState: public FFState {
+ virtual std::string ToString() const {
return "GPULMState";
}
- virtual size_t hash() const
- {
+ virtual size_t hash() const {
return boost::hash_value(lastWords);
}
- virtual bool operator==(const FFState& other) const
- {
+ virtual bool operator==(const FFState& other) const {
const GPULMState &otherCast = static_cast<const GPULMState&>(other);
bool ret = lastWords == otherCast.lastWords;
return ret;
}
- void SetContext(const Context &context)
- {
+ void SetContext(const Context &context) {
lastWords = context;
if (lastWords.size()) {
lastWords.resize(lastWords.size() - 1);
@@ -63,7 +60,7 @@ struct GPULMState: public FFState
/////////////////////////////////////////////////////////////////
GPULM::GPULM(size_t startInd, const std::string &line)
-:StatefulFeatureFunction(startInd, line)
+ :StatefulFeatureFunction(startInd, line)
{
cerr << "GPULM::GPULM" << endl;
ReadParameters();
@@ -93,15 +90,15 @@ FFState* GPULM::BlankState(MemPool &pool, const System &sys) const
//! return the state associated with the empty hypothesis for a given sentence
void GPULM::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const
+ const InputType &input, const Hypothesis &hypo) const
{
GPULMState &stateCast = static_cast<GPULMState&>(state);
stateCast.lastWords.push_back(m_bos);
}
void GPULM::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
if (targetPhrase.GetSize() == 0) {
return;
@@ -120,8 +117,7 @@ void GPULM::EvaluateInIsolation(MemPool &pool, const System &system,
if (context.size() == m_order) {
//std::pair<SCORE, void*> fromScoring = Score(context);
//score += fromScoring.first;
- }
- else {
+ } else {
//std::pair<SCORE, void*> fromScoring = Score(context);
//nonFullScore += fromScoring.first;
}
@@ -130,33 +126,30 @@ void GPULM::EvaluateInIsolation(MemPool &pool, const System &system,
}
void GPULM::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
UTIL_THROW2("Not implemented");
}
void GPULM::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const
{
UTIL_THROW2("Not implemented");
}
void GPULM::SetParameter(const std::string& key,
- const std::string& value)
+ const std::string& value)
{
//cerr << "key=" << key << " " << value << endl;
if (key == "path") {
m_path = value;
- }
- else if (key == "order") {
+ } else if (key == "order") {
m_order = Scan<size_t>(value);
- }
- else if (key == "factor") {
+ } else if (key == "factor") {
m_factorType = Scan<FactorType>(value);
- }
- else {
+ } else {
StatefulFeatureFunction::SetParameter(key, value);
}
@@ -164,8 +157,8 @@ void GPULM::SetParameter(const std::string& key,
}
void GPULM::EvaluateWhenAppliedBatch(
- const System &system,
- const Batch &batch) const
+ const System &system,
+ const Batch &batch) const
{
// create list of ngrams
std::vector<std::pair<Hypothesis*, Context> > contexts;
@@ -219,7 +212,7 @@ void GPULM::CreateNGram(std::vector<std::pair<Hypothesis*, Context> > &contexts,
}
void GPULM::ShiftOrPush(std::vector<const Factor*> &context,
- const Factor *factor) const
+ const Factor *factor) const
{
if (context.size() < m_order) {
context.resize(context.size() + 1);
@@ -239,8 +232,8 @@ SCORE GPULM::Score(const Context &context) const
}
void GPULM::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const
{
UTIL_THROW2("Not implemented");
}
diff --git a/contrib/moses2/LM/GPULM.h b/moses2/LM/GPULM.h
index ad236ef95..6a3fb49f7 100644
--- a/contrib/moses2/LM/GPULM.h
+++ b/moses2/LM/GPULM.h
@@ -9,7 +9,9 @@
#include <boost/shared_ptr.hpp>
#include <boost/bind.hpp>
#include <boost/thread.hpp>
+#ifdef __linux
#include <pthread.h>
+#endif
#include "../FF/StatefulFeatureFunction.h"
#include "lm/model.hh"
@@ -33,35 +35,35 @@ public:
virtual void Load(System &system);
void SetParameter(const std::string& key,
- const std::string& value);
+ const std::string& value);
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
//! return the state associated with the empty hypothesis for a given sentence
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
+ const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const;
virtual void EvaluateWhenAppliedBatch(
- const System &system,
- const Batch &batch) const;
+ const System &system,
+ const Batch &batch) const;
protected:
std::string m_path;
@@ -71,8 +73,7 @@ protected:
const Factor *m_eos;
size_t m_order;
- inline lm::WordIndex TranslateID(const Word &word) const
- {
+ inline lm::WordIndex TranslateID(const Word &word) const {
std::size_t factor = word[m_factorType]->GetId();
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
}
@@ -83,7 +84,7 @@ protected:
void CreateNGram(std::vector<std::pair<Hypothesis*, Context> > &contexts, Hypothesis &hypo) const;
void ShiftOrPush(std::vector<const Factor*> &context,
- const Factor *factor) const;
+ const Factor *factor) const;
SCORE Score(const Context &context) const;
};
diff --git a/contrib/moses2/LM/KENLM.cpp b/moses2/LM/KENLM.cpp
index 3173392cd..689d76b92 100644
--- a/contrib/moses2/LM/KENLM.cpp
+++ b/moses2/LM/KENLM.cpp
@@ -28,23 +28,19 @@ using namespace std;
namespace Moses2
{
-struct KenLMState: public FFState
-{
+struct KenLMState: public FFState {
lm::ngram::State state;
- virtual size_t hash() const
- {
+ virtual size_t hash() const {
size_t ret = hash_value(state);
return ret;
}
- virtual bool operator==(const FFState& o) const
- {
+ virtual bool operator==(const FFState& o) const {
const KenLMState &other = static_cast<const KenLMState &>(o);
bool ret = state == other.state;
return ret;
}
- virtual std::string ToString() const
- {
+ virtual std::string ToString() const {
stringstream ss;
for (size_t i = 0; i < state.Length(); ++i) {
ss << state.words[i] << " ";
@@ -77,9 +73,8 @@ public:
return ret;
}
- virtual std::string ToString() const
- {
- return "LanguageModelChartStateKenLM";
+ virtual std::string ToString() const {
+ return "LanguageModelChartStateKenLM";
}
private:
@@ -91,13 +86,11 @@ class MappingBuilder: public lm::EnumerateVocab
{
public:
MappingBuilder(FactorCollection &factorCollection, System &system,
- std::vector<lm::WordIndex> &mapping) :
- m_factorCollection(factorCollection), m_system(system), m_mapping(mapping)
- {
+ std::vector<lm::WordIndex> &mapping) :
+ m_factorCollection(factorCollection), m_system(system), m_mapping(mapping) {
}
- void Add(lm::WordIndex index, const StringPiece &str)
- {
+ void Add(lm::WordIndex index, const StringPiece &str) {
std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId();
if (m_mapping.size() <= factorId) {
// 0 is <unk> :-)
@@ -115,10 +108,10 @@ private:
/////////////////////////////////////////////////////////////////
template<class Model>
KENLM<Model>::KENLM(size_t startInd, const std::string &line,
- const std::string &file, FactorType factorType,
- util::LoadMethod load_method) :
- StatefulFeatureFunction(startInd, line), m_path(file), m_factorType(
- factorType), m_load_method(load_method)
+ const std::string &file, FactorType factorType,
+ util::LoadMethod load_method) :
+ StatefulFeatureFunction(startInd, line), m_path(file), m_factorType(
+ factorType), m_load_method(load_method)
{
ReadParameters();
}
@@ -154,8 +147,7 @@ FFState* KENLM<Model>::BlankState(MemPool &pool, const System &sys) const
FFState *ret;
if (sys.isPb) {
ret = new (pool.Allocate<KenLMState>()) KenLMState();
- }
- else {
+ } else {
ret = new (pool.Allocate<LanguageModelChartStateKenLM>()) LanguageModelChartStateKenLM();
}
return ret;
@@ -164,7 +156,7 @@ FFState* KENLM<Model>::BlankState(MemPool &pool, const System &sys) const
//! return the state associated with the empty hypothesis for a given sentence
template<class Model>
void KENLM<Model>::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const
+ const InputType &input, const Hypothesis &hypo) const
{
KenLMState &stateCast = static_cast<KenLMState&>(state);
stateCast.state = m_ngram->BeginSentenceState();
@@ -172,8 +164,8 @@ void KENLM<Model>::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
template<class Model>
void KENLM<Model>::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
// contains factors used by this LM
float fullScore, nGramScore;
@@ -193,22 +185,21 @@ void KENLM<Model>::EvaluateInIsolation(MemPool &pool, const System &system,
estimateScoresVec[0] = estimateScore;
estimateScoresVec[1] = 0;
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScoresVec);
+ estimateScoresVec);
estimatedScore += weightedScore;
- }
- else {
+ } else {
scores.PlusEquals(system, *this, nGramScore);
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScore);
+ estimateScore);
estimatedScore += weightedScore;
}
}
template<class Model>
void KENLM<Model>::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
// contains factors used by this LM
float fullScore, nGramScore;
@@ -232,29 +223,28 @@ void KENLM<Model>::EvaluateInIsolation(MemPool &pool, const System &system, cons
estimateScoresVec[0] = estimateScore;
estimateScoresVec[1] = 0;
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScoresVec);
+ estimateScoresVec);
estimatedScore += weightedScore;
- }
- else {
+ } else {
scores.PlusEquals(system, *this, nGramScore);
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScore);
+ estimateScore);
estimatedScore += weightedScore;
}
}
template<class Model>
void KENLM<Model>::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const
{
KenLMState &stateCast = static_cast<KenLMState&>(state);
const System &system = mgr.system;
const lm::ngram::State &in_state =
- static_cast<const KenLMState&>(prevState).state;
+ static_cast<const KenLMState&>(prevState).state;
if (!hypo.GetTargetPhrase().GetSize()) {
stateCast.state = in_state;
@@ -271,11 +261,11 @@ void KENLM<Model>::EvaluateWhenApplied(const ManagerBase &mgr,
typename Model::State *state0 = &stateCast.state, *state1 = &aux_state;
float score = m_ngram->Score(in_state, TranslateID(hypo.GetWord(position)),
- *state0);
+ *state0);
++position;
for (; position < adjust_end; ++position) {
score += m_ngram->Score(*state0, TranslateID(hypo.GetWord(position)),
- *state1);
+ *state1);
std::swap(state0, state1);
}
@@ -284,15 +274,13 @@ void KENLM<Model>::EvaluateWhenApplied(const ManagerBase &mgr,
std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
const lm::WordIndex *last = LastIDs(hypo, &indices.front());
score += m_ngram->FullScoreForgotState(&indices.front(), last,
- m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob;
- }
- else if (adjust_end < end) {
+ m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob;
+ } else if (adjust_end < end) {
// Get state after adding a long phrase.
std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
const lm::WordIndex *last = LastIDs(hypo, &indices.front());
m_ngram->GetState(&indices.front(), last, stateCast.state);
- }
- else if (state0 != &stateCast.state) {
+ } else if (state0 != &stateCast.state) {
// Short enough phrase that we can just reuse the state.
stateCast.state = *state0;
}
@@ -305,15 +293,14 @@ void KENLM<Model>::EvaluateWhenApplied(const ManagerBase &mgr,
scoresVec[0] = score;
scoresVec[1] = 0.0;
scores.PlusEquals(system, *this, scoresVec);
- }
- else {
+ } else {
scores.PlusEquals(system, *this, score);
}
}
template<class Model>
void KENLM<Model>::CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore,
- float &ngramScore, std::size_t &oovCount) const
+ float &ngramScore, std::size_t &oovCount) const
{
fullScore = 0;
ngramScore = 0;
@@ -328,8 +315,7 @@ void KENLM<Model>::CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScor
if (m_bos == phrase[0][m_factorType]) {
scorer.BeginSentence();
position = 1;
- }
- else {
+ } else {
position = 0;
}
@@ -357,7 +343,7 @@ void KENLM<Model>::CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScor
template<class Model>
void KENLM<Model>::CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore,
- float &ngramScore, std::size_t &oovCount) const
+ float &ngramScore, std::size_t &oovCount) const
{
fullScore = 0;
ngramScore = 0;
@@ -411,7 +397,7 @@ void KENLM<Model>::CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore,
// Convert last words of hypothesis into vocab ids, returning an end pointer.
template<class Model>
lm::WordIndex *KENLM<Model>::LastIDs(const Hypothesis &hypo,
- lm::WordIndex *indices) const
+ lm::WordIndex *indices) const
{
lm::WordIndex *index = indices;
lm::WordIndex *end = indices + m_ngram->Order() - 1;
@@ -428,8 +414,8 @@ lm::WordIndex *KENLM<Model>::LastIDs(const Hypothesis &hypo,
template<class Model>
void KENLM<Model>::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const
{
LanguageModelChartStateKenLM &newState = static_cast<LanguageModelChartStateKenLM&>(state);
lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState.GetChartState());
@@ -511,48 +497,38 @@ FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig)
for (; argument; ++argument) {
const char *equals = std::find(argument->data(),
- argument->data() + argument->size(), '=');
+ argument->data() + argument->size(), '=');
UTIL_THROW_IF2(equals == argument->data() + argument->size(),
- "Expected = in KenLM argument " << *argument);
+ "Expected = in KenLM argument " << *argument);
StringPiece name(argument->data(), equals - argument->data());
StringPiece value(equals + 1,
- argument->data() + argument->size() - equals - 1);
+ argument->data() + argument->size() - equals - 1);
if (name == "factor") {
factorType = boost::lexical_cast<FactorType>(value);
- }
- else if (name == "order") {
+ } else if (name == "order") {
// Ignored
- }
- else if (name == "path") {
+ } else if (name == "path") {
filePath.assign(value.data(), value.size());
- }
- else if (name == "lazyken") {
+ } else if (name == "lazyken") {
// deprecated: use load instead.
load_method =
- boost::lexical_cast<bool>(value) ?
- util::LAZY : util::POPULATE_OR_READ;
- }
- else if (name == "load") {
+ boost::lexical_cast<bool>(value) ?
+ util::LAZY : util::POPULATE_OR_READ;
+ } else if (name == "load") {
if (value == "lazy") {
load_method = util::LAZY;
- }
- else if (value == "populate_or_lazy") {
+ } else if (value == "populate_or_lazy") {
load_method = util::POPULATE_OR_LAZY;
- }
- else if (value == "populate_or_read" || value == "populate") {
+ } else if (value == "populate_or_read" || value == "populate") {
load_method = util::POPULATE_OR_READ;
- }
- else if (value == "read") {
+ } else if (value == "read") {
load_method = util::READ;
- }
- else if (value == "parallel_read") {
+ } else if (value == "parallel_read") {
load_method = util::PARALLEL_READ;
- }
- else {
+ } else {
UTIL_THROW2("Unknown KenLM load method " << value);
}
- }
- else {
+ } else {
// pass to base class to interpret
line << " " << name << "=" << value;
}
@@ -562,38 +538,37 @@ FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig)
}
FeatureFunction *ConstructKenLM(size_t startInd, const std::string &line,
- const std::string &file, FactorType factorType,
- util::LoadMethod load_method)
+ const std::string &file, FactorType factorType,
+ util::LoadMethod load_method)
{
lm::ngram::ModelType model_type;
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
switch (model_type) {
case lm::ngram::PROBING:
return new KENLM<lm::ngram::ProbingModel>(startInd, line, file,
- factorType, load_method);
+ factorType, load_method);
case lm::ngram::REST_PROBING:
return new KENLM<lm::ngram::RestProbingModel>(startInd, line, file,
- factorType, load_method);
+ factorType, load_method);
case lm::ngram::TRIE:
return new KENLM<lm::ngram::TrieModel>(startInd, line, file, factorType,
- load_method);
+ load_method);
case lm::ngram::QUANT_TRIE:
return new KENLM<lm::ngram::QuantTrieModel>(startInd, line, file,
- factorType, load_method);
+ factorType, load_method);
case lm::ngram::ARRAY_TRIE:
return new KENLM<lm::ngram::ArrayTrieModel>(startInd, line, file,
- factorType, load_method);
+ factorType, load_method);
case lm::ngram::QUANT_ARRAY_TRIE:
return new KENLM<lm::ngram::QuantArrayTrieModel>(startInd, line, file,
- factorType, load_method);
+ factorType, load_method);
default:
UTIL_THROW2("Unrecognized kenlm model type " << model_type)
;
}
- }
- else {
+ } else {
return new KENLM<lm::ngram::ProbingModel>(startInd, line, file, factorType,
- load_method);
+ load_method);
}
}
diff --git a/contrib/moses2/LM/KENLM.h b/moses2/LM/KENLM.h
index 703b398d8..3c7839bea 100644
--- a/contrib/moses2/LM/KENLM.h
+++ b/moses2/LM/KENLM.h
@@ -19,15 +19,15 @@ class Word;
FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig);
FeatureFunction *ConstructKenLM(size_t startInd, const std::string &line,
- const std::string &file, FactorType factorType,
- util::LoadMethod load_method);
+ const std::string &file, FactorType factorType,
+ util::LoadMethod load_method);
template<class Model>
class KENLM: public StatefulFeatureFunction
{
public:
KENLM(size_t startInd, const std::string &line, const std::string &file,
- FactorType factorType, util::LoadMethod load_method);
+ FactorType factorType, util::LoadMethod load_method);
virtual ~KENLM();
@@ -37,25 +37,25 @@ public:
//! return the state associated with the empty hypothesis for a given sentence
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
+ const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const;
protected:
std::string m_path;
@@ -67,13 +67,12 @@ protected:
boost::shared_ptr<Model> m_ngram;
void CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore, float &ngramScore,
- std::size_t &oovCount) const;
+ std::size_t &oovCount) const;
void CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore, float &ngramScore,
- std::size_t &oovCount) const;
+ std::size_t &oovCount) const;
- inline lm::WordIndex TranslateID(const Word &word) const
- {
+ inline lm::WordIndex TranslateID(const Word &word) const {
std::size_t factor = word[m_factorType]->GetId();
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
}
diff --git a/contrib/moses2/LM/KENLMBatch.cpp b/moses2/LM/KENLMBatch.cpp
index 1ed6e7663..d36430961 100644
--- a/contrib/moses2/LM/KENLMBatch.cpp
+++ b/moses2/LM/KENLMBatch.cpp
@@ -8,11 +8,13 @@
#include <sstream>
#include <vector>
+#ifdef _linux
#include <pthread.h>
+#include <unistd.h>
+#endif
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
-#include <unistd.h>
#include "KENLMBatch.h"
#include "../Phrase.h"
@@ -33,23 +35,19 @@ using namespace std;
namespace Moses2
{
-struct KenLMState: public FFState
-{
+struct KenLMState: public FFState {
lm::ngram::State state;
- virtual size_t hash() const
- {
+ virtual size_t hash() const {
size_t ret = hash_value(state);
return ret;
}
- virtual bool operator==(const FFState& o) const
- {
+ virtual bool operator==(const FFState& o) const {
const KenLMState &other = static_cast<const KenLMState &>(o);
bool ret = state == other.state;
return ret;
}
- virtual std::string ToString() const
- {
+ virtual std::string ToString() const {
stringstream ss;
for (size_t i = 0; i < state.Length(); ++i) {
ss << state.words[i] << " ";
@@ -64,13 +62,11 @@ class MappingBuilder: public lm::EnumerateVocab
{
public:
MappingBuilder(FactorCollection &factorCollection, System &system,
- std::vector<lm::WordIndex> &mapping) :
- m_factorCollection(factorCollection), m_system(system), m_mapping(mapping)
- {
+ std::vector<lm::WordIndex> &mapping) :
+ m_factorCollection(factorCollection), m_system(system), m_mapping(mapping) {
}
- void Add(lm::WordIndex index, const StringPiece &str)
- {
+ void Add(lm::WordIndex index, const StringPiece &str) {
std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId();
if (m_mapping.size() <= factorId) {
// 0 is <unk> :-)
@@ -87,8 +83,8 @@ private:
/////////////////////////////////////////////////////////////////
KENLMBatch::KENLMBatch(size_t startInd, const std::string &line)
-:StatefulFeatureFunction(startInd, line)
-,m_numHypos(0)
+ :StatefulFeatureFunction(startInd, line)
+ ,m_numHypos(0)
{
cerr << "KENLMBatch::KENLMBatch" << endl;
ReadParameters();
@@ -126,15 +122,15 @@ FFState* KENLMBatch::BlankState(MemPool &pool, const System &sys) const
//! return the state associated with the empty hypothesis for a given sentence
void KENLMBatch::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const
+ const InputType &input, const Hypothesis &hypo) const
{
KenLMState &stateCast = static_cast<KenLMState&>(state);
stateCast.state = m_ngram->BeginSentenceState();
}
void KENLMBatch::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
// contains factors used by this LM
float fullScore, nGramScore;
@@ -154,34 +150,33 @@ void KENLMBatch::EvaluateInIsolation(MemPool &pool, const System &system,
estimateScoresVec[0] = estimateScore;
estimateScoresVec[1] = 0;
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScoresVec);
+ estimateScoresVec);
estimatedScore += weightedScore;
- }
- else {
+ } else {
scores.PlusEquals(system, *this, nGramScore);
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScore);
+ estimateScore);
estimatedScore += weightedScore;
}
}
void KENLMBatch::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
}
void KENLMBatch::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const
{
KenLMState &stateCast = static_cast<KenLMState&>(state);
const System &system = mgr.system;
const lm::ngram::State &in_state =
- static_cast<const KenLMState&>(prevState).state;
+ static_cast<const KenLMState&>(prevState).state;
if (!hypo.GetTargetPhrase().GetSize()) {
stateCast.state = in_state;
@@ -194,15 +189,15 @@ void KENLMBatch::EvaluateWhenApplied(const ManagerBase &mgr,
const std::size_t adjust_end = std::min(end, begin + m_ngram->Order() - 1);
std::size_t position = begin;
- typename Model::State aux_state;
- typename Model::State *state0 = &stateCast.state, *state1 = &aux_state;
+ Model::State aux_state;
+ Model::State *state0 = &stateCast.state, *state1 = &aux_state;
float score = m_ngram->Score(in_state, TranslateID(hypo.GetWord(position)),
- *state0);
+ *state0);
++position;
for (; position < adjust_end; ++position) {
score += m_ngram->Score(*state0, TranslateID(hypo.GetWord(position)),
- *state1);
+ *state1);
std::swap(state0, state1);
}
@@ -211,15 +206,13 @@ void KENLMBatch::EvaluateWhenApplied(const ManagerBase &mgr,
std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
const lm::WordIndex *last = LastIDs(hypo, &indices.front());
score += m_ngram->FullScoreForgotState(&indices.front(), last,
- m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob;
- }
- else if (adjust_end < end) {
+ m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob;
+ } else if (adjust_end < end) {
// Get state after adding a long phrase.
std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
const lm::WordIndex *last = LastIDs(hypo, &indices.front());
m_ngram->GetState(&indices.front(), last, stateCast.state);
- }
- else if (state0 != &stateCast.state) {
+ } else if (state0 != &stateCast.state) {
// Short enough phrase that we can just reuse the state.
stateCast.state = *state0;
}
@@ -232,14 +225,13 @@ void KENLMBatch::EvaluateWhenApplied(const ManagerBase &mgr,
scoresVec[0] = score;
scoresVec[1] = 0.0;
scores.PlusEquals(system, *this, scoresVec);
- }
- else {
+ } else {
scores.PlusEquals(system, *this, score);
}
}
void KENLMBatch::CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore,
- float &ngramScore, std::size_t &oovCount) const
+ float &ngramScore, std::size_t &oovCount) const
{
fullScore = 0;
ngramScore = 0;
@@ -254,8 +246,7 @@ void KENLMBatch::CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore,
if (m_bos == phrase[0][m_factorType]) {
scorer.BeginSentence();
position = 1;
- }
- else {
+ } else {
position = 0;
}
@@ -283,7 +274,7 @@ void KENLMBatch::CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore,
// Convert last words of hypothesis into vocab ids, returning an end pointer.
lm::WordIndex *KENLMBatch::LastIDs(const Hypothesis &hypo,
- lm::WordIndex *indices) const
+ lm::WordIndex *indices) const
{
lm::WordIndex *index = indices;
lm::WordIndex *end = indices + m_ngram->Order() - 1;
@@ -299,44 +290,34 @@ lm::WordIndex *KENLMBatch::LastIDs(const Hypothesis &hypo,
}
void KENLMBatch::SetParameter(const std::string& key,
- const std::string& value)
+ const std::string& value)
{
//cerr << "key=" << key << " " << value << endl;
if (key == "path") {
m_path = value;
- }
- else if (key == "order") {
+ } else if (key == "order") {
// ignore
- }
- else if (key == "factor") {
+ } else if (key == "factor") {
m_factorType = Scan<FactorType>(value);
- }
- else if (key == "lazyken") {
+ } else if (key == "lazyken") {
m_load_method =
- boost::lexical_cast<bool>(value) ?
- util::LAZY : util::POPULATE_OR_READ;
- }
- else if (key == "load") {
+ boost::lexical_cast<bool>(value) ?
+ util::LAZY : util::POPULATE_OR_READ;
+ } else if (key == "load") {
if (value == "lazy") {
m_load_method = util::LAZY;
- }
- else if (value == "populate_or_lazy") {
+ } else if (value == "populate_or_lazy") {
m_load_method = util::POPULATE_OR_LAZY;
- }
- else if (value == "populate_or_read" || value == "populate") {
+ } else if (value == "populate_or_read" || value == "populate") {
m_load_method = util::POPULATE_OR_READ;
- }
- else if (value == "read") {
+ } else if (value == "read") {
m_load_method = util::READ;
- }
- else if (value == "parallel_read") {
+ } else if (value == "parallel_read") {
m_load_method = util::PARALLEL_READ;
- }
- else {
+ } else {
UTIL_THROW2("Unknown KenLM load method " << value);
}
- }
- else {
+ } else {
StatefulFeatureFunction::SetParameter(key, value);
}
@@ -344,7 +325,7 @@ void KENLMBatch::SetParameter(const std::string& key,
}
void KENLMBatch::EvaluateWhenAppliedBatch(
- const Batch &batch) const
+ const Batch &batch) const
{
{
// write lock
@@ -362,8 +343,7 @@ void KENLMBatch::EvaluateWhenAppliedBatch(
m_numHypos = 0;
m_threadNeeded.notify_all();
- }
- else {
+ } else {
boost::mutex::scoped_lock lock(m_mutex);
m_threadNeeded.wait(lock);
}
@@ -380,8 +360,8 @@ void KENLMBatch::EvaluateWhenAppliedBatch() const
}
void KENLMBatch::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const
{
UTIL_THROW2("Not implemented");
}
diff --git a/contrib/moses2/LM/KENLMBatch.h b/moses2/LM/KENLMBatch.h
index 21dc8637c..1510381b5 100644
--- a/contrib/moses2/LM/KENLMBatch.h
+++ b/moses2/LM/KENLMBatch.h
@@ -9,7 +9,9 @@
#include <boost/shared_ptr.hpp>
#include <boost/bind.hpp>
#include <boost/thread.hpp>
+#ifdef __linux
#include <pthread.h>
+#endif
#include "../FF/StatefulFeatureFunction.h"
#include "lm/model.hh"
@@ -33,34 +35,34 @@ public:
virtual void Load(System &system);
void SetParameter(const std::string& key,
- const std::string& value);
+ const std::string& value);
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
//! return the state associated with the empty hypothesis for a given sentence
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
+ const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const;
virtual void EvaluateWhenAppliedBatch(
- const Batch &batch) const;
+ const Batch &batch) const;
protected:
std::string m_path;
@@ -73,10 +75,9 @@ protected:
boost::shared_ptr<Model> m_ngram;
void CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore, float &ngramScore,
- std::size_t &oovCount) const;
+ std::size_t &oovCount) const;
- inline lm::WordIndex TranslateID(const Word &word) const
- {
+ inline lm::WordIndex TranslateID(const Word &word) const {
std::size_t factor = word[m_factorType]->GetId();
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
}
diff --git a/contrib/moses2/LM/LanguageModel.cpp b/moses2/LM/LanguageModel.cpp
index 3e0c39d20..a720851ba 100644
--- a/contrib/moses2/LM/LanguageModel.cpp
+++ b/moses2/LM/LanguageModel.cpp
@@ -22,28 +22,24 @@ using namespace std;
namespace Moses2
{
-struct LMState: public PointerState
-{
+struct LMState: public PointerState {
LMState() :
- PointerState()
- {
+ PointerState() {
// uninitialised
}
- void Set(MemPool &pool, void *lms, const std::vector<const Factor*> &context)
- {
+ void Set(MemPool &pool, void *lms, const std::vector<const Factor*> &context) {
lmstate = lms;
numWords = context.size();
lastWords = (const Factor**) pool.Allocate(
- sizeof(const Factor*) * numWords);
+ sizeof(const Factor*) * numWords);
for (size_t i = 0; i < numWords; ++i) {
lastWords[i] = context[i];
}
}
- void Init(MemPool &pool, const Factor *factor)
- {
+ void Init(MemPool &pool, const Factor *factor) {
lmstate = NULL;
numWords = 1;
lastWords = (const Factor**) pool.Allocate(sizeof(const Factor*));
@@ -56,7 +52,7 @@ struct LMState: public PointerState
////////////////////////////////////////////////////////////////////////////////////////
LanguageModel::LanguageModel(size_t startInd, const std::string &line) :
- StatefulFeatureFunction(startInd, line), m_oov(-100)
+ StatefulFeatureFunction(startInd, line), m_oov(-100)
{
ReadParameters();
}
@@ -112,18 +108,15 @@ void LanguageModel::Load(System &system)
}
void LanguageModel::SetParameter(const std::string& key,
- const std::string& value)
+ const std::string& value)
{
if (key == "path") {
m_path = value;
- }
- else if (key == "factor") {
+ } else if (key == "factor") {
m_factorType = Scan<FactorType>(value);
- }
- else if (key == "order") {
+ } else if (key == "order") {
m_order = Scan<size_t>(value);
- }
- else {
+ } else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
@@ -143,8 +136,8 @@ void LanguageModel::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
}
void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
if (targetPhrase.GetSize() == 0) {
return;
@@ -163,8 +156,7 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system,
if (context.size() == m_order) {
std::pair<SCORE, void*> fromScoring = Score(context);
score += fromScoring.first;
- }
- else {
+ } else {
std::pair<SCORE, void*> fromScoring = Score(context);
nonFullScore += fromScoring.first;
}
@@ -176,14 +168,14 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system,
}
void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
}
void LanguageModel::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const
{
const LMState &prevLMState = static_cast<const LMState &>(prevState);
size_t numWords = prevLMState.numWords;
@@ -214,8 +206,7 @@ void LanguageModel::EvaluateWhenApplied(const ManagerBase &mgr,
score += fromScoring.first;
fromScoring.second = NULL;
context.clear();
- }
- else {
+ } else {
assert(context.size());
if (context.size() == m_order) {
context.resize(context.size() - 1);
@@ -233,7 +224,7 @@ void LanguageModel::EvaluateWhenApplied(const ManagerBase &mgr,
}
void LanguageModel::ShiftOrPush(std::vector<const Factor*> &context,
- const Factor *factor) const
+ const Factor *factor) const
{
if (context.size() < m_order) {
context.resize(context.size() + 1);
@@ -248,7 +239,7 @@ void LanguageModel::ShiftOrPush(std::vector<const Factor*> &context,
}
std::pair<SCORE, void*> LanguageModel::Score(
- const std::vector<const Factor*> &context) const
+ const std::vector<const Factor*> &context) const
{
//cerr << "context=";
//DebugContext(context);
@@ -260,8 +251,7 @@ std::pair<SCORE, void*> LanguageModel::Score(
if (node) {
ret.first = node->getValue().prob;
ret.second = (void*) node;
- }
- else {
+ } else {
SCORE backoff = 0;
std::vector<const Factor*> backOffContext(context.begin() + 1,
context.end());
@@ -282,7 +272,7 @@ std::pair<SCORE, void*> LanguageModel::Score(
}
SCORE LanguageModel::BackoffScore(
- const std::vector<const Factor*> &context) const
+ const std::vector<const Factor*> &context) const
{
//cerr << "backoff=";
//DebugContext(context);
@@ -295,19 +285,17 @@ SCORE LanguageModel::BackoffScore(
if (stoppedAtInd == context.size()) {
// found entire ngram
ret = node.getValue().backoff;
- }
- else {
+ } else {
if (stoppedAtInd == 0) {
ret = m_oov;
stoppedAtInd = 1;
- }
- else {
+ } else {
ret = node.getValue().backoff;
}
// recursive
std::vector<const Factor*> backoff(context.begin() + stoppedAtInd,
- context.end());
+ context.end());
ret += BackoffScore(backoff);
}
@@ -315,7 +303,7 @@ SCORE LanguageModel::BackoffScore(
}
void LanguageModel::DebugContext(
- const std::vector<const Factor*> &context) const
+ const std::vector<const Factor*> &context) const
{
for (size_t i = 0; i < context.size(); ++i) {
cerr << context[i]->GetString() << " ";
@@ -324,8 +312,8 @@ void LanguageModel::DebugContext(
}
void LanguageModel::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const
{
UTIL_THROW2("Not implemented");
}
diff --git a/contrib/moses2/LM/LanguageModel.h b/moses2/LM/LanguageModel.h
index d262a8497..12d25809f 100644
--- a/contrib/moses2/LM/LanguageModel.h
+++ b/moses2/LM/LanguageModel.h
@@ -9,7 +9,7 @@
#include "../FF/StatefulFeatureFunction.h"
#include "../TypeDef.h"
-#include "../MorphoTrie/MorphTrie.h"
+#include "../InMemoryTrie/InMemoryTrie.h"
#include "../legacy/Factor.h"
#include "../legacy/Util2.h"
@@ -17,24 +17,19 @@ namespace Moses2
{
////////////////////////////////////////////////////////////////////////////////////////
-struct LMScores
-{
- LMScores()
- {
+struct LMScores {
+ LMScores() {
}
LMScores(const LMScores &copy) :
- prob(copy.prob), backoff(copy.backoff)
- {
+ prob(copy.prob), backoff(copy.backoff) {
}
LMScores(float inProb, float inBackoff) :
- prob(inProb), backoff(inBackoff)
- {
+ prob(inProb), backoff(inBackoff) {
}
- void Debug(std::ostream &out, const System &system) const
- {
+ void Debug(std::ostream &out, const System &system) const {
out << "(" << prob << "," << backoff << ")" << std::flush;
}
@@ -54,40 +49,40 @@ public:
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
+ const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+ FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+ FFState &state) const;
protected:
std::string m_path;
FactorType m_factorType;
size_t m_order;
- MorphTrie<const Factor*, LMScores> m_root;
+ InMemoryTrie<const Factor*, LMScores> m_root;
SCORE m_oov;
const Factor *m_bos;
const Factor *m_eos;
void ShiftOrPush(std::vector<const Factor*> &context,
- const Factor *factor) const;
+ const Factor *factor) const;
std::pair<SCORE, void*> Score(
- const std::vector<const Factor*> &context) const;
+ const std::vector<const Factor*> &context) const;
SCORE BackoffScore(const std::vector<const Factor*> &context) const;
void DebugContext(const std::vector<const Factor*> &context) const;
diff --git a/contrib/moses2/Main.cpp b/moses2/Main.cpp
index 0661d1d0e..cf833760a 100644
--- a/contrib/moses2/Main.cpp
+++ b/moses2/Main.cpp
@@ -20,15 +20,15 @@ using namespace std;
int main(int argc, char** argv)
{
- cerr << "Starting..." << endl;
+ cerr << "Starting..." << endl;
Moses2::Timer timer;
timer.start();
- //Temp();
+ //Temp();
Moses2::Parameter params;
if (!params.LoadParam(argc, argv)) {
- return EXIT_FAILURE;
+ return EXIT_FAILURE;
}
Moses2::System system(params);
timer.check("Loaded");
@@ -45,8 +45,7 @@ int main(int argc, char** argv)
if (params.GetParam("server")) {
std::cerr << "RUN SERVER" << std::endl;
run_as_server(system);
- }
- else {
+ } else {
std::cerr << "RUN BATCH" << std::endl;
batch_run(params, system, pool);
}
@@ -71,8 +70,7 @@ istream &GetInputStream(Moses2::Parameter &params)
if (vec && vec->size()) {
Moses2::InputFileStream *stream = new Moses2::InputFileStream(vec->at(0));
return *stream;
- }
- else {
+ } else {
return cin;
}
}
@@ -86,7 +84,7 @@ void batch_run(Moses2::Parameter &params, Moses2::System &system, Moses2::Thread
string line;
while (getline(inStream, line)) {
//cerr << "line=" << line << endl;
- boost::shared_ptr<Moses2::TranslationTask> task(new Moses2::TranslationTask(system, line, translationId));
+ boost::shared_ptr<Moses2::TranslationTask> task(new Moses2::TranslationTask(system, line, translationId));
//cerr << "START pool.Submit()" << endl;
pool.Submit(task);
@@ -106,23 +104,23 @@ void batch_run(Moses2::Parameter &params, Moses2::System &system, Moses2::Thread
////////////////////////////////////////////////////////////////////////////////////////////////
void Temp()
{
- Moses2::MemPool pool;
- Moses2::MemPoolAllocator<int> a(pool);
+ Moses2::MemPool pool;
+ Moses2::MemPoolAllocator<int> a(pool);
- boost::unordered_set<int, boost::hash<int>, std::equal_to<int>, Moses2::MemPoolAllocator<int> > s(a);
- s.insert(3);
- s.insert(4);
- s.insert(3);
- s.erase(3);
+ boost::unordered_set<int, boost::hash<int>, std::equal_to<int>, Moses2::MemPoolAllocator<int> > s(a);
+ s.insert(3);
+ s.insert(4);
+ s.insert(3);
+ s.erase(3);
- boost::pool_allocator<int> alloc;
- std::vector<int, boost::pool_allocator<int> > v(alloc);
- for (int i = 0; i < 1000; ++i)
- v.push_back(i);
+ boost::pool_allocator<int> alloc;
+ std::vector<int, boost::pool_allocator<int> > v(alloc);
+ for (int i = 0; i < 1000; ++i)
+ v.push_back(i);
- v.clear();
- boost::singleton_pool<boost::pool_allocator_tag, sizeof(int)>::
- purge_memory();
+ v.clear();
+ boost::singleton_pool<boost::pool_allocator_tag, sizeof(int)>::
+ purge_memory();
- abort();
+ abort();
}
diff --git a/contrib/moses2/Main.h b/moses2/Main.h
index 41e016130..731d6385b 100644
--- a/contrib/moses2/Main.h
+++ b/moses2/Main.h
@@ -7,7 +7,8 @@
#pragma once
#include <iostream>
-namespace Moses2 {
+namespace Moses2
+{
class Parameter;
class System;
class ThreadPool;
diff --git a/contrib/moses2/ManagerBase.cpp b/moses2/ManagerBase.cpp
index 1e774cc5b..f40aa7b2f 100644
--- a/contrib/moses2/ManagerBase.cpp
+++ b/moses2/ManagerBase.cpp
@@ -21,14 +21,14 @@ using namespace std;
namespace Moses2
{
ManagerBase::ManagerBase(System &sys, const TranslationTask &task,
- const std::string &inputStr, long translationId)
-:system(sys)
-,task(task)
-,m_inputStr(inputStr)
-,m_translationId(translationId)
-,m_pool(NULL)
-,m_systemPool(NULL)
-,m_hypoRecycle(NULL)
+ const std::string &inputStr, long translationId)
+ :system(sys)
+ ,task(task)
+ ,m_inputStr(inputStr)
+ ,m_translationId(translationId)
+ ,m_pool(NULL)
+ ,m_systemPool(NULL)
+ ,m_hypoRecycle(NULL)
{
}
@@ -37,10 +37,10 @@ ManagerBase::~ManagerBase()
system.featureFunctions.CleanUpAfterSentenceProcessing();
if (m_pool) {
- GetPool().Reset();
+ GetPool().Reset();
}
if (m_hypoRecycle) {
- GetHypoRecycle().Clear();
+ GetHypoRecycle().Clear();
}
}
diff --git a/contrib/moses2/ManagerBase.h b/moses2/ManagerBase.h
index 7b4a02ba8..cb8ee019c 100644
--- a/contrib/moses2/ManagerBase.h
+++ b/moses2/ManagerBase.h
@@ -38,27 +38,32 @@ public:
mutable ArcLists arcLists;
ManagerBase(System &sys, const TranslationTask &task,
- const std::string &inputStr, long translationId);
+ const std::string &inputStr, long translationId);
virtual ~ManagerBase();
virtual void Decode() = 0;
virtual std::string OutputBest() const = 0;
virtual std::string OutputNBest() = 0;
virtual std::string OutputTransOpt() = 0;
- MemPool &GetPool() const
- { return *m_pool; }
+ MemPool &GetPool() const {
+ return *m_pool;
+ }
- MemPool &GetSystemPool() const
- { return *m_systemPool; }
+ MemPool &GetSystemPool() const {
+ return *m_systemPool;
+ }
- Recycler<HypothesisBase*> &GetHypoRecycle() const
- { return *m_hypoRecycle; }
+ Recycler<HypothesisBase*> &GetHypoRecycle() const {
+ return *m_hypoRecycle;
+ }
- const InputType &GetInput() const
- { return *m_input; }
+ const InputType &GetInput() const {
+ return *m_input;
+ }
- long GetTranslationId() const
- { return m_translationId; }
+ long GetTranslationId() const {
+ return m_translationId;
+ }
protected:
std::string m_inputStr;
diff --git a/contrib/moses2/MemPool.cpp b/moses2/MemPool.cpp
index 7e159117b..31d684bfc 100644
--- a/contrib/moses2/MemPool.cpp
+++ b/moses2/MemPool.cpp
@@ -16,7 +16,7 @@ namespace Moses2
{
MemPool::Page::Page(std::size_t vSize) :
- size(vSize)
+ size(vSize)
{
mem = (uint8_t*) util::MallocOrThrow(size);
end = mem + size;
@@ -28,7 +28,7 @@ MemPool::Page::~Page()
}
////////////////////////////////////////////////////
MemPool::MemPool(size_t initSize) :
- m_currSize(initSize), m_currPage(0)
+ m_currSize(initSize), m_currPage(0)
{
Page *page = new Page(m_currSize);
m_pages.push_back(page);
@@ -57,16 +57,14 @@ uint8_t *MemPool::More(std::size_t size)
uint8_t *ret = page->mem;
current_ = ret + size;
return ret;
- }
- else {
+ } else {
// use existing page
Page &page = *m_pages[m_currPage];
if (size <= page.size) {
uint8_t *ret = page.mem;
current_ = ret + size;
return ret;
- }
- else {
+ } else {
// recursive call More()
return More(size);
}
diff --git a/contrib/moses2/MemPool.h b/moses2/MemPool.h
index eaa55915e..2e8fccc34 100644
--- a/contrib/moses2/MemPool.h
+++ b/moses2/MemPool.h
@@ -20,14 +20,12 @@ namespace Moses2
class MemPool
{
- struct Page
- {
+ struct Page {
uint8_t *mem;
uint8_t *end;
size_t size;
- Page()
- {
+ Page() {
}
Page(std::size_t size);
~Page();
@@ -38,8 +36,7 @@ public:
~MemPool();
- uint8_t *Allocate(std::size_t size)
- {
+ uint8_t *Allocate(std::size_t size) {
size = (size + 3) & 0xfffffffc;
uint8_t *ret = current_;
@@ -48,8 +45,7 @@ public:
Page &page = *m_pages[m_currPage];
if (current_ <= page.end) {
// return what we got
- }
- else {
+ } else {
ret = More(size);
}
return ret;
@@ -57,15 +53,13 @@ public:
}
template<typename T>
- T *Allocate()
- {
+ T *Allocate() {
uint8_t *ret = Allocate(sizeof(T));
return (T*) ret;
}
template<typename T>
- T *Allocate(size_t num)
- {
+ T *Allocate(size_t num) {
uint8_t *ret = Allocate(sizeof(T) * num);
return (T*) ret;
}
@@ -94,18 +88,15 @@ class ObjectPoolContiguous
public:
ObjectPoolContiguous(std::size_t initSize = 100000) :
- m_size(0), m_actualSize(initSize)
- {
+ m_size(0), m_actualSize(initSize) {
m_vec = (T*) malloc(sizeof(T) * initSize);
}
- ~ObjectPoolContiguous()
- {
+ ~ObjectPoolContiguous() {
free(m_vec);
}
- void Add(T &obj)
- {
+ void Add(T &obj) {
if (m_size >= m_actualSize) {
//std::cerr << std::endl << "MORE " << m_size << std::endl;
m_actualSize *= 2;
@@ -116,46 +107,38 @@ public:
++m_size;
}
- bool IsEmpty() const
- {
+ bool IsEmpty() const {
return m_size == 0;
}
- void Reset()
- {
+ void Reset() {
m_size = 0;
}
// vector op
- size_t GetSize() const
- {
+ size_t GetSize() const {
return m_size;
}
- const T& operator[](size_t ind) const
- {
+ const T& operator[](size_t ind) const {
return m_vec[ind];
}
// stack op
- const T &Get() const
- {
+ const T &Get() const {
return m_vec[m_size - 1];
}
- void Pop()
- {
+ void Pop() {
--m_size;
}
- T *GetData()
- {
+ T *GetData() {
return m_vec;
}
template<typename ORDERER>
- void Sort(const ORDERER &orderer)
- {
+ void Sort(const ORDERER &orderer) {
std::sort(m_vec, m_vec + m_size, orderer);
}
diff --git a/contrib/moses2/MemPoolAllocator.h b/moses2/MemPoolAllocator.h
index 6cc699893..994bb7711 100644
--- a/contrib/moses2/MemPoolAllocator.h
+++ b/moses2/MemPoolAllocator.h
@@ -17,51 +17,42 @@ public:
typedef std::ptrdiff_t difference_type;
template<class U>
- struct rebind
- {
+ struct rebind {
typedef MemPoolAllocator<U> other;
};
MemPoolAllocator(Moses2::MemPool &pool) :
- m_pool(pool)
- {
+ m_pool(pool) {
}
MemPoolAllocator(const MemPoolAllocator &other) :
- m_pool(other.m_pool)
- {
+ m_pool(other.m_pool) {
}
template<class U>
MemPoolAllocator(const MemPoolAllocator<U>& other) :
- m_pool(other.m_pool)
- {
+ m_pool(other.m_pool) {
}
- size_type max_size() const
- {
+ size_type max_size() const {
return std::numeric_limits<size_type>::max();
}
- void deallocate(pointer p, size_type n)
- {
+ void deallocate(pointer p, size_type n) {
//std::cerr << "deallocate " << p << " " << n << std::endl;
}
- pointer allocate(size_type n, std::allocator<void>::const_pointer hint = 0)
- {
+ pointer allocate(size_type n, std::allocator<void>::const_pointer hint = 0) {
//std::cerr << "allocate " << n << " " << hint << std::endl;
pointer ret = m_pool.Allocate<T>(n);
return ret;
}
- void construct(pointer p, const_reference val)
- {
+ void construct(pointer p, const_reference val) {
//std::cerr << "construct " << p << " " << n << std::endl;
new ((void *) p) T(val);
}
- void destroy(pointer p)
- {
+ void destroy(pointer p) {
//std::cerr << "destroy " << p << " " << n << std::endl;
}
@@ -81,6 +72,10 @@ public:
return false;
}
+ MemPoolAllocator<T>& operator=(const MemPoolAllocator<T>& allocator) {
+ return *this;
+ }
+
MemPool &m_pool;
protected:
};
diff --git a/contrib/moses2/Phrase.cpp b/moses2/Phrase.cpp
index dd4abf328..dd4abf328 100644
--- a/contrib/moses2/Phrase.cpp
+++ b/moses2/Phrase.cpp
diff --git a/contrib/moses2/Phrase.h b/moses2/Phrase.h
index 714e65d42..100701483 100644
--- a/contrib/moses2/Phrase.h
+++ b/moses2/Phrase.h
@@ -32,17 +32,16 @@ template<typename WORD>
class Phrase
{
public:
- virtual ~Phrase()
- {
+ virtual ~Phrase() {
}
virtual const WORD& operator[](size_t pos) const = 0;
virtual size_t GetSize() const = 0;
- virtual const WORD& Back() const
- { return (*this)[GetSize() - 1]; }
+ virtual const WORD& Back() const {
+ return (*this)[GetSize() - 1];
+ }
- virtual size_t hash() const
- {
+ virtual size_t hash() const {
size_t seed = 0;
for (size_t i = 0; i < GetSize(); ++i) {
@@ -54,8 +53,7 @@ public:
return seed;
}
- virtual bool operator==(const Phrase &compare) const
- {
+ virtual bool operator==(const Phrase &compare) const {
if (GetSize() != compare.GetSize()) {
return false;
}
@@ -71,13 +69,11 @@ public:
return true;
}
- virtual bool operator!=(const Phrase &compare) const
- {
+ virtual bool operator!=(const Phrase &compare) const {
return !((*this) == compare);
}
- virtual std::string GetString(const FactorList &factorTypes) const
- {
+ virtual std::string GetString(const FactorList &factorTypes) const {
if (GetSize() == 0) {
return "";
}
@@ -95,8 +91,7 @@ public:
virtual SubPhrase<WORD> GetSubPhrase(size_t start, size_t size) const = 0;
- virtual std::string Debug(const System &system) const
- {
+ virtual std::string Debug(const System &system) const {
std::stringstream out;
size_t size = GetSize();
if (size) {
@@ -110,8 +105,7 @@ public:
return out.str();
}
- virtual void OutputToStream(const System &system, std::ostream &out) const
- {
+ virtual void OutputToStream(const System &system, std::ostream &out) const {
size_t size = GetSize();
if (size) {
(*this)[0].OutputToStream(system, out);
@@ -131,8 +125,7 @@ template<typename WORD>
class PhraseOrdererLexical
{
public:
- bool operator()(const Phrase<WORD> &a, const Phrase<WORD> &b) const
- {
+ bool operator()(const Phrase<WORD> &a, const Phrase<WORD> &b) const {
size_t minSize = std::min(a.GetSize(), b.GetSize());
for (size_t i = 0; i < minSize; ++i) {
const Word &aWord = a[i];
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp b/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp
index 2af2b35f0..7fcd4fa0c 100644
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp
+++ b/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp
@@ -22,22 +22,20 @@ namespace NSCubePruningMiniStack
////////////////////////////////////////////////////////////////////////
QueueItem *QueueItem::Create(QueueItem *currItem, Manager &mgr, CubeEdge &edge,
- size_t hypoIndex, size_t tpIndex,
- QueueItemRecycler &queueItemRecycler)
+ size_t hypoIndex, size_t tpIndex,
+ QueueItemRecycler &queueItemRecycler)
{
QueueItem *ret;
if (currItem) {
// reuse incoming queue item to create new item
ret = currItem;
ret->Init(mgr, edge, hypoIndex, tpIndex);
- }
- else if (!queueItemRecycler.empty()) {
+ } else if (!queueItemRecycler.empty()) {
// use item from recycle bin
ret = queueItemRecycler.back();
ret->Init(mgr, edge, hypoIndex, tpIndex);
queueItemRecycler.pop_back();
- }
- else {
+ } else {
// create new item
ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge,
hypoIndex, tpIndex);
@@ -47,14 +45,14 @@ QueueItem *QueueItem::Create(QueueItem *currItem, Manager &mgr, CubeEdge &edge,
}
QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex,
- size_t tpIndex) :
- edge(&edge), hypoIndex(hypoIndex), tpIndex(tpIndex)
+ size_t tpIndex) :
+ edge(&edge), hypoIndex(hypoIndex), tpIndex(tpIndex)
{
CreateHypothesis(mgr);
}
void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex,
- size_t tpIndex)
+ size_t tpIndex)
{
this->edge = &edge;
this->hypoIndex = hypoIndex;
@@ -66,7 +64,7 @@ void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex,
void QueueItem::CreateHypothesis(Manager &mgr)
{
const Hypothesis *prevHypo =
- static_cast<const Hypothesis*>(edge->hypos[hypoIndex]);
+ static_cast<const Hypothesis*>(edge->hypos[hypoIndex]);
const TargetPhraseImpl &tp = edge->tps[tpIndex];
//cerr << "hypoIndex=" << hypoIndex << endl;
@@ -76,7 +74,7 @@ void QueueItem::CreateHypothesis(Manager &mgr)
hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap,
- edge->estimatedScore);
+ edge->estimatedScore);
if (!mgr.system.options.cube.lazy_scoring) {
hypo->EvaluateWhenApplied();
@@ -85,8 +83,8 @@ void QueueItem::CreateHypothesis(Manager &mgr)
////////////////////////////////////////////////////////////////////////
CubeEdge::CubeEdge(Manager &mgr, const Hypotheses &hypos, const InputPath &path,
- const TargetPhrases &tps, const Bitmap &newBitmap) :
- hypos(hypos), path(path), tps(tps), newBitmap(newBitmap)
+ const TargetPhrases &tps, const Bitmap &newBitmap) :
+ hypos(hypos), path(path), tps(tps), newBitmap(newBitmap)
{
estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
}
@@ -99,7 +97,7 @@ std::string CubeEdge::Debug(const System &system) const
}
bool CubeEdge::SetSeenPosition(const size_t x, const size_t y,
- SeenPositions &seenPositions) const
+ SeenPositions &seenPositions) const
{
//UTIL_THROW_IF2(x >= (1<<17), "Error");
//UTIL_THROW_IF2(y >= (1<<17), "Error");
@@ -110,22 +108,22 @@ bool CubeEdge::SetSeenPosition(const size_t x, const size_t y,
}
void CubeEdge::CreateFirst(Manager &mgr, Queue &queue,
- SeenPositions &seenPositions,
- QueueItemRecycler &queueItemRecycler)
+ SeenPositions &seenPositions,
+ QueueItemRecycler &queueItemRecycler)
{
assert(hypos.size());
assert(tps.GetSize());
QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0,
- queueItemRecycler);
+ queueItemRecycler);
queue.push(item);
bool setSeen = SetSeenPosition(0, 0, seenPositions);
assert(setSeen);
}
void CubeEdge::CreateNext(Manager &mgr, QueueItem *item, Queue &queue,
- SeenPositions &seenPositions,
- QueueItemRecycler &queueItemRecycler)
+ SeenPositions &seenPositions,
+ QueueItemRecycler &queueItemRecycler)
{
size_t hypoIndex = item->hypoIndex;
size_t tpIndex = item->tpIndex;
@@ -134,7 +132,7 @@ void CubeEdge::CreateNext(Manager &mgr, QueueItem *item, Queue &queue,
&& SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
// reuse incoming queue item to create new item
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1,
- tpIndex, queueItemRecycler);
+ tpIndex, queueItemRecycler);
assert(newItem == item);
queue.push(newItem);
item = NULL;
@@ -143,7 +141,7 @@ void CubeEdge::CreateNext(Manager &mgr, QueueItem *item, Queue &queue,
if (tpIndex + 1 < tps.GetSize()
&& SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex,
- tpIndex + 1, queueItemRecycler);
+ tpIndex + 1, queueItemRecycler);
queue.push(newItem);
item = NULL;
}
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.h b/moses2/PhraseBased/CubePruningMiniStack/Misc.h
index 535ef6ada..4fc576cba 100644
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.h
+++ b/moses2/PhraseBased/CubePruningMiniStack/Misc.h
@@ -38,8 +38,8 @@ class QueueItem
~QueueItem(); // NOT IMPLEMENTED. Use MemPool
public:
static QueueItem *Create(QueueItem *currItem, Manager &mgr, CubeEdge &edge,
- size_t hypoIndex, size_t tpIndex,
- QueueItemRecycler &queueItemRecycler);
+ size_t hypoIndex, size_t tpIndex,
+ QueueItemRecycler &queueItemRecycler);
QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
@@ -56,8 +56,7 @@ protected:
class QueueItemOrderer
{
public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const
- {
+ bool operator()(QueueItem* itemA, QueueItem* itemB) const {
HypothesisFutureScoreOrderer orderer;
return !orderer(itemA->hypo, itemB->hypo);
}
@@ -68,11 +67,11 @@ class CubeEdge
{
public:
typedef std::priority_queue<QueueItem*,
- std::vector<QueueItem*, MemPoolAllocator<QueueItem*> >, QueueItemOrderer> Queue;
+ std::vector<QueueItem*, MemPoolAllocator<QueueItem*> >, QueueItemOrderer> Queue;
typedef std::pair<const CubeEdge*, int> SeenPositionItem;
typedef boost::unordered_set<SeenPositionItem, boost::hash<SeenPositionItem>,
- std::equal_to<SeenPositionItem>, MemPoolAllocator<SeenPositionItem> > SeenPositions;
+ std::equal_to<SeenPositionItem>, MemPoolAllocator<SeenPositionItem> > SeenPositions;
const Hypotheses &hypos;
const InputPath &path;
@@ -81,16 +80,16 @@ public:
SCORE estimatedScore;
CubeEdge(Manager &mgr, const Hypotheses &hypos, const InputPath &path,
- const TargetPhrases &tps, const Bitmap &newBitmap);
+ const TargetPhrases &tps, const Bitmap &newBitmap);
bool SetSeenPosition(const size_t x, const size_t y,
- SeenPositions &seenPositions) const;
+ SeenPositions &seenPositions) const;
void CreateFirst(Manager &mgr, Queue &queue, SeenPositions &seenPositions,
- QueueItemRecycler &queueItemRecycler);
+ QueueItemRecycler &queueItemRecycler);
void CreateNext(Manager &mgr, QueueItem *item, Queue &queue,
- SeenPositions &seenPositions,
- QueueItemRecycler &queueItemRecycler);
+ SeenPositions &seenPositions,
+ QueueItemRecycler &queueItemRecycler);
std::string Debug(const System &system) const;
diff --git a/moses2/PhraseBased/CubePruningMiniStack/Search.cpp b/moses2/PhraseBased/CubePruningMiniStack/Search.cpp
new file mode 100644
index 000000000..74103d211
--- /dev/null
+++ b/moses2/PhraseBased/CubePruningMiniStack/Search.cpp
@@ -0,0 +1,248 @@
+/*
+ * Search.cpp
+ *
+ * Created on: 16 Nov 2015
+ * Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include "Search.h"
+#include "Stack.h"
+#include "../Manager.h"
+#include "../Hypothesis.h"
+#include "../TrellisPath.h"
+#include "../Sentence.h"
+#include "../../TrellisPaths.h"
+#include "../../InputPathsBase.h"
+#include "../../InputPathBase.h"
+#include "../../System.h"
+#include "../../TranslationTask.h"
+#include "../../legacy/Util2.h"
+#include "../../PhraseBased/TargetPhrases.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningMiniStack
+{
+
+////////////////////////////////////////////////////////////////////////
+Search::Search(Manager &mgr) :
+ Moses2::Search(mgr), m_stack(mgr), m_cubeEdgeAlloc(mgr.GetPool())
+
+ , m_queue(QueueItemOrderer(),
+ std::vector<QueueItem*, MemPoolAllocator<QueueItem*> >(
+ MemPoolAllocator<QueueItem*>(mgr.GetPool())))
+
+ , m_seenPositions(
+ MemPoolAllocator<CubeEdge::SeenPositionItem>(mgr.GetPool()))
+
+ , m_queueItemRecycler(MemPoolAllocator<QueueItem*>(mgr.GetPool()))
+
+{
+}
+
+Search::~Search()
+{
+}
+
+void Search::Decode()
+{
+ const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
+
+ // init cue edges
+ m_cubeEdges.resize(sentence.GetSize() + 1);
+ for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
+ m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges(
+ m_cubeEdgeAlloc);
+ }
+
+ const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
+ Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
+ initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
+ initBitmap);
+ initHypo->EmptyHypothesisState(mgr.GetInput());
+ //cerr << "initHypo=" << *initHypo << endl;
+
+ m_stack.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
+ PostDecode(0);
+
+ for (size_t stackInd = 1; stackInd < sentence.GetSize() + 1;
+ ++stackInd) {
+ //cerr << "stackInd=" << stackInd << endl;
+ m_stack.Clear();
+ Decode(stackInd);
+ PostDecode(stackInd);
+
+ //m_stack.DebugCounts();
+ }
+
+}
+
+void Search::Decode(size_t stackInd)
+{
+ Recycler<HypothesisBase*> &hypoRecycler = mgr.GetHypoRecycle();
+
+ // reuse queue from previous stack. Clear it first
+ std::vector<QueueItem*, MemPoolAllocator<QueueItem*> > &container = Container(
+ m_queue);
+ //cerr << "container=" << container.size() << endl;
+ BOOST_FOREACH(QueueItem *item, container) {
+ // recycle unused hypos from queue
+ Hypothesis *hypo = item->hypo;
+ hypoRecycler.Recycle(hypo);
+
+ // recycle queue item
+ m_queueItemRecycler.push_back(item);
+ }
+ container.clear();
+
+ m_seenPositions.clear();
+
+ // add top hypo from every edge into queue
+ CubeEdges &edges = *m_cubeEdges[stackInd];
+
+ BOOST_FOREACH(CubeEdge *edge, edges) {
+ //cerr << *edge << " ";
+ edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
+ }
+
+ /*
+ cerr << "edges: ";
+ boost::unordered_set<const Bitmap*> uniqueBM;
+ BOOST_FOREACH(CubeEdge *edge, edges) {
+ uniqueBM.insert(&edge->newBitmap);
+ //cerr << *edge << " ";
+ }
+ cerr << edges.size() << " " << uniqueBM.size();
+ cerr << endl;
+ */
+
+ size_t pops = 0;
+ while (!m_queue.empty() && pops < mgr.system.options.cube.pop_limit) {
+ // get best hypo from queue, add to stack
+ //cerr << "queue=" << queue.size() << endl;
+ QueueItem *item = m_queue.top();
+ m_queue.pop();
+
+ CubeEdge *edge = item->edge;
+
+ // add hypo to stack
+ Hypothesis *hypo = item->hypo;
+
+ if (mgr.system.options.cube.lazy_scoring) {
+ hypo->EvaluateWhenApplied();
+ }
+
+ //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
+ m_stack.Add(hypo, hypoRecycler, mgr.arcLists);
+
+ edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
+
+ ++pops;
+ }
+
+ // create hypo from every edge. Increase diversity
+ if (mgr.system.options.cube.diversity) {
+ while (!m_queue.empty()) {
+ QueueItem *item = m_queue.top();
+ m_queue.pop();
+
+ if (item->hypoIndex == 0 && item->tpIndex == 0) {
+ // add hypo to stack
+ Hypothesis *hypo = item->hypo;
+ //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
+ m_stack.Add(hypo, hypoRecycler, mgr.arcLists);
+ }
+ }
+ }
+}
+
+void Search::PostDecode(size_t stackInd)
+{
+ MemPool &pool = mgr.GetPool();
+
+ const InputPaths &paths = mgr.GetInputPaths();
+ const Matrix<InputPath*> &pathMatrix = paths.GetMatrix();
+ size_t inputSize = pathMatrix.GetRows();
+ size_t numPaths = pathMatrix.GetCols();
+
+ BOOST_FOREACH(const Stack::Coll::value_type &val, m_stack.GetColl()) {
+ const Bitmap &hypoBitmap = *val.first.first;
+ size_t firstGap = hypoBitmap.GetFirstGapPos();
+ size_t hypoEndPos = val.first.second;
+
+ Moses2::HypothesisColl &hypos = *val.second;
+
+ //cerr << "key=" << hypoBitmap << " " << firstGap << " " << inputSize << endl;
+
+ // create edges to next hypos from existing hypos
+ for (size_t startPos = firstGap; startPos < inputSize; ++startPos) {
+ for (size_t pathInd = 0; pathInd < numPaths; ++pathInd) {
+ const InputPath *path = pathMatrix.GetValue(startPos, pathInd);
+
+ if (path == NULL) {
+ break;
+ }
+ if (path->GetNumRules() == 0) {
+ continue;
+ }
+
+ const Range &pathRange = path->range;
+ //cerr << "pathRange=" << pathRange << endl;
+ if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
+ continue;
+ }
+
+ const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
+ if (!reorderingConstraint.Check(hypoBitmap, startPos, pathRange.GetEndPos())) {
+ continue;
+ }
+
+ const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
+ size_t numWords = newBitmap.GetNumWordsCovered();
+
+ CubeEdges &edges = *m_cubeEdges[numWords];
+
+ // sort hypo for a particular bitmap and hypoEndPos
+ const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
+
+ size_t numPt = mgr.system.mappings.size();
+ for (size_t i = 0; i < numPt; ++i) {
+ const TargetPhrases *tps = path->targetPhrases[i];
+ if (tps && tps->GetSize()) {
+ CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
+ edges.push_back(edge);
+ }
+ }
+ }
+ }
+ }
+}
+
+const Hypothesis *Search::GetBestHypo() const
+{
+ const Hypothesis *bestHypo = m_stack.GetBestHypo();
+ return bestHypo;
+}
+
+void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
+{
+ const Stack::Coll &coll = m_stack.GetColl();
+ BOOST_FOREACH(const Stack::Coll::value_type &val, coll) {
+ Moses2::HypothesisColl &hypos = *val.second;
+ const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
+
+ BOOST_FOREACH(const HypothesisBase *hypoBase, sortedHypos) {
+ const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
+ TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
+ paths.Add(path);
+ }
+ }
+}
+
+}
+
+}
+
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.h b/moses2/PhraseBased/CubePruningMiniStack/Search.h
index 0dfe9dfb2..0dfe9dfb2 100644
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.h
+++ b/moses2/PhraseBased/CubePruningMiniStack/Search.h
diff --git a/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp b/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp
new file mode 100644
index 000000000..0565aa402
--- /dev/null
+++ b/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp
@@ -0,0 +1,123 @@
+/*
+ * Stack.cpp
+ *
+ * Created on: 24 Oct 2015
+ * Author: hieu
+ */
+#include <algorithm>
+#include <boost/foreach.hpp>
+#include "Stack.h"
+#include "../Hypothesis.h"
+#include "../Manager.h"
+#include "../../Scores.h"
+#include "../../System.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningMiniStack
+{
+Stack::Stack(const Manager &mgr) :
+ m_mgr(mgr), m_coll(
+ MemPoolAllocator<std::pair<HypoCoverage, Moses2::HypothesisColl*> >(
+ mgr.GetPool())), m_miniStackRecycler(
+ MemPoolAllocator<Moses2::HypothesisColl*>(mgr.GetPool()))
+{
+}
+
+Stack::~Stack()
+{
+ BOOST_FOREACH(const Coll::value_type &val, m_coll) {
+ const Moses2::HypothesisColl *miniStack = val.second;
+ delete miniStack;
+ }
+
+ while (!m_miniStackRecycler.empty()) {
+ Moses2::HypothesisColl *miniStack = m_miniStackRecycler.back();
+ m_miniStackRecycler.pop_back();
+ delete miniStack;
+
+ }
+}
+
+void Stack::Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
+ ArcLists &arcLists)
+{
+ HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
+ Moses2::HypothesisColl &coll = GetMiniStack(key);
+ coll.Add(m_mgr, hypo, hypoRecycle, arcLists);
+}
+
+const Hypothesis *Stack::GetBestHypo() const
+{
+ SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
+ const HypothesisBase *bestHypo = NULL;
+ BOOST_FOREACH(const Coll::value_type &val, m_coll) {
+ const Moses2::HypothesisColl &hypos = *val.second;
+ const Moses2::HypothesisBase *hypo = hypos.GetBestHypo();
+
+ if (hypo && hypo->GetFutureScore() > bestScore) {
+ bestScore = hypo->GetFutureScore();
+ bestHypo = hypo;
+ }
+ }
+ return &bestHypo->Cast<Hypothesis>();
+}
+
+size_t Stack::GetHypoSize() const
+{
+ size_t ret = 0;
+ BOOST_FOREACH(const Coll::value_type &val, m_coll) {
+ const Moses2::HypothesisColl &hypos = *val.second;
+ ret += hypos.GetSize();
+ }
+ return ret;
+}
+
+Moses2::HypothesisColl &Stack::GetMiniStack(const HypoCoverage &key)
+{
+ Moses2::HypothesisColl *ret;
+ Coll::iterator iter = m_coll.find(key);
+ if (iter == m_coll.end()) {
+ if (m_miniStackRecycler.empty()) {
+ ret = new Moses2::HypothesisColl(m_mgr);
+ } else {
+ ret = m_miniStackRecycler.back();
+ ret->Clear();
+ m_miniStackRecycler.pop_back();
+ }
+
+ m_coll[key] = ret;
+ } else {
+ ret = iter->second;
+ }
+ return *ret;
+}
+
+void Stack::Clear()
+{
+ BOOST_FOREACH(const Coll::value_type &val, m_coll) {
+ Moses2::HypothesisColl *miniStack = val.second;
+ m_miniStackRecycler.push_back(miniStack);
+ }
+
+ m_coll.clear();
+}
+
+void Stack::DebugCounts()
+{
+ cerr << "counts=";
+ BOOST_FOREACH(const Coll::value_type &val, GetColl()) {
+ const Moses2::HypothesisColl &miniStack = *val.second;
+ size_t count = miniStack.GetSize();
+ cerr << count << " ";
+ }
+ cerr << endl;
+}
+
+}
+
+}
+
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.h b/moses2/PhraseBased/CubePruningMiniStack/Stack.h
index 7601f90b2..abd564b3f 100644
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.h
+++ b/moses2/PhraseBased/CubePruningMiniStack/Stack.h
@@ -36,25 +36,23 @@ public:
// bitmap and current endPos of hypos
typedef boost::unordered_map<HypoCoverage, Moses2::HypothesisColl*,
- boost::hash<HypoCoverage>, std::equal_to<HypoCoverage>,
- MemPoolAllocator<std::pair<HypoCoverage, Moses2::HypothesisColl*> > > Coll;
+ boost::hash<HypoCoverage>, std::equal_to<HypoCoverage>,
+ MemPoolAllocator<std::pair<HypoCoverage, Moses2::HypothesisColl*> > > Coll;
Stack(const Manager &mgr);
virtual ~Stack();
size_t GetHypoSize() const;
- Coll &GetColl()
- {
+ Coll &GetColl() {
return m_coll;
}
- const Coll &GetColl() const
- {
+ const Coll &GetColl() const {
return m_coll;
}
void Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists);
+ ArcLists &arcLists);
Moses2::HypothesisColl &GetMiniStack(const HypoCoverage &key);
diff --git a/contrib/moses2/PhraseBased/Hypothesis.cpp b/moses2/PhraseBased/Hypothesis.cpp
index d59efb11a..e907c1a8b 100644
--- a/contrib/moses2/PhraseBased/Hypothesis.cpp
+++ b/moses2/PhraseBased/Hypothesis.cpp
@@ -30,8 +30,7 @@ Hypothesis *Hypothesis::Create(MemPool &pool, Manager &mgr)
ret = static_cast<Hypothesis*>(recycler.Get());
if (ret) {
// got new hypo from recycler. Do nothing
- }
- else {
+ } else {
ret = new (pool.Allocate<Hypothesis>()) Hypothesis(pool, mgr.system);
//cerr << "Hypothesis=" << sizeof(Hypothesis) << " " << ret << endl;
recycler.Keep(ret);
@@ -40,7 +39,7 @@ Hypothesis *Hypothesis::Create(MemPool &pool, Manager &mgr)
}
Hypothesis::Hypothesis(MemPool &pool, const System &system) :
- HypothesisBase(pool, system), m_currTargetWordsRange()
+ HypothesisBase(pool, system), m_currTargetWordsRange()
{
}
@@ -50,7 +49,7 @@ Hypothesis::~Hypothesis()
}
void Hypothesis::Init(Manager &mgr, const InputPathBase &path,
- const TargetPhraseImpl &tp, const Bitmap &bitmap)
+ const TargetPhraseImpl &tp, const Bitmap &bitmap)
{
m_mgr = &mgr;
m_targetPhrase = &tp;
@@ -66,8 +65,8 @@ void Hypothesis::Init(Manager &mgr, const InputPathBase &path,
}
void Hypothesis::Init(Manager &mgr, const Hypothesis &prevHypo,
- const InputPathBase &path, const TargetPhraseImpl &tp, const Bitmap &bitmap,
- SCORE estimatedScore)
+ const InputPathBase &path, const TargetPhraseImpl &tp, const Bitmap &bitmap,
+ SCORE estimatedScore)
{
m_mgr = &mgr;
m_targetPhrase = &tp;
@@ -76,9 +75,9 @@ void Hypothesis::Init(Manager &mgr, const Hypothesis &prevHypo,
m_prevHypo = &prevHypo;
m_currTargetWordsRange.SetStartPos(
- prevHypo.m_currTargetWordsRange.GetEndPos() + 1);
+ prevHypo.m_currTargetWordsRange.GetEndPos() + 1);
m_currTargetWordsRange.SetEndPos(
- prevHypo.m_currTargetWordsRange.GetEndPos() + tp.GetSize());
+ prevHypo.m_currTargetWordsRange.GetEndPos() + tp.GetSize());
m_estimatedScore = estimatedScore;
@@ -116,7 +115,7 @@ std::string Hypothesis::Debug(const System &system) const
// states
const std::vector<const StatefulFeatureFunction*> &sfffs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
+ GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
size_t numStatefulFFs = sfffs.size();
for (size_t i = 0; i < numStatefulFFs; ++i) {
const FFState &state = *GetState(i);
@@ -152,8 +151,7 @@ void Hypothesis::OutputToStream(std::ostream &out) const
if (m_mgr->system.options.output.ReportSegmentation == 1) {
// just report phrase segmentation
out << "|" << m_path->range.GetStartPos() << "-" << m_path->range.GetEndPos() << "| ";
- }
- else if (m_mgr->system.options.output.ReportSegmentation == 2) {
+ } else if (m_mgr->system.options.output.ReportSegmentation == 2) {
// more detailed info about every segment
out << "|";
@@ -171,19 +169,19 @@ void Hypothesis::OutputToStream(std::ostream &out) const
void Hypothesis::EmptyHypothesisState(const InputType &input)
{
const std::vector<const StatefulFeatureFunction*> &sfffs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
- BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){
- size_t statefulInd = sfff->GetStatefulInd();
- FFState *state = m_ffStates[statefulInd];
- sfff->EmptyHypothesisState(*state, GetManager(), input, *this);
-}
+ GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
+ BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) {
+ size_t statefulInd = sfff->GetStatefulInd();
+ FFState *state = m_ffStates[statefulInd];
+ sfff->EmptyHypothesisState(*state, GetManager(), input, *this);
+ }
}
void Hypothesis::EvaluateWhenApplied()
{
const std::vector<const StatefulFeatureFunction*> &sfffs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
- BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){
+ GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
+ BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) {
EvaluateWhenApplied(*sfff);
}
//cerr << *this << endl;
@@ -196,7 +194,7 @@ void Hypothesis::EvaluateWhenApplied(const StatefulFeatureFunction &sfff)
FFState *thisState = m_ffStates[statefulInd];
assert(prevState);
sfff.EvaluateWhenApplied(GetManager(), *this, *prevState, *m_scores,
- *thisState);
+ *thisState);
}
diff --git a/contrib/moses2/PhraseBased/Hypothesis.h b/moses2/PhraseBased/Hypothesis.h
index 7859c1d14..71b95a3e3 100644
--- a/contrib/moses2/PhraseBased/Hypothesis.h
+++ b/moses2/PhraseBased/Hypothesis.h
@@ -35,36 +35,31 @@ public:
// initial, empty hypo
void Init(Manager &mgr, const InputPathBase &path, const TargetPhraseImpl &tp,
- const Bitmap &bitmap);
+ const Bitmap &bitmap);
void Init(Manager &mgr, const Hypothesis &prevHypo, const InputPathBase &path,
- const TargetPhraseImpl &tp, const Bitmap &bitmap, SCORE estimatedScore);
+ const TargetPhraseImpl &tp, const Bitmap &bitmap, SCORE estimatedScore);
size_t hash() const;
bool operator==(const Hypothesis &other) const;
- inline const Bitmap &GetBitmap() const
- {
+ inline const Bitmap &GetBitmap() const {
return *m_sourceCompleted;
}
- inline const InputPathBase &GetInputPath() const
- {
+ inline const InputPathBase &GetInputPath() const {
return *m_path;
}
- inline const Range &GetCurrTargetWordsRange() const
- {
+ inline const Range &GetCurrTargetWordsRange() const {
return m_currTargetWordsRange;
}
- SCORE GetFutureScore() const
- {
+ SCORE GetFutureScore() const {
return GetScores().GetTotalScore() + m_estimatedScore;
}
- const TargetPhrase<Moses2::Word> &GetTargetPhrase() const
- {
+ const TargetPhrase<Moses2::Word> &GetTargetPhrase() const {
return *m_targetPhrase;
}
@@ -77,16 +72,14 @@ public:
void EvaluateWhenApplied();
void EvaluateWhenApplied(const StatefulFeatureFunction &sfff);
- const Hypothesis* GetPrevHypo() const
- {
+ const Hypothesis* GetPrevHypo() const {
return m_prevHypo;
}
/** curr - pos is relative from CURRENT hypothesis's starting index
* (ie, start of sentence would be some negative number, which is
* not allowed- USE WITH CAUTION) */
- inline const Word &GetCurrWord(size_t pos) const
- {
+ inline const Word &GetCurrWord(size_t pos) const {
return GetTargetPhrase()[pos];
}
@@ -108,8 +101,7 @@ protected:
class HypothesisTargetPhraseOrderer
{
public:
- bool operator()(const Hypothesis* a, const Hypothesis* b) const
- {
+ bool operator()(const Hypothesis* a, const Hypothesis* b) const {
PhraseOrdererLexical<Moses2::Word> phraseCmp;
bool ret = phraseCmp(a->GetTargetPhrase(), b->GetTargetPhrase());
/*
diff --git a/contrib/moses2/PhraseBased/InputPath.cpp b/moses2/PhraseBased/InputPath.cpp
index 1a9716380..3761080a4 100644
--- a/contrib/moses2/PhraseBased/InputPath.cpp
+++ b/moses2/PhraseBased/InputPath.cpp
@@ -15,10 +15,10 @@ using namespace std;
namespace Moses2
{
InputPath::InputPath(MemPool &pool, const SubPhrase<Moses2::Word> &subPhrase,
- const Range &range, size_t numPt, const InputPath *prefixPath)
-:InputPathBase(pool, range, numPt, prefixPath)
-,m_numRules(0)
-,subPhrase(subPhrase)
+ const Range &range, size_t numPt, const InputPath *prefixPath)
+ :InputPathBase(pool, range, numPt, prefixPath)
+ ,m_numRules(0)
+ ,subPhrase(subPhrase)
{
targetPhrases = pool.Allocate<const TargetPhrases*>(numPt);
Init<const TargetPhrases*>(targetPhrases, numPt, NULL);
@@ -30,7 +30,7 @@ InputPath::~InputPath()
}
void InputPath::AddTargetPhrases(const PhraseTable &pt,
- const TargetPhrases *tps)
+ const TargetPhrases *tps)
{
size_t ptInd = pt.GetPtInd();
targetPhrases[ptInd] = tps;
diff --git a/contrib/moses2/PhraseBased/InputPath.h b/moses2/PhraseBased/InputPath.h
index 100649155..b29c7f5ec 100644
--- a/contrib/moses2/PhraseBased/InputPath.h
+++ b/moses2/PhraseBased/InputPath.h
@@ -22,14 +22,15 @@ public:
SubPhrase<Moses2::Word> subPhrase;
InputPath(MemPool &pool, const SubPhrase<Moses2::Word> &subPhrase, const Range &range,
- size_t numPt, const InputPath *prefixPath);
+ size_t numPt, const InputPath *prefixPath);
virtual ~InputPath();
void AddTargetPhrases(const PhraseTable &pt, const TargetPhrases *tps);
const TargetPhrases *GetTargetPhrases(const PhraseTable &pt) const;
- size_t GetNumRules() const
- { return m_numRules; }
+ size_t GetNumRules() const {
+ return m_numRules;
+ }
std::string Debug(const System &system) const;
diff --git a/contrib/moses2/PhraseBased/InputPaths.cpp b/moses2/PhraseBased/InputPaths.cpp
index 50c00acbb..50c00acbb 100644
--- a/contrib/moses2/PhraseBased/InputPaths.cpp
+++ b/moses2/PhraseBased/InputPaths.cpp
diff --git a/contrib/moses2/PhraseBased/InputPaths.h b/moses2/PhraseBased/InputPaths.h
index dda374515..9089a7c16 100644
--- a/contrib/moses2/PhraseBased/InputPaths.h
+++ b/moses2/PhraseBased/InputPaths.h
@@ -23,16 +23,17 @@ class InputPaths: public InputPathsBase
public:
void Init(const InputType &input, const ManagerBase &mgr);
- const InputPath &GetBlank() const
- {
+ const InputPath &GetBlank() const {
return *m_blank;
}
- Matrix<InputPath*> &GetMatrix()
- { return *m_matrix; }
+ Matrix<InputPath*> &GetMatrix() {
+ return *m_matrix;
+ }
- const Matrix<InputPath*> &GetMatrix() const
- { return *m_matrix; }
+ const Matrix<InputPath*> &GetMatrix() const {
+ return *m_matrix;
+ }
protected:
InputPath *m_blank;
diff --git a/moses2/PhraseBased/Manager.cpp b/moses2/PhraseBased/Manager.cpp
new file mode 100644
index 000000000..28073d4f6
--- /dev/null
+++ b/moses2/PhraseBased/Manager.cpp
@@ -0,0 +1,278 @@
+/*
+ * Manager.cpp
+ *
+ * Created on: 23 Oct 2015
+ * Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include <boost/functional/hash.hpp>
+#include <boost/unordered_set.hpp>
+#include <vector>
+#include <sstream>
+#include "Manager.h"
+#include "TargetPhraseImpl.h"
+#include "InputPath.h"
+#include "Sentence.h"
+
+#include "Normal/Search.h"
+#include "CubePruningMiniStack/Search.h"
+
+/*
+ #include "CubePruningPerMiniStack/Search.h"
+ #include "CubePruningPerBitmap/Search.h"
+ #include "CubePruningCardinalStack/Search.h"
+ #include "CubePruningBitmapStack/Search.h"
+ */
+#include "../TrellisPaths.h"
+#include "../System.h"
+#include "../Phrase.h"
+#include "../InputPathsBase.h"
+#include "../TranslationModel/PhraseTable.h"
+#include "../TranslationModel/UnknownWordPenalty.h"
+#include "../legacy/Range.h"
+#include "../PhraseBased/TargetPhrases.h"
+
+using namespace std;
+
+namespace Moses2
+{
+Manager::Manager(System &sys, const TranslationTask &task,
+ const std::string &inputStr, long translationId) :
+ ManagerBase(sys, task, inputStr, translationId)
+ ,m_search(NULL)
+ ,m_bitmaps(NULL)
+{
+ //cerr << translationId << " inputStr=" << inputStr << endl;
+}
+
+Manager::~Manager()
+{
+ //cerr << "Start ~Manager " << this << endl;
+ delete m_search;
+ delete m_bitmaps;
+ //cerr << "Finish ~Manager " << this << endl;
+}
+
+void Manager::Init()
+{
+ // init pools etc
+ InitPools();
+
+ FactorCollection &vocab = system.GetVocab();
+ m_input = Moses2::Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr);
+
+ m_bitmaps = new Bitmaps(GetPool());
+
+ const PhraseTable &firstPt = *system.featureFunctions.phraseTables[0];
+ m_initPhrase = new (GetPool().Allocate<TargetPhraseImpl>()) TargetPhraseImpl(
+ GetPool(), firstPt, system, 0);
+
+ const Sentence &sentence = static_cast<const Sentence&>(GetInput());
+ //cerr << "sentence=" << sentence.GetSize() << " " << sentence.Debug(system) << endl;
+
+ m_inputPaths.Init(sentence, *this);
+
+ // xml
+ const UnknownWordPenalty *unkWP = system.featureFunctions.GetUnknownWordPenalty();
+ UTIL_THROW_IF2(unkWP == NULL, "There must be a UnknownWordPenalty FF");
+ unkWP->ProcessXML(*this, GetPool(), sentence, m_inputPaths);
+
+ // lookup with every pt
+ const std::vector<const PhraseTable*> &pts = system.mappings;
+ for (size_t i = 0; i < pts.size(); ++i) {
+ const PhraseTable &pt = *pts[i];
+ //cerr << "Looking up from " << pt.GetName() << endl;
+ pt.Lookup(*this, m_inputPaths);
+ }
+ //m_inputPaths.DeleteUnusedPaths();
+ CalcFutureScore();
+
+ m_bitmaps->Init(sentence.GetSize(), vector<bool>(0));
+
+ switch (system.options.search.algo) {
+ case Normal:
+ m_search = new NSNormal::Search(*this);
+ break;
+ case NormalBatch:
+ //m_search = new NSBatch::Search(*this);
+ UTIL_THROW2("Not implemented");
+ break;
+ case CubePruning:
+ case CubePruningMiniStack:
+ m_search = new NSCubePruningMiniStack::Search(*this);
+ break;
+ /*
+ case CubePruningPerMiniStack:
+ m_search = new NSCubePruningPerMiniStack::Search(*this);
+ break;
+ case CubePruningPerBitmap:
+ m_search = new NSCubePruningPerBitmap::Search(*this);
+ break;
+ case CubePruningCardinalStack:
+ m_search = new NSCubePruningCardinalStack::Search(*this);
+ break;
+ case CubePruningBitmapStack:
+ m_search = new NSCubePruningBitmapStack::Search(*this);
+ break;
+ */
+ default:
+ UTIL_THROW2("Unknown search algorithm");
+ }
+}
+
+void Manager::Decode()
+{
+ //cerr << "Start Decode " << this << endl;
+
+ Init();
+ m_search->Decode();
+
+ //cerr << "Finished Decode " << this << endl;
+}
+
+void Manager::CalcFutureScore()
+{
+ const Sentence &sentence = static_cast<const Sentence&>(GetInput());
+ size_t size = sentence.GetSize();
+ m_estimatedScores =
+ new (GetPool().Allocate<EstimatedScores>()) EstimatedScores(GetPool(),
+ size);
+ m_estimatedScores->InitTriangle(-numeric_limits<SCORE>::infinity());
+
+ // walk all the translation options and record the cheapest option for each span
+ BOOST_FOREACH(const InputPathBase *path, m_inputPaths) {
+ const Range &range = path->range;
+ SCORE bestScore = -numeric_limits<SCORE>::infinity();
+
+ size_t numPt = system.mappings.size();
+ for (size_t i = 0; i < numPt; ++i) {
+ const TargetPhrases *tps = static_cast<const InputPath*>(path)->targetPhrases[i];
+ if (tps) {
+ BOOST_FOREACH(const TargetPhraseImpl *tp, *tps) {
+ SCORE score = tp->GetFutureScore();
+ if (score > bestScore) {
+ bestScore = score;
+ }
+ }
+ }
+ }
+ m_estimatedScores->SetValue(range.GetStartPos(), range.GetEndPos(), bestScore);
+ }
+
+ // now fill all the cells in the strictly upper triangle
+ // there is no way to modify the diagonal now, in the case
+ // where no translation option covers a single-word span,
+ // we leave the +inf in the matrix
+ // like in chart parsing we want each cell to contain the highest score
+ // of the full-span trOpt or the sum of scores of joining two smaller spans
+
+ for (size_t colstart = 1; colstart < size; colstart++) {
+ for (size_t diagshift = 0; diagshift < size - colstart; diagshift++) {
+ size_t sPos = diagshift;
+ size_t ePos = colstart + diagshift;
+ for (size_t joinAt = sPos; joinAt < ePos; joinAt++) {
+ float joinedScore = m_estimatedScores->GetValue(sPos, joinAt)
+ + m_estimatedScores->GetValue(joinAt + 1, ePos);
+ // uncomment to see the cell filling scheme
+ // TRACE_ERR("[" << sPos << "," << ePos << "] <-? ["
+ // << sPos << "," << joinAt << "]+["
+ // << joinAt+1 << "," << ePos << "] (colstart: "
+ // << colstart << ", diagshift: " << diagshift << ")"
+ // << endl);
+
+ if (joinedScore > m_estimatedScores->GetValue(sPos, ePos)) m_estimatedScores->SetValue(
+ sPos, ePos, joinedScore);
+ }
+ }
+ }
+
+ //cerr << "Square matrix:" << endl;
+ //cerr << *m_estimatedScores << endl;
+}
+
+std::string Manager::OutputBest() const
+{
+ stringstream out;
+ Moses2::FixPrecision(out);
+
+ const Hypothesis *bestHypo = m_search->GetBestHypo();
+ if (bestHypo) {
+ if (system.options.output.ReportHypoScore) {
+ out << bestHypo->GetScores().GetTotalScore() << " ";
+ }
+
+ bestHypo->OutputToStream(out);
+ //cerr << "BEST TRANSLATION: " << *bestHypo;
+ } else {
+ if (system.options.output.ReportHypoScore) {
+ out << "0 ";
+ }
+ //cerr << "NO TRANSLATION " << m_input->GetTranslationId() << endl;
+ }
+
+ return out.str();
+ //cerr << endl;
+}
+
+std::string Manager::OutputNBest()
+{
+ arcLists.Sort();
+
+ boost::unordered_set<size_t> distinctHypos;
+
+ TrellisPaths<TrellisPath> contenders;
+ m_search->AddInitialTrellisPaths(contenders);
+
+ long transId = GetTranslationId();
+
+ // MAIN LOOP
+ stringstream out;
+ //Moses2::FixPrecision(out);
+
+ size_t maxIter = system.options.nbest.nbest_size * system.options.nbest.factor;
+ size_t bestInd = 0;
+ for (size_t i = 0; i < maxIter; ++i) {
+ if (bestInd > system.options.nbest.nbest_size || contenders.empty()) {
+ break;
+ }
+
+ //cerr << "bestInd=" << bestInd << endl;
+ TrellisPath *path = contenders.Get();
+
+ bool ok = false;
+ if (system.options.nbest.only_distinct) {
+ string tgtPhrase = path->OutputTargetPhrase(system);
+ //cerr << "tgtPhrase=" << tgtPhrase << endl;
+ boost::hash<std::string> string_hash;
+ size_t hash = string_hash(tgtPhrase);
+
+ if (distinctHypos.insert(hash).second) {
+ ok = true;
+ }
+ } else {
+ ok = true;
+ }
+
+ if (ok) {
+ ++bestInd;
+ out << transId << " ||| ";
+ path->OutputToStream(out, system);
+ out << "\n";
+ }
+
+ // create next paths
+ path->CreateDeviantPaths(contenders, arcLists, GetPool(), system);
+
+ delete path;
+ }
+
+ return out.str();
+}
+
+std::string Manager::OutputTransOpt()
+{
+ return "";
+}
+
+}
+
diff --git a/contrib/moses2/PhraseBased/Manager.h b/moses2/PhraseBased/Manager.h
index 3f42d6b27..1a348f75f 100644
--- a/contrib/moses2/PhraseBased/Manager.h
+++ b/moses2/PhraseBased/Manager.h
@@ -37,21 +37,25 @@ class Manager: public ManagerBase
{
public:
Manager(System &sys, const TranslationTask &task, const std::string &inputStr,
- long translationId);
+ long translationId);
virtual ~Manager();
- Bitmaps &GetBitmaps()
- { return *m_bitmaps; }
+ Bitmaps &GetBitmaps() {
+ return *m_bitmaps;
+ }
- const EstimatedScores &GetEstimatedScores() const
- { return *m_estimatedScores; }
+ const EstimatedScores &GetEstimatedScores() const {
+ return *m_estimatedScores;
+ }
- const InputPaths &GetInputPaths() const
- { return m_inputPaths; }
+ const InputPaths &GetInputPaths() const {
+ return m_inputPaths;
+ }
- const TargetPhraseImpl &GetInitPhrase() const
- { return *m_initPhrase; }
+ const TargetPhraseImpl &GetInitPhrase() const {
+ return *m_initPhrase;
+ }
void Decode();
std::string OutputBest() const;
diff --git a/moses2/PhraseBased/Normal/Search.cpp b/moses2/PhraseBased/Normal/Search.cpp
new file mode 100644
index 000000000..1c158543d
--- /dev/null
+++ b/moses2/PhraseBased/Normal/Search.cpp
@@ -0,0 +1,161 @@
+/*
+ * SearchNormal.cpp
+ *
+ * Created on: 25 Oct 2015
+ * Author: hieu
+ */
+
+#include "Search.h"
+#include <algorithm>
+#include <boost/foreach.hpp>
+#include "Stack.h"
+#include "../Manager.h"
+#include "../TrellisPath.h"
+#include "../Sentence.h"
+#include "../../TrellisPaths.h"
+#include "../../InputPathsBase.h"
+#include "../../Phrase.h"
+#include "../../System.h"
+#include "../../PhraseBased/TargetPhrases.h"
+
+using namespace std;
+
+namespace Moses2
+{
+namespace NSNormal
+{
+
+Search::Search(Manager &mgr)
+ :Moses2::Search(mgr)
+ , m_stacks(mgr)
+{
+ // TODO Auto-generated constructor stub
+
+}
+
+Search::~Search()
+{
+ // TODO Auto-generated destructor stub
+}
+
+void Search::Decode()
+{
+ // init stacks
+ const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
+ m_stacks.Init(mgr, sentence.GetSize() + 1);
+
+ const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
+ Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
+ initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
+ initBitmap);
+ initHypo->EmptyHypothesisState(mgr.GetInput());
+
+ m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
+
+ for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
+ Decode(stackInd);
+ //cerr << m_stacks << endl;
+
+ // delete stack to save mem
+ if (stackInd < m_stacks.GetSize() - 1) {
+ m_stacks.Delete(stackInd);
+ }
+ //cerr << m_stacks.Debug(mgr.system) << endl;
+ }
+}
+
+void Search::Decode(size_t stackInd)
+{
+ //cerr << "stackInd=" << stackInd << endl;
+ Stack &stack = m_stacks[stackInd];
+ if (&stack == &m_stacks.Back()) {
+ // last stack. don't do anythin
+ return;
+ }
+
+ const Hypotheses &hypos = stack.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
+ //cerr << "hypos=" << hypos.size() << endl;
+
+ const InputPaths &paths = mgr.GetInputPaths();
+
+ BOOST_FOREACH(const InputPathBase *path, paths) {
+ BOOST_FOREACH(const HypothesisBase *hypo, hypos) {
+ Extend(*static_cast<const Hypothesis*>(hypo), *static_cast<const InputPath*>(path));
+ }
+ }
+}
+
+void Search::Extend(const Hypothesis &hypo, const InputPath &path)
+{
+ const Bitmap &hypoBitmap = hypo.GetBitmap();
+ const Range &hypoRange = hypo.GetInputPath().range;
+ const Range &pathRange = path.range;
+
+ if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) {
+ return;
+ }
+
+ const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
+ if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) {
+ return;
+ }
+
+ // extend this hypo
+ const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
+ //SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos());
+ SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
+
+ size_t numPt = mgr.system.mappings.size();
+ const TargetPhrases **tpsAllPt = path.targetPhrases;
+ for (size_t i = 0; i < numPt; ++i) {
+ const TargetPhrases *tps = tpsAllPt[i];
+ if (tps) {
+ Extend(hypo, *tps, path, newBitmap, estimatedScore);
+ }
+ }
+}
+
+void Search::Extend(const Hypothesis &hypo, const TargetPhrases &tps,
+ const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
+{
+ BOOST_FOREACH(const TargetPhraseImpl *tp, tps) {
+ Extend(hypo, *tp, path, newBitmap, estimatedScore);
+ }
+}
+
+void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
+ const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
+{
+ Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
+ newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore);
+ newHypo->EvaluateWhenApplied();
+
+ m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists);
+
+ //m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other);
+ //stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2);
+
+}
+
+const Hypothesis *Search::GetBestHypo() const
+{
+ const Stack &lastStack = m_stacks.Back();
+ const Hypothesis *best = lastStack.GetBestHypo<Hypothesis>();
+ return best;
+}
+
+void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
+{
+ const Stack &lastStack = m_stacks.Back();
+ const Hypotheses &hypos = lastStack.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
+
+ BOOST_FOREACH(const HypothesisBase *hypoBase, hypos) {
+ const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
+ TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
+ paths.Add(path);
+ }
+}
+
+} // namespace
+}
+
diff --git a/contrib/moses2/PhraseBased/Normal/Search.h b/moses2/PhraseBased/Normal/Search.h
index cefefa924..0d487e32b 100644
--- a/contrib/moses2/PhraseBased/Normal/Search.h
+++ b/moses2/PhraseBased/Normal/Search.h
@@ -41,9 +41,9 @@ protected:
void Decode(size_t stackInd);
void Extend(const Hypothesis &hypo, const InputPath &path);
void Extend(const Hypothesis &hypo, const TargetPhrases &tps,
- const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore);
+ const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore);
void Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
- const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore);
+ const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore);
};
diff --git a/contrib/moses2/PhraseBased/Normal/Stack.cpp b/moses2/PhraseBased/Normal/Stack.cpp
index 782ce5b84..efaa86f2d 100644
--- a/contrib/moses2/PhraseBased/Normal/Stack.cpp
+++ b/moses2/PhraseBased/Normal/Stack.cpp
@@ -20,7 +20,7 @@ namespace NSNormal
{
Stack::Stack(const Manager &mgr) :
- HypothesisColl(mgr)
+ HypothesisColl(mgr)
{
// TODO Auto-generated constructor stub
diff --git a/contrib/moses2/PhraseBased/Normal/Stack.h b/moses2/PhraseBased/Normal/Stack.h
index 4ad707ce4..4ad707ce4 100644
--- a/contrib/moses2/PhraseBased/Normal/Stack.h
+++ b/moses2/PhraseBased/Normal/Stack.h
diff --git a/contrib/moses2/PhraseBased/Normal/Stacks.cpp b/moses2/PhraseBased/Normal/Stacks.cpp
index bb7239cf8..a47709676 100644
--- a/contrib/moses2/PhraseBased/Normal/Stacks.cpp
+++ b/moses2/PhraseBased/Normal/Stacks.cpp
@@ -18,7 +18,7 @@ namespace NSNormal
{
Stacks::Stacks(const Manager &mgr) :
- m_mgr(mgr)
+ m_mgr(mgr)
{
// TODO Auto-generated constructor stub
@@ -46,8 +46,7 @@ std::string Stacks::Debug(const System &system) const
const Stack *stack = m_stacks[i];
if (stack) {
out << stack->GetSize() << " ";
- }
- else {
+ } else {
out << "N ";
}
}
@@ -55,7 +54,7 @@ std::string Stacks::Debug(const System &system) const
}
void Stacks::Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists)
+ ArcLists &arcLists)
{
size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
//cerr << "numWordsCovered=" << numWordsCovered << endl;
diff --git a/contrib/moses2/PhraseBased/Normal/Stacks.h b/moses2/PhraseBased/Normal/Stacks.h
index 58626f234..b6da78a4e 100644
--- a/contrib/moses2/PhraseBased/Normal/Stacks.h
+++ b/moses2/PhraseBased/Normal/Stacks.h
@@ -27,29 +27,25 @@ public:
void Init(const Manager &mgr, size_t numStacks);
- size_t GetSize() const
- {
+ size_t GetSize() const {
return m_stacks.size();
}
- const Stack &Back() const
- {
+ const Stack &Back() const {
return *m_stacks.back();
}
- Stack &operator[](size_t ind)
- {
+ Stack &operator[](size_t ind) {
return *m_stacks[ind];
}
- void Delete(size_t ind)
- {
+ void Delete(size_t ind) {
delete m_stacks[ind];
m_stacks[ind] = NULL;
}
void Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists);
+ ArcLists &arcLists);
std::string Debug(const System &system) const;
diff --git a/contrib/moses2/PhraseBased/PhraseImpl.cpp b/moses2/PhraseBased/PhraseImpl.cpp
index 00f55a35b..d72e36083 100644
--- a/contrib/moses2/PhraseBased/PhraseImpl.cpp
+++ b/moses2/PhraseBased/PhraseImpl.cpp
@@ -11,7 +11,7 @@ using namespace std;
namespace Moses2
{
PhraseImpl *PhraseImpl::CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str)
+ const System &system, const std::string &str)
{
std::vector<std::string> toks = Moses2::Tokenize(str);
size_t size = toks.size();
diff --git a/contrib/moses2/PhraseBased/PhraseImpl.h b/moses2/PhraseBased/PhraseImpl.h
index 787cdf58d..f199e62d4 100644
--- a/contrib/moses2/PhraseBased/PhraseImpl.h
+++ b/moses2/PhraseBased/PhraseImpl.h
@@ -9,11 +9,10 @@ class PhraseImpl: public PhraseImplTemplate<Word>
{
public:
static PhraseImpl *CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str);
+ const System &system, const std::string &str);
PhraseImpl(MemPool &pool, size_t size) :
- PhraseImplTemplate<Word>(pool, size)
- {
+ PhraseImplTemplate<Word>(pool, size) {
}
};
diff --git a/contrib/moses2/PhraseBased/ReorderingConstraint.cpp b/moses2/PhraseBased/ReorderingConstraint.cpp
index cff09cc24..0e84b1f3f 100644
--- a/contrib/moses2/PhraseBased/ReorderingConstraint.cpp
+++ b/moses2/PhraseBased/ReorderingConstraint.cpp
@@ -237,12 +237,12 @@ std::ostream &ReorderingConstraint::Debug(std::ostream &out, const System &syste
out << "Walls:";
for (size_t i = 0; i < m_size; ++i) {
- out << m_wall[i];
+ out << m_wall[i];
}
out << " Local walls:";
for (size_t i = 0; i < m_size; ++i) {
- out << m_localWall[i] << " ";
+ out << m_localWall[i] << " ";
}
return out;
diff --git a/contrib/moses2/PhraseBased/ReorderingConstraint.h b/moses2/PhraseBased/ReorderingConstraint.h
index b8d2461e5..b8d2461e5 100644
--- a/contrib/moses2/PhraseBased/ReorderingConstraint.h
+++ b/moses2/PhraseBased/ReorderingConstraint.h
diff --git a/contrib/moses2/PhraseBased/Search.cpp b/moses2/PhraseBased/Search.cpp
index 1a85e15f5..48f9995ff 100644
--- a/contrib/moses2/PhraseBased/Search.cpp
+++ b/moses2/PhraseBased/Search.cpp
@@ -15,7 +15,7 @@ namespace Moses2
{
Search::Search(Manager &mgr) :
- mgr(mgr)
+ mgr(mgr)
{
// TODO Auto-generated constructor stub
@@ -27,7 +27,7 @@ Search::~Search()
}
bool Search::CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos,
- const Range &pathRange)
+ const Range &pathRange)
{
const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos();
@@ -46,7 +46,7 @@ bool Search::CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos,
if (mgr.system.options.reordering.max_distortion >= 0) {
// distortion limit
int distortion = ComputeDistortionDistance(hypoRangeEndPos,
- pathRange.GetStartPos());
+ pathRange.GetStartPos());
if (distortion > mgr.system.options.reordering.max_distortion) {
//cerr << " NO" << endl;
return false;
@@ -88,8 +88,7 @@ bool Search::CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos,
if (isLeftMostEdge) {
// any length extension is okay if starting at left-most edge
- }
- else { // starting somewhere other than left-most edge, use caution
+ } else { // starting somewhere other than left-most edge, use caution
// the basic idea is this: we would like to translate a phrase
// starting from a position further right than the left-most
// open gap. The distortion penalty for the following phrase
@@ -101,7 +100,7 @@ bool Search::CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos,
Range bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
if (ComputeDistortionDistance(pathRange.GetEndPos(),
- bestNextExtension.GetStartPos()) > mgr.system.options.reordering.max_distortion) {
+ bestNextExtension.GetStartPos()) > mgr.system.options.reordering.max_distortion) {
//cerr << " NO" << endl;
return false;
}
diff --git a/contrib/moses2/PhraseBased/Search.h b/moses2/PhraseBased/Search.h
index 8e9e9f787..c90856676 100644
--- a/contrib/moses2/PhraseBased/Search.h
+++ b/moses2/PhraseBased/Search.h
@@ -39,16 +39,14 @@ protected:
//ArcLists m_arcLists;
bool CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos,
- const Range &pathRange);
+ const Range &pathRange);
inline int ComputeDistortionDistance(size_t prevEndPos,
- size_t currStartPos) const
- {
+ size_t currStartPos) const {
int dist = 0;
if (prevEndPos == NOT_FOUND) {
dist = currStartPos;
- }
- else {
+ } else {
dist = (int)prevEndPos - (int)currStartPos + 1;
}
return abs(dist);
diff --git a/moses2/PhraseBased/Sentence.cpp b/moses2/PhraseBased/Sentence.cpp
new file mode 100644
index 000000000..173f00419
--- /dev/null
+++ b/moses2/PhraseBased/Sentence.cpp
@@ -0,0 +1,173 @@
+/*
+ * Sentence.cpp
+ *
+ * Created on: 14 Dec 2015
+ * Author: hieu
+ */
+#include <boost/property_tree/ptree.hpp>
+#include <boost/property_tree/xml_parser.hpp>
+#include "Sentence.h"
+#include "../System.h"
+#include "../parameters/AllOptions.h"
+#include "../legacy/Util2.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
+ const System &system, const std::string &str)
+{
+ Sentence *ret;
+
+ if (system.options.input.xml_policy) {
+ // xml
+ ret = CreateFromStringXML(pool, vocab, system, str);
+ } else {
+ // no xml
+ //cerr << "PB Sentence" << endl;
+ std::vector<std::string> toks = Tokenize(str);
+
+ size_t size = toks.size();
+ ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
+ ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
+ }
+
+ //cerr << "REORDERING CONSTRAINTS:" << ret->GetReorderingConstraint() << endl;
+ //cerr << "ret=" << ret->Debug(system) << endl;
+
+ return ret;
+}
+
+Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
+ const System &system, const std::string &str)
+{
+ Sentence *ret;
+
+ vector<XMLOption*> xmlOptions;
+ pugi::xml_document doc;
+
+ string str2 = "<xml>" + str + "</xml>";
+ pugi::xml_parse_result result = doc.load(str2.c_str(),
+ pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
+ pugi::xml_node topNode = doc.child("xml");
+
+ std::vector<std::string> toks;
+ XMLParse(pool, system, 0, topNode, toks, xmlOptions);
+
+ // debug
+ /*
+ cerr << "xmloptions:" << endl;
+ for (size_t i = 0; i < xmlOptions.size(); ++i) {
+ cerr << xmlOptions[i]->Debug(system) << endl;
+ }
+ */
+
+ // create words
+ size_t size = toks.size();
+ ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
+ ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
+
+ // xml
+ ret->Init(system, size, system.options.reordering.max_distortion);
+
+ ReorderingConstraint &reorderingConstraint = ret->GetReorderingConstraint();
+
+ // set reordering walls, if "-monotone-at-punction" is set
+ if (system.options.reordering.monotone_at_punct && ret->GetSize()) {
+ reorderingConstraint.SetMonotoneAtPunctuation(*ret);
+ }
+
+ // set walls obtained from xml
+ for(size_t i=0; i<xmlOptions.size(); i++) {
+ const XMLOption *xmlOption = xmlOptions[i];
+ if(strcmp(xmlOption->GetNodeName(), "wall") == 0) {
+ if (xmlOption->startPos) {
+ UTIL_THROW_IF2(xmlOption->startPos > ret->GetSize(), "wall is beyond the sentence"); // no buggy walls, please
+ reorderingConstraint.SetWall(xmlOption->startPos - 1, true);
+ }
+ } else if (strcmp(xmlOption->GetNodeName(), "zone") == 0) {
+ reorderingConstraint.SetZone( xmlOption->startPos, xmlOption->startPos + xmlOption->phraseSize -1 );
+ } else if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
+ FactorType placeholderFactor = system.options.input.placeholder_factor;
+ UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
+ "Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
+ UTIL_THROW_IF2(xmlOption->phraseSize != 1,
+ "Placeholder must only cover 1 word");
+
+ const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
+ (*ret)[xmlOption->startPos][placeholderFactor] = factor;
+ } else {
+ // default - forced translation. Add to class variable
+ ret->AddXMLOption(system, xmlOption);
+ }
+ }
+ reorderingConstraint.FinalizeWalls();
+
+ return ret;
+}
+
+void Sentence::XMLParse(
+ MemPool &pool,
+ const System &system,
+ size_t depth,
+ const pugi::xml_node &parentNode,
+ std::vector<std::string> &toks,
+ vector<XMLOption*> &xmlOptions)
+{
+ // pugixml
+ for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
+ string nodeName = childNode.name();
+ //cerr << depth << " nodeName=" << nodeName << endl;
+
+ int startPos = toks.size();
+
+ string value = childNode.value();
+ if (!value.empty()) {
+ //cerr << depth << "childNode text=" << value << endl;
+ std::vector<std::string> subPhraseToks = Tokenize(value);
+ for (size_t i = 0; i < subPhraseToks.size(); ++i) {
+ toks.push_back(subPhraseToks[i]);
+ }
+ }
+
+ if (!nodeName.empty()) {
+ XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
+
+ pugi::xml_attribute attr;
+ attr = childNode.attribute("translation");
+ if (!attr.empty()) {
+ xmlOption->SetTranslation(pool, attr.as_string());
+ }
+
+ attr = childNode.attribute("entity");
+ if (!attr.empty()) {
+ xmlOption->SetEntity(pool, attr.as_string());
+ }
+
+ attr = childNode.attribute("prob");
+ if (!attr.empty()) {
+ xmlOption->prob = attr.as_float();
+ }
+
+ xmlOptions.push_back(xmlOption);
+
+ // recursively call this function. For proper recursive trees
+ XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
+
+ size_t endPos = toks.size();
+ xmlOption->phraseSize = endPos - startPos;
+
+ /*
+ cerr << "xmlOptions=";
+ xmlOption->Debug(cerr, system);
+ cerr << endl;
+ */
+ }
+
+ }
+}
+
+} /* namespace Moses2 */
+
diff --git a/contrib/moses2/PhraseBased/Sentence.h b/moses2/PhraseBased/Sentence.h
index 2e9e834a7..ff7c52138 100644
--- a/contrib/moses2/PhraseBased/Sentence.h
+++ b/moses2/PhraseBased/Sentence.h
@@ -24,11 +24,11 @@ class Sentence: public InputType, public PhraseImpl
public:
static Sentence *CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str);
+ const System &system, const std::string &str);
Sentence(MemPool &pool, size_t size)
- :InputType(pool)
- ,PhraseImpl(pool, size)
+ :InputType(pool)
+ ,PhraseImpl(pool, size)
{}
virtual ~Sentence()
@@ -36,15 +36,15 @@ public:
protected:
static Sentence *CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str);
+ const System &system, const std::string &str);
static void XMLParse(
- MemPool &pool,
- const System &system,
- size_t depth,
- const pugi::xml_node &parentNode,
- std::vector<std::string> &toks,
- std::vector<XMLOption*> &xmlOptions);
+ MemPool &pool,
+ const System &system,
+ size_t depth,
+ const pugi::xml_node &parentNode,
+ std::vector<std::string> &toks,
+ std::vector<XMLOption*> &xmlOptions);
};
diff --git a/contrib/moses2/PhraseBased/TargetPhraseImpl.cpp b/moses2/PhraseBased/TargetPhraseImpl.cpp
index 3768ca278..d9bc766d9 100644
--- a/contrib/moses2/PhraseBased/TargetPhraseImpl.cpp
+++ b/moses2/PhraseBased/TargetPhraseImpl.cpp
@@ -26,16 +26,16 @@ TargetPhraseImpl *TargetPhraseImpl::CreateFromString(MemPool &pool,
vector<string> toks = Tokenize(str);
size_t size = toks.size();
TargetPhraseImpl *ret =
- new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, pt, system,
- size);
+ new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, pt, system,
+ size);
ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks);
return ret;
}
TargetPhraseImpl::TargetPhraseImpl(MemPool &pool, const PhraseTable &pt,
- const System &system, size_t size)
-:Moses2::TargetPhrase<Moses2::Word>(pool, pt, system, size)
+ const System &system, size_t size)
+ :Moses2::TargetPhrase<Moses2::Word>(pool, pt, system, size)
{
m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
system.featureFunctions.GetNumScores());
diff --git a/contrib/moses2/PhraseBased/TargetPhraseImpl.h b/moses2/PhraseBased/TargetPhraseImpl.h
index a3355ffe6..026414b5d 100644
--- a/contrib/moses2/PhraseBased/TargetPhraseImpl.h
+++ b/moses2/PhraseBased/TargetPhraseImpl.h
@@ -31,19 +31,22 @@ public:
static TargetPhraseImpl *CreateFromString(MemPool &pool,
const PhraseTable &pt, const System &system, const std::string &str);
TargetPhraseImpl(MemPool &pool, const PhraseTable &pt, const System &system,
- size_t size);
+ size_t size);
//TargetPhraseImpl(MemPool &pool, const System &system, const TargetPhraseImpl &copy);
virtual ~TargetPhraseImpl();
- SCORE GetFutureScore() const
- { return m_scores->GetTotalScore() + m_estimatedScore; }
+ SCORE GetFutureScore() const {
+ return m_scores->GetTotalScore() + m_estimatedScore;
+ }
- void SetEstimatedScore(const SCORE &value)
- { m_estimatedScore = value; }
+ void SetEstimatedScore(const SCORE &value) {
+ m_estimatedScore = value;
+ }
- virtual SCORE GetScoreForPruning() const
- { return GetFutureScore(); }
+ virtual SCORE GetScoreForPruning() const {
+ return GetFutureScore();
+ }
protected:
SCORE m_estimatedScore;
diff --git a/contrib/moses2/PhraseBased/TargetPhrases.cpp b/moses2/PhraseBased/TargetPhrases.cpp
index a48afefa9..553312753 100644
--- a/contrib/moses2/PhraseBased/TargetPhrases.cpp
+++ b/moses2/PhraseBased/TargetPhrases.cpp
@@ -17,7 +17,7 @@ namespace Moses2
{
TargetPhrases::TargetPhrases(MemPool &pool, size_t size) :
- m_coll(pool, size), m_currInd(0)
+ m_coll(pool, size), m_currInd(0)
{
}
@@ -42,7 +42,7 @@ TargetPhrases::~TargetPhrases()
std::string TargetPhrases::Debug(const System &system) const
{
stringstream out;
- BOOST_FOREACH(const TargetPhraseImpl *tp, *this){
+ BOOST_FOREACH(const TargetPhraseImpl *tp, *this) {
out << tp->Debug(system);
out << endl;
}
@@ -53,11 +53,11 @@ void TargetPhrases::SortAndPrune(size_t tableLimit)
{
iterator iterMiddle;
iterMiddle =
- (tableLimit == 0 || m_coll.size() < tableLimit) ?
- m_coll.end() : m_coll.begin() + tableLimit;
+ (tableLimit == 0 || m_coll.size() < tableLimit) ?
+ m_coll.end() : m_coll.begin() + tableLimit;
std::partial_sort(m_coll.begin(), iterMiddle, m_coll.end(),
- CompareScoreForPruning<TP>());
+ CompareScoreForPruning<TP>());
if (tableLimit && m_coll.size() > tableLimit) {
m_coll.resize(tableLimit);
diff --git a/contrib/moses2/PhraseBased/TargetPhrases.h b/moses2/PhraseBased/TargetPhrases.h
index 2582a7386..79595ab40 100644
--- a/contrib/moses2/PhraseBased/TargetPhrases.h
+++ b/moses2/PhraseBased/TargetPhrases.h
@@ -25,12 +25,10 @@ public:
typedef Coll::iterator iterator;
typedef Coll::const_iterator const_iterator;
//! iterators
- const_iterator begin() const
- {
+ const_iterator begin() const {
return m_coll.begin();
}
- const_iterator end() const
- {
+ const_iterator end() const {
return m_coll.end();
}
@@ -38,18 +36,15 @@ public:
//TargetPhrases(MemPool &pool, const System &system, const TargetPhrases &copy);
virtual ~TargetPhrases();
- void AddTargetPhrase(const TP &targetPhrase)
- {
+ void AddTargetPhrase(const TP &targetPhrase) {
m_coll[m_currInd++] = &targetPhrase;
}
- size_t GetSize() const
- {
+ size_t GetSize() const {
return m_coll.size();
}
- const TP& operator[](size_t ind) const
- {
+ const TP& operator[](size_t ind) const {
return *m_coll[ind];
}
diff --git a/contrib/moses2/PhraseBased/TrellisPath.cpp b/moses2/PhraseBased/TrellisPath.cpp
index a7213fe18..5a1132c60 100644
--- a/contrib/moses2/PhraseBased/TrellisPath.cpp
+++ b/moses2/PhraseBased/TrellisPath.cpp
@@ -27,16 +27,16 @@ std::string TrellisNode::Debug(const System &system) const
/////////////////////////////////////////////////////////////////////////////////
TrellisPath::TrellisPath(const Hypothesis *hypo, const ArcLists &arcLists) :
- prevEdgeChanged(-1)
+ prevEdgeChanged(-1)
{
AddNodes(hypo, arcLists);
m_scores = &hypo->GetScores();
}
TrellisPath::TrellisPath(const TrellisPath &origPath, size_t edgeIndex,
- const TrellisNode &newNode, const ArcLists &arcLists, MemPool &pool,
- const System &system) :
- prevEdgeChanged(edgeIndex)
+ const TrellisNode &newNode, const ArcLists &arcLists, MemPool &pool,
+ const System &system) :
+ prevEdgeChanged(edgeIndex)
{
nodes.reserve(origPath.nodes.size());
for (size_t currEdge = 0; currEdge < edgeIndex; currEdge++) {
@@ -64,7 +64,7 @@ TrellisPath::TrellisPath(const TrellisPath &origPath, size_t edgeIndex,
const HypothesisBase *newHypo = newNode.GetHypo();
CalcScores(origPath.GetScores(), origHypo->GetScores(), newHypo->GetScores(),
- pool, system);
+ pool, system);
}
TrellisPath::~TrellisPath()
@@ -107,7 +107,7 @@ std::string TrellisPath::OutputTargetPhrase(const System &system) const
{
std::stringstream out;
for (int i = nodes.size() - 2; i >= 0; --i) {
- const TrellisNode &node = nodes[i];
+ const TrellisNode &node = nodes[i];
const Hypothesis *hypo = static_cast<const Hypothesis*>(node.GetHypo());
const TargetPhrase<Moses2::Word> &tp = hypo->GetTargetPhrase();
@@ -121,7 +121,7 @@ std::string TrellisPath::OutputTargetPhrase(const System &system) const
}
void TrellisPath::CreateDeviantPaths(TrellisPaths<TrellisPath> &paths,
- const ArcLists &arcLists, MemPool &pool, const System &system) const
+ const ArcLists &arcLists, MemPool &pool, const System &system) const
{
const size_t sizePath = nodes.size();
@@ -145,8 +145,8 @@ void TrellisPath::CreateDeviantPaths(TrellisPaths<TrellisPath> &paths,
}
void TrellisPath::CalcScores(const Scores &origScores,
- const Scores &origHypoScores, const Scores &newHypoScores, MemPool &pool,
- const System &system)
+ const Scores &origHypoScores, const Scores &newHypoScores, MemPool &pool,
+ const System &system)
{
Scores *scores = new (pool.Allocate<Scores>()) Scores(system, pool,
system.featureFunctions.GetNumScores(), origScores);
diff --git a/contrib/moses2/PhraseBased/TrellisPath.h b/moses2/PhraseBased/TrellisPath.h
index c0b989ad9..6852b43ba 100644
--- a/contrib/moses2/PhraseBased/TrellisPath.h
+++ b/moses2/PhraseBased/TrellisPath.h
@@ -27,12 +27,10 @@ public:
size_t ind;
TrellisNode(const ArcList &varcList, size_t vind) :
- arcList(&varcList), ind(vind)
- {
+ arcList(&varcList), ind(vind) {
}
- const HypothesisBase *GetHypo() const
- {
+ const HypothesisBase *GetHypo() const {
return (*arcList)[ind];
}
@@ -55,13 +53,12 @@ public:
* which may change other hypo back from there
*/
TrellisPath(const TrellisPath &origPath, size_t edgeIndex,
- const TrellisNode &newNode, const ArcLists &arcLists, MemPool &pool,
- const System &system);
+ const TrellisNode &newNode, const ArcLists &arcLists, MemPool &pool,
+ const System &system);
virtual ~TrellisPath();
- const Scores &GetScores() const
- {
+ const Scores &GetScores() const {
return *m_scores;
}
SCORE GetFutureScore() const;
@@ -73,14 +70,14 @@ public:
//! create a set of next best paths by wiggling 1 of the node at a time.
void CreateDeviantPaths(TrellisPaths<TrellisPath> &paths, const ArcLists &arcLists,
- MemPool &pool, const System &system) const;
+ MemPool &pool, const System &system) const;
protected:
const Scores *m_scores;
void AddNodes(const Hypothesis *hypo, const ArcLists &arcLists);
void CalcScores(const Scores &origScores, const Scores &origHypoScores,
- const Scores &newHypoScores, MemPool &pool, const System &system);
+ const Scores &newHypoScores, MemPool &pool, const System &system);
};
} /* namespace Moses2 */
diff --git a/contrib/moses2/PhraseImplTemplate.h b/moses2/PhraseImplTemplate.h
index a9d377bb0..a3ef32a7f 100644
--- a/contrib/moses2/PhraseImplTemplate.h
+++ b/moses2/PhraseImplTemplate.h
@@ -21,15 +21,13 @@ class PhraseImplTemplate : public Phrase<WORD>
{
public:
PhraseImplTemplate(MemPool &pool, size_t size) :
- m_size(size)
- {
+ m_size(size) {
m_words = new (pool.Allocate<WORD>(size)) WORD[size];
}
PhraseImplTemplate(MemPool &pool, const PhraseImplTemplate &copy) :
- m_size(copy.GetSize())
- {
+ m_size(copy.GetSize()) {
m_words = new (pool.Allocate<WORD>(m_size)) WORD[m_size];
for (size_t i = 0; i < m_size; ++i) {
const WORD &word = copy[i];
@@ -37,21 +35,22 @@ public:
}
}
- virtual ~PhraseImplTemplate()
- {
+ virtual ~PhraseImplTemplate() {
}
- size_t GetSize() const
- { return m_size; }
+ size_t GetSize() const {
+ return m_size;
+ }
- WORD& operator[](size_t pos)
- { return m_words[pos]; }
+ WORD& operator[](size_t pos) {
+ return m_words[pos];
+ }
- const WORD& operator[](size_t pos) const
- { return m_words[pos]; }
+ const WORD& operator[](size_t pos) const {
+ return m_words[pos];
+ }
- SubPhrase<WORD> GetSubPhrase(size_t start, size_t size) const
- {
+ SubPhrase<WORD> GetSubPhrase(size_t start, size_t size) const {
SubPhrase<WORD> ret(*this, start, size);
return ret;
}
@@ -61,8 +60,7 @@ protected:
WORD *m_words;
void CreateFromString(FactorCollection &vocab, const System &system,
- const std::vector<std::string> &toks, bool addBOSEOS = false)
- {
+ const std::vector<std::string> &toks, bool addBOSEOS = false) {
size_t startPos = 0;
if (addBOSEOS) {
startPos = 1;
diff --git a/contrib/moses2/Recycler.cpp b/moses2/Recycler.cpp
index b7a8fb77d..b7a8fb77d 100644
--- a/contrib/moses2/Recycler.cpp
+++ b/moses2/Recycler.cpp
diff --git a/contrib/moses2/Recycler.h b/moses2/Recycler.h
index 3751a2a93..60bdddf31 100644
--- a/contrib/moses2/Recycler.h
+++ b/moses2/Recycler.h
@@ -18,45 +18,37 @@ class Recycler
{
public:
Recycler() :
- m_currInd(0)
- {
+ m_currInd(0) {
}
- virtual ~Recycler()
- {
+ virtual ~Recycler() {
}
- T Get()
- {
+ T Get() {
if (!m_coll.empty()) {
T &obj = m_coll.back();
m_coll.pop_back();
return obj;
- }
- else if (m_currInd) {
+ } else if (m_currInd) {
--m_currInd;
T &obj = m_all[m_currInd];
return obj;
- }
- else {
+ } else {
return NULL;
}
}
- void Clear()
- {
+ void Clear() {
m_coll.clear();
m_currInd = m_all.size();
}
// call this for new objects when u 1st create it. It is assumed the object will be used right away
- void Keep(const T& val)
- {
+ void Keep(const T& val) {
m_all.push_back(val);
}
// call this for existing object to put back into queue for reuse
- void Recycle(const T& val)
- {
+ void Recycle(const T& val) {
m_coll.push_back(val);
}
diff --git a/contrib/moses2/SCFG/ActiveChart.cpp b/moses2/SCFG/ActiveChart.cpp
index 711767b2f..fb4d84bfb 100644
--- a/contrib/moses2/SCFG/ActiveChart.cpp
+++ b/moses2/SCFG/ActiveChart.cpp
@@ -17,12 +17,12 @@ SymbolBindElement::SymbolBindElement()
}
SymbolBindElement::SymbolBindElement(
- const Moses2::Range &range,
- const SCFG::Word &word,
- const Moses2::Hypotheses *hypos)
-:m_range(&range)
-,word(&word)
-,hypos(hypos)
+ const Moses2::Range &range,
+ const SCFG::Word &word,
+ const Moses2::Hypotheses *hypos)
+ :m_range(&range)
+ ,word(&word)
+ ,hypos(hypos)
{
assert( (word.isNonTerminal && hypos) || (!word.isNonTerminal && hypos == NULL));
}
@@ -37,19 +37,19 @@ size_t hash_value(const SymbolBindElement &obj)
std::string SymbolBindElement::Debug(const System &system) const
{
- stringstream out;
- out << "(";
- out << *m_range;
- out << word->Debug(system);
- out << ")";
+ stringstream out;
+ out << "(";
+ out << *m_range;
+ out << word->Debug(system);
+ out << ")";
return out.str();
}
////////////////////////////////////////////////////////////////////////////
SymbolBind::SymbolBind(MemPool &pool)
-:coll(pool)
-,numNT(0)
+ :coll(pool)
+ ,numNT(0)
{
}
@@ -83,19 +83,19 @@ std::string SymbolBind::Debug(const System &system) const
{
stringstream out;
BOOST_FOREACH(const SymbolBindElement &ele, coll) {
- out << ele.Debug(system) << " ";
+ out << ele.Debug(system) << " ";
}
return out.str();
}
////////////////////////////////////////////////////////////////////////////
ActiveChartEntry::ActiveChartEntry(MemPool &pool)
-:m_symbolBind(pool)
+ :m_symbolBind(pool)
{
}
////////////////////////////////////////////////////////////////////////////
ActiveChart::ActiveChart(MemPool &pool)
-:entries(pool)
+ :entries(pool)
{
}
diff --git a/contrib/moses2/SCFG/ActiveChart.h b/moses2/SCFG/ActiveChart.h
index ed9f35d92..baf3a09dd 100644
--- a/contrib/moses2/SCFG/ActiveChart.h
+++ b/moses2/SCFG/ActiveChart.h
@@ -28,13 +28,13 @@ public:
SymbolBindElement();
SymbolBindElement(const Moses2::Range &range, const SCFG::Word &word, const Moses2::Hypotheses *hypos);
- const Range &GetRange() const
- { return *m_range; }
+ const Range &GetRange() const {
+ return *m_range;
+ }
- bool operator==(const SymbolBindElement &compare) const
- {
+ bool operator==(const SymbolBindElement &compare) const {
bool ret = hypos == compare.hypos
- && word == compare.word;
+ && word == compare.word;
return ret;
}
@@ -58,19 +58,21 @@ public:
SymbolBind(MemPool &pool);
SymbolBind(MemPool &pool, const SymbolBind &copy)
- :coll(copy.coll)
- ,numNT(copy.numNT)
+ :coll(copy.coll)
+ ,numNT(copy.numNT)
{}
- size_t GetSize() const
- { return coll.size(); }
+ size_t GetSize() const {
+ return coll.size();
+ }
std::vector<const SymbolBindElement*> GetNTElements() const;
void Add(const Range &range, const SCFG::Word &word, const Moses2::Hypotheses *hypos);
- bool operator==(const SymbolBind &compare) const
- { return coll == compare.coll; }
+ bool operator==(const SymbolBind &compare) const {
+ return coll == compare.coll;
+ }
std::string Debug(const System &system) const;
@@ -88,20 +90,19 @@ public:
ActiveChartEntry(MemPool &pool);
ActiveChartEntry(MemPool &pool, const ActiveChartEntry &prevEntry)
- :m_symbolBind(pool, prevEntry.GetSymbolBind())
- {
+ :m_symbolBind(pool, prevEntry.GetSymbolBind()) {
//symbolBinds = new (pool.Allocate<SymbolBind>()) SymbolBind(pool, *prevEntry.symbolBinds);
}
- const SymbolBind &GetSymbolBind() const
- { return m_symbolBind; }
+ const SymbolBind &GetSymbolBind() const {
+ return m_symbolBind;
+ }
virtual void AddSymbolBindElement(
- const Range &range,
- const SCFG::Word &word,
- const Moses2::Hypotheses *hypos,
- const PhraseTable &pt)
- {
+ const Range &range,
+ const SCFG::Word &word,
+ const Moses2::Hypotheses *hypos,
+ const PhraseTable &pt) {
m_symbolBind.Add(range, word, hypos);
}
diff --git a/contrib/moses2/SCFG/Hypothesis.cpp b/moses2/SCFG/Hypothesis.cpp
index 28411a43e..c7ae8c798 100644
--- a/contrib/moses2/SCFG/Hypothesis.cpp
+++ b/moses2/SCFG/Hypothesis.cpp
@@ -19,35 +19,34 @@ namespace SCFG
Hypothesis *Hypothesis::Create(MemPool &pool, Manager &mgr)
{
// ++g_numHypos;
- Hypothesis *ret;
- //ret = new (pool.Allocate<Hypothesis>()) Hypothesis(pool, mgr.system);
-
- Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
- ret = static_cast<Hypothesis*>(recycler.Get());
- if (ret) {
- // got new hypo from recycler. Do nothing
- }
- else {
- ret = new (pool.Allocate<Hypothesis>()) Hypothesis(pool, mgr.system);
- //cerr << "Hypothesis=" << sizeof(Hypothesis) << " " << ret << endl;
- recycler.Keep(ret);
- }
- return ret;
+ Hypothesis *ret;
+ //ret = new (pool.Allocate<Hypothesis>()) Hypothesis(pool, mgr.system);
+
+ Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
+ ret = static_cast<Hypothesis*>(recycler.Get());
+ if (ret) {
+ // got new hypo from recycler. Do nothing
+ } else {
+ ret = new (pool.Allocate<Hypothesis>()) Hypothesis(pool, mgr.system);
+ //cerr << "Hypothesis=" << sizeof(Hypothesis) << " " << ret << endl;
+ recycler.Keep(ret);
+ }
+ return ret;
}
Hypothesis::Hypothesis(MemPool &pool,
- const System &system)
-:HypothesisBase(pool, system)
-,m_prevHypos(pool)
+ const System &system)
+ :HypothesisBase(pool, system)
+ ,m_prevHypos(pool)
{
}
void Hypothesis::Init(SCFG::Manager &mgr,
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind,
- const SCFG::TargetPhraseImpl &tp,
- const Vector<size_t> &prevHyposIndices)
+ const SCFG::InputPath &path,
+ const SCFG::SymbolBind &symbolBind,
+ const SCFG::TargetPhraseImpl &tp,
+ const Vector<size_t> &prevHyposIndices)
{
m_mgr = &mgr;
m_targetPhrase = &tp;
@@ -91,8 +90,8 @@ SCORE Hypothesis::GetFutureScore() const
void Hypothesis::EvaluateWhenApplied()
{
const std::vector<const StatefulFeatureFunction*> &sfffs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
- BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){
+ GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
+ BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) {
EvaluateWhenApplied(*sfff);
}
//cerr << *this << endl;
@@ -105,7 +104,7 @@ void Hypothesis::EvaluateWhenApplied(const StatefulFeatureFunction &sfff)
size_t statefulInd = sfff.GetStatefulInd();
FFState *thisState = m_ffStates[statefulInd];
sfff.EvaluateWhenApplied(mgr, *this, statefulInd, GetScores(),
- *thisState);
+ *thisState);
}
@@ -123,8 +122,7 @@ void Hypothesis::OutputToStream(std::ostream &strm) const
size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[targetPos];
const Hypothesis *prevHypo = m_prevHypos[nonTermInd];
prevHypo->OutputToStream(strm);
- }
- else {
+ } else {
word.OutputToStream(*m_mgr, targetPos, *this, strm);
strm << " ";
}
@@ -166,13 +164,13 @@ std::string Hypothesis::Debug(const System &system) const
void Hypothesis::OutputTransOpt(std::ostream &out) const
{
- out << GetInputPath().range << " "
- << "score=" << GetScores().GetTotalScore() << " "
- << GetTargetPhrase().Debug(m_mgr->system) << endl;
+ out << GetInputPath().range << " "
+ << "score=" << GetScores().GetTotalScore() << " "
+ << GetTargetPhrase().Debug(m_mgr->system) << endl;
- BOOST_FOREACH(const Hypothesis *prevHypo, m_prevHypos) {
- prevHypo->OutputTransOpt(out);
- }
+ BOOST_FOREACH(const Hypothesis *prevHypo, m_prevHypos) {
+ prevHypo->OutputTransOpt(out);
+ }
}
} // namespaces
diff --git a/contrib/moses2/SCFG/Hypothesis.h b/moses2/SCFG/Hypothesis.h
index 8ece45bb6..fbbd663aa 100644
--- a/contrib/moses2/SCFG/Hypothesis.h
+++ b/moses2/SCFG/Hypothesis.h
@@ -23,29 +23,34 @@ public:
static Hypothesis *Create(MemPool &pool, Manager &mgr);
void Init(SCFG::Manager &mgr,
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind,
- const SCFG::TargetPhraseImpl &tp,
- const Vector<size_t> &prevHyposIndices);
+ const SCFG::InputPath &path,
+ const SCFG::SymbolBind &symbolBind,
+ const SCFG::TargetPhraseImpl &tp,
+ const Vector<size_t> &prevHyposIndices);
virtual SCORE GetFutureScore() const;
virtual void EvaluateWhenApplied();
- const SCFG::TargetPhraseImpl &GetTargetPhrase() const
- { return *m_targetPhrase; }
+ const SCFG::TargetPhraseImpl &GetTargetPhrase() const {
+ return *m_targetPhrase;
+ }
- const SCFG::InputPath &GetInputPath() const
- { return *m_path; }
+ const SCFG::InputPath &GetInputPath() const {
+ return *m_path;
+ }
- const SCFG::SymbolBind &GetSymbolBind() const
- { return *m_symbolBind; }
+ const SCFG::SymbolBind &GetSymbolBind() const {
+ return *m_symbolBind;
+ }
- const Vector<const Hypothesis*> &GetPrevHypos() const
- { return m_prevHypos; }
+ const Vector<const Hypothesis*> &GetPrevHypos() const {
+ return m_prevHypos;
+ }
//! get a particular previous hypos
- const Hypothesis* GetPrevHypo(size_t ind) const
- { return m_prevHypos[ind]; }
+ const Hypothesis* GetPrevHypo(size_t ind) const {
+ return m_prevHypos[ind];
+ }
void OutputToStream(std::ostream &strm) const;
void OutputTransOpt(std::ostream &strm) const;
@@ -60,7 +65,7 @@ protected:
Vector<const Hypothesis*> m_prevHypos; // always sorted by source position?
Hypothesis(MemPool &pool,
- const System &system);
+ const System &system);
void EvaluateWhenApplied(const StatefulFeatureFunction &sfff);
diff --git a/contrib/moses2/SCFG/InputPath.cpp b/moses2/SCFG/InputPath.cpp
index 1ebbbf327..4fcbbb2b0 100644
--- a/contrib/moses2/SCFG/InputPath.cpp
+++ b/moses2/SCFG/InputPath.cpp
@@ -19,10 +19,10 @@ namespace SCFG
{
InputPath::InputPath(MemPool &pool, const SubPhrase<SCFG::Word> &subPhrase,
- const Range &range, size_t numPt, const InputPath *prefixPath)
-:InputPathBase(pool, range, numPt, prefixPath)
-,subPhrase(subPhrase)
-,targetPhrases(MemPoolAllocator<Element>(pool))
+ const Range &range, size_t numPt, const InputPath *prefixPath)
+ :InputPathBase(pool, range, numPt, prefixPath)
+ ,subPhrase(subPhrase)
+ ,targetPhrases(MemPoolAllocator<Element>(pool))
{
m_activeChart = pool.Allocate<ActiveChart>(numPt);
for (size_t i = 0; i < numPt; ++i) {
@@ -68,14 +68,14 @@ std::string InputPath::Debug(const System &system) const
}
void InputPath::AddTargetPhrasesToPath(
- MemPool &pool,
- const System &system,
- const PhraseTable &pt,
- const SCFG::TargetPhrases &tps,
- const SCFG::SymbolBind &symbolBind)
+ MemPool &pool,
+ const System &system,
+ const PhraseTable &pt,
+ const SCFG::TargetPhrases &tps,
+ const SCFG::SymbolBind &symbolBind)
{
targetPhrases.push_back(Element(symbolBind, &tps));
- /*
+ /*
Coll::iterator iterColl;
iterColl = targetPhrases.find(symbolBind);
assert(iterColl == targetPhrases.end());
diff --git a/contrib/moses2/SCFG/InputPath.h b/moses2/SCFG/InputPath.h
index c8a7253c2..bef9e0a79 100644
--- a/contrib/moses2/SCFG/InputPath.h
+++ b/moses2/SCFG/InputPath.h
@@ -35,20 +35,21 @@ public:
SubPhrase<SCFG::Word> subPhrase;
InputPath(MemPool &pool, const SubPhrase<SCFG::Word> &subPhrase, const Range &range,
- size_t numPt, const InputPath *prefixPath);
+ size_t numPt, const InputPath *prefixPath);
virtual ~InputPath();
- const ActiveChart &GetActiveChart(size_t ptInd) const
- { return m_activeChart[ptInd]; }
+ const ActiveChart &GetActiveChart(size_t ptInd) const {
+ return m_activeChart[ptInd];
+ }
void AddActiveChartEntry(size_t ptInd, ActiveChartEntry *chartEntry);
void AddTargetPhrasesToPath(
- MemPool &pool,
- const System &system,
- const PhraseTable &pt,
- const SCFG::TargetPhrases &tps,
- const SCFG::SymbolBind &symbolBind);
+ MemPool &pool,
+ const System &system,
+ const PhraseTable &pt,
+ const SCFG::TargetPhrases &tps,
+ const SCFG::SymbolBind &symbolBind);
size_t GetNumRules() const;
diff --git a/contrib/moses2/SCFG/InputPaths.cpp b/moses2/SCFG/InputPaths.cpp
index e1c3f9d21..77478cd98 100644
--- a/contrib/moses2/SCFG/InputPaths.cpp
+++ b/moses2/SCFG/InputPaths.cpp
@@ -56,7 +56,7 @@ void InputPaths::Init(const InputType &input, const ManagerBase &mgr)
Range range(startPos, endPos);
SCFG::InputPath *path = new (pool.Allocate<SCFG::InputPath>())
- SCFG::InputPath(pool, subPhrase, range, numPt, prefixPath);
+ SCFG::InputPath(pool, subPhrase, range, numPt, prefixPath);
//cerr << "path=" << *path << endl;
m_inputPaths.push_back(path);
diff --git a/contrib/moses2/SCFG/InputPaths.h b/moses2/SCFG/InputPaths.h
index 37e2404cf..57c45414f 100644
--- a/contrib/moses2/SCFG/InputPaths.h
+++ b/moses2/SCFG/InputPaths.h
@@ -26,8 +26,7 @@ class InputPaths: public InputPathsBase
public:
void Init(const InputType &input, const ManagerBase &mgr);
- const Matrix<InputPath*> &GetMatrix() const
- {
+ const Matrix<InputPath*> &GetMatrix() const {
return *m_matrix;
}
diff --git a/contrib/moses2/SCFG/Manager.cpp b/moses2/SCFG/Manager.cpp
index 5db4e2a89..6e10b32ed 100644
--- a/contrib/moses2/SCFG/Manager.cpp
+++ b/moses2/SCFG/Manager.cpp
@@ -28,8 +28,8 @@ namespace SCFG
{
Manager::Manager(System &sys, const TranslationTask &task,
- const std::string &inputStr, long translationId)
-:ManagerBase(sys, task, inputStr, translationId)
+ const std::string &inputStr, long translationId)
+ :ManagerBase(sys, task, inputStr, translationId)
{
}
@@ -48,7 +48,7 @@ void Manager::Decode()
FactorCollection &vocab = system.GetVocab();
m_input = Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr,
- m_translationId);
+ m_translationId);
const SCFG::Sentence &sentence = static_cast<const SCFG::Sentence&>(GetInput());
@@ -100,15 +100,15 @@ void Manager::Decode()
void Manager::InitActiveChart(SCFG::InputPath &path)
{
- size_t numPt = system.mappings.size();
- //cerr << "numPt=" << numPt << endl;
-
- for (size_t i = 0; i < numPt; ++i) {
- const PhraseTable &pt = *system.mappings[i];
- //cerr << "START InitActiveChart" << endl;
- pt.InitActiveChart(GetPool(), *this, path);
- //cerr << "FINISHED InitActiveChart" << endl;
- }
+ size_t numPt = system.mappings.size();
+ //cerr << "numPt=" << numPt << endl;
+
+ for (size_t i = 0; i < numPt; ++i) {
+ const PhraseTable &pt = *system.mappings[i];
+ //cerr << "START InitActiveChart" << endl;
+ pt.InitActiveChart(GetPool(), *this, path);
+ //cerr << "FINISHED InitActiveChart" << endl;
+ }
}
void Manager::Lookup(SCFG::InputPath &path)
@@ -202,9 +202,9 @@ void Manager::Decode(SCFG::InputPath &path, Stack &stack)
}
void Manager::CreateQueue(
- const SCFG::InputPath &path,
- const SymbolBind &symbolBind,
- const SCFG::TargetPhrases &tps)
+ const SCFG::InputPath &path,
+ const SymbolBind &symbolBind,
+ const SCFG::TargetPhrases &tps)
{
MemPool &pool = GetPool();
@@ -255,10 +255,10 @@ void Manager::Decode(SCFG::InputPath &path, Stack &stack)
*/
void Manager::ExpandHypo(
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind,
- const SCFG::TargetPhraseImpl &tp,
- Stack &stack)
+ const SCFG::InputPath &path,
+ const SCFG::SymbolBind &symbolBind,
+ const SCFG::TargetPhraseImpl &tp,
+ Stack &stack)
{
Recycler<HypothesisBase*> &hypoRecycler = GetHypoRecycle();
@@ -280,9 +280,9 @@ void Manager::ExpandHypo(
}
bool Manager::IncrPrevHypoIndices(
- Vector<size_t> &prevHyposIndices,
- size_t ind,
- const std::vector<const SymbolBindElement*> ntEles)
+ Vector<size_t> &prevHyposIndices,
+ size_t ind,
+ const std::vector<const SymbolBindElement*> ntEles)
{
if (ntEles.size() == 0) {
// no nt. Do the 1st
@@ -319,8 +319,7 @@ bool Manager::IncrPrevHypoIndices(
if (ind >= numHypos) {
return false;
- }
- else {
+ } else {
return true;
}
}
@@ -345,8 +344,7 @@ std::string Manager::OutputBest() const
if (system.options.output.ReportHypoScore) {
out = SPrint(bestHypo->GetScores().GetTotalScore()) + " " + out;
}
- }
- else {
+ } else {
if (system.options.output.ReportHypoScore) {
out = "0 ";
}
@@ -377,12 +375,11 @@ std::string Manager::OutputTransOpt()
const SCFG::Hypothesis *bestHypo = lastStack.GetBestHypo();
if (bestHypo) {
- stringstream outStrm;
- bestHypo->OutputTransOpt(outStrm);
- return outStrm.str();
- }
- else {
- return "";
+ stringstream outStrm;
+ bestHypo->OutputTransOpt(outStrm);
+ return outStrm.str();
+ } else {
+ return "";
}
}
diff --git a/contrib/moses2/SCFG/Manager.h b/moses2/SCFG/Manager.h
index 6bd53cc89..a9a575896 100644
--- a/contrib/moses2/SCFG/Manager.h
+++ b/moses2/SCFG/Manager.h
@@ -29,7 +29,7 @@ class Manager: public Moses2::ManagerBase
{
public:
Manager(System &sys, const TranslationTask &task, const std::string &inputStr,
- long translationId);
+ long translationId);
virtual ~Manager();
void Decode();
@@ -37,14 +37,17 @@ public:
std::string OutputNBest();
std::string OutputTransOpt();
- const InputPaths &GetInputPaths() const
- { return m_inputPaths; }
+ const InputPaths &GetInputPaths() const {
+ return m_inputPaths;
+ }
- QueueItemRecycler &GetQueueItemRecycler()
- { return m_queueItemRecycler; }
+ QueueItemRecycler &GetQueueItemRecycler() {
+ return m_queueItemRecycler;
+ }
- const Stacks &GetStacks() const
- { return m_stacks; }
+ const Stacks &GetStacks() const {
+ return m_stacks;
+ }
protected:
Stacks m_stacks;
@@ -56,15 +59,15 @@ protected:
void Decode(SCFG::InputPath &path, Stack &stack);
void ExpandHypo(
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind,
- const SCFG::TargetPhraseImpl &tp,
- Stack &stack);
+ const SCFG::InputPath &path,
+ const SCFG::SymbolBind &symbolBind,
+ const SCFG::TargetPhraseImpl &tp,
+ Stack &stack);
bool IncrPrevHypoIndices(
- Vector<size_t> &prevHyposIndices,
- size_t ind,
- const std::vector<const SymbolBindElement*> ntEles);
+ Vector<size_t> &prevHyposIndices,
+ size_t ind,
+ const std::vector<const SymbolBindElement*> ntEles);
// cube pruning
Queue m_queue;
@@ -73,9 +76,9 @@ protected:
QueueItemRecycler m_queueItemRecycler;
void CreateQueue(
- const SCFG::InputPath &path,
- const SymbolBind &symbolBind,
- const SCFG::TargetPhrases &tps);
+ const SCFG::InputPath &path,
+ const SymbolBind &symbolBind,
+ const SCFG::TargetPhrases &tps);
};
}
diff --git a/contrib/moses2/SCFG/Misc.cpp b/moses2/SCFG/Misc.cpp
index 1ab053b60..9a340928a 100644
--- a/contrib/moses2/SCFG/Misc.cpp
+++ b/moses2/SCFG/Misc.cpp
@@ -20,25 +20,25 @@ namespace SCFG
////////////////////////////////////////////////////////
SeenPosition::SeenPosition(MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vtps,
- size_t numNT)
-:symbolBind(vSymbolBind)
-,tps(vtps)
-,tpInd(0)
-,hypoIndColl(pool, numNT, 0)
+ const SymbolBind &vSymbolBind,
+ const SCFG::TargetPhrases &vtps,
+ size_t numNT)
+ :symbolBind(vSymbolBind)
+ ,tps(vtps)
+ ,tpInd(0)
+ ,hypoIndColl(pool, numNT, 0)
{
}
SeenPosition::SeenPosition(MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vtps,
- size_t vtpInd,
- const Vector<size_t> &vhypoIndColl)
-:symbolBind(vSymbolBind)
-,tps(vtps)
-,tpInd(vtpInd)
-,hypoIndColl(pool, vhypoIndColl.size())
+ const SymbolBind &vSymbolBind,
+ const SCFG::TargetPhrases &vtps,
+ size_t vtpInd,
+ const Vector<size_t> &vhypoIndColl)
+ :symbolBind(vSymbolBind)
+ ,tps(vtps)
+ ,tpInd(vtpInd)
+ ,hypoIndColl(pool, vhypoIndColl.size())
{
for (size_t i = 0; i < hypoIndColl.size(); ++i) {
hypoIndColl[i] = vhypoIndColl[i];
@@ -60,7 +60,7 @@ std::string SeenPosition::Debug(const System &system) const
bool SeenPosition::operator==(const SeenPosition &compare) const
{
if (&symbolBind != &compare.symbolBind) {
- return false;
+ return false;
}
if (&tps != &compare.tps) {
@@ -106,8 +106,7 @@ QueueItem *QueueItem::Create(MemPool &pool, SCFG::Manager &mgr)
// use item from recycle bin
ret = queueItemRecycler.back();
queueItemRecycler.pop_back();
- }
- else {
+ } else {
// create new item
ret = new (pool.Allocate<QueueItem>()) QueueItem(pool);
}
@@ -117,16 +116,16 @@ QueueItem *QueueItem::Create(MemPool &pool, SCFG::Manager &mgr)
}
QueueItem::QueueItem(MemPool &pool)
-:m_hypoIndColl(NULL)
+ :m_hypoIndColl(NULL)
{
}
void QueueItem::Init(
- MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vTPS,
- const Vector<size_t> &hypoIndColl)
+ MemPool &pool,
+ const SymbolBind &vSymbolBind,
+ const SCFG::TargetPhrases &vTPS,
+ const Vector<size_t> &hypoIndColl)
{
symbolBind = &vSymbolBind;
tps = &vTPS;
@@ -136,11 +135,11 @@ void QueueItem::Init(
}
void QueueItem::Init(
- MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vTPS,
- size_t vTPInd,
- const Vector<size_t> &hypoIndColl)
+ MemPool &pool,
+ const SymbolBind &vSymbolBind,
+ const SCFG::TargetPhrases &vTPS,
+ size_t vTPInd,
+ const Vector<size_t> &hypoIndColl)
{
symbolBind = &vSymbolBind;
tps = &vTPS;
@@ -155,10 +154,10 @@ void QueueItem::AddHypos(const Moses2::Hypotheses &hypos)
}
void QueueItem::CreateHypo(
- MemPool &systemPool,
- SCFG::Manager &mgr,
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind)
+ MemPool &systemPool,
+ SCFG::Manager &mgr,
+ const SCFG::InputPath &path,
+ const SCFG::SymbolBind &symbolBind)
{
const SCFG::TargetPhraseImpl &tp = (*tps)[tpInd];
@@ -168,12 +167,12 @@ void QueueItem::CreateHypo(
}
void QueueItem::CreateNext(
- MemPool &systemPool,
- MemPool &mgrPool,
- SCFG::Manager &mgr,
- SCFG::Queue &queue,
- SeenPositions &seenPositions,
- const SCFG::InputPath &path)
+ MemPool &systemPool,
+ MemPool &mgrPool,
+ SCFG::Manager &mgr,
+ SCFG::Queue &queue,
+ SeenPositions &seenPositions,
+ const SCFG::InputPath &path)
{
//cerr << "tpInd=" << tpInd << " " << tps->GetSize() << endl;
if (tpInd + 1 < tps->GetSize()) {
diff --git a/contrib/moses2/SCFG/Misc.h b/moses2/SCFG/Misc.h
index 0e1c2a015..27b9df79a 100644
--- a/contrib/moses2/SCFG/Misc.h
+++ b/moses2/SCFG/Misc.h
@@ -31,14 +31,14 @@ public:
Vector<size_t> hypoIndColl;
SeenPosition(MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vtps,
- size_t numNT);
+ const SymbolBind &vSymbolBind,
+ const SCFG::TargetPhrases &vtps,
+ size_t numNT);
SeenPosition(MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vtps,
- size_t vtpInd,
- const Vector<size_t> &vhypoIndColl);
+ const SymbolBind &vSymbolBind,
+ const SCFG::TargetPhrases &vtps,
+ size_t vtpInd,
+ const Vector<size_t> &vhypoIndColl);
bool operator==(const SeenPosition &compare) const;
size_t hash() const;
@@ -54,13 +54,14 @@ class SeenPositions
public:
bool Add(const SeenPosition *item);
- void clear()
- { m_coll.clear(); }
+ void clear() {
+ m_coll.clear();
+ }
protected:
typedef boost::unordered_set<const SeenPosition*,
- UnorderedComparer<SeenPosition>, UnorderedComparer<SeenPosition> > Coll;
+ UnorderedComparer<SeenPosition>, UnorderedComparer<SeenPosition> > Coll;
Coll m_coll;
};
@@ -73,30 +74,30 @@ public:
static QueueItem *Create(MemPool &pool, SCFG::Manager &mgr);
void Init(
- MemPool &pool,
- const SymbolBind &symbolBind,
- const SCFG::TargetPhrases &tps,
- const Vector<size_t> &hypoIndColl);
+ MemPool &pool,
+ const SymbolBind &symbolBind,
+ const SCFG::TargetPhrases &tps,
+ const Vector<size_t> &hypoIndColl);
void Init(
- MemPool &pool,
- const SymbolBind &symbolBind,
- const SCFG::TargetPhrases &tps,
- size_t vTPInd,
- const Vector<size_t> &hypoIndColl);
+ MemPool &pool,
+ const SymbolBind &symbolBind,
+ const SCFG::TargetPhrases &tps,
+ size_t vTPInd,
+ const Vector<size_t> &hypoIndColl);
void AddHypos(const Moses2::Hypotheses &hypos);
void CreateHypo(
- MemPool &systemPool,
- SCFG::Manager &mgr,
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind);
+ MemPool &systemPool,
+ SCFG::Manager &mgr,
+ const SCFG::InputPath &path,
+ const SCFG::SymbolBind &symbolBind);
void CreateNext(
- MemPool &systemPool,
- MemPool &mgrPool,
- SCFG::Manager &mgr,
- SCFG::Queue &queue,
- SeenPositions &seenPositions,
- const SCFG::InputPath &path);
+ MemPool &systemPool,
+ MemPool &mgrPool,
+ SCFG::Manager &mgr,
+ SCFG::Queue &queue,
+ SeenPositions &seenPositions,
+ const SCFG::InputPath &path);
std::string Debug(const System &system) const;
@@ -109,7 +110,7 @@ protected:
size_t tpInd;
const Vector<size_t> *m_hypoIndColl; // pointer to variable in seen position
- // hypos and ind to the 1 we're using
+ // hypos and ind to the 1 we're using
QueueItem(MemPool &pool);
@@ -123,8 +124,7 @@ typedef std::deque<QueueItem*> QueueItemRecycler;
class QueueItemOrderer
{
public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const
- {
+ bool operator()(QueueItem* itemA, QueueItem* itemB) const {
HypothesisFutureScoreOrderer orderer;
return !orderer(itemA->hypo, itemB->hypo);
}
@@ -132,8 +132,8 @@ public:
///////////////////////////////////////////
class Queue : public std::priority_queue<QueueItem*,
- std::vector<QueueItem*>,
- QueueItemOrderer>
+ std::vector<QueueItem*>,
+ QueueItemOrderer>
{
};
diff --git a/contrib/moses2/SCFG/PhraseImpl.cpp b/moses2/SCFG/PhraseImpl.cpp
index 028ede8b0..398e8e217 100644
--- a/contrib/moses2/SCFG/PhraseImpl.cpp
+++ b/moses2/SCFG/PhraseImpl.cpp
@@ -18,7 +18,7 @@ PhraseImpl *PhraseImpl::CreateFromString(MemPool &pool, FactorCollection &vocab,
std::vector<std::string> toks = Moses2::Tokenize(str);
size_t size = toks.size();
if (skipLastWord) {
- --size;
+ --size;
}
PhraseImpl *ret;
diff --git a/contrib/moses2/SCFG/PhraseImpl.h b/moses2/SCFG/PhraseImpl.h
index f26de313d..f61bf2915 100644
--- a/contrib/moses2/SCFG/PhraseImpl.h
+++ b/moses2/SCFG/PhraseImpl.h
@@ -12,11 +12,10 @@ class PhraseImpl: public PhraseImplTemplate<SCFG::Word>
{
public:
static PhraseImpl *CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str, bool skipLastWord = true);
+ const System &system, const std::string &str, bool skipLastWord = true);
PhraseImpl(MemPool &pool, size_t size) :
- PhraseImplTemplate<Word>(pool, size)
- {
+ PhraseImplTemplate<Word>(pool, size) {
}
};
diff --git a/moses2/SCFG/Sentence.cpp b/moses2/SCFG/Sentence.cpp
new file mode 100644
index 000000000..de82e3ee9
--- /dev/null
+++ b/moses2/SCFG/Sentence.cpp
@@ -0,0 +1,154 @@
+/*
+ * Sentence.cpp
+ *
+ * Created on: 14 Dec 2015
+ * Author: hieu
+ */
+
+#include "Sentence.h"
+#include "../System.h"
+
+using namespace std;
+
+namespace Moses2
+{
+namespace SCFG
+{
+Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
+ const System &system, const std::string &str, long translationId)
+{
+ //cerr << "SCFG Sentence" << endl;
+
+ Sentence *ret;
+
+ if (system.options.input.xml_policy) {
+ // xml
+ ret = CreateFromStringXML(pool, vocab, system, str);
+ //cerr << "ret=" << ret->Debug(system) << endl;
+ } else {
+ std::vector<std::string> toks = Tokenize(str);
+ size_t size = toks.size() + 2;
+
+ ret = new (pool.Allocate<SCFG::Sentence>()) Sentence(pool, size);
+ ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
+
+ }
+
+ return ret;
+}
+
+Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
+ const System &system, const std::string &str)
+{
+ Sentence *ret;
+
+ vector<XMLOption*> xmlOptions;
+ pugi::xml_document doc;
+
+ string str2 = "<xml>" + str + "</xml>";
+ pugi::xml_parse_result result = doc.load(str2.c_str(),
+ pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
+ pugi::xml_node topNode = doc.child("xml");
+
+ std::vector<std::string> toks;
+ XMLParse(pool, system, 0, topNode, toks, xmlOptions);
+
+ // debug
+ /*
+ cerr << "xmloptions:" << endl;
+ for (size_t i = 0; i < xmlOptions.size(); ++i) {
+ cerr << xmlOptions[i]->Debug(system) << endl;
+ }
+ */
+
+ // create words
+ size_t size = toks.size() + 2;
+ ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
+ ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
+
+ // xml
+ for(size_t i=0; i<xmlOptions.size(); i++) {
+ const XMLOption *xmlOption = xmlOptions[i];
+ if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
+ FactorType placeholderFactor = system.options.input.placeholder_factor;
+ UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
+ "Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
+ UTIL_THROW_IF2(xmlOption->phraseSize != 1,
+ "Placeholder must only cover 1 word");
+
+ const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
+ (*ret)[xmlOption->startPos + 1][placeholderFactor] = factor;
+ } else {
+ // default - forced translation. Add to class variable
+ ret->AddXMLOption(system, xmlOption);
+ }
+ }
+
+ //cerr << "ret=" << ret->Debug(system) << endl;
+ return ret;
+}
+
+void Sentence::XMLParse(
+ MemPool &pool,
+ const System &system,
+ size_t depth,
+ const pugi::xml_node &parentNode,
+ std::vector<std::string> &toks,
+ vector<XMLOption*> &xmlOptions)
+{
+ // pugixml
+ for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
+ string nodeName = childNode.name();
+ //cerr << depth << " nodeName=" << nodeName << endl;
+
+ int startPos = toks.size();
+
+ string value = childNode.value();
+ if (!value.empty()) {
+ //cerr << depth << "childNode text=" << value << endl;
+ std::vector<std::string> subPhraseToks = Tokenize(value);
+ for (size_t i = 0; i < subPhraseToks.size(); ++i) {
+ toks.push_back(subPhraseToks[i]);
+ }
+ }
+
+ if (!nodeName.empty()) {
+ XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
+
+ pugi::xml_attribute attr;
+ attr = childNode.attribute("translation");
+ if (!attr.empty()) {
+ xmlOption->SetTranslation(pool, attr.as_string());
+ }
+
+ attr = childNode.attribute("entity");
+ if (!attr.empty()) {
+ xmlOption->SetEntity(pool, attr.as_string());
+ }
+
+ attr = childNode.attribute("prob");
+ if (!attr.empty()) {
+ xmlOption->prob = attr.as_float();
+ }
+
+ xmlOptions.push_back(xmlOption);
+
+ // recursively call this function. For proper recursive trees
+ XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
+
+ size_t endPos = toks.size();
+ xmlOption->phraseSize = endPos - startPos;
+
+ /*
+ cerr << "xmlOptions=";
+ xmlOption->Debug(cerr, system);
+ cerr << endl;
+ */
+ }
+
+ }
+}
+
+}
+} /* namespace Moses2 */
+
diff --git a/contrib/moses2/SCFG/Sentence.h b/moses2/SCFG/Sentence.h
index 7652a677e..1f4378caf 100644
--- a/contrib/moses2/SCFG/Sentence.h
+++ b/moses2/SCFG/Sentence.h
@@ -25,11 +25,11 @@ class Sentence: public InputType, public PhraseImpl
{
public:
static Sentence *CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str, long translationId);
+ const System &system, const std::string &str, long translationId);
Sentence(MemPool &pool, size_t size)
- :InputType(pool)
- ,PhraseImpl(pool, size)
+ :InputType(pool)
+ ,PhraseImpl(pool, size)
{}
virtual ~Sentence()
@@ -37,15 +37,15 @@ public:
protected:
static Sentence *CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str);
+ const System &system, const std::string &str);
static void XMLParse(
- MemPool &pool,
- const System &system,
- size_t depth,
- const pugi::xml_node &parentNode,
- std::vector<std::string> &toks,
- std::vector<XMLOption*> &xmlOptions);
+ MemPool &pool,
+ const System &system,
+ size_t depth,
+ const pugi::xml_node &parentNode,
+ std::vector<std::string> &toks,
+ std::vector<XMLOption*> &xmlOptions);
};
diff --git a/contrib/moses2/SCFG/Stack.cpp b/moses2/SCFG/Stack.cpp
index 163761a49..25517d006 100644
--- a/contrib/moses2/SCFG/Stack.cpp
+++ b/moses2/SCFG/Stack.cpp
@@ -13,7 +13,7 @@ namespace SCFG
{
Stack::Stack(const Manager &mgr)
-:m_mgr(mgr)
+ :m_mgr(mgr)
{
}
@@ -26,7 +26,7 @@ Stack::~Stack()
}
void Stack::Add(SCFG::Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists)
+ ArcLists &arcLists)
{
const SCFG::TargetPhraseImpl &tp = hypo->GetTargetPhrase();
const SCFG::Word &lhs = tp.lhs;
@@ -52,8 +52,7 @@ const Moses2::HypothesisColl *Stack::GetColl(const SCFG::Word &nt) const
Coll::const_iterator iter = m_coll.find(nt);
if (iter != m_coll.end()) {
return NULL;
- }
- else {
+ } else {
return iter->second;
}
}
@@ -66,8 +65,7 @@ Moses2::HypothesisColl &Stack::GetColl(const SCFG::Word &nt)
if (iter == m_coll.end()) {
ret = new Moses2::HypothesisColl(m_mgr);
m_coll[nt] = ret;
- }
- else {
+ } else {
ret = iter->second;
}
return *ret;
@@ -77,7 +75,7 @@ const Hypothesis *Stack::GetBestHypo() const
{
SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
const HypothesisBase *bestHypo = NULL;
- BOOST_FOREACH(const Coll::value_type &val, m_coll){
+ BOOST_FOREACH(const Coll::value_type &val, m_coll) {
const Moses2::HypothesisColl &hypos = *val.second;
const Moses2::HypothesisBase *hypo = hypos.GetBestHypo();
diff --git a/contrib/moses2/SCFG/Stack.h b/moses2/SCFG/Stack.h
index eb7ce2706..413f0749b 100644
--- a/contrib/moses2/SCFG/Stack.h
+++ b/moses2/SCFG/Stack.h
@@ -22,15 +22,16 @@ public:
Stack(const Manager &mgr);
virtual ~Stack();
- const Coll &GetColl() const
- { return m_coll; }
+ const Coll &GetColl() const {
+ return m_coll;
+ }
const Moses2::HypothesisColl *GetColl(const SCFG::Word &nt) const;
size_t GetSize() const;
void Add(SCFG::Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists);
+ ArcLists &arcLists);
const Hypothesis *GetBestHypo() const;
diff --git a/contrib/moses2/SCFG/Stacks.cpp b/moses2/SCFG/Stacks.cpp
index 63214c7c3..63214c7c3 100644
--- a/contrib/moses2/SCFG/Stacks.cpp
+++ b/moses2/SCFG/Stacks.cpp
diff --git a/contrib/moses2/SCFG/Stacks.h b/moses2/SCFG/Stacks.h
index 6594d5763..09aedb01e 100644
--- a/contrib/moses2/SCFG/Stacks.h
+++ b/moses2/SCFG/Stacks.h
@@ -17,16 +17,19 @@ public:
void Init(SCFG::Manager &mgr, size_t size);
- const Stack &GetStack(size_t startPos, size_t size) const
- { return *m_cells[startPos][size - 1]; }
+ const Stack &GetStack(size_t startPos, size_t size) const {
+ return *m_cells[startPos][size - 1];
+ }
- Stack &GetStack(size_t startPos, size_t size)
- { return *m_cells[startPos][size - 1]; }
+ Stack &GetStack(size_t startPos, size_t size) {
+ return *m_cells[startPos][size - 1];
+ }
void OutputStacks() const;
- const Stack &GetLastStack() const
- { return GetStack(0, m_cells.size()); }
+ const Stack &GetLastStack() const {
+ return GetStack(0, m_cells.size());
+ }
protected:
std::vector<std::vector<Stack*> > m_cells;
diff --git a/contrib/moses2/SCFG/TargetPhraseImpl.cpp b/moses2/SCFG/TargetPhraseImpl.cpp
index ebea6cef7..e58e057e4 100644
--- a/contrib/moses2/SCFG/TargetPhraseImpl.cpp
+++ b/moses2/SCFG/TargetPhraseImpl.cpp
@@ -30,8 +30,8 @@ TargetPhraseImpl *TargetPhraseImpl::CreateFromString(MemPool &pool,
vector<string> toks = Tokenize(str);
size_t size = toks.size() - 1;
TargetPhraseImpl *ret =
- new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, pt, system,
- size);
+ new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, pt, system,
+ size);
for (size_t i = 0; i < size; ++i) {
SCFG::Word &word = (*ret)[i];
@@ -45,11 +45,11 @@ TargetPhraseImpl *TargetPhraseImpl::CreateFromString(MemPool &pool,
}
TargetPhraseImpl::TargetPhraseImpl(MemPool &pool,
- const PhraseTable &pt,
- const System &system,
- size_t size)
-:Moses2::TargetPhrase<SCFG::Word>(pool, pt, system, size)
-,m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
+ const PhraseTable &pt,
+ const System &system,
+ size_t size)
+ :Moses2::TargetPhrase<SCFG::Word>(pool, pt, system, size)
+ ,m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
{
m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
@@ -111,13 +111,13 @@ void TargetPhraseImpl::SetAlignmentInfo(const std::string &alignString)
size_t TargetPhraseImpl::GetNumNonTerms() const
{
- size_t ret = 0;
- for (size_t i = 0; i < GetSize(); ++i) {
- if ((*this)[i].isNonTerminal) {
- ++ret;
- }
- }
- return ret;
+ size_t ret = 0;
+ for (size_t i = 0; i < GetSize(); ++i) {
+ if ((*this)[i].isNonTerminal) {
+ ++ret;
+ }
+ }
+ return ret;
}
diff --git a/contrib/moses2/SCFG/TargetPhraseImpl.h b/moses2/SCFG/TargetPhraseImpl.h
index f526d02e7..286ce2157 100644
--- a/contrib/moses2/SCFG/TargetPhraseImpl.h
+++ b/moses2/SCFG/TargetPhraseImpl.h
@@ -38,7 +38,7 @@ public:
const PhraseTable &pt, const System &system, const std::string &str);
TargetPhraseImpl(MemPool &pool, const PhraseTable &pt, const System &system,
- size_t size);
+ size_t size);
//TargetPhraseImpl(MemPool &pool, const System &system, const TargetPhraseImpl &copy);
virtual ~TargetPhraseImpl();
@@ -48,19 +48,22 @@ public:
}
void SetAlignNonTerm(const AlignmentInfo &alignInfo) {
- m_alignNonTerm = &alignInfo;
+ m_alignNonTerm = &alignInfo;
}
void SetAlignmentInfo(const std::string &alignString);
- SCORE GetFutureScore() const
- { return m_scores->GetTotalScore() + m_estimatedScore; }
+ SCORE GetFutureScore() const {
+ return m_scores->GetTotalScore() + m_estimatedScore;
+ }
- virtual SCORE GetScoreForPruning() const
- { return GetFutureScore(); }
+ virtual SCORE GetScoreForPruning() const {
+ return GetFutureScore();
+ }
- void SetEstimatedScore(const SCORE &value)
- { m_estimatedScore = value; }
+ void SetEstimatedScore(const SCORE &value) {
+ m_estimatedScore = value;
+ }
std::string Debug(const System &system) const;
diff --git a/contrib/moses2/SCFG/TargetPhrases.cpp b/moses2/SCFG/TargetPhrases.cpp
index f3d4b9790..fbef79e9c 100644
--- a/contrib/moses2/SCFG/TargetPhrases.cpp
+++ b/moses2/SCFG/TargetPhrases.cpp
@@ -18,12 +18,12 @@ namespace Moses2
namespace SCFG
{
TargetPhrases::TargetPhrases(MemPool &pool)
-:m_coll(pool)
+ :m_coll(pool)
{
}
TargetPhrases::TargetPhrases(MemPool &pool, size_t size)
-:m_coll(pool)
+ :m_coll(pool)
{
m_coll.reserve(size);
}
@@ -37,11 +37,11 @@ void TargetPhrases::SortAndPrune(size_t tableLimit)
{
iterator iterMiddle;
iterMiddle =
- (tableLimit == 0 || m_coll.size() < tableLimit) ?
- m_coll.end() : m_coll.begin() + tableLimit;
+ (tableLimit == 0 || m_coll.size() < tableLimit) ?
+ m_coll.end() : m_coll.begin() + tableLimit;
std::partial_sort(m_coll.begin(), iterMiddle, m_coll.end(),
- CompareScoreForPruning<SCFG::TargetPhraseImpl>());
+ CompareScoreForPruning<SCFG::TargetPhraseImpl>());
if (tableLimit && m_coll.size() > tableLimit) {
m_coll.resize(tableLimit);
diff --git a/contrib/moses2/SCFG/TargetPhrases.h b/moses2/SCFG/TargetPhrases.h
index 22502b3ef..8bdea7d09 100644
--- a/contrib/moses2/SCFG/TargetPhrases.h
+++ b/moses2/SCFG/TargetPhrases.h
@@ -27,17 +27,14 @@ public:
typedef Coll::iterator iterator;
typedef Coll::const_iterator const_iterator;
//! iterators
- const_iterator begin() const
- {
+ const_iterator begin() const {
return m_coll.begin();
}
- const_iterator end() const
- {
+ const_iterator end() const {
return m_coll.end();
}
- const SCFG::TargetPhraseImpl& operator[](size_t ind) const
- {
+ const SCFG::TargetPhraseImpl& operator[](size_t ind) const {
return *m_coll[ind];
}
@@ -45,11 +42,11 @@ public:
TargetPhrases(MemPool &pool, size_t size);
virtual ~TargetPhrases();
- size_t GetSize() const
- { return m_coll.size(); }
+ size_t GetSize() const {
+ return m_coll.size();
+ }
- void AddTargetPhrase(const SCFG::TargetPhraseImpl &targetPhrase)
- {
+ void AddTargetPhrase(const SCFG::TargetPhraseImpl &targetPhrase) {
m_coll.push_back(&targetPhrase);
}
diff --git a/contrib/moses2/SCFG/Word.cpp b/moses2/SCFG/Word.cpp
index 1794706da..8f67fb0fa 100644
--- a/contrib/moses2/SCFG/Word.cpp
+++ b/moses2/SCFG/Word.cpp
@@ -22,14 +22,14 @@ namespace Moses2
namespace SCFG
{
Word::Word(const SCFG::Word &copy)
-:Moses2::Word(copy)
-,isNonTerminal(copy.isNonTerminal)
+ :Moses2::Word(copy)
+ ,isNonTerminal(copy.isNonTerminal)
{
}
void Word::CreateFromString(FactorCollection &vocab,
- const System &system,
- const std::string &str)
+ const System &system,
+ const std::string &str)
{
vector<string> toks;
@@ -43,13 +43,11 @@ void Word::CreateFromString(FactorCollection &vocab,
assert(startPos != string::npos);
string str2 = str.substr(startPos + 1, str.size() - startPos - 2);
toks = Tokenize(str2, "|");
- }
- else {
+ } else {
string str2 = str.substr(1, str.size() - 2);
toks = Tokenize(str2, "|");
}
- }
- else {
+ } else {
isNonTerminal = false;
toks = Tokenize(str, "|");
}
@@ -75,9 +73,9 @@ size_t Word::hash(const std::vector<FactorType> &factors) const
{
size_t seed = isNonTerminal;
for (size_t i = 0; i < factors.size(); ++i) {
- FactorType factorType = factors[i];
- const Factor *factor = m_factors[factorType];
- boost::hash_combine(seed, factor);
+ FactorType factorType = factors[i];
+ const Factor *factor = m_factors[factorType];
+ boost::hash_combine(seed, factor);
}
return seed;
}
@@ -89,46 +87,46 @@ void Word::OutputToStream(const System &system, std::ostream &out) const
}
Moses2::Word::OutputToStream(system, out);
if (isNonTerminal) {
- out << "]";
+ out << "]";
}
}
void Word::OutputToStream(
- const ManagerBase &mgr,
- size_t targetPos,
- const SCFG::Hypothesis &hypo,
- std::ostream &out) const
+ const ManagerBase &mgr,
+ size_t targetPos,
+ const SCFG::Hypothesis &hypo,
+ std::ostream &out) const
{
const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
const SCFG::SymbolBind &symbolBind = hypo.GetSymbolBind();
- bool outputWord = true;
- if (mgr.system.options.input.placeholder_factor != NOT_FOUND) {
- const AlignmentInfo &alignInfo = tp.GetAlignTerm();
- std::set<size_t> sourceAligns = alignInfo.GetAlignmentsForTarget(targetPos);
- if (sourceAligns.size() == 1) {
- size_t sourcePos = *sourceAligns.begin();
- /*
- cerr << "sourcePos=" << sourcePos << endl;
- cerr << "tp=" << tp.Debug(mgr.system) << endl;
- cerr << "m_symbolBind=" << symbolBind.Debug(mgr.system) << endl;
- */
- assert(sourcePos < symbolBind.GetSize());
- const Range &inputRange = symbolBind.coll[sourcePos].GetRange();
- assert(inputRange.GetNumWordsCovered() == 1);
- const SCFG::Sentence &sentence = static_cast<const SCFG::Sentence &>(mgr.GetInput());
- const SCFG::Word &sourceWord = sentence[inputRange.GetStartPos()];
- const Factor *factor = sourceWord[mgr.system.options.input.placeholder_factor];
- if (factor) {
- out << factor->GetString();
- outputWord = false;
- }
- }
+ bool outputWord = true;
+ if (mgr.system.options.input.placeholder_factor != NOT_FOUND) {
+ const AlignmentInfo &alignInfo = tp.GetAlignTerm();
+ std::set<size_t> sourceAligns = alignInfo.GetAlignmentsForTarget(targetPos);
+ if (sourceAligns.size() == 1) {
+ size_t sourcePos = *sourceAligns.begin();
+ /*
+ cerr << "sourcePos=" << sourcePos << endl;
+ cerr << "tp=" << tp.Debug(mgr.system) << endl;
+ cerr << "m_symbolBind=" << symbolBind.Debug(mgr.system) << endl;
+ */
+ assert(sourcePos < symbolBind.GetSize());
+ const Range &inputRange = symbolBind.coll[sourcePos].GetRange();
+ assert(inputRange.GetNumWordsCovered() == 1);
+ const SCFG::Sentence &sentence = static_cast<const SCFG::Sentence &>(mgr.GetInput());
+ const SCFG::Word &sourceWord = sentence[inputRange.GetStartPos()];
+ const Factor *factor = sourceWord[mgr.system.options.input.placeholder_factor];
+ if (factor) {
+ out << factor->GetString();
+ outputWord = false;
+ }
}
+ }
- if (outputWord){
- OutputToStream(mgr.system, out);
- }
+ if (outputWord) {
+ OutputToStream(mgr.system, out);
+ }
}
std::string Word::Debug(const System &system) const
@@ -139,7 +137,7 @@ std::string Word::Debug(const System &system) const
}
out << Moses2::Word::Debug(system);
if (isNonTerminal) {
- out << "]";
+ out << "]";
}
return out.str();
}
diff --git a/contrib/moses2/SCFG/Word.h b/moses2/SCFG/Word.h
index 0c3aa158a..e039f92e8 100644
--- a/contrib/moses2/SCFG/Word.h
+++ b/moses2/SCFG/Word.h
@@ -26,16 +26,14 @@ public:
explicit Word(const SCFG::Word &copy);
void CreateFromString(FactorCollection &vocab,
- const System &system,
- const std::string &str);
+ const System &system,
+ const std::string &str);
- bool operator==(const SCFG::Word &compare) const
- {
+ bool operator==(const SCFG::Word &compare) const {
int cmp = Moses2::Word::Compare(compare);
if (cmp == 0 && isNonTerminal == compare.isNonTerminal) {
return true;
- }
- else {
+ } else {
return false;
}
}
@@ -45,10 +43,10 @@ public:
virtual void OutputToStream(const System &system, std::ostream &out) const;
virtual void OutputToStream(
- const ManagerBase &mgr,
- size_t targetPos,
- const SCFG::Hypothesis &hypo,
- std::ostream &out) const;
+ const ManagerBase &mgr,
+ size_t targetPos,
+ const SCFG::Hypothesis &hypo,
+ std::ostream &out) const;
virtual std::string Debug(const System &system) const;
@@ -56,7 +54,9 @@ protected:
};
inline size_t hash_value(const SCFG::Word &word)
-{ return word.hash(); }
+{
+ return word.hash();
+}
}
}
diff --git a/moses2/SCFG/nbest/KBestExtractor.cpp b/moses2/SCFG/nbest/KBestExtractor.cpp
new file mode 100644
index 000000000..14d12c4b5
--- /dev/null
+++ b/moses2/SCFG/nbest/KBestExtractor.cpp
@@ -0,0 +1,74 @@
+/*
+ * KBestExtractor.cpp
+ *
+ * Created on: 2 Aug 2016
+ * Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include <sstream>
+#include "KBestExtractor.h"
+#include "../Manager.h"
+#include "../Hypothesis.h"
+#include "../Stacks.h"
+#include "../Stack.h"
+#include "../Sentence.h"
+#include "../../System.h"
+#include "../../Scores.h"
+#include "../../legacy/Util2.h"
+
+using namespace std;
+
+namespace Moses2
+{
+//bool g_debug = false;
+
+namespace SCFG
+{
+/////////////////////////////////////////////////////////////
+KBestExtractor::KBestExtractor(const SCFG::Manager &mgr)
+ :m_mgr(mgr)
+{
+
+}
+
+KBestExtractor::~KBestExtractor()
+{
+}
+
+void KBestExtractor::OutputToStream(std::stringstream &strm)
+{
+ //cerr << "1" << flush;
+ const Stack &lastStack = m_mgr.GetStacks().GetLastStack();
+ UTIL_THROW_IF2(lastStack.GetColl().size() != 1, "Only suppose to be 1 hypo coll in last stack");
+ UTIL_THROW_IF2(lastStack.GetColl().begin()->second == NULL, "NULL hypo collection");
+
+ const Hypotheses &hypos = lastStack.GetColl().begin()->second->GetSortedAndPrunedHypos(m_mgr, m_mgr.arcLists);
+ UTIL_THROW_IF2(hypos.size() != 1, "Only suppose to be 1 hypo in collection");
+ const HypothesisBase *hypo = hypos[0];
+
+ const ArcLists &arcLists = m_mgr.arcLists;
+ const ArcList &arcList = arcLists.GetArcList(hypo);
+ NBests &nbests = m_nbestColl.GetOrCreateNBests(m_mgr, arcList);
+
+ size_t ind = 0;
+ while (nbests.Extend(m_mgr, m_nbestColl, ind)) {
+ const NBest &deriv = nbests.Get(ind);
+ strm << m_mgr.GetTranslationId() << " ||| ";
+ //cerr << "1" << flush;
+ strm << deriv.GetStringExclSentenceMarkers();
+ //cerr << "2" << flush;
+ strm << " ||| ";
+ deriv.GetScores().OutputBreakdown(strm, m_mgr.system);
+ //cerr << "3" << flush;
+ strm << "||| ";
+ strm << deriv.GetScores().GetTotalScore();
+ //cerr << "4" << flush;
+
+ strm << endl;
+
+ ++ind;
+ }
+}
+
+}
+} /* namespace Moses2 */
diff --git a/contrib/moses2/SCFG/nbest/KBestExtractor.h b/moses2/SCFG/nbest/KBestExtractor.h
index 91b62d60b..91b62d60b 100644
--- a/contrib/moses2/SCFG/nbest/KBestExtractor.h
+++ b/moses2/SCFG/nbest/KBestExtractor.h
diff --git a/moses2/SCFG/nbest/NBest.cpp b/moses2/SCFG/nbest/NBest.cpp
new file mode 100644
index 000000000..1057fa004
--- /dev/null
+++ b/moses2/SCFG/nbest/NBest.cpp
@@ -0,0 +1,192 @@
+/*
+ * NBest.cpp
+ *
+ * Created on: 24 Aug 2016
+ * Author: hieu
+ */
+#include <sstream>
+#include <boost/foreach.hpp>
+#include "util/exception.hh"
+#include "NBest.h"
+#include "NBests.h"
+#include "NBestColl.h"
+#include "../Manager.h"
+#include "../TargetPhraseImpl.h"
+#include "../../System.h"
+
+using namespace std;
+
+namespace Moses2
+{
+namespace SCFG
+{
+
+NBest::NBest(
+ const SCFG::Manager &mgr,
+ const ArcList &varcList,
+ size_t vind,
+ NBestColl &nbestColl)
+ :arcList(&varcList)
+ ,arcInd(vind)
+{
+ const SCFG::Hypothesis &hypo = GetHypo();
+
+ // copy scores from best hypo
+ MemPool &pool = mgr.GetPool();
+ m_scores = new (pool.Allocate<Scores>())
+ Scores(mgr.system, pool, mgr.system.featureFunctions.GetNumScores(), hypo.GetScores());
+
+ // children
+ const ArcLists &arcLists = mgr.arcLists;
+ //const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
+
+ const Vector<const Hypothesis*> &prevHypos = hypo.GetPrevHypos();
+ for (size_t i = 0; i < prevHypos.size(); ++i) {
+ const SCFG::Hypothesis *prevHypo = prevHypos[i];
+ const ArcList &childArc = arcLists.GetArcList(prevHypo);
+ NBests &childNBests = nbestColl.GetOrCreateNBests(mgr, childArc);
+ Child child(&childNBests, 0);
+ children.push_back(child);
+ }
+
+ stringstream strm;
+ OutputToStream(mgr, strm);
+ m_str = strm.str();
+}
+
+NBest::NBest(const SCFG::Manager &mgr,
+ const NBest &orig,
+ size_t childInd,
+ NBestColl &nbestColl)
+ :arcList(orig.arcList)
+ ,arcInd(orig.arcInd)
+ ,children(orig.children)
+{
+ Child &child = children[childInd];
+ size_t &ind = child.second;
+ ++ind;
+ UTIL_THROW_IF2(ind >= child.first->GetSize(),
+ "out of bound:" << ind << ">=" << child.first->GetSize());
+
+ // scores
+ MemPool &pool = mgr.GetPool();
+ m_scores = new (pool.Allocate<Scores>())
+ Scores(mgr.system,
+ pool,
+ mgr.system.featureFunctions.GetNumScores(),
+ orig.GetScores());
+
+ const Scores &origScores = orig.GetChild(childInd).GetScores();
+ const Scores &newScores = GetChild(childInd).GetScores();
+
+ m_scores->MinusEquals(mgr.system, origScores);
+ m_scores->PlusEquals(mgr.system, newScores);
+
+ stringstream strm;
+ OutputToStream(mgr, strm);
+ m_str = strm.str();
+}
+
+const SCFG::Hypothesis &NBest::GetHypo() const
+{
+ const HypothesisBase *hypoBase = (*arcList)[arcInd];
+ const SCFG::Hypothesis &hypo = *static_cast<const SCFG::Hypothesis*>(hypoBase);
+ return hypo;
+}
+
+const NBest &NBest::GetChild(size_t ind) const
+{
+ const Child &child = children[ind];
+ const NBests &nbests = *child.first;
+ const NBest &nbest = nbests.Get(child.second);
+ return nbest;
+}
+
+
+void NBest::CreateDeviants(
+ const SCFG::Manager &mgr,
+ NBestColl &nbestColl,
+ Contenders &contenders) const
+{
+ if (arcInd + 1 < arcList->size()) {
+ // to use next arclist, all children must be 1st. Not sure if this is correct
+ bool ok = true;
+ BOOST_FOREACH(const Child &child, children) {
+ if (child.second) {
+ ok = false;
+ break;
+ }
+ }
+
+ if (ok) {
+ NBest *next = new NBest(mgr, *arcList, arcInd + 1, nbestColl);
+ contenders.push(next);
+ }
+ }
+
+ for (size_t childInd = 0; childInd < children.size(); ++childInd) {
+ const Child &child = children[childInd];
+ NBests &childNBests = *child.first;
+ bool extended = childNBests.Extend(mgr, nbestColl, child.second + 1);
+ if (extended) {
+ //cerr << "HH1 " << childInd << endl;
+ NBest *next = new NBest(mgr, *this, childInd, nbestColl);
+
+ //cerr << "HH2 " << childInd << endl;
+ contenders.push(next);
+ //cerr << "HH3 " << childInd << endl;
+ }
+ }
+}
+
+void NBest::OutputToStream(
+ const SCFG::Manager &mgr,
+ std::stringstream &strm) const
+{
+ const SCFG::Hypothesis &hypo = GetHypo();
+ //strm << &hypo << " ";
+
+ const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
+
+ for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
+ const SCFG::Word &word = tp[targetPos];
+ //cerr << "word " << pos << "=" << word << endl;
+ if (word.isNonTerminal) {
+ //cerr << "is nt" << endl;
+ // non-term. fill out with prev hypo
+ size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[targetPos];
+
+ UTIL_THROW_IF2(nonTermInd >= children.size(), "Out of bounds:" << nonTermInd << ">=" << children.size());
+
+ const NBest &nbest = GetChild(nonTermInd);
+ strm << nbest.GetString();
+ } else {
+ //cerr << "not nt" << endl;
+ word.OutputToStream(hypo.GetManager(), targetPos, hypo, strm);
+
+ strm << " ";
+ }
+ }
+}
+
+std::string NBest::Debug(const System &system) const
+{
+ stringstream strm;
+ strm << GetScores().GetTotalScore() << " "
+ << arcList << "("
+ << arcList->size() << ")["
+ << arcInd << "] ";
+ for (size_t i = 0; i < children.size(); ++i) {
+ const Child &child = children[i];
+ const NBest &childNBest = child.first->Get(child.second);
+
+ strm << child.first << "("
+ << child.first->GetSize() << ")["
+ << child.second << "]";
+ strm << childNBest.GetScores().GetTotalScore() << " ";
+ }
+ return strm.str();
+}
+
+}
+}
diff --git a/moses2/SCFG/nbest/NBest.h b/moses2/SCFG/nbest/NBest.h
new file mode 100644
index 000000000..6b406fa17
--- /dev/null
+++ b/moses2/SCFG/nbest/NBest.h
@@ -0,0 +1,99 @@
+/*
+ * NBest.h
+ *
+ * Created on: 24 Aug 2016
+ * Author: hieu
+ */
+
+#pragma once
+#include <queue>
+#include <vector>
+#include <string>
+#include <stdlib.h>
+#include "../../Scores.h"
+#include "../../ArcLists.h"
+
+namespace Moses2
+{
+class Scores;
+class System;
+
+namespace SCFG
+{
+class NBest;
+class NBests;
+class NBestScoreOrderer;
+class Manager;
+class NBestColl;
+class Hypothesis;
+
+/////////////////////////////////////////////////////////////
+typedef std::priority_queue<NBest*, std::vector<NBest*>, NBestScoreOrderer> Contenders;
+
+/////////////////////////////////////////////////////////////
+class NBest
+{
+public:
+ const ArcList *arcList;
+ size_t arcInd;
+
+ typedef std::pair<NBests*, size_t> Child; // key to another NBest
+ typedef std::vector<Child> Children;
+ Children children;
+
+ NBest(const SCFG::Manager &mgr,
+ const ArcList &varcList,
+ size_t vind,
+ NBestColl &nbestColl);
+
+ NBest(const SCFG::Manager &mgr,
+ const NBest &orig,
+ size_t childInd,
+ NBestColl &nbestColl);
+
+
+ void CreateDeviants(
+ const SCFG::Manager &mgr,
+ NBestColl &nbestColl,
+ Contenders &contenders) const;
+
+ const Scores &GetScores() const {
+ return *m_scores;
+ }
+
+ const NBest &GetChild(size_t ind) const;
+
+ const std::string &GetString() const {
+ return m_str;
+ }
+
+ std::string GetStringExclSentenceMarkers() const {
+ std::string ret = m_str.substr(4, m_str.size() - 10);
+ return ret;
+ }
+
+ std::string Debug(const System &system) const;
+
+protected:
+ Scores *m_scores;
+ std::string m_str;
+
+ const SCFG::Hypothesis &GetHypo() const;
+
+ void OutputToStream(
+ const SCFG::Manager &mgr,
+ std::stringstream &strm) const;
+};
+
+/////////////////////////////////////////////////////////////
+class NBestScoreOrderer
+{
+public:
+ bool operator()(const NBest* a, const NBest* b) const {
+ return a->GetScores().GetTotalScore() < b->GetScores().GetTotalScore();
+ }
+};
+
+}
+}
+
diff --git a/contrib/moses2/SCFG/nbest/NBestColl.cpp b/moses2/SCFG/nbest/NBestColl.cpp
index 8cd386a08..38a9ac867 100644
--- a/contrib/moses2/SCFG/nbest/NBestColl.cpp
+++ b/moses2/SCFG/nbest/NBestColl.cpp
@@ -21,30 +21,29 @@ namespace SCFG
/////////////////////////////////////////////////////////////
NBestColl::~NBestColl()
{
- BOOST_FOREACH(const Coll::value_type &valPair, m_candidates) {
- NBests *nbests = valPair.second;
- delete nbests;
- }
+ BOOST_FOREACH(const Coll::value_type &valPair, m_candidates) {
+ NBests *nbests = valPair.second;
+ delete nbests;
+ }
}
void NBestColl::Add(const SCFG::Manager &mgr, const ArcList &arcList)
{
- NBests &nbests = GetOrCreateNBests(mgr, arcList);
- //cerr << "nbests for " << &nbests << ":";
+ NBests &nbests = GetOrCreateNBests(mgr, arcList);
+ //cerr << "nbests for " << &nbests << ":";
}
NBests &NBestColl::GetOrCreateNBests(const SCFG::Manager &mgr, const ArcList &arcList)
{
- NBests *ret;
- Coll::iterator iter = m_candidates.find(&arcList);
- if(iter == m_candidates.end()) {
- ret = new NBests(mgr, arcList, *this);
- m_candidates[&arcList] = ret;
- }
- else {
- ret = iter->second;
- }
- return *ret;
+ NBests *ret;
+ Coll::iterator iter = m_candidates.find(&arcList);
+ if(iter == m_candidates.end()) {
+ ret = new NBests(mgr, arcList, *this);
+ m_candidates[&arcList] = ret;
+ } else {
+ ret = iter->second;
+ }
+ return *ret;
}
diff --git a/contrib/moses2/SCFG/nbest/NBestColl.h b/moses2/SCFG/nbest/NBestColl.h
index 1ef8a5698..01e5763e4 100644
--- a/contrib/moses2/SCFG/nbest/NBestColl.h
+++ b/moses2/SCFG/nbest/NBestColl.h
@@ -19,14 +19,14 @@ class Manager;
class NBestColl
{
public:
- virtual ~NBestColl();
+ virtual ~NBestColl();
- void Add(const SCFG::Manager &mgr, const ArcList &arcList);
- NBests &GetOrCreateNBests(const SCFG::Manager &mgr, const ArcList &arcList);
+ void Add(const SCFG::Manager &mgr, const ArcList &arcList);
+ NBests &GetOrCreateNBests(const SCFG::Manager &mgr, const ArcList &arcList);
protected:
- typedef boost::unordered_map<const ArcList*, NBests*> Coll;
- Coll m_candidates;
+ typedef boost::unordered_map<const ArcList*, NBests*> Coll;
+ Coll m_candidates;
};
diff --git a/moses2/SCFG/nbest/NBests.cpp b/moses2/SCFG/nbest/NBests.cpp
new file mode 100644
index 000000000..27376977f
--- /dev/null
+++ b/moses2/SCFG/nbest/NBests.cpp
@@ -0,0 +1,109 @@
+/*
+ * NBests.cpp
+ *
+ * Created on: 24 Aug 2016
+ * Author: hieu
+ */
+
+#include <boost/foreach.hpp>
+#include "NBests.h"
+#include "../Manager.h"
+#include "../../System.h"
+
+using namespace std;
+
+namespace Moses2
+{
+namespace SCFG
+{
+NBests::NBests(const SCFG::Manager &mgr,
+ const ArcList &arcList,
+ NBestColl &nbestColl)
+ :indIter(0)
+{
+ // best
+ NBest *contender = new NBest(mgr, arcList, 0, nbestColl);
+ contenders.push(contender);
+ bool extended = Extend(mgr, nbestColl, 0);
+ assert(extended);
+}
+
+NBests::~NBests()
+{
+ BOOST_FOREACH(const NBest *nbest, m_coll) {
+ delete nbest;
+ }
+
+ // delete bad contenders left in queue
+ while (!contenders.empty()) {
+ NBest *contender = contenders.top();
+ contenders.pop();
+ delete contender;
+ }
+}
+
+bool NBests::Extend(const SCFG::Manager &mgr,
+ NBestColl &nbestColl,
+ size_t ind)
+{
+ if (ind < m_coll.size()) {
+ // asking for 1 we've dont already
+ return true;
+ }
+
+ assert(ind == m_coll.size());
+
+ // checks
+ if (ind >= mgr.system.options.nbest.nbest_size) {
+ return false;
+ }
+
+ size_t maxIter = mgr.system.options.nbest.nbest_size * mgr.system.options.nbest.factor;
+
+ // MAIN LOOP, create 1 new deriv.
+ // The loop is for distinct nbest
+ bool ok = false;
+ while (!ok) {
+ ++indIter;
+ if (indIter > maxIter) {
+ return false;
+ }
+
+ if (contenders.empty()) {
+ return false;
+ }
+
+ NBest *contender = contenders.top();
+ contenders.pop();
+
+ contender->CreateDeviants(mgr, nbestColl, contenders);
+
+ if (mgr.system.options.nbest.only_distinct) {
+ const string &tgtPhrase = contender->GetString();
+ //cerr << "tgtPhrase=" << tgtPhrase << endl;
+ boost::hash<std::string> string_hash;
+ size_t hash = string_hash(tgtPhrase);
+
+ if (distinctHypos.insert(hash).second) {
+ ok = true;
+ }
+ } else {
+ ok = true;
+ }
+
+ if (ok) {
+ Add(contender);
+ //cerr << best->GetScores().GetTotalScore() << " ";
+ //cerr << best->Debug(mgr.system) << endl;
+ return true;
+ } else {
+ delete contender;
+ }
+ }
+
+ return false;
+}
+
+}
+}
+
diff --git a/moses2/SCFG/nbest/NBests.h b/moses2/SCFG/nbest/NBests.h
new file mode 100644
index 000000000..97fe9a025
--- /dev/null
+++ b/moses2/SCFG/nbest/NBests.h
@@ -0,0 +1,54 @@
+/*
+ * NBests.h
+ *
+ * Created on: 24 Aug 2016
+ * Author: hieu
+ */
+
+#pragma once
+#include <boost/unordered_set.hpp>
+#include "NBest.h"
+
+namespace Moses2
+{
+namespace SCFG
+{
+
+class NBests
+{
+public:
+ Contenders contenders;
+ boost::unordered_set<size_t> distinctHypos;
+
+ NBests(const SCFG::Manager &mgr,
+ const ArcList &arcList,
+ NBestColl &nbestColl);
+
+ virtual ~NBests();
+
+ size_t GetSize() const {
+ return m_coll.size();
+ }
+
+ const NBest &Get(size_t ind) const {
+ return *m_coll[ind];
+ }
+
+ bool Extend(const SCFG::Manager &mgr,
+ NBestColl &nbestColl,
+ size_t ind);
+
+protected:
+ std::vector<const NBest*> m_coll;
+ size_t indIter;
+
+ void Add(const NBest *nbest) {
+ m_coll.push_back(nbest);
+ }
+
+};
+
+
+}
+}
+
diff --git a/contrib/moses2/Scores.cpp b/moses2/Scores.cpp
index b6e731807..6cf121422 100644
--- a/contrib/moses2/Scores.cpp
+++ b/moses2/Scores.cpp
@@ -22,26 +22,24 @@ namespace Moses2
{
Scores::Scores(const System &system, MemPool &pool, size_t numScores) :
- m_total(0)
+ m_total(0)
{
if (system.options.nbest.nbest_size) {
m_scores = new (pool.Allocate<SCORE>(numScores)) SCORE[numScores];
Init<SCORE>(m_scores, numScores, 0);
- }
- else {
+ } else {
m_scores = NULL;
}
}
Scores::Scores(const System &system, MemPool &pool, size_t numScores,
- const Scores &origScores) :
- m_total(origScores.m_total)
+ const Scores &origScores) :
+ m_total(origScores.m_total)
{
if (system.options.nbest.nbest_size) {
m_scores = new (pool.Allocate<SCORE>(numScores)) SCORE[numScores];
memcpy(m_scores, origScores.m_scores, sizeof(SCORE) * numScores);
- }
- else {
+ } else {
m_scores = NULL;
}
}
@@ -69,7 +67,7 @@ void Scores::Reset(const System &system)
}
void Scores::PlusEquals(const System &system,
- const FeatureFunction &featureFunction, const SCORE &score)
+ const FeatureFunction &featureFunction, const SCORE &score)
{
assert(featureFunction.GetNumScores() == 1);
@@ -84,7 +82,7 @@ void Scores::PlusEquals(const System &system,
}
void Scores::PlusEquals(const System &system,
- const FeatureFunction &featureFunction, const SCORE &score, size_t offset)
+ const FeatureFunction &featureFunction, const SCORE &score, size_t offset)
{
assert(offset < featureFunction.GetNumScores());
@@ -99,7 +97,7 @@ void Scores::PlusEquals(const System &system,
}
void Scores::PlusEquals(const System &system,
- const FeatureFunction &featureFunction, const std::vector<SCORE> &scores)
+ const FeatureFunction &featureFunction, const std::vector<SCORE> &scores)
{
assert(scores.size() == featureFunction.GetNumScores());
@@ -118,7 +116,7 @@ void Scores::PlusEquals(const System &system,
}
void Scores::PlusEquals(const System &system,
- const FeatureFunction &featureFunction, SCORE scores[])
+ const FeatureFunction &featureFunction, SCORE scores[])
{
//assert(scores.size() == featureFunction.GetNumScores());
@@ -159,7 +157,7 @@ void Scores::MinusEquals(const System &system, const Scores &other)
}
void Scores::Assign(const System &system,
- const FeatureFunction &featureFunction, const SCORE &score)
+ const FeatureFunction &featureFunction, const SCORE &score)
{
assert(featureFunction.GetNumScores() == 1);
@@ -177,7 +175,7 @@ void Scores::Assign(const System &system,
}
void Scores::Assign(const System &system,
- const FeatureFunction &featureFunction, const std::vector<SCORE> &scores)
+ const FeatureFunction &featureFunction, const std::vector<SCORE> &scores)
{
assert(scores.size() == featureFunction.GetNumScores());
@@ -198,13 +196,13 @@ void Scores::Assign(const System &system,
}
void Scores::CreateFromString(const std::string &str,
- const FeatureFunction &featureFunction, const System &system,
- bool transformScores)
+ const FeatureFunction &featureFunction, const System &system,
+ bool transformScores)
{
vector<SCORE> scores = Tokenize<SCORE>(str);
if (transformScores) {
std::transform(scores.begin(), scores.end(), scores.begin(),
- TransformScore);
+ TransformScore);
std::transform(scores.begin(), scores.end(), scores.begin(), FloorScore);
}
@@ -223,7 +221,7 @@ std::string Scores::Debug(const System &system) const
if (system.options.nbest.nbest_size) {
out << ", ";
- BOOST_FOREACH(const FeatureFunction *ff, system.featureFunctions.GetFeatureFunctions()){
+ BOOST_FOREACH(const FeatureFunction *ff, system.featureFunctions.GetFeatureFunctions()) {
out << ff->GetName() << "= ";
for (size_t i = ff->GetStartInd(); i < (ff->GetStartInd() + ff->GetNumScores()); ++i) {
out << m_scores[i] << " ";
@@ -237,7 +235,7 @@ std::string Scores::Debug(const System &system) const
void Scores::OutputBreakdown(std::ostream &out, const System &system) const
{
if (system.options.nbest.nbest_size) {
- BOOST_FOREACH(const FeatureFunction *ff, system.featureFunctions.GetFeatureFunctions()){
+ BOOST_FOREACH(const FeatureFunction *ff, system.featureFunctions.GetFeatureFunctions()) {
if (ff->IsTuneable()) {
out << ff->GetName() << "= ";
for (size_t i = ff->GetStartInd(); i < (ff->GetStartInd() + ff->GetNumScores()); ++i) {
@@ -250,7 +248,7 @@ void Scores::OutputBreakdown(std::ostream &out, const System &system) const
// static functions to work out estimated scores
SCORE Scores::CalcWeightedScore(const System &system,
- const FeatureFunction &featureFunction, SCORE scores[])
+ const FeatureFunction &featureFunction, SCORE scores[])
{
SCORE ret = 0;
@@ -269,7 +267,7 @@ SCORE Scores::CalcWeightedScore(const System &system,
}
SCORE Scores::CalcWeightedScore(const System &system,
- const FeatureFunction &featureFunction, SCORE score)
+ const FeatureFunction &featureFunction, SCORE score)
{
const Weights &weights = system.weights;
assert(featureFunction.GetNumScores() == 1);
diff --git a/contrib/moses2/Scores.h b/moses2/Scores.h
index ef4896ad1..5069fda36 100644
--- a/contrib/moses2/Scores.h
+++ b/moses2/Scores.h
@@ -23,42 +23,43 @@ class Scores
public:
Scores(const System &system, MemPool &pool, size_t numScores);
Scores(const System &system, MemPool &pool, size_t numScores,
- const Scores &origScores);
+ const Scores &origScores);
virtual ~Scores();
- SCORE GetTotalScore() const
- { return m_total; }
+ SCORE GetTotalScore() const {
+ return m_total;
+ }
const SCORE *GetScores(const FeatureFunction &featureFunction) const;
void Reset(const System &system);
void CreateFromString(const std::string &str,
- const FeatureFunction &featureFunction, const System &system,
- bool transformScores);
+ const FeatureFunction &featureFunction, const System &system,
+ bool transformScores);
void PlusEquals(const System &system, const FeatureFunction &featureFunction,
- const SCORE &score);
+ const SCORE &score);
void PlusEquals(const System &system, const FeatureFunction &featureFunction,
- const SCORE &score, size_t offset);
+ const SCORE &score, size_t offset);
void PlusEquals(const System &system, const FeatureFunction &featureFunction,
- const std::vector<SCORE> &scores);
+ const std::vector<SCORE> &scores);
void PlusEquals(const System &system, const FeatureFunction &featureFunction,
- SCORE scores[]);
+ SCORE scores[]);
void PlusEquals(const System &system, const Scores &scores);
void MinusEquals(const System &system, const Scores &scores);
void Assign(const System &system, const FeatureFunction &featureFunction,
- const SCORE &score);
+ const SCORE &score);
void Assign(const System &system, const FeatureFunction &featureFunction,
- const std::vector<SCORE> &scores);
+ const std::vector<SCORE> &scores);
std::string Debug(const System &system) const;
@@ -66,10 +67,10 @@ public:
// static functions to work out estimated scores
static SCORE CalcWeightedScore(const System &system,
- const FeatureFunction &featureFunction, SCORE scores[]);
+ const FeatureFunction &featureFunction, SCORE scores[]);
static SCORE CalcWeightedScore(const System &system,
- const FeatureFunction &featureFunction, SCORE score);
+ const FeatureFunction &featureFunction, SCORE score);
protected:
SCORE *m_scores;
diff --git a/contrib/moses2/SubPhrase.cpp b/moses2/SubPhrase.cpp
index 4d3c20f14..4d3c20f14 100644
--- a/contrib/moses2/SubPhrase.cpp
+++ b/moses2/SubPhrase.cpp
diff --git a/contrib/moses2/SubPhrase.h b/moses2/SubPhrase.h
index 893a7ba8f..21b003912 100644
--- a/contrib/moses2/SubPhrase.h
+++ b/moses2/SubPhrase.h
@@ -13,25 +13,25 @@ class SubPhrase: public Phrase<WORD>
{
public:
SubPhrase(const Phrase<WORD> &origPhrase, size_t start, size_t size)
- :m_origPhrase(&origPhrase)
- ,m_start(start)
- ,m_size(size)
+ :m_origPhrase(&origPhrase)
+ ,m_start(start)
+ ,m_size(size)
{}
- virtual const WORD& operator[](size_t pos) const
- { return (*m_origPhrase)[pos + m_start]; }
+ virtual const WORD& operator[](size_t pos) const {
+ return (*m_origPhrase)[pos + m_start];
+ }
- virtual size_t GetSize() const
- { return m_size; }
+ virtual size_t GetSize() const {
+ return m_size;
+ }
- SubPhrase GetSubPhrase(size_t start, size_t size) const
- {
+ SubPhrase GetSubPhrase(size_t start, size_t size) const {
SubPhrase ret(*m_origPhrase, m_start + start, size);
return ret;
}
- virtual std::string Debug(const System &system) const
- {
+ virtual std::string Debug(const System &system) const {
std::stringstream out;
if (GetSize()) {
out << (*this)[0].Debug(system);
diff --git a/contrib/moses2/System.cpp b/moses2/System.cpp
index 4da36690d..63df967fe 100644
--- a/contrib/moses2/System.cpp
+++ b/moses2/System.cpp
@@ -21,7 +21,7 @@ namespace Moses2
{
System::System(const Parameter &paramsArg) :
- params(paramsArg), featureFunctions(*this)
+ params(paramsArg), featureFunctions(*this)
{
options.init(paramsArg);
IsPb();
@@ -39,7 +39,7 @@ System::System(const Parameter &paramsArg) :
}
if (!options.output.detailed_transrep_filepath.empty()) {
- detailedTranslationCollector.reset(new OutputCollector(options.output.detailed_transrep_filepath));
+ detailedTranslationCollector.reset(new OutputCollector(options.output.detailed_transrep_filepath));
}
featureFunctions.Create();
@@ -91,17 +91,30 @@ void System::LoadWeights()
//cerr << "Weights:" << endl;
typedef std::map<std::string, std::vector<float> > WeightMap;
const WeightMap &allWeights = params.GetAllWeights();
+
+ // check all weights are there for all FF
+ const std::vector<const FeatureFunction*> &ffs = featureFunctions.GetFeatureFunctions();
+ BOOST_FOREACH(const FeatureFunction *ff, ffs) {
+ if (ff->IsTuneable()) {
+ const std::string &ffName = ff->GetName();
+ WeightMap::const_iterator iterWeight = allWeights.find(ffName);
+ UTIL_THROW_IF2(iterWeight == allWeights.end(), "Must specify weight for " << ffName);
+ }
+ }
+
+
+ // set weight
BOOST_FOREACH(const WeightMap::value_type &valPair, allWeights) {
- const string &ffName = valPair.first;
- const std::vector<float> &ffWeights = valPair.second;
- /*
- cerr << ffName << "=";
- for (size_t i = 0; i < ffWeights.size(); ++i) {
- cerr << ffWeights[i] << " ";
- }
- cerr << endl;
- */
- weights.SetWeights(featureFunctions, ffName, ffWeights);
+ const string &ffName = valPair.first;
+ const std::vector<float> &ffWeights = valPair.second;
+ /*
+ cerr << ffName << "=";
+ for (size_t i = 0; i < ffWeights.size(); ++i) {
+ cerr << ffWeights[i] << " ";
+ }
+ cerr << endl;
+ */
+ weights.SetWeights(featureFunctions, ffName, ffWeights);
}
}
@@ -110,20 +123,19 @@ void System::LoadMappings()
const PARAM_VEC *vec = params.GetParam("mapping");
UTIL_THROW_IF2(vec == NULL, "Must have [mapping] section");
- BOOST_FOREACH(const std::string &line, *vec){
- vector<string> toks = Tokenize(line);
- assert( (toks.size() == 2 && toks[0] == "T") || (toks.size() == 3 && toks[1] == "T") );
+ BOOST_FOREACH(const std::string &line, *vec) {
+ vector<string> toks = Tokenize(line);
+ assert( (toks.size() == 2 && toks[0] == "T") || (toks.size() == 3 && toks[1] == "T") );
- size_t ptInd;
- if (toks.size() == 2) {
- ptInd = Scan<size_t>(toks[1]);
- }
- else {
- ptInd = Scan<size_t>(toks[2]);
+ size_t ptInd;
+ if (toks.size() == 2) {
+ ptInd = Scan<size_t>(toks[1]);
+ } else {
+ ptInd = Scan<size_t>(toks[2]);
+ }
+ const PhraseTable *pt = featureFunctions.GetPhraseTableExcludeUnknownWordPenalty(ptInd);
+ mappings.push_back(pt);
}
- const PhraseTable *pt = featureFunctions.GetPhraseTableExcludeUnknownWordPenalty(ptInd);
- mappings.push_back(pt);
-}
// unk pt
const UnknownWordPenalty *unkWP = featureFunctions.GetUnknownWordPenalty();
@@ -137,17 +149,15 @@ void System::LoadDecodeGraphBackoff()
const PARAM_VEC *vec = params.GetParam("decoding-graph-backoff");
for (size_t i = 0; i < mappings.size(); ++i) {
- PhraseTable *pt = const_cast<PhraseTable*>(mappings[i]);
-
- if (vec && vec->size() < i) {
- pt->decodeGraphBackoff = Scan<int>((*vec)[i]);
- }
- else if (pt == featureFunctions.GetUnknownWordPenalty()) {
- pt->decodeGraphBackoff = 1;
- }
- else {
- pt->decodeGraphBackoff = 0;
- }
+ PhraseTable *pt = const_cast<PhraseTable*>(mappings[i]);
+
+ if (vec && vec->size() < i) {
+ pt->decodeGraphBackoff = Scan<int>((*vec)[i]);
+ } else if (pt == featureFunctions.GetUnknownWordPenalty()) {
+ pt->decodeGraphBackoff = 1;
+ } else {
+ pt->decodeGraphBackoff = 0;
+ }
}
}
diff --git a/contrib/moses2/System.h b/moses2/System.h
index 1d60e96a0..1d60e96a0 100644
--- a/contrib/moses2/System.h
+++ b/moses2/System.h
diff --git a/contrib/moses2/TargetPhrase.cpp b/moses2/TargetPhrase.cpp
index 600d41ae7..600d41ae7 100644
--- a/contrib/moses2/TargetPhrase.cpp
+++ b/moses2/TargetPhrase.cpp
diff --git a/contrib/moses2/TargetPhrase.h b/moses2/TargetPhrase.h
index 50f66326a..2522f85df 100644
--- a/contrib/moses2/TargetPhrase.h
+++ b/moses2/TargetPhrase.h
@@ -27,25 +27,27 @@ public:
SCORE *scoreProperties;
TargetPhrase(MemPool &pool, const PhraseTable &pt, const System &system, size_t size)
- : PhraseImplTemplate<WORD>(pool, size)
- , pt(pt)
- , scoreProperties(NULL)
- , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
- {
+ : PhraseImplTemplate<WORD>(pool, size)
+ , pt(pt)
+ , scoreProperties(NULL)
+ , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo()) {
m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
- system.featureFunctions.GetNumScores());
+ system.featureFunctions.GetNumScores());
}
- Scores &GetScores()
- { return *m_scores; }
+ Scores &GetScores() {
+ return *m_scores;
+ }
- const Scores &GetScores() const
- { return *m_scores; }
+ const Scores &GetScores() const {
+ return *m_scores;
+ }
virtual SCORE GetScoreForPruning() const = 0;
- SCORE *GetScoresProperty(int propertyInd) const
- { return scoreProperties ? scoreProperties + propertyInd : NULL; }
+ SCORE *GetScoresProperty(int propertyInd) const {
+ return scoreProperties ? scoreProperties + propertyInd : NULL;
+ }
const AlignmentInfo &GetAlignTerm() const {
return *m_alignTerm;
@@ -63,8 +65,7 @@ public:
m_alignTerm = AlignmentInfoCollection::Instance().Add(coll);
}
- virtual void SetAlignmentInfo(const std::string &alignString)
- {
+ virtual void SetAlignmentInfo(const std::string &alignString) {
AlignmentInfo::CollType alignTerm;
std::vector<std::string> toks = Tokenize(alignString);
@@ -86,35 +87,32 @@ public:
}
- void OutputToStream(const System &system, const Phrase<WORD> &inputPhrase, std::ostream &out) const
- {
- // get placeholders
- FactorType placeholderFactor = system.options.input.placeholder_factor;
- std::map<size_t, const Factor*> placeholders;
- if (placeholderFactor != NOT_FOUND) {
- // creates map of target position -> factor for placeholders
- placeholders = GetPlaceholders(system, inputPhrase);
- }
-
- size_t size = PhraseImplTemplate<WORD>::GetSize();
- for (size_t i = 0; i < size; ++i) {
- // output placeholder, if any
- std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(i);
- if (iter == placeholders.end()) {
- const WORD &word = (*this)[i];
- word.OutputToStream(system, out);
- }
- else {
- const Factor *factor = iter->second;
- out << *factor;
- }
-
- out << " ";
- }
+ void OutputToStream(const System &system, const Phrase<WORD> &inputPhrase, std::ostream &out) const {
+ // get placeholders
+ FactorType placeholderFactor = system.options.input.placeholder_factor;
+ std::map<size_t, const Factor*> placeholders;
+ if (placeholderFactor != NOT_FOUND) {
+ // creates map of target position -> factor for placeholders
+ placeholders = GetPlaceholders(system, inputPhrase);
+ }
+
+ size_t size = PhraseImplTemplate<WORD>::GetSize();
+ for (size_t i = 0; i < size; ++i) {
+ // output placeholder, if any
+ std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(i);
+ if (iter == placeholders.end()) {
+ const WORD &word = (*this)[i];
+ word.OutputToStream(system, out);
+ } else {
+ const Factor *factor = iter->second;
+ out << *factor;
+ }
+
+ out << " ";
+ }
}
- std::map<size_t, const Factor*> GetPlaceholders(const System &system, const Phrase<WORD> &inputPhrase) const
- {
+ std::map<size_t, const Factor*> GetPlaceholders(const System &system, const Phrase<WORD> &inputPhrase) const {
FactorType placeholderFactor = system.options.input.placeholder_factor;
std::map<size_t, const Factor*> ret;
//std::cerr << "inputPhrase=" << inputPhrase.Debug(system) << std::endl;
@@ -122,8 +120,8 @@ public:
for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
const Factor *factor = inputPhrase[sourcePos][placeholderFactor];
if (factor) {
- //std::cerr << "factor=" << *factor << std::endl;
- //std::cerr << "tp=" << Debug(system) << std::endl;
+ //std::cerr << "factor=" << *factor << std::endl;
+ //std::cerr << "tp=" << Debug(system) << std::endl;
std::set<size_t> targetPos = GetAlignTerm().GetAlignmentsForSource(sourcePos);
UTIL_THROW_IF2(targetPos.size() != 1,
"Placeholder should be aligned to 1, and only 1, word:" << targetPos.size() << "!=1");
@@ -134,8 +132,7 @@ public:
return ret;
}
- virtual std::string Debug(const System &system) const
- {
+ virtual std::string Debug(const System &system) const {
std::stringstream out;
out << Phrase<WORD>::Debug(system);
out << " pt=" << pt.GetName() << " ";
@@ -153,15 +150,12 @@ protected:
///////////////////////////////////////////////////////////////////////
template<typename TP>
-struct CompareScoreForPruning
-{
- bool operator()(const TP *a, const TP *b) const
- {
+struct CompareScoreForPruning {
+ bool operator()(const TP *a, const TP *b) const {
return a->GetScoreForPruning() > b->GetScoreForPruning();
}
- bool operator()(const TP &a, const TP &b) const
- {
+ bool operator()(const TP &a, const TP &b) const {
return a.GetScoreForPruning() > b.GetScoreForPruning();
}
};
diff --git a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp b/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp
index 338a8e221..47f03626a 100644
--- a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp
+++ b/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp
@@ -33,10 +33,10 @@ namespace Moses2
{
#ifdef WITH_THREADS
BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
- size_t threadsNum) :
- m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits), m_fileHandle(0), m_fileHandleStart(
- 0), m_landmarks(true), m_size(0), m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(
- 0), m_threadPool(threadsNum)
+ size_t threadsNum) :
+ m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits), m_fileHandle(0), m_fileHandleStart(
+ 0), m_landmarks(true), m_size(0), m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(
+ 0), m_threadPool(threadsNum)
{
#ifndef HAVE_CMPH
std::cerr << "minphr: CMPH support not compiled in." << std::endl;
@@ -45,9 +45,9 @@ BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
}
#else
BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits)
-: m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
-m_fileHandle(0), m_fileHandleStart(0), m_size(0),
-m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0)
+ : m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
+ m_fileHandle(0), m_fileHandleStart(0), m_size(0),
+ m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0)
{
#ifndef HAVE_CMPH
std::cerr << "minphr: CMPH support not compiled in." << std::endl;
@@ -60,11 +60,11 @@ BlockHashIndex::~BlockHashIndex()
{
#ifdef HAVE_CMPH
for (std::vector<void*>::iterator it = m_hashes.begin(); it != m_hashes.end();
- it++)
+ it++)
if (*it != 0) cmph_destroy((cmph_t*) *it);
for (std::vector<PairedPackedArray<>*>::iterator it = m_arrays.begin();
- it != m_arrays.end(); it++)
+ it != m_arrays.end(); it++)
if (*it != 0) delete *it;
#endif
}
@@ -73,7 +73,7 @@ size_t BlockHashIndex::GetHash(const char* key)
{
std::string keyStr(key);
size_t i = std::distance(m_landmarks.begin(),
- std::upper_bound(m_landmarks.begin(), m_landmarks.end(), keyStr)) - 1;
+ std::upper_bound(m_landmarks.begin(), m_landmarks.end(), keyStr)) - 1;
if (i == 0ul - 1) return GetSize();
@@ -99,14 +99,14 @@ size_t BlockHashIndex::GetHash(size_t i, const char* key)
//LoadRange(i);
#ifdef HAVE_CMPH
size_t idx = cmph_search((cmph_t*) m_hashes[i], key,
- (cmph_uint32) strlen(key));
+ (cmph_uint32) strlen(key));
#else
assert(0);
size_t idx = 0;
#endif
std::pair<size_t, size_t> orderPrint = m_arrays[i]->Get(idx, m_orderBits,
- m_fingerPrintBits);
+ m_fingerPrintBits);
m_clocks[i] = clock();
if (GetFprint(key) == orderPrint.second) return orderPrint.first;
@@ -229,7 +229,7 @@ size_t BlockHashIndex::FinalizeSave()
size_t fileHandleStop = std::ftell(m_fileHandle);
return fileHandleStop - m_fileHandleStart + sizeof(m_orderBits)
- + sizeof(m_fingerPrintBits);
+ + sizeof(m_fingerPrintBits);
}
size_t BlockHashIndex::Save(std::FILE * mphf)
@@ -262,7 +262,7 @@ size_t BlockHashIndex::LoadIndex(std::FILE* mphf)
read += std::fread(&seekIndexSize, sizeof(size_t), 1, m_fileHandle);
m_seekIndex.resize(seekIndexSize);
read += std::fread(&m_seekIndex[0], sizeof(size_t), seekIndexSize,
- m_fileHandle);
+ m_fileHandle);
m_hashes.resize(seekIndexSize, 0);
m_clocks.resize(seekIndexSize, 0);
m_arrays.resize(seekIndexSize, 0);
@@ -403,13 +403,13 @@ void* BlockHashIndex::vectorAdapter(std::vector<std::string>& v)
}
void* BlockHashIndex::vectorAdapter(
- StringVector<unsigned, size_t, std::allocator>& sv)
+ StringVector<unsigned, size_t, std::allocator>& sv)
{
return (void*) CmphStringVectorAdapter(sv);
}
void* BlockHashIndex::vectorAdapter(
- StringVector<unsigned, size_t, MmapAllocator>& sv)
+ StringVector<unsigned, size_t, MmapAllocator>& sv)
{
return (void*) CmphStringVectorAdapter(sv);
}
diff --git a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h b/moses2/TranslationModel/CompactPT/BlockHashIndex.h
index b91ef8f6c..10c55601e 100644
--- a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h
+++ b/moses2/TranslationModel/CompactPT/BlockHashIndex.h
@@ -81,17 +81,14 @@ private:
{
public:
HashTask(int id, BlockHashIndex& hash, Keys& keys) :
- m_id(id), m_hash(hash), m_keys(new Keys(keys))
- {
+ m_id(id), m_hash(hash), m_keys(new Keys(keys)) {
}
- virtual void Run()
- {
+ virtual void Run() {
m_hash.CalcHash(m_id, *m_keys);
}
- virtual ~HashTask()
- {
+ virtual ~HashTask() {
delete m_keys;
}
@@ -108,7 +105,7 @@ private:
public:
#ifdef WITH_THREADS
BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
- size_t threadsNum = 2);
+ size_t threadsNum = 2);
#else
BlockHashIndex(size_t orderBits, size_t fingerPrintBits);
#endif
@@ -147,8 +144,7 @@ public:
void KeepNLastRanges(float ratio = 0.1, float tolerance = 0.1);
template<typename Keys>
- void AddRange(Keys &keys)
- {
+ void AddRange(Keys &keys) {
size_t current = m_landmarks.size();
if (m_landmarks.size() && m_landmarks.back().str() >= keys[0]) {
@@ -171,7 +167,7 @@ public:
#ifdef WITH_THREADS
boost::shared_ptr<HashTask<Keys> > ht(
- new HashTask<Keys>(current, *this, keys));
+ new HashTask<Keys>(current, *this, keys));
m_threadPool.Submit(ht);
#else
CalcHash(current, keys);
@@ -179,8 +175,7 @@ public:
}
template<typename Keys>
- void CalcHash(size_t current, Keys &keys)
- {
+ void CalcHash(size_t current, Keys &keys) {
#ifdef HAVE_CMPH
void* source = vectorAdapter(keys);
CalcHash(current, source);
diff --git a/contrib/moses2/TranslationModel/CompactPT/CanonicalHuffman.h b/moses2/TranslationModel/CompactPT/CanonicalHuffman.h
index ffb6488c0..eb11c730a 100644
--- a/contrib/moses2/TranslationModel/CompactPT/CanonicalHuffman.h
+++ b/moses2/TranslationModel/CompactPT/CanonicalHuffman.h
@@ -43,24 +43,20 @@ private:
typedef boost::unordered_map<Data, boost::dynamic_bitset<> > EncodeMap;
EncodeMap m_encodeMap;
- struct MinHeapSorter
- {
+ struct MinHeapSorter {
std::vector<size_t>& m_vec;
MinHeapSorter(std::vector<size_t>& vec) :
- m_vec(vec)
- {
+ m_vec(vec) {
}
- bool operator()(size_t a, size_t b)
- {
+ bool operator()(size_t a, size_t b) {
return m_vec[a] > m_vec[b];
}
};
template<class Iterator>
- void CalcLengths(Iterator begin, Iterator end, std::vector<size_t>& lengths)
- {
+ void CalcLengths(Iterator begin, Iterator end, std::vector<size_t>& lengths) {
size_t n = std::distance(begin, end);
std::vector<size_t> A(2 * n, 0);
@@ -109,11 +105,10 @@ private:
lengths[i] = A[i + n];
}
- void CalcCodes(std::vector<size_t>& lengths)
- {
+ void CalcCodes(std::vector<size_t>& lengths) {
std::vector<size_t> numLength;
for (std::vector<size_t>::iterator it = lengths.begin();
- it != lengths.end(); it++) {
+ it != lengths.end(); it++) {
size_t length = *it;
if (numLength.size() <= length) numLength.resize(length + 1, 0);
numLength[length]++;
@@ -139,7 +134,7 @@ private:
size_t length = lengths[i];
size_t pos = m_lengthIndex[length]
- + (nextCode[length] - m_firstCodes[length]);
+ + (nextCode[length] - m_firstCodes[length]);
t_symbols[pos] = data;
nextCode[length] = nextCode[length] + 1;
@@ -148,13 +143,12 @@ private:
m_symbols.swap(t_symbols);
}
- void CreateCodeMap()
- {
+ void CreateCodeMap() {
for (size_t l = 1; l < m_lengthIndex.size(); l++) {
size_t intCode = m_firstCodes[l];
size_t num = (
- (l + 1 < m_lengthIndex.size()) ?
- m_lengthIndex[l + 1] : m_symbols.size()) - m_lengthIndex[l];
+ (l + 1 < m_lengthIndex.size()) ?
+ m_lengthIndex[l + 1] : m_symbols.size()) - m_lengthIndex[l];
for (size_t i = 0; i < num; i++) {
Data data = m_symbols[m_lengthIndex[l] + i];
@@ -165,17 +159,15 @@ private:
}
}
- const boost::dynamic_bitset<>& Encode(Data data) const
- {
+ const boost::dynamic_bitset<>& Encode(Data data) const {
typename EncodeMap::const_iterator it = m_encodeMap.find(data);
UTIL_THROW_IF2(it == m_encodeMap.end(),
- "Cannot find symbol in encoding map");
+ "Cannot find symbol in encoding map");
return it->second;
}
template<class BitWrapper>
- void PutCode(BitWrapper& bitWrapper, const boost::dynamic_bitset<>& code)
- {
+ void PutCode(BitWrapper& bitWrapper, const boost::dynamic_bitset<>& code) {
for (int j = code.size() - 1; j >= 0; j--)
bitWrapper.Put(code[j]);
}
@@ -183,8 +175,7 @@ private:
public:
template<class Iterator>
- CanonicalHuffman(Iterator begin, Iterator end, bool forEncoding = true)
- {
+ CanonicalHuffman(Iterator begin, Iterator end, bool forEncoding = true) {
std::vector<size_t> lengths;
CalcLengths(begin, end, lengths);
CalcCodes(lengths);
@@ -192,22 +183,19 @@ public:
if (forEncoding) CreateCodeMap();
}
- CanonicalHuffman(std::FILE* pFile, bool forEncoding = false)
- {
+ CanonicalHuffman(std::FILE* pFile, bool forEncoding = false) {
Load(pFile);
if (forEncoding) CreateCodeMap();
}
template<class BitWrapper>
- void Put(BitWrapper& bitWrapper, Data data)
- {
+ void Put(BitWrapper& bitWrapper, Data data) {
PutCode(bitWrapper, Encode(data));
}
template<class BitWrapper>
- Data Read(BitWrapper& bitWrapper)
- {
+ Data Read(BitWrapper& bitWrapper) {
if (bitWrapper.TellFromEnd()) {
size_t intCode = bitWrapper.Read();
size_t len = 1;
@@ -220,8 +208,7 @@ public:
return Data();
}
- size_t Load(std::FILE* pFile)
- {
+ size_t Load(std::FILE* pFile) {
size_t start = std::ftell(pFile);
size_t read = 0;
@@ -241,8 +228,7 @@ public:
return std::ftell(pFile) - start;
}
- size_t Save(std::FILE* pFile)
- {
+ size_t Save(std::FILE* pFile) {
size_t start = std::ftell(pFile);
size_t size = m_symbols.size();
@@ -277,24 +263,20 @@ private:
public:
BitWrapper(Container &data) :
- m_data(data), m_iterator(m_data.begin()), m_currentValue(0), m_valueBits(
- sizeof(typename Container::value_type) * 8), m_mask(1), m_bitPos(0)
- {
+ m_data(data), m_iterator(m_data.begin()), m_currentValue(0), m_valueBits(
+ sizeof(typename Container::value_type) * 8), m_mask(1), m_bitPos(0) {
}
- bool Read()
- {
+ bool Read() {
if (m_bitPos % m_valueBits == 0) {
if (m_iterator != m_data.end()) m_currentValue = *m_iterator++;
- }
- else m_currentValue = m_currentValue >> 1;
+ } else m_currentValue = m_currentValue >> 1;
m_bitPos++;
return (m_currentValue & m_mask);
}
- void Put(bool bit)
- {
+ void Put(bool bit) {
if (m_bitPos % m_valueBits == 0) m_data.push_back(0);
if (bit) m_data[m_data.size() - 1] |= m_mask << (m_bitPos % m_valueBits);
@@ -302,40 +284,34 @@ public:
m_bitPos++;
}
- size_t Tell()
- {
+ size_t Tell() {
return m_bitPos;
}
- size_t TellFromEnd()
- {
+ size_t TellFromEnd() {
if (m_data.size() * m_valueBits < m_bitPos) return 0;
return m_data.size() * m_valueBits - m_bitPos;
}
- void Seek(size_t bitPos)
- {
+ void Seek(size_t bitPos) {
m_bitPos = bitPos;
m_iterator = m_data.begin() + int((m_bitPos - 1) / m_valueBits);
m_currentValue = (*m_iterator) >> ((m_bitPos - 1) % m_valueBits);
m_iterator++;
}
- void SeekFromEnd(size_t bitPosFromEnd)
- {
+ void SeekFromEnd(size_t bitPosFromEnd) {
size_t bitPos = m_data.size() * m_valueBits - bitPosFromEnd;
Seek(bitPos);
}
- void Reset()
- {
+ void Reset() {
m_iterator = m_data.begin();
m_currentValue = 0;
m_bitPos = 0;
}
- Container& GetContainer()
- {
+ Container& GetContainer() {
return m_data;
}
};
diff --git a/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp b/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp
index a51dc5a45..8dc3ebde6 100644
--- a/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp
+++ b/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp
@@ -42,7 +42,7 @@ void CmphStringVectorAdapterRewind(void *data)
cmph_io_adapter_t *CmphVectorAdapterNew(std::vector<std::string>& v)
{
cmph_io_adapter_t * key_source = (cmph_io_adapter_t *) malloc(
- sizeof(cmph_io_adapter_t));
+ sizeof(cmph_io_adapter_t));
cmph_vector_t * cmph_vector = (cmph_vector_t *) malloc(sizeof(cmph_vector_t));
assert(key_source);
assert(cmph_vector);
diff --git a/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h b/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h
index 20d43a80c..8d23b4f41 100644
--- a/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h
+++ b/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h
@@ -33,18 +33,17 @@
namespace Moses2
{
-typedef struct
-{
+typedef struct {
void *vector;
cmph_uint32 position;
} cmph_vector_t;
template<typename ValueT, typename PosT, template<typename > class Allocator>
cmph_io_adapter_t *CmphStringVectorAdapterNew(
- StringVector<ValueT, PosT, Allocator>& sv)
+ StringVector<ValueT, PosT, Allocator>& sv)
{
cmph_io_adapter_t * key_source = (cmph_io_adapter_t *) malloc(
- sizeof(cmph_io_adapter_t));
+ sizeof(cmph_io_adapter_t));
cmph_vector_t * cmph_vector = (cmph_vector_t *) malloc(sizeof(cmph_vector_t));
assert(key_source);
assert(cmph_vector);
@@ -79,7 +78,7 @@ void CmphStringVectorAdapterRewind(void *data);
template<typename ValueT, typename PosT, template<typename > class Allocator>
cmph_io_adapter_t* CmphStringVectorAdapter(
- StringVector<ValueT, PosT, Allocator>& sv)
+ StringVector<ValueT, PosT, Allocator>& sv)
{
cmph_io_adapter_t * key_source = CmphStringVectorAdapterNew(sv);
diff --git a/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp b/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp
index 1d32b9a6f..051116dec 100644
--- a/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp
+++ b/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp
@@ -32,23 +32,23 @@ namespace Moses2
bool LexicalReorderingTableCompact::s_inMemoryByDefault = false;
LexicalReorderingTableCompact::LexicalReorderingTableCompact(
- const std::string& filePath, const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors) :
- LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory(
- s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees(
- true), m_hash(10, 16), m_scoreTrees(1)
+ const std::string& filePath, const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors) :
+ LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory(
+ s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees(
+ true), m_hash(10, 16), m_scoreTrees(1)
{
Load(filePath);
}
LexicalReorderingTableCompact::LexicalReorderingTableCompact(
- const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors) :
- LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory(
- s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees(
- true), m_hash(10, 16), m_scoreTrees(1)
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors) :
+ LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory(
+ s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees(
+ true), m_hash(10, 16), m_scoreTrees(1)
{
}
@@ -81,7 +81,7 @@ std::vector<float> LexicalReorderingTableCompact::GetScore(const Phrase<Moses2::
BitWrapper<> bitStream(scoresString);
for (size_t i = 0; i < m_numScoreComponent; i++)
scores.push_back(
- m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream));
+ m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream));
return scores;
}
@@ -93,7 +93,7 @@ std::string LexicalReorderingTableCompact::MakeKey(const Phrase<Moses2::Word>& f
const Phrase<Moses2::Word>& e, const Phrase<Moses2::Word>& c) const
{
return MakeKey(Trim(f.GetString(m_FactorsF)), Trim(e.GetString(m_FactorsE)),
- Trim(c.GetString(m_FactorsC)));
+ Trim(c.GetString(m_FactorsC)));
}
std::string LexicalReorderingTableCompact::MakeKey(const std::string& f,
@@ -126,7 +126,7 @@ LexicalReorderingTableCompact::CheckAndLoad(const std::string& filePath,
//there exists a compact binary version use that
std::cerr << "Using compact lexical reordering table" << std::endl;
return new LexicalReorderingTableCompact(filePath + minlexr, f_factors,
- e_factors, c_factors);
+ e_factors, c_factors);
}
// file name is specified with suffix
if (filePath.substr(filePath.length() - minlexr.length(), minlexr.length())
@@ -134,7 +134,7 @@ LexicalReorderingTableCompact::CheckAndLoad(const std::string& filePath,
//there exists a compact binary version use that
std::cerr << "Using compact lexical reordering table" << std::endl;
return new LexicalReorderingTableCompact(filePath, f_factors, e_factors,
- c_factors);
+ c_factors);
}
#endif
return 0;
@@ -152,16 +152,15 @@ void LexicalReorderingTableCompact::Load(std::string filePath)
size_t read = 0;
read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1,
- pFile);
+ pFile);
read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1,
- pFile);
+ pFile);
if (m_multipleScoreTrees) {
m_scoreTrees.resize(m_numScoreComponent);
for (size_t i = 0; i < m_numScoreComponent; i++)
m_scoreTrees[i] = new CanonicalHuffman<float>(pFile);
- }
- else {
+ } else {
m_scoreTrees.resize(1);
m_scoreTrees[0] = new CanonicalHuffman<float>(pFile);
}
diff --git a/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h b/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h
index 90abf4197..cef6ae108 100644
--- a/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h
+++ b/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h
@@ -36,13 +36,11 @@ class LexicalReorderingTable
{
public:
LexicalReorderingTable(const FactorList& f_factors,
- const FactorList& e_factors, const FactorList& c_factors) :
- m_FactorsF(f_factors), m_FactorsE(e_factors), m_FactorsC(c_factors)
- {
+ const FactorList& e_factors, const FactorList& c_factors) :
+ m_FactorsF(f_factors), m_FactorsE(e_factors), m_FactorsC(c_factors) {
}
- virtual ~LexicalReorderingTable()
- {
+ virtual ~LexicalReorderingTable() {
}
public:
@@ -51,33 +49,27 @@ public:
GetScore(const Phrase<Moses2::Word>& f, const Phrase<Moses2::Word>& e, const Phrase<Moses2::Word>& c) = 0;
virtual
- void InitializeForInput()
- {
+ void InitializeForInput() {
/* override for on-demand loading */
}
;
virtual
- void InitializeForInputPhrase(const Phrase<Moses2::Word>&)
- {
+ void InitializeForInputPhrase(const Phrase<Moses2::Word>&) {
}
- const FactorList& GetFFactorMask() const
- {
+ const FactorList& GetFFactorMask() const {
return m_FactorsF;
}
- const FactorList& GetEFactorMask() const
- {
+ const FactorList& GetEFactorMask() const {
return m_FactorsE;
}
- const FactorList& GetCFactorMask() const
- {
+ const FactorList& GetCFactorMask() const {
return m_FactorsC;
}
virtual
- void DbgDump(std::ostream* out) const
- {
+ void DbgDump(std::ostream* out) const {
*out << "Overwrite in subclass...\n";
}
;
@@ -109,17 +101,17 @@ private:
std::string MakeKey(const Phrase<Moses2::Word>& f, const Phrase<Moses2::Word>& e, const Phrase<Moses2::Word>& c) const;
std::string MakeKey(const std::string& f, const std::string& e,
- const std::string& c) const;
+ const std::string& c) const;
public:
LexicalReorderingTableCompact(const std::string& filePath,
- const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors);
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors);
LexicalReorderingTableCompact(const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors);
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors);
virtual
~LexicalReorderingTableCompact();
@@ -129,9 +121,9 @@ public:
static LexicalReorderingTable*
CheckAndLoad(const std::string& filePath,
- const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors);
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors);
void
Load(std::string filePath);
diff --git a/contrib/moses2/TranslationModel/CompactPT/ListCoders.h b/moses2/TranslationModel/CompactPT/ListCoders.h
index 5a01274d9..540f50a59 100644
--- a/contrib/moses2/TranslationModel/CompactPT/ListCoders.h
+++ b/moses2/TranslationModel/CompactPT/ListCoders.h
@@ -33,8 +33,7 @@ class VarIntType
{
private:
template<typename IntType, typename OutIt>
- static void EncodeSymbol(IntType input, OutIt output)
- {
+ static void EncodeSymbol(IntType input, OutIt output) {
if (input == 0) {
*output = 0;
output++;
@@ -56,8 +55,7 @@ private:
;
template<typename InIt, typename IntType>
- static void DecodeSymbol(InIt &it, InIt end, IntType &output)
- {
+ static void DecodeSymbol(InIt &it, InIt end, IntType &output) {
T msb = 1 << (sizeof(T) * 8 - 1);
IntType shift = (sizeof(T) * 8 - 1);
@@ -81,8 +79,7 @@ private:
public:
template<typename InIt, typename OutIt>
- static void Encode(InIt it, InIt end, OutIt outIt)
- {
+ static void Encode(InIt it, InIt end, OutIt outIt) {
while (it != end) {
EncodeSymbol(*it, outIt);
it++;
@@ -90,8 +87,7 @@ public:
}
template<typename InIt, typename OutIt>
- static void Decode(InIt &it, InIt end, OutIt outIt)
- {
+ static void Decode(InIt &it, InIt end, OutIt outIt) {
while (it != end) {
size_t output;
DecodeSymbol(it, end, output);
@@ -101,8 +97,7 @@ public:
}
template<typename InIt>
- static size_t DecodeAndSum(InIt &it, InIt end, size_t num)
- {
+ static size_t DecodeAndSum(InIt &it, InIt end, size_t num) {
size_t sum = 0;
size_t curr = 0;
@@ -130,8 +125,7 @@ private:
typedef unsigned int uint;
template<typename InIt>
- inline static void EncodeSymbol(uint &output, InIt it, InIt end)
- {
+ inline static void EncodeSymbol(uint &output, InIt it, InIt end) {
uint length = end - it;
uint type = 0;
@@ -182,8 +176,8 @@ private:
uint i = 0;
while (it != end) {
UTIL_THROW_IF2(*it > 268435455,
- "You are trying to encode " << *it
- << " with Simple9. Cannot encode numbers larger than 268435455 (2^28-1)");
+ "You are trying to encode " << *it
+ << " with Simple9. Cannot encode numbers larger than 268435455 (2^28-1)");
uint l = bitlength * (length - i - 1);
output |= *it << l;
@@ -193,8 +187,7 @@ private:
}
template<typename OutIt>
- static inline void DecodeSymbol(uint input, OutIt outIt)
- {
+ static inline void DecodeSymbol(uint input, OutIt outIt) {
uint type = (input >> 28);
uint bitlen = 0;
@@ -258,8 +251,7 @@ private:
outIt++;
}
- static inline size_t DecodeAndSumSymbol(uint input, size_t num, size_t &curr)
- {
+ static inline size_t DecodeAndSumSymbol(uint input, size_t num, size_t &curr) {
uint type = (input >> 28);
uint bitlen = 0;
@@ -327,8 +319,7 @@ private:
public:
template<typename InIt, typename OutIt>
- static void Encode(InIt it, InIt end, OutIt outIt)
- {
+ static void Encode(InIt it, InIt end, OutIt outIt) {
uint parts[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 };
uint buffer[28];
@@ -367,8 +358,7 @@ public:
}
template<typename InIt, typename OutIt>
- static void Decode(InIt &it, InIt end, OutIt outIt)
- {
+ static void Decode(InIt &it, InIt end, OutIt outIt) {
while (it != end) {
DecodeSymbol(*it, outIt);
it++;
@@ -376,8 +366,7 @@ public:
}
template<typename InIt>
- static size_t DecodeAndSum(InIt &it, InIt end, size_t num)
- {
+ static size_t DecodeAndSum(InIt &it, InIt end, size_t num) {
size_t sum = 0;
size_t curr = 0;
while (it != end && curr < num) {
diff --git a/contrib/moses2/TranslationModel/CompactPT/MmapAllocator.h b/moses2/TranslationModel/CompactPT/MmapAllocator.h
index 1e40d8d41..09ba58d93 100644
--- a/contrib/moses2/TranslationModel/CompactPT/MmapAllocator.h
+++ b/moses2/TranslationModel/CompactPT/MmapAllocator.h
@@ -25,13 +25,14 @@
#include <limits>
#include <iostream>
#include <cstdio>
-#include <unistd.h>
#if defined(_WIN32) || defined(_WIN64)
+#define _WINSOCKAPI_
#include <windows.h>
#include <io.h>
#else
#include <sys/mman.h>
+#include <unistd.h>
#endif
#include "util/mmap.hh"
@@ -63,43 +64,37 @@ public:
typedef std::ptrdiff_t difference_type;
MmapAllocator() throw () :
- m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)), m_page_size(
- util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(0), m_fixed(
- false), m_count(new size_t(0))
- {
+ m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)), m_page_size(
+ util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(0), m_fixed(
+ false), m_count(new size_t(0)) {
}
MmapAllocator(std::FILE* f_ptr) throw () :
- m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)), m_page_size(
- util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(0), m_fixed(
- false), m_count(new size_t(0))
- {
+ m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)), m_page_size(
+ util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(0), m_fixed(
+ false), m_count(new size_t(0)) {
}
MmapAllocator(std::FILE* f_ptr, size_t data_offset) throw () :
- m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)), m_page_size(
- util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(
- data_offset), m_fixed(true), m_count(new size_t(0))
- {
+ m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)), m_page_size(
+ util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(
+ data_offset), m_fixed(true), m_count(new size_t(0)) {
}
MmapAllocator(std::string fileName) throw () :
- m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc(
- fileno(m_file_ptr)), m_page_size(util::SizePage()), m_map_size(0), m_data_ptr(
- 0), m_data_offset(0), m_fixed(false), m_count(new size_t(0))
- {
+ m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc(
+ fileno(m_file_ptr)), m_page_size(util::SizePage()), m_map_size(0), m_data_ptr(
+ 0), m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
}
MmapAllocator(const MmapAllocator& c) throw () :
- m_file_ptr(c.m_file_ptr), m_file_desc(c.m_file_desc), m_page_size(
- c.m_page_size), m_map_size(c.m_map_size), m_data_ptr(c.m_data_ptr), m_data_offset(
- c.m_data_offset), m_fixed(c.m_fixed), m_count(c.m_count)
- {
+ m_file_ptr(c.m_file_ptr), m_file_desc(c.m_file_desc), m_page_size(
+ c.m_page_size), m_map_size(c.m_map_size), m_data_ptr(c.m_data_ptr), m_data_offset(
+ c.m_data_offset), m_fixed(c.m_fixed), m_count(c.m_count) {
(*m_count)++;
}
- ~MmapAllocator() throw ()
- {
+ ~MmapAllocator() throw () {
if (m_data_ptr && *m_count == 0) {
util::UnmapOrThrow(m_data_ptr, m_map_size);
if (!m_fixed && std::ftell(m_file_ptr) != -1) std::fclose(m_file_ptr);
@@ -108,28 +103,23 @@ public:
}
template<class U>
- struct rebind
- {
+ struct rebind {
typedef MmapAllocator<U> other;
};
- pointer address(reference value) const
- {
+ pointer address(reference value) const {
return &value;
}
- const_pointer address(const_reference value) const
- {
+ const_pointer address(const_reference value) const {
return &value;
}
- size_type max_size() const throw ()
- {
+ size_type max_size() const throw () {
return std::numeric_limits<size_t>::max() / sizeof(value_type);
}
- pointer allocate(size_type num, const void* = 0)
- {
+ pointer allocate(size_type num, const void* = 0) {
m_map_size = num * sizeof(T);
#if defined(_WIN32) || defined(_WIN64)
@@ -140,59 +130,58 @@ public:
#endif
if (!m_fixed) {
size_t read = 0;
+#ifdef _WIN32
+ read += _chsize_s(m_file_desc, m_map_size);
+#else
read += ftruncate(m_file_desc, m_map_size);
+#endif
m_data_ptr = (char *) util::MapOrThrow(m_map_size, true, map_shared,
- false, m_file_desc, 0);
+ false, m_file_desc, 0);
return (pointer) m_data_ptr;
- }
- else {
+ } else {
const size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
const size_t relative_offset = m_data_offset - map_offset;
const size_t adjusted_map_size = m_map_size + relative_offset;
m_data_ptr = (char *) util::MapOrThrow(adjusted_map_size, false,
- map_shared, false, m_file_desc, map_offset);
+ map_shared, false, m_file_desc, map_offset);
return (pointer) (m_data_ptr + relative_offset);
}
}
- void deallocate(pointer p, size_type num)
- {
+ void deallocate(pointer p, size_type num) {
if (!m_fixed) {
util::UnmapOrThrow(p, num * sizeof(T));
- }
- else {
+ } else {
const size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
const size_t relative_offset = m_data_offset - map_offset;
const size_t adjusted_map_size = m_map_size + relative_offset;
util::UnmapOrThrow((pointer) ((char*) p - relative_offset),
- adjusted_map_size);
+ adjusted_map_size);
}
}
- void construct(pointer p, const T& value)
- {
+ void construct(pointer p, const T& value) {
if (!m_fixed) new (p) value_type(value);
}
- void destroy(pointer p)
- {
+ void destroy(pointer p) {
if (!m_fixed) p->~T();
}
template<class T1, class T2>
friend bool operator==(const MmapAllocator<T1>&,
- const MmapAllocator<T2>&) throw ();
+ const MmapAllocator<T2>&) throw ();
template<class T1, class T2>
friend bool operator!=(const MmapAllocator<T1>&,
- const MmapAllocator<T2>&) throw ();
+ const MmapAllocator<T2>&) throw ();
};
template<class T1, class T2>
bool operator==(const MmapAllocator<T1>& a1,
- const MmapAllocator<T2>& a2) throw ()
+ const MmapAllocator<T2>& a2) throw ()
{
bool equal = true;
equal &= a1.m_file_ptr == a2.m_file_ptr;
@@ -207,7 +196,7 @@ bool operator==(const MmapAllocator<T1>& a1,
template<class T1, class T2>
bool operator!=(const MmapAllocator<T1>& a1,
- const MmapAllocator<T2>& a2) throw ()
+ const MmapAllocator<T2>& a2) throw ()
{
return !(a1 == a2);
}
diff --git a/contrib/moses2/TranslationModel/CompactPT/MonotonicVector.h b/moses2/TranslationModel/CompactPT/MonotonicVector.h
index 586397db8..179354657 100644
--- a/contrib/moses2/TranslationModel/CompactPT/MonotonicVector.h
+++ b/moses2/TranslationModel/CompactPT/MonotonicVector.h
@@ -43,7 +43,7 @@ namespace Moses2
{
template<typename PosT = size_t, typename NumT = size_t, PosT stepSize = 32,
- template<typename > class Allocator = std::allocator>
+ template<typename > class Allocator = std::allocator>
class MonotonicVector
{
private:
@@ -62,17 +62,14 @@ public:
typedef PosT value_type;
MonotonicVector() :
- m_size(0), m_last(0), m_final(false)
- {
+ m_size(0), m_last(0), m_final(false) {
}
- size_t size() const
- {
+ size_t size() const {
return m_size + m_tempDiffs.size();
}
- PosT at(size_t i) const
- {
+ PosT at(size_t i) const {
PosT s = stepSize;
PosT j = m_anchors[i / s];
PosT r = i % s;
@@ -83,23 +80,20 @@ public:
k += VarInt32::DecodeAndSum(it, m_diffs.end(), 1);
if (i < m_size) k += Simple9::DecodeAndSum(it, m_diffs.end(), r);
else if (i < m_size + m_tempDiffs.size()) for (size_t l = 0; l < r; l++)
- k += m_tempDiffs[l];
+ k += m_tempDiffs[l];
return k;
}
- PosT operator[](PosT i) const
- {
+ PosT operator[](PosT i) const {
return at(i);
}
- PosT back() const
- {
+ PosT back() const {
return at(size() - 1);
}
- void push_back(PosT i)
- {
+ void push_back(PosT i) {
assert(m_final != true);
if (m_anchors.size() == 0 && m_tempDiffs.size() == 0) {
@@ -113,14 +107,13 @@ public:
if (m_tempDiffs.size() == stepSize - 1) {
Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(),
- std::back_inserter(m_diffs));
+ std::back_inserter(m_diffs));
m_anchors.push_back(m_diffs.size());
VarInt32::Encode(&i, &i + 1, std::back_inserter(m_diffs));
m_size += m_tempDiffs.size() + 1;
m_tempDiffs.clear();
- }
- else {
+ } else {
PosT last = m_last;
PosT diff = i - last;
m_tempDiffs.push_back(diff);
@@ -128,24 +121,21 @@ public:
m_last = i;
}
- void commit()
- {
+ void commit() {
assert(m_final != true);
Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(),
- std::back_inserter(m_diffs));
+ std::back_inserter(m_diffs));
m_size += m_tempDiffs.size();
m_tempDiffs.clear();
m_final = true;
}
- size_t usage()
- {
+ size_t usage() {
return m_diffs.size() * sizeof(unsigned int)
- + m_anchors.size() * sizeof(NumT);
+ + m_anchors.size() * sizeof(NumT);
}
- size_t load(std::FILE* in, bool map = false)
- {
+ size_t load(std::FILE* in, bool map = false) {
size_t byteSize = 0;
byteSize += fread(&m_final, sizeof(bool), 1, in) * sizeof(bool);
@@ -160,8 +150,7 @@ public:
template<typename ValueT>
size_t loadVector(std::vector<ValueT, std::allocator<ValueT> >& v,
- std::FILE* in, bool map = false)
- {
+ std::FILE* in, bool map = false) {
// Can only be read into memory. Mapping not possible with std:allocator.
assert(map == false);
@@ -178,8 +167,7 @@ public:
template<typename ValueT>
size_t loadVector(std::vector<ValueT, MmapAllocator<ValueT> >& v,
- std::FILE* in, bool map = false)
- {
+ std::FILE* in, bool map = false) {
size_t byteSize = 0;
size_t valSize;
@@ -191,9 +179,8 @@ public:
v.resize(valSize, 0);
byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in)
- * sizeof(ValueT);
- }
- else {
+ * sizeof(ValueT);
+ } else {
// Map it directly on specified region of file "in" starting at valPos
// with length valSize * sizeof(ValueT). Mapped region cannot be resized.
@@ -211,31 +198,29 @@ public:
return byteSize;
}
- size_t save(std::FILE* out)
- {
+ size_t save(std::FILE* out) {
if (!m_final) commit();
bool byteSize = 0;
byteSize += ThrowingFwrite(&m_final, sizeof(bool), 1, out) * sizeof(bool);
byteSize += ThrowingFwrite(&m_size, sizeof(size_t), 1, out)
- * sizeof(size_t);
+ * sizeof(size_t);
byteSize += ThrowingFwrite(&m_last, sizeof(PosT), 1, out) * sizeof(PosT);
size_t size = m_diffs.size();
byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t);
byteSize += ThrowingFwrite(&m_diffs[0], sizeof(unsigned int), size, out)
- * sizeof(unsigned int);
+ * sizeof(unsigned int);
size = m_anchors.size();
byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t);
byteSize += ThrowingFwrite(&m_anchors[0], sizeof(NumT), size, out)
- * sizeof(NumT);
+ * sizeof(NumT);
return byteSize;
}
- void swap(MonotonicVector<PosT, NumT, stepSize, Allocator> &mv)
- {
+ void swap(MonotonicVector<PosT, NumT, stepSize, Allocator> &mv) {
if (!m_final) commit();
m_diffs.swap(mv.m_diffs);
diff --git a/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.cpp b/moses2/TranslationModel/CompactPT/MurmurHash3.cpp
index c3e567af6..988c1627f 100644
--- a/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.cpp
+++ b/moses2/TranslationModel/CompactPT/MurmurHash3.cpp
@@ -151,7 +151,7 @@ void MurmurHash3_x86_32(const void * key, int len, uint32_t seed, void * out)
//-----------------------------------------------------------------------------
void MurmurHash3_x86_128(const void * key, const int len, uint32_t seed,
- void * out)
+ void * out)
{
const uint8_t * data = (const uint8_t*) key;
const int nblocks = len / 16;
@@ -312,7 +312,7 @@ void MurmurHash3_x86_128(const void * key, const int len, uint32_t seed,
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128(const void * key, const int len, const uint32_t seed,
- void * out)
+ void * out)
{
const uint8_t * data = (const uint8_t*) key;
const int nblocks = len / 16;
diff --git a/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.h b/moses2/TranslationModel/CompactPT/MurmurHash3.h
index f513008cf..ef885a6d4 100644
--- a/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.h
+++ b/moses2/TranslationModel/CompactPT/MurmurHash3.h
@@ -12,9 +12,10 @@
#if defined(_MSC_VER)
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
+#include <stdint.h>
+//typedef unsigned char uint8_t;
+//typedef unsigned long uint32_t;
+//typedef unsigned __int64 uint64_t;
// Other compilers
diff --git a/contrib/moses2/TranslationModel/CompactPT/PackedArray.h b/moses2/TranslationModel/CompactPT/PackedArray.h
index 409c3cca8..2da59a9f2 100644
--- a/contrib/moses2/TranslationModel/CompactPT/PackedArray.h
+++ b/moses2/TranslationModel/CompactPT/PackedArray.h
@@ -43,22 +43,19 @@ protected:
D* m_storage;
public:
- PackedArray()
- {
+ PackedArray() {
m_size = 0;
m_storageSize = 0;
m_storage = new D[0];
}
PackedArray(size_t size, size_t bits) :
- m_size(size)
- {
+ m_size(size) {
m_storageSize = ceil(float(bits * size) / float(m_dataBits));
m_storage = new D[m_storageSize];
}
- PackedArray(const PackedArray<T, D> &c)
- {
+ PackedArray(const PackedArray<T, D> &c) {
m_size = c.m_size;
m_storageSize = c.m_storageSize;
@@ -67,16 +64,14 @@ public:
std::memcpy(m_storage, c.m_storage, m_storageSize * sizeof(D));
}
- virtual ~PackedArray()
- {
+ virtual ~PackedArray() {
delete[] m_storage;
m_size = 0;
m_storageSize = 0;
m_storage = 0;
}
- T Get(size_t i, size_t bits) const
- {
+ T Get(size_t i, size_t bits) const {
T out = 0;
size_t bitstart = (i * bits);
@@ -97,8 +92,7 @@ public:
return out;
}
- void Set(size_t i, T v, size_t bits)
- {
+ void Set(size_t i, T v, size_t bits) {
size_t bitstart = (i * bits);
size_t bitpos = bitstart;
@@ -116,23 +110,19 @@ public:
}
}
- virtual D*& GetStorage()
- {
+ virtual D*& GetStorage() {
return m_storage;
}
- virtual size_t GetStorageSize() const
- {
+ virtual size_t GetStorageSize() const {
return m_storageSize;
}
- virtual size_t Size() const
- {
+ virtual size_t Size() const {
return m_size;
}
- virtual size_t Load(std::FILE* in)
- {
+ virtual size_t Load(std::FILE* in) {
size_t a1 = std::ftell(in);
size_t read = 0;
@@ -146,8 +136,7 @@ public:
return a2 - a1;
}
- virtual size_t Save(std::FILE* out)
- {
+ virtual size_t Save(std::FILE* out) {
size_t a1 = std::ftell(out);
ThrowingFwrite(&m_size, sizeof(m_size), 1, out);
@@ -170,31 +159,26 @@ class PairedPackedArray: public PackedArray<T, D>
{
public:
PairedPackedArray() :
- PackedArray<T, D>()
- {
+ PackedArray<T, D>() {
}
PairedPackedArray(size_t size, size_t bits1, size_t bits2) :
- PackedArray<T, D>(size, bits1 + bits2)
- {
+ PackedArray<T, D>(size, bits1 + bits2) {
}
- void Set(size_t i, T a, T b, size_t bits1, size_t bits2)
- {
+ void Set(size_t i, T a, T b, size_t bits1, size_t bits2) {
T c = 0;
c = a | (b << bits1);
PackedArray<T, D>::Set(i, c, bits1 + bits2);
}
- void Set(size_t i, std::pair<T, T> p, size_t bits1, size_t bits2)
- {
+ void Set(size_t i, std::pair<T, T> p, size_t bits1, size_t bits2) {
T c = 0;
c = p.second | (p.first << bits1);
PackedArray<T, D>::Set(i, c);
}
- std::pair<T, T> Get(size_t i, size_t bits1, size_t bits2)
- {
+ std::pair<T, T> Get(size_t i, size_t bits1, size_t bits2) {
T v = PackedArray<T, D>::Get(i, bits1 + bits2);
T a = v & ((1 << bits1) - 1);
T b = v >> bits1;
diff --git a/contrib/moses2/TranslationModel/CompactPT/StringVector.h b/moses2/TranslationModel/CompactPT/StringVector.h
index 87d6388bf..0b2aa176f 100644
--- a/contrib/moses2/TranslationModel/CompactPT/StringVector.h
+++ b/moses2/TranslationModel/CompactPT/StringVector.h
@@ -53,13 +53,11 @@ public:
const ValueIteratorT& begin() const;
const ValueIteratorT& end() const;
const std::string str() const;
- operator const std::string()
- {
+ operator const std::string() {
return str();
}
- size_t size()
- {
+ size_t size() {
return std::distance(m_begin, m_end);
}
@@ -75,7 +73,7 @@ public:
// ********** StringVector **********
template<typename ValueT = unsigned char, typename PosT = unsigned int,
- template<typename > class Allocator = std::allocator>
+ template<typename > class Allocator = std::allocator>
class StringVector
{
protected:
@@ -94,7 +92,7 @@ public:
// ********** RangeIterator **********
class RangeIterator: public boost::iterator_facade<RangeIterator, range,
- std::random_access_iterator_tag, range, PosT>
+ std::random_access_iterator_tag, range, PosT>
{
private:
@@ -122,7 +120,7 @@ public:
// ********** StringIterator **********
class StringIterator: public boost::iterator_facade<StringIterator,
- std::string, std::random_access_iterator_tag, const std::string, PosT>
+ std::string, std::random_access_iterator_tag, const std::string, PosT>
{
private:
@@ -152,13 +150,11 @@ public:
StringVector(bool allocate = false);
StringVector(Allocator<ValueT>& alloc);
- virtual ~StringVector()
- {
+ virtual ~StringVector() {
delete m_charArray;
}
- void swap(StringVector<ValueT, PosT, Allocator> &c)
- {
+ void swap(StringVector<ValueT, PosT, Allocator> &c) {
m_positions.commit();
m_positions.swap(c.m_positions);
m_charArray->swap(*c.m_charArray);
@@ -184,8 +180,7 @@ public:
const ValueT* begin(PosT i) const;
const ValueT* end(PosT i) const;
- void clear()
- {
+ void clear() {
m_charArray->clear();
m_sorted = true;
m_positions = MonotonicVector<PosT, unsigned int, 32>();
@@ -203,8 +198,7 @@ public:
PosT find(StringT &s) const;
PosT find(const char* c) const;
- virtual size_t load(std::FILE* in, bool memoryMapped = false)
- {
+ virtual size_t load(std::FILE* in, bool memoryMapped = false) {
size_t size = 0;
m_memoryMapped = memoryMapped;
@@ -216,8 +210,7 @@ public:
}
size_t loadCharArray(std::vector<ValueT, std::allocator<ValueT> >*& c,
- std::FILE* in, bool map = false)
- {
+ std::FILE* in, bool map = false) {
// Can only be read into memory. Mapping not possible with std:allocator.
assert(map == false);
@@ -228,14 +221,13 @@ public:
c = new std::vector<ValueT, std::allocator<ValueT> >(valSize, 0);
byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in)
- * sizeof(ValueT);
+ * sizeof(ValueT);
return byteSize;
}
size_t loadCharArray(std::vector<ValueT, MmapAllocator<ValueT> >*& c,
- std::FILE* in, bool map = false)
- {
+ std::FILE* in, bool map = false) {
size_t byteSize = 0;
size_t valSize;
@@ -246,9 +238,8 @@ public:
// and map memory onto temporary file. Can be resized.
c = new std::vector<ValueT, MmapAllocator<ValueT> >(valSize, 0);
byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in)
- * sizeof(ValueT);
- }
- else {
+ * sizeof(ValueT);
+ } else {
// Map it directly on specified region of file "in" starting at valPos
// with length valSize * sizeof(ValueT). Mapped region cannot be resized.
@@ -263,16 +254,14 @@ public:
return byteSize;
}
- size_t load(std::string filename, bool memoryMapped = false)
- {
+ size_t load(std::string filename, bool memoryMapped = false) {
std::FILE* pFile = fopen(filename.c_str(), "r");
size_t byteSize = load(pFile, memoryMapped);
fclose(pFile);
return byteSize;
}
- size_t save(std::FILE* out)
- {
+ size_t save(std::FILE* out) {
size_t byteSize = 0;
byteSize += ThrowingFwrite(&m_sorted, sizeof(bool), 1, out) * sizeof(bool);
@@ -280,15 +269,14 @@ public:
size_t valSize = size2();
byteSize += ThrowingFwrite(&valSize, sizeof(size_t), 1, out)
- * sizeof(size_t);
+ * sizeof(size_t);
byteSize += ThrowingFwrite(&(*m_charArray)[0], sizeof(ValueT), valSize, out)
- * sizeof(ValueT);
+ * sizeof(ValueT);
return byteSize;
}
- size_t save(std::string filename)
- {
+ size_t save(std::string filename) {
std::FILE* pFile = fopen(filename.c_str(), "w");
size_t byteSize = save(pFile);
fclose(pFile);
@@ -304,7 +292,7 @@ public:
template<typename ValueIteratorT>
ValueIteratorRange<ValueIteratorT>::ValueIteratorRange(ValueIteratorT begin,
ValueIteratorT end) :
- m_begin(begin), m_end(end)
+ m_begin(begin), m_end(end)
{
}
@@ -334,7 +322,7 @@ template<typename StringT>
bool ValueIteratorRange<ValueIteratorT>::operator==(const StringT& o) const
{
if (std::distance(m_begin, m_end) == std::distance(o.begin(), o.end())) return std::equal(
- m_begin, m_end, o.begin());
+ m_begin, m_end, o.begin());
else return false;
}
@@ -349,7 +337,7 @@ template<typename StringT>
bool ValueIteratorRange<ValueIteratorT>::operator<(const StringT &s2) const
{
return std::lexicographical_compare(m_begin, m_end, s2.begin(), s2.end(),
- std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
+ std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
}
template<typename ValueIteratorT>
@@ -362,8 +350,8 @@ template<typename StringT, typename ValueIteratorT>
bool operator<(const StringT &s1, const ValueIteratorRange<ValueIteratorT> &s2)
{
return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(),
- s2.end(),
- std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
+ s2.end(),
+ std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
}
template<typename ValueIteratorT>
@@ -371,7 +359,7 @@ bool operator<(const char* c, const ValueIteratorRange<ValueIteratorT> &s2)
{
size_t len = std::char_traits<char>::length(c);
return std::lexicographical_compare(c, c + len, s2.begin(), s2.end(),
- std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
+ std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
}
template<typename OStream, typename ValueIteratorT>
@@ -387,15 +375,15 @@ OStream& operator<<(OStream &os, ValueIteratorRange<ValueIteratorT> cr)
template<typename ValueT, typename PosT, template<typename > class Allocator>
StringVector<ValueT, PosT, Allocator>::StringVector(bool allocate) :
- m_sorted(true), m_memoryMapped(false), m_charArray(
- allocate ? new std::vector<ValueT, Allocator<ValueT> >() : 0)
+ m_sorted(true), m_memoryMapped(false), m_charArray(
+ allocate ? new std::vector<ValueT, Allocator<ValueT> >() : 0)
{
}
template<typename ValueT, typename PosT, template<typename > class Allocator>
StringVector<ValueT, PosT, Allocator>::StringVector(Allocator<ValueT> &alloc) :
- m_sorted(true), m_memoryMapped(false), m_charArray(
- new std::vector<ValueT, Allocator<ValueT> >(alloc))
+ m_sorted(true), m_memoryMapped(false), m_charArray(
+ new std::vector<ValueT, Allocator<ValueT> >(alloc))
{
}
@@ -428,12 +416,12 @@ template<typename Iterator>
Iterator StringVector<ValueT, PosT, Allocator>::end() const
{
return Iterator(const_cast<StringVector<ValueT, PosT, Allocator>&>(*this),
- size());
+ size());
}
template<typename ValueT, typename PosT, template<typename > class Allocator>
typename StringVector<ValueT, PosT, Allocator>::iterator StringVector<ValueT,
- PosT, Allocator>::begin() const
+ PosT, Allocator>::begin() const
{
return begin<iterator>();
}
@@ -441,7 +429,7 @@ typename StringVector<ValueT, PosT, Allocator>::iterator StringVector<ValueT,
template<typename ValueT, typename PosT, template<typename > class Allocator>
typename StringVector<ValueT, PosT, Allocator>::iterator StringVector<ValueT,
- PosT, Allocator>::end() const
+ PosT, Allocator>::end() const
{
return end<iterator>();
}
@@ -467,21 +455,21 @@ PosT StringVector<ValueT, PosT, Allocator>::size2() const
template<typename ValueT, typename PosT, template<typename > class Allocator>
typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT,
- Allocator>::at(PosT i) const
+ Allocator>::at(PosT i) const
{
return range(begin(i), end(i));
}
template<typename ValueT, typename PosT, template<typename > class Allocator>
typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT,
- Allocator>::operator[](PosT i) const
+ Allocator>::operator[](PosT i) const
{
return at(i);
}
template<typename ValueT, typename PosT, template<typename > class Allocator>
typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT,
- Allocator>::back() const
+ Allocator>::back() const
{
return at(size() - 1);
}
@@ -520,7 +508,7 @@ template<typename StringT>
PosT StringVector<ValueT, PosT, Allocator>::find(StringT &s) const
{
if (m_sorted) return std::distance(begin(),
- std::lower_bound(begin(), end(), s));
+ std::lower_bound(begin(), end(), s));
return std::distance(begin(), std::find(begin(), end(), s));
}
@@ -535,14 +523,14 @@ PosT StringVector<ValueT, PosT, Allocator>::find(const char* c) const
template<typename ValueT, typename PosT, template<typename > class Allocator>
StringVector<ValueT, PosT, Allocator>::RangeIterator::RangeIterator() :
- m_index(0), m_container(0)
+ m_index(0), m_container(0)
{
}
template<typename ValueT, typename PosT, template<typename > class Allocator>
StringVector<ValueT, PosT, Allocator>::RangeIterator::RangeIterator(
- StringVector<ValueT, PosT, Allocator> &sv, PosT index) :
- m_index(index), m_container(&sv)
+ StringVector<ValueT, PosT, Allocator> &sv, PosT index) :
+ m_index(index), m_container(&sv)
{
}
@@ -554,15 +542,15 @@ PosT StringVector<ValueT, PosT, Allocator>::RangeIterator::get_index()
template<typename ValueT, typename PosT, template<typename > class Allocator>
typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT,
- Allocator>::RangeIterator::dereference() const
+ Allocator>::RangeIterator::dereference() const
{
return typename StringVector<ValueT, PosT, Allocator>::range(
- m_container->begin(m_index), m_container->end(m_index));
+ m_container->begin(m_index), m_container->end(m_index));
}
template<typename ValueT, typename PosT, template<typename > class Allocator>
bool StringVector<ValueT, PosT, Allocator>::RangeIterator::equal(
- StringVector<ValueT, PosT, Allocator>::RangeIterator const& other) const
+ StringVector<ValueT, PosT, Allocator>::RangeIterator const& other) const
{
return m_index == other.m_index && m_container == other.m_container;
}
@@ -587,7 +575,7 @@ void StringVector<ValueT, PosT, Allocator>::RangeIterator::advance(PosT n)
template<typename ValueT, typename PosT, template<typename > class Allocator>
PosT StringVector<ValueT, PosT, Allocator>::RangeIterator::distance_to(
- StringVector<ValueT, PosT, Allocator>::RangeIterator const& other) const
+ StringVector<ValueT, PosT, Allocator>::RangeIterator const& other) const
{
return other.m_index - m_index;
}
@@ -596,14 +584,14 @@ PosT StringVector<ValueT, PosT, Allocator>::RangeIterator::distance_to(
template<typename ValueT, typename PosT, template<typename > class Allocator>
StringVector<ValueT, PosT, Allocator>::StringIterator::StringIterator() :
- m_index(0), m_container(0)
+ m_index(0), m_container(0)
{
}
template<typename ValueT, typename PosT, template<typename > class Allocator>
StringVector<ValueT, PosT, Allocator>::StringIterator::StringIterator(
- StringVector<ValueT, PosT, Allocator> &sv, PosT index) :
- m_index(index), m_container(&sv)
+ StringVector<ValueT, PosT, Allocator> &sv, PosT index) :
+ m_index(index), m_container(&sv)
{
}
@@ -617,12 +605,12 @@ template<typename ValueT, typename PosT, template<typename > class Allocator>
const std::string StringVector<ValueT, PosT, Allocator>::StringIterator::dereference() const
{
return StringVector<ValueT, PosT, Allocator>::range(
- m_container->begin(m_index), m_container->end(m_index)).str();
+ m_container->begin(m_index), m_container->end(m_index)).str();
}
template<typename ValueT, typename PosT, template<typename > class Allocator>
bool StringVector<ValueT, PosT, Allocator>::StringIterator::equal(
- StringVector<ValueT, PosT, Allocator>::StringIterator const& other) const
+ StringVector<ValueT, PosT, Allocator>::StringIterator const& other) const
{
return m_index == other.m_index && m_container == other.m_container;
}
@@ -647,7 +635,7 @@ void StringVector<ValueT, PosT, Allocator>::StringIterator::advance(PosT n)
template<typename ValueT, typename PosT, template<typename > class Allocator>
PosT StringVector<ValueT, PosT, Allocator>::StringIterator::distance_to(
- StringVector<ValueT, PosT, Allocator>::StringIterator const& other) const
+ StringVector<ValueT, PosT, Allocator>::StringIterator const& other) const
{
return other.m_index - m_index;
}
diff --git a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp b/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp
index 07d0469e0..07d0469e0 100644
--- a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp
+++ b/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp
diff --git a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h b/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h
index 3a9e6f170..75ab40c93 100644
--- a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h
+++ b/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h
@@ -35,14 +35,12 @@ namespace Moses2
{
typedef std::pair<size_t, size_t> AlignPointSizeT;
-struct PhraseCompact : public std::vector<Word>
-{
+struct PhraseCompact : public std::vector<Word> {
public:
PhraseCompact(const Phrase<Word> &copy);
};
-struct TPCompact
-{
+struct TPCompact {
std::vector<Word> words;
std::set<AlignPointSizeT> alignment;
std::vector<float> scores;
diff --git a/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp b/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp
index d9fec5013..d9fec5013 100644
--- a/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp
+++ b/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp
diff --git a/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.h b/moses2/TranslationModel/CompactPT/ThrowingFwrite.h
index 2a0c71a27..2a0c71a27 100644
--- a/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.h
+++ b/moses2/TranslationModel/CompactPT/ThrowingFwrite.h
diff --git a/contrib/moses2/TranslationModel/Memory/Node.h b/moses2/TranslationModel/Memory/Node.h
index 97fa9618e..d5a6b8795 100644
--- a/contrib/moses2/TranslationModel/Memory/Node.h
+++ b/moses2/TranslationModel/Memory/Node.h
@@ -25,56 +25,50 @@ public:
typedef boost::unordered_map<size_t, Node> Children;
Node()
- :m_targetPhrases(NULL)
- ,m_unsortedTPS(NULL)
+ :m_targetPhrases(NULL)
+ ,m_unsortedTPS(NULL)
{}
~Node()
{}
- void AddRule(const std::vector<FactorType> &factors, SP &source, TP *target)
- {
+ void AddRule(const std::vector<FactorType> &factors, SP &source, TP *target) {
AddRule(factors, source, target, 0);
}
- TPS *Find(const std::vector<FactorType> &factors, const SP &source, size_t pos = 0) const
- {
+ TPS *Find(const std::vector<FactorType> &factors, const SP &source, size_t pos = 0) const {
assert(source.GetSize());
if (pos == source.GetSize()) {
return m_targetPhrases;
- }
- else {
+ } else {
const WORD &word = source[pos];
//cerr << "word=" << word << endl;
typename Children::const_iterator iter = m_children.find(word.hash(factors));
if (iter == m_children.end()) {
return NULL;
- }
- else {
+ } else {
const Node &child = iter->second;
return child.Find(factors, source, pos + 1);
}
}
}
- const Node *Find(const std::vector<FactorType> &factors, const WORD &word) const
- {
+ const Node *Find(const std::vector<FactorType> &factors, const WORD &word) const {
typename Children::const_iterator iter = m_children.find(word.hash(factors));
if (iter == m_children.end()) {
return NULL;
- }
- else {
+ } else {
const Node &child = iter->second;
return &child;
}
}
- const TPS *GetTargetPhrases() const
- { return m_targetPhrases; }
+ const TPS *GetTargetPhrases() const {
+ return m_targetPhrases;
+ }
- void SortAndPrune(size_t tableLimit, MemPool &pool, System &system)
- {
- BOOST_FOREACH(typename Children::value_type &val, m_children){
+ void SortAndPrune(size_t tableLimit, MemPool &pool, System &system) {
+ BOOST_FOREACH(typename Children::value_type &val, m_children) {
Node &child = val.second;
child.SortAndPrune(tableLimit, pool, system);
}
@@ -95,8 +89,9 @@ public:
}
}
- const Children &GetChildren() const
- { return m_children; }
+ const Children &GetChildren() const {
+ return m_children;
+ }
void Debug(std::ostream &out, const System &system) const {
BOOST_FOREACH(const typename Children::value_type &valPair, m_children) {
@@ -110,8 +105,7 @@ protected:
Phrase<WORD> *m_source;
std::vector<TP*> *m_unsortedTPS;
- Node &AddRule(const std::vector<FactorType> &factors, SP &source, TP *target, size_t pos)
- {
+ Node &AddRule(const std::vector<FactorType> &factors, SP &source, TP *target, size_t pos) {
if (pos == source.GetSize()) {
if (m_unsortedTPS == NULL) {
m_unsortedTPS = new std::vector<TP*>();
@@ -120,8 +114,7 @@ protected:
m_unsortedTPS->push_back(target);
return *this;
- }
- else {
+ } else {
const WORD &word = source[pos];
Node &child = m_children[word.hash(factors)];
//std::cerr << "added " << word << " " << &child << " from " << this << std::endl;
diff --git a/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.cpp b/moses2/TranslationModel/Memory/PhraseTableMemory.cpp
index 09eead137..9b231ebd6 100644
--- a/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.cpp
+++ b/moses2/TranslationModel/Memory/PhraseTableMemory.cpp
@@ -37,9 +37,9 @@ namespace Moses2
////////////////////////////////////////////////////////////////////////
PhraseTableMemory::PhraseTableMemory(size_t startInd, const std::string &line)
-:PhraseTable(startInd, line)
-,m_rootPb(NULL)
-,m_rootSCFG(NULL)
+ :PhraseTable(startInd, line)
+ ,m_rootPb(NULL)
+ ,m_rootSCFG(NULL)
{
ReadParameters();
}
@@ -58,8 +58,7 @@ void PhraseTableMemory::Load(System &system)
if (system.isPb) {
m_rootPb = new PBNODE();
- }
- else {
+ } else {
m_rootSCFG = new SCFGNODE();
//cerr << "m_rootSCFG=" << m_rootSCFG << endl;
}
@@ -80,17 +79,17 @@ void PhraseTableMemory::Load(System &system)
if (system.isPb) {
PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
- toks[0]);
+ toks[0]);
//cerr << "created soure" << endl;
TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(systemPool, *this, system,
- toks[1]);
+ toks[1]);
//cerr << "created target" << endl;
target->GetScores().CreateFromString(toks[2], *this, system, true);
//cerr << "created scores:" << *target << endl;
if (toks.size() >= 4) {
- //cerr << "alignstr=" << toks[3] << endl;
- target->SetAlignmentInfo(toks[3]);
+ //cerr << "alignstr=" << toks[3] << endl;
+ target->SetAlignmentInfo(toks[3]);
}
// properties
@@ -105,13 +104,12 @@ void PhraseTableMemory::Load(System &system)
m_rootPb->AddRule(m_input, *source, target);
//cerr << "target=" << target->Debug(system) << endl;
- }
- else {
+ } else {
SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
- toks[0]);
+ toks[0]);
//cerr << "created source:" << *source << endl;
SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this,
- system, toks[1]);
+ system, toks[1]);
//cerr << "created target " << *target << " source=" << *source << endl;
@@ -139,8 +137,7 @@ void PhraseTableMemory::Load(System &system)
if (system.isPb) {
m_rootPb->SortAndPrune(m_tableLimit, systemPool, system);
//cerr << "root=" << &m_rootPb << endl;
- }
- else {
+ } else {
m_rootSCFG->SortAndPrune(m_tableLimit, systemPool, system);
//cerr << "root=" << &m_rootPb << endl;
}
@@ -162,9 +159,9 @@ TargetPhrases* PhraseTableMemory::Lookup(const Manager &mgr, MemPool &pool,
}
void PhraseTableMemory::InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ SCFG::InputPath &path) const
{
size_t ptInd = GetPtInd();
ActiveChartEntryMem *chartEntry = new (pool.Allocate<ActiveChartEntryMem>()) ActiveChartEntryMem(pool, *m_rootSCFG);
@@ -173,10 +170,10 @@ void PhraseTableMemory::InitActiveChart(
}
void PhraseTableMemory::Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
+ const SCFG::Manager &mgr,
+ size_t maxChartSpan,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const
{
if (path.range.GetNumWordsCovered() > maxChartSpan) {
return;
@@ -213,13 +210,13 @@ void PhraseTableMemory::Lookup(MemPool &pool,
}
void PhraseTableMemory::LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::ActiveChartEntry &prevEntry,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const
{
const ActiveChartEntryMem &prevEntryCast = static_cast<const ActiveChartEntryMem&>(prevEntry);
@@ -252,10 +249,10 @@ void PhraseTableMemory::LookupGivenNode(
// there are some rules
/*
cerr << "outPath=" << outPath.range
- << " bind=" << chartEntry->GetSymbolBind().Debug(mgr.system)
- << " pt=" << GetPtInd()
- << " tps=" << tps->Debug(mgr.system) << endl;
- */
+ << " bind=" << chartEntry->GetSymbolBind().Debug(mgr.system)
+ << " pt=" << GetPtInd()
+ << " tps=" << tps->Debug(mgr.system) << endl;
+ */
outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tps, chartEntry->GetSymbolBind());
}
diff --git a/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.h b/moses2/TranslationModel/Memory/PhraseTableMemory.h
index 035c7c9c5..07a47c7ff 100644
--- a/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.h
+++ b/moses2/TranslationModel/Memory/PhraseTableMemory.h
@@ -33,16 +33,16 @@ class PhraseTableMemory: public PhraseTable
const PhraseTableMemory::SCFGNODE &node;
ActiveChartEntryMem(MemPool &pool, const PhraseTableMemory::SCFGNODE &vnode)
- :Parent(pool)
- ,node(vnode)
+ :Parent(pool)
+ ,node(vnode)
{}
ActiveChartEntryMem(
- MemPool &pool,
- const PhraseTableMemory::SCFGNODE &vnode,
- const ActiveChartEntry &prevEntry)
- :Parent(prevEntry)
- ,node(vnode)
+ MemPool &pool,
+ const PhraseTableMemory::SCFGNODE &vnode,
+ const ActiveChartEntry &prevEntry)
+ :Parent(prevEntry)
+ ,node(vnode)
{}
};
@@ -53,31 +53,31 @@ public:
virtual void Load(System &system);
virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
+ InputPath &inputPath) const;
virtual void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const;
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ SCFG::InputPath &path) const;
void Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
+ const SCFG::Manager &mgr,
+ size_t maxChartSpan,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const;
protected:
PBNODE *m_rootPb;
SCFGNODE *m_rootSCFG;
void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::ActiveChartEntry &prevEntry,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const;
};
diff --git a/contrib/moses2/TranslationModel/PhraseTable.cpp b/moses2/TranslationModel/PhraseTable.cpp
index c790147bb..fef6771d6 100644
--- a/contrib/moses2/TranslationModel/PhraseTable.cpp
+++ b/moses2/TranslationModel/PhraseTable.cpp
@@ -22,8 +22,8 @@ namespace Moses2
////////////////////////////////////////////////////////////////////////////
PhraseTable::PhraseTable(size_t startInd, const std::string &line) :
- StatelessFeatureFunction(startInd, line), m_tableLimit(20) // default
- , m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
+ StatelessFeatureFunction(startInd, line), m_tableLimit(20) // default
+ , m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
{
m_input.push_back(0);
}
@@ -37,20 +37,15 @@ void PhraseTable::SetParameter(const std::string& key, const std::string& value)
{
if (key == "cache-size") {
m_maxCacheSize = Scan<size_t>(value);
- }
- else if (key == "path") {
+ } else if (key == "path") {
m_path = value;
- }
- else if (key == "input-factor") {
- m_input = Tokenize<FactorType>(value, ",");
- }
- else if (key == "output-factor") {
- m_output = Tokenize<FactorType>(value, ",");
- }
- else if (key == "table-limit") {
+ } else if (key == "input-factor") {
+ m_input = Tokenize<FactorType>(value, ",");
+ } else if (key == "output-factor") {
+ m_output = Tokenize<FactorType>(value, ",");
+ } else if (key == "table-limit") {
m_tableLimit = Scan<size_t>(value);
- }
- else {
+ } else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
@@ -59,29 +54,27 @@ bool PhraseTable::SatisfyBackoff(const Manager &mgr, const InputPath &path) cons
{
const InputType &input = mgr.GetInput();
if ((mgr.system.options.input.xml_policy == XmlExclusive)
- && input.XmlOverlap(path.range.GetStartPos(), path.range.GetEndPos())) {
- return false;
+ && input.XmlOverlap(path.range.GetStartPos(), path.range.GetEndPos())) {
+ return false;
+ }
+
+ //cerr << GetName() << "=" << GetPtInd() << "=" << decodeGraphBackoff << endl;
+ if (decodeGraphBackoff == 0) {
+ // always lookup
+ return true;
+ } else if (decodeGraphBackoff == -1) {
+ // lookup only if there's no existing rules
+ return path.GetNumRules() ? false : true;
+ } else if (path.range.GetNumWordsCovered() <= decodeGraphBackoff) {
+ return path.GetNumRules() ? false : true;
}
- //cerr << GetName() << "=" << GetPtInd() << "=" << decodeGraphBackoff << endl;
- if (decodeGraphBackoff == 0) {
- // always lookup
- return true;
- }
- else if (decodeGraphBackoff == -1) {
- // lookup only if there's no existing rules
- return path.GetNumRules() ? false : true;
- }
- else if (path.range.GetNumWordsCovered() <= decodeGraphBackoff) {
- return path.GetNumRules() ? false : true;
- }
-
- return false;
+ return false;
}
void PhraseTable::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const
{
- BOOST_FOREACH(InputPathBase *pathBase, inputPaths){
+ BOOST_FOREACH(InputPathBase *pathBase, inputPaths) {
InputPath *path = static_cast<InputPath*>(pathBase);
//cerr << "path=" << path->range << " ";
@@ -102,29 +95,29 @@ void PhraseTable::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const
}
TargetPhrases *PhraseTable::Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const
+ InputPath &inputPath) const
{
UTIL_THROW2("Not implemented");
}
void PhraseTable::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
}
void PhraseTable::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
{
}
// scfg
void PhraseTable::LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
+ const SCFG::Manager &mgr,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const
{
//cerr << "BEFORE LookupUnary" << path.Debug(mgr.system) << endl;
size_t startPos = path.range.GetStartPos();
@@ -134,12 +127,12 @@ void PhraseTable::LookupUnary(MemPool &pool,
}
void PhraseTable::LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const Moses2::Range &subPhraseRange,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &outPath) const
{
size_t endPos = outPath.range.GetEndPos();
@@ -161,13 +154,13 @@ void PhraseTable::LookupNT(
}
void PhraseTable::LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const
{
size_t ptInd = GetPtInd();
diff --git a/contrib/moses2/TranslationModel/PhraseTable.h b/moses2/TranslationModel/PhraseTable.h
index 9237f5ba6..ef40c06a4 100644
--- a/contrib/moses2/TranslationModel/PhraseTable.h
+++ b/moses2/TranslationModel/PhraseTable.h
@@ -42,43 +42,45 @@ public:
virtual void SetParameter(const std::string& key, const std::string& value);
virtual void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
+ InputPath &inputPath) const;
- void SetPtInd(size_t ind)
- { m_ptInd = ind; }
+ void SetPtInd(size_t ind) {
+ m_ptInd = ind;
+ }
- size_t GetPtInd() const
- { return m_ptInd; }
+ size_t GetPtInd() const {
+ return m_ptInd;
+ }
bool SatisfyBackoff(const Manager &mgr, const InputPath &path) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
// scfg
virtual void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const = 0;
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ SCFG::InputPath &path) const = 0;
virtual void Lookup(
- MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const = 0;
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ size_t maxChartSpan,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const = 0;
virtual void LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
+ const SCFG::Manager &mgr,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const;
protected:
std::string m_path;
@@ -89,38 +91,37 @@ protected:
// cache
size_t m_maxCacheSize; // 0 = no caching
- struct CacheCollEntry2
- {
+ struct CacheCollEntry2 {
TargetPhrases *tpsPtr;
clock_t clock;
};
// scfg
virtual void LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const;
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const Moses2::Range &subPhraseRange,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &outPath) const;
virtual void LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const;
virtual void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const = 0;
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::ActiveChartEntry &prevEntry,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const = 0;
};
diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/moses2/TranslationModel/ProbingPT.cpp
index 1f22f45be..d99137f05 100644
--- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp
+++ b/moses2/TranslationModel/ProbingPT.cpp
@@ -6,41 +6,41 @@
*/
#include <boost/foreach.hpp>
#include "ProbingPT.h"
-#include "querying.hh"
-#include "probing_hash_utils.hh"
+#include "probingpt/querying.h"
+#include "probingpt/probing_hash_utils.h"
#include "util/exception.hh"
-#include "../../System.h"
-#include "../../Scores.h"
-#include "../../Phrase.h"
-#include "../../legacy/InputFileStream.h"
-#include "../../legacy/FactorCollection.h"
-#include "../../legacy/Util2.h"
-#include "../../FF/FeatureFunctions.h"
-#include "../../PhraseBased/PhraseImpl.h"
-#include "../../PhraseBased/TargetPhraseImpl.h"
-#include "../../PhraseBased/Manager.h"
-#include "../../PhraseBased/TargetPhrases.h"
-#include "../../SCFG/InputPath.h"
-#include "../../SCFG/Manager.h"
-#include "../../SCFG/TargetPhraseImpl.h"
-#include "../../SCFG/PhraseImpl.h"
+#include "../System.h"
+#include "../Scores.h"
+#include "../Phrase.h"
+#include "../legacy/InputFileStream.h"
+#include "../legacy/FactorCollection.h"
+#include "../legacy/Util2.h"
+#include "../FF/FeatureFunctions.h"
+#include "../PhraseBased/PhraseImpl.h"
+#include "../PhraseBased/TargetPhraseImpl.h"
+#include "../PhraseBased/Manager.h"
+#include "../PhraseBased/TargetPhrases.h"
+#include "../SCFG/InputPath.h"
+#include "../SCFG/Manager.h"
+#include "../SCFG/TargetPhraseImpl.h"
+#include "../SCFG/PhraseImpl.h"
using namespace std;
namespace Moses2
{
ProbingPT::ActiveChartEntryProbing::ActiveChartEntryProbing(
- MemPool &pool,
- const ActiveChartEntryProbing &prevEntry)
-:Parent(prevEntry)
-,m_key(prevEntry.m_key)
+ MemPool &pool,
+ const ActiveChartEntryProbing &prevEntry)
+ :Parent(prevEntry)
+ ,m_key(prevEntry.m_key)
{}
void ProbingPT::ActiveChartEntryProbing::AddSymbolBindElement(
- const Range &range,
- const SCFG::Word &word,
- const Moses2::Hypotheses *hypos,
- const Moses2::PhraseTable &pt)
+ const Range &range,
+ const SCFG::Word &word,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::PhraseTable &pt)
{
const ProbingPT &probingPt = static_cast<const ProbingPT&>(pt);
std::pair<bool, uint64_t> key = GetKey(word, probingPt);
@@ -68,8 +68,8 @@ std::pair<bool, uint64_t> ProbingPT::ActiveChartEntryProbing::GetKey(const SCFG:
////////////////////////////////////////////////////////////////////////////
ProbingPT::ProbingPT(size_t startInd, const std::string &line)
-:PhraseTable(startInd, line)
-,load_method(util::POPULATE_OR_READ)
+ :PhraseTable(startInd, line)
+ ,load_method(util::POPULATE_OR_READ)
{
ReadParameters();
}
@@ -81,7 +81,7 @@ ProbingPT::~ProbingPT()
void ProbingPT::Load(System &system)
{
- m_engine = new QueryEngine(m_path.c_str(), load_method);
+ m_engine = new probingpt::QueryEngine(m_path.c_str(), load_method);
m_unkId = 456456546456;
@@ -89,10 +89,10 @@ void ProbingPT::Load(System &system)
// source vocab
const std::map<uint64_t, std::string> &sourceVocab =
- m_engine->getSourceVocab();
+ m_engine->getSourceVocab();
std::map<uint64_t, std::string>::const_iterator iterSource;
for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end();
- ++iterSource) {
+ ++iterSource) {
string wordStr = iterSource->second;
bool isNT;
//cerr << "wordStr=" << wordStr << endl;
@@ -142,29 +142,23 @@ void ProbingPT::Load(System &system)
void ProbingPT::SetParameter(const std::string& key, const std::string& value)
{
- if (key == "load") {
+ if (key == "load") {
if (value == "lazy") {
load_method = util::LAZY;
- }
- else if (value == "populate_or_lazy") {
+ } else if (value == "populate_or_lazy") {
load_method = util::POPULATE_OR_LAZY;
- }
- else if (value == "populate_or_read" || value == "populate") {
+ } else if (value == "populate_or_read" || value == "populate") {
load_method = util::POPULATE_OR_READ;
- }
- else if (value == "read") {
+ } else if (value == "read") {
load_method = util::READ;
- }
- else if (value == "parallel_read") {
+ } else if (value == "parallel_read") {
load_method = util::PARALLEL_READ;
+ } else {
+ UTIL_THROW2("load method not supported" << value);
}
- else {
- UTIL_THROW2("load method not supported" << value);
- }
- }
- else {
- PhraseTable::SetParameter(key, value);
- }
+ } else {
+ PhraseTable::SetParameter(key, value);
+ }
}
void ProbingPT::CreateAlignmentMap(System &system, const std::string path)
@@ -191,19 +185,19 @@ void ProbingPT::CreateAlignmentMap(System &system, const std::string path)
void ProbingPT::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const
{
- BOOST_FOREACH(InputPathBase *pathBase, inputPaths){
- InputPath *path = static_cast<InputPath*>(pathBase);
-
- if (SatisfyBackoff(mgr, *path)) {
- TargetPhrases *tpsPtr;
- tpsPtr = Lookup(mgr, mgr.GetPool(), *path);
- path->AddTargetPhrases(*this, tpsPtr);
- }
+ BOOST_FOREACH(InputPathBase *pathBase, inputPaths) {
+ InputPath *path = static_cast<InputPath*>(pathBase);
+
+ if (SatisfyBackoff(mgr, *path)) {
+ TargetPhrases *tpsPtr;
+ tpsPtr = Lookup(mgr, mgr.GetPool(), *path);
+ path->AddTargetPhrases(*this, tpsPtr);
+ }
}
}
TargetPhrases* ProbingPT::Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const
+ InputPath &inputPath) const
{
/*
if (inputPath.prefixPath && inputPath.prefixPath->GetTargetPhrases(*this) == NULL) {
@@ -234,7 +228,7 @@ TargetPhrases* ProbingPT::Lookup(const Manager &mgr, MemPool &pool,
// query pt
TargetPhrases *tps = CreateTargetPhrases(pool, mgr.system, sourcePhrase,
- keyStruct.second);
+ keyStruct.second);
return tps;
}
@@ -246,13 +240,12 @@ std::pair<bool, uint64_t> ProbingPT::GetKey(const Phrase<Moses2::Word> &sourcePh
size_t sourceSize = sourcePhrase.GetSize();
assert(sourceSize);
- uint64_t probingSource[sourceSize];
+ uint64_t *probingSource = (uint64_t*) alloca(sourceSize * sizeof(uint64_t));
GetSourceProbingIds(sourcePhrase, ret.first, probingSource);
if (!ret.first) {
// source phrase contains a word unknown in the pt.
// We know immediately there's no translation for it
- }
- else {
+ } else {
ret.second = m_engine->getKey(probingSource, sourceSize);
}
@@ -296,18 +289,18 @@ TargetPhrases *ProbingPT::CreateTargetPhrases(MemPool &pool,
}
TargetPhraseImpl *ProbingPT::CreateTargetPhrase(
- MemPool &pool,
- const System &system,
- const char *&offset) const
+ MemPool &pool,
+ const System &system,
+ const char *&offset) const
{
- TargetPhraseInfo *tpInfo = (TargetPhraseInfo*) offset;
+ probingpt::TargetPhraseInfo *tpInfo = (probingpt::TargetPhraseInfo*) offset;
size_t numRealWords = tpInfo->numWords / m_output.size();
TargetPhraseImpl *tp =
- new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this,
- system, numRealWords);
+ new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this,
+ system, numRealWords);
- offset += sizeof(TargetPhraseInfo);
+ offset += sizeof(probingpt::TargetPhraseInfo);
// scores
SCORE *scores = (SCORE*) offset;
@@ -322,10 +315,9 @@ TargetPhraseImpl *ProbingPT::CreateTargetPhrase(
if (m_engine->num_lex_scores) {
tp->scoreProperties = scores + m_engine->num_scores;
}
- }
- else {
+ } else {
// log score 1st
- SCORE logScores[totalNumScores];
+ SCORE *logScores = (SCORE*) alloca(totalNumScores * sizeof(SCORE));
for (size_t i = 0; i < totalNumScores; ++i) {
logScores[i] = FloorScore(TransformScore(scores[i]));
}
@@ -344,20 +336,20 @@ TargetPhraseImpl *ProbingPT::CreateTargetPhrase(
// words
for (size_t targetPos = 0; targetPos < numRealWords; ++targetPos) {
- for (size_t i = 0; i < m_output.size(); ++i) {
- FactorType factorType = m_output[i];
+ for (size_t i = 0; i < m_output.size(); ++i) {
+ FactorType factorType = m_output[i];
- uint32_t *probingId = (uint32_t*) offset;
+ uint32_t *probingId = (uint32_t*) offset;
- const std::pair<bool, const Factor *> *factorPair = GetTargetFactor(*probingId);
- assert(factorPair);
- assert(!factorPair->first);
+ const std::pair<bool, const Factor *> *factorPair = GetTargetFactor(*probingId);
+ assert(factorPair);
+ assert(!factorPair->first);
- Word &word = (*tp)[targetPos];
- word[factorType] = factorPair->second;
+ Word &word = (*tp)[targetPos];
+ word[factorType] = factorPair->second;
- offset += sizeof(uint32_t);
- }
+ offset += sizeof(uint32_t);
+ }
}
// align
@@ -372,7 +364,7 @@ TargetPhraseImpl *ProbingPT::CreateTargetPhrase(
}
void ProbingPT::GetSourceProbingIds(const Phrase<Moses2::Word> &sourcePhrase,
- bool &ok, uint64_t probingSource[]) const
+ bool &ok, uint64_t probingSource[]) const
{
size_t size = sourcePhrase.GetSize();
@@ -382,8 +374,7 @@ void ProbingPT::GetSourceProbingIds(const Phrase<Moses2::Word> &sourcePhrase,
if (probingId == m_unkId) {
ok = false;
return;
- }
- else {
+ } else {
probingSource[i] = probingId;
}
}
@@ -396,14 +387,14 @@ uint64_t ProbingPT::GetSourceProbingId(const Word &word) const
uint64_t ret = 0;
for (size_t i = 0; i < m_input.size(); ++i) {
- FactorType factorType = m_input[i];
- const Factor *factor = word[factorType];
-
- size_t factorId = factor->GetId();
- if (factorId >= m_sourceVocab.size()) {
- return m_unkId;
- }
- ret += m_sourceVocab[factorId];
+ FactorType factorType = m_input[i];
+ const Factor *factor = word[factorType];
+
+ size_t factorId = factor->GetId();
+ if (factorId >= m_sourceVocab.size()) {
+ return m_unkId;
+ }
+ ret += m_sourceVocab[factorId];
}
return ret;
@@ -435,12 +426,12 @@ void ProbingPT::CreateCache(System &system)
//cerr << "line=" << line << endl;
if (system.isPb) {
- PhraseImpl *sourcePhrase = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, toks[2]);
+ PhraseImpl *sourcePhrase = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, toks[2]);
- /*
+ /*
std::pair<bool, uint64_t> retStruct = GetKey(*sourcePhrase);
if (!retStruct.first) {
- UTIL_THROW2("Unknown cache entry");
+ UTIL_THROW2("Unknown cache entry");
}
cerr << "key=" << retStruct.second << " " << key << endl;
*/
@@ -448,9 +439,8 @@ void ProbingPT::CreateCache(System &system)
assert(tps);
m_cachePb[key] = tps;
- }
- else {
- // SCFG
+ } else {
+ // SCFG
SCFG::PhraseImpl *sourcePhrase = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, toks[2], false);
//cerr << "sourcePhrase=" << sourcePhrase->Debug(system) << endl;
@@ -473,8 +463,7 @@ void ProbingPT::ReformatWord(System &system, std::string &wordStr, bool &isNT)
isNT = false;
if (system.isPb) {
return;
- }
- else {
+ } else {
isNT = (wordStr[0] == '[' && wordStr[wordStr.size() - 1] == ']');
//cerr << "nt=" << nt << endl;
@@ -482,8 +471,7 @@ void ProbingPT::ReformatWord(System &system, std::string &wordStr, bool &isNT)
size_t startPos = wordStr.find("][");
if (startPos == string::npos) {
startPos = 1;
- }
- else {
+ } else {
startPos += 2;
}
@@ -494,9 +482,9 @@ void ProbingPT::ReformatWord(System &system, std::string &wordStr, bool &isNT)
}
void ProbingPT::InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ SCFG::InputPath &path) const
{
//cerr << "InitActiveChart=" << path.Debug(cerr, mgr.system) << endl;
size_t ptInd = GetPtInd();
@@ -505,10 +493,10 @@ void ProbingPT::InitActiveChart(
}
void ProbingPT::Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
+ const SCFG::Manager &mgr,
+ size_t maxChartSpan,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const
{
//cerr << "Lookup=" << endl;
if (path.range.GetNumWordsCovered() > maxChartSpan) {
@@ -546,13 +534,13 @@ void ProbingPT::Lookup(MemPool &pool,
}
void ProbingPT::LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::ActiveChartEntry &prevEntry,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const
{
const ActiveChartEntryProbing &prevEntryCast = static_cast<const ActiveChartEntryProbing&>(prevEntry);
@@ -568,7 +556,7 @@ void ProbingPT::LookupGivenNode(
// check in cache
CacheSCFG::const_iterator iter = m_cacheSCFG.find(key.second);
if (iter != m_cacheSCFG.end()) {
- //cerr << "FOUND IN CACHE " << key.second << " " << sourcePhrase.Debug(mgr.system) << endl;
+ //cerr << "FOUND IN CACHE " << key.second << " " << sourcePhrase.Debug(mgr.system) << endl;
SCFG::TargetPhrases *tps = iter->second;
ActiveChartEntryProbing *chartEntry = new (pool.Allocate<ActiveChartEntryProbing>()) ActiveChartEntryProbing(pool, prevEntryCast);
@@ -581,44 +569,43 @@ void ProbingPT::LookupGivenNode(
outPath.AddActiveChartEntry(ptInd, chartEntry);
outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tps, chartEntry->GetSymbolBind());
- }
- else {
- // not in cache. Lookup
- std::pair<bool, SCFG::TargetPhrases*> tpsPair = CreateTargetPhrasesSCFG(pool, mgr.system, sourcePhrase, key.second);
- assert(tpsPair.first && tpsPair.second);
-
- if (tpsPair.first) {
- // new entries
- ActiveChartEntryProbing *chartEntry = new (pool.Allocate<ActiveChartEntryProbing>()) ActiveChartEntryProbing(pool, prevEntryCast);
- //cerr << "AFTER chartEntry" << endl;
-
- chartEntry->AddSymbolBindElement(subPhraseRange, wordSought, hypos, *this);
- //cerr << "AFTER AddSymbolBindElement" << endl;
-
- size_t ptInd = GetPtInd();
- outPath.AddActiveChartEntry(ptInd, chartEntry);
- //cerr << "AFTER AddActiveChartEntry" << endl;
-
- if (tpsPair.second) {
- // there are some rules
- //cerr << "symbolbind=" << chartEntry->GetSymbolBind().Debug(mgr.system) << endl;
- outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tpsPair.second, chartEntry->GetSymbolBind());
- }
- }
+ } else {
+ // not in cache. Lookup
+ std::pair<bool, SCFG::TargetPhrases*> tpsPair = CreateTargetPhrasesSCFG(pool, mgr.system, sourcePhrase, key.second);
+ assert(tpsPair.first && tpsPair.second);
+
+ if (tpsPair.first) {
+ // new entries
+ ActiveChartEntryProbing *chartEntry = new (pool.Allocate<ActiveChartEntryProbing>()) ActiveChartEntryProbing(pool, prevEntryCast);
+ //cerr << "AFTER chartEntry" << endl;
+
+ chartEntry->AddSymbolBindElement(subPhraseRange, wordSought, hypos, *this);
+ //cerr << "AFTER AddSymbolBindElement" << endl;
+
+ size_t ptInd = GetPtInd();
+ outPath.AddActiveChartEntry(ptInd, chartEntry);
+ //cerr << "AFTER AddActiveChartEntry" << endl;
+
+ if (tpsPair.second) {
+ // there are some rules
+ //cerr << "symbolbind=" << chartEntry->GetSymbolBind().Debug(mgr.system) << endl;
+ outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tpsPair.second, chartEntry->GetSymbolBind());
+ }
+ }
}
}
SCFG::TargetPhraseImpl *ProbingPT::CreateTargetPhraseSCFG(
- MemPool &pool,
- const System &system,
- const char *&offset) const
+ MemPool &pool,
+ const System &system,
+ const char *&offset) const
{
- TargetPhraseInfo *tpInfo = (TargetPhraseInfo*) offset;
+ probingpt::TargetPhraseInfo *tpInfo = (probingpt::TargetPhraseInfo*) offset;
SCFG::TargetPhraseImpl *tp =
- new (pool.Allocate<SCFG::TargetPhraseImpl>()) SCFG::TargetPhraseImpl(pool, *this,
- system, tpInfo->numWords - 1);
+ new (pool.Allocate<SCFG::TargetPhraseImpl>()) SCFG::TargetPhraseImpl(pool, *this,
+ system, tpInfo->numWords - 1);
- offset += sizeof(TargetPhraseInfo);
+ offset += sizeof(probingpt::TargetPhraseInfo);
// scores
SCORE *scores = (SCORE*) offset;
@@ -633,10 +620,9 @@ SCFG::TargetPhraseImpl *ProbingPT::CreateTargetPhraseSCFG(
if (m_engine->num_lex_scores) {
tp->scoreProperties = scores + m_engine->num_scores;
}
- }
- else {
+ } else {
// log score 1st
- SCORE logScores[totalNumScores];
+ SCORE *logScores = (SCORE*) alloca(totalNumScores * sizeof(SCORE));
for (size_t i = 0; i < totalNumScores; ++i) {
logScores[i] = FloorScore(TransformScore(scores[i]));
}
@@ -706,47 +692,47 @@ std::pair<bool, SCFG::TargetPhrases*> ProbingPT::CreateTargetPhrasesSCFG(MemPool
/*
if (outPath.range.GetStartPos() == 1 || outPath.range.GetStartPos() == 2) {
- cerr << "range=" << outPath.range
- << " prevEntry=" << prevEntry.GetSymbolBind().Debug(mgr.system) << " " << prevEntryCast.GetKey()
- << " wordSought=" << wordSought.Debug(mgr.system)
- << " key=" << key.first << " " << key.second
- << " query_result=" << query_result.first << " " << (query_result.second == NONE)
- << endl;
+ cerr << "range=" << outPath.range
+ << " prevEntry=" << prevEntry.GetSymbolBind().Debug(mgr.system) << " " << prevEntryCast.GetKey()
+ << " wordSought=" << wordSought.Debug(mgr.system)
+ << " key=" << key.first << " " << key.second
+ << " query_result=" << query_result.first << " " << (query_result.second == NONE)
+ << endl;
}
*/
if (query_result.first) {
ret.first = true;
- size_t ptInd = GetPtInd();
+ size_t ptInd = GetPtInd();
- if (query_result.second != NONE) {
- // there are some rules
- const FeatureFunctions &ffs = system.featureFunctions;
+ if (query_result.second != NONE) {
+ // there are some rules
+ const FeatureFunctions &ffs = system.featureFunctions;
- const char *offset = m_engine->memTPS + query_result.second;
- uint64_t *numTP = (uint64_t*) offset;
- //cerr << "numTP=" << *numTP << endl;
+ const char *offset = m_engine->memTPS + query_result.second;
+ uint64_t *numTP = (uint64_t*) offset;
+ //cerr << "numTP=" << *numTP << endl;
- SCFG::TargetPhrases *tps = new (pool.Allocate<SCFG::TargetPhrases>()) SCFG::TargetPhrases(pool, *numTP);
- ret.second = tps;
+ SCFG::TargetPhrases *tps = new (pool.Allocate<SCFG::TargetPhrases>()) SCFG::TargetPhrases(pool, *numTP);
+ ret.second = tps;
- offset += sizeof(uint64_t);
- for (size_t i = 0; i < *numTP; ++i) {
- SCFG::TargetPhraseImpl *tp = CreateTargetPhraseSCFG(pool, system, offset);
- assert(tp);
- //cerr << "tp=" << tp->Debug(mgr.system) << endl;
+ offset += sizeof(uint64_t);
+ for (size_t i = 0; i < *numTP; ++i) {
+ SCFG::TargetPhraseImpl *tp = CreateTargetPhraseSCFG(pool, system, offset);
+ assert(tp);
+ //cerr << "tp=" << tp->Debug(mgr.system) << endl;
- ffs.EvaluateInIsolation(pool, system, sourcePhrase, *tp);
+ ffs.EvaluateInIsolation(pool, system, sourcePhrase, *tp);
- tps->AddTargetPhrase(*tp);
+ tps->AddTargetPhrase(*tp);
- }
+ }
- tps->SortAndPrune(m_tableLimit);
- ffs.EvaluateAfterTablePruning(pool, *tps, sourcePhrase);
- //cerr << "tps=" << tps->GetSize() << endl;
+ tps->SortAndPrune(m_tableLimit);
+ ffs.EvaluateAfterTablePruning(pool, *tps, sourcePhrase);
+ //cerr << "tps=" << tps->GetSize() << endl;
- }
+ }
}
return ret;
diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h b/moses2/TranslationModel/ProbingPT.h
index c5fbefd6f..47d22e1b3 100644
--- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h
+++ b/moses2/TranslationModel/ProbingPT.h
@@ -11,17 +11,21 @@
#include <boost/thread/tss.hpp>
#include <boost/bimap.hpp>
#include <deque>
-#include "../PhraseTable.h"
-#include "../../Vector.h"
-#include "../../Phrase.h"
-#include "../../SCFG/ActiveChart.h"
+#include "PhraseTable.h"
+#include "../Vector.h"
+#include "../Phrase.h"
+#include "../SCFG/ActiveChart.h"
#include "util/mmap.hh"
-namespace Moses2
+namespace probingpt
{
-class AlignmentInfo;
class QueryEngine;
class target_text;
+}
+
+namespace Moses2
+{
+class AlignmentInfo;
class MemPool;
class System;
class RecycleData;
@@ -35,35 +39,36 @@ class TargetPhrases;
class ProbingPT: public Moses2::PhraseTable
{
//////////////////////////////////////
- class ActiveChartEntryProbing : public SCFG::ActiveChartEntry
- {
- typedef SCFG::ActiveChartEntry Parent;
- public:
+ class ActiveChartEntryProbing : public SCFG::ActiveChartEntry
+ {
+ typedef SCFG::ActiveChartEntry Parent;
+ public:
- ActiveChartEntryProbing(MemPool &pool)
+ ActiveChartEntryProbing(MemPool &pool)
:Parent(pool)
,m_key(0)
- {}
+ {}
- ActiveChartEntryProbing(
- MemPool &pool,
- const ActiveChartEntryProbing &prevEntry);
+ ActiveChartEntryProbing(
+ MemPool &pool,
+ const ActiveChartEntryProbing &prevEntry);
- uint64_t GetKey() const
- { return m_key; }
+ uint64_t GetKey() const {
+ return m_key;
+ }
- std::pair<bool, uint64_t> GetKey(const SCFG::Word &nextWord, const ProbingPT &pt) const;
+ std::pair<bool, uint64_t> GetKey(const SCFG::Word &nextWord, const ProbingPT &pt) const;
- virtual void AddSymbolBindElement(
- const Range &range,
- const SCFG::Word &word,
- const Moses2::Hypotheses *hypos,
- const Moses2::PhraseTable &pt);
+ virtual void AddSymbolBindElement(
+ const Range &range,
+ const SCFG::Word &word,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::PhraseTable &pt);
- protected:
- uint64_t m_key;
- };
- //////////////////////////////////////
+ protected:
+ uint64_t m_key;
+ };
+ //////////////////////////////////////
public:
ProbingPT(size_t startInd, const std::string &line);
@@ -73,20 +78,21 @@ public:
virtual void SetParameter(const std::string& key, const std::string& value);
void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
- uint64_t GetUnk() const
- { return m_unkId; }
+ uint64_t GetUnk() const {
+ return m_unkId;
+ }
// SCFG
void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const;
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ SCFG::InputPath &path) const;
virtual void Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
+ const SCFG::Manager &mgr,
+ size_t maxChartSpan,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const;
protected:
@@ -96,19 +102,18 @@ protected:
util::LoadMethod load_method;
uint64_t m_unkId;
- QueryEngine *m_engine;
+ probingpt::QueryEngine *m_engine;
void CreateAlignmentMap(System &system, const std::string path);
TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
+ InputPath &inputPath) const;
TargetPhrases *CreateTargetPhrases(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &sourcePhrase, uint64_t key) const;
+ const Phrase<Moses2::Word> &sourcePhrase, uint64_t key) const;
TargetPhraseImpl *CreateTargetPhrase(MemPool &pool, const System &system,
- const char *&offset) const;
+ const char *&offset) const;
- inline const std::pair<bool, const Factor*> *GetTargetFactor(uint32_t probingId) const
- {
+ inline const std::pair<bool, const Factor*> *GetTargetFactor(uint32_t probingId) const {
if (probingId >= m_targetVocab.size()) {
return NULL;
}
@@ -118,7 +123,7 @@ protected:
std::pair<bool, uint64_t> GetKey(const Phrase<Moses2::Word> &sourcePhrase) const;
void GetSourceProbingIds(const Phrase<Moses2::Word> &sourcePhrase, bool &ok,
- uint64_t probingSource[]) const;
+ uint64_t probingSource[]) const;
uint64_t GetSourceProbingId(const Word &word) const;
@@ -135,22 +140,22 @@ protected:
// SCFG
void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::ActiveChartEntry &prevEntry,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const;
std::pair<bool, SCFG::TargetPhrases*> CreateTargetPhrasesSCFG(MemPool &pool, const System &system,
const Phrase<SCFG::Word> &sourcePhrase, uint64_t key) const;
// return value: 1st = there are actual rules, not just a empty cell for prefix
SCFG::TargetPhraseImpl *CreateTargetPhraseSCFG(
- MemPool &pool,
- const System &system,
- const char *&offset) const;
+ MemPool &pool,
+ const System &system,
+ const char *&offset) const;
};
diff --git a/contrib/moses2/TranslationModel/Transliteration.cpp b/moses2/TranslationModel/Transliteration.cpp
index f92348ee9..13c884508 100644
--- a/contrib/moses2/TranslationModel/Transliteration.cpp
+++ b/moses2/TranslationModel/Transliteration.cpp
@@ -28,7 +28,7 @@ namespace Moses2
{
Transliteration::Transliteration(size_t startInd, const std::string &line) :
- PhraseTable(startInd, line)
+ PhraseTable(startInd, line)
{
ReadParameters();
UTIL_THROW_IF2(m_mosesDir.empty() ||
@@ -63,23 +63,23 @@ SetParameter(const std::string& key, const std::string& value)
}
void Transliteration::Lookup(const Manager &mgr,
- InputPathsBase &inputPaths) const
+ InputPathsBase &inputPaths) const
{
- BOOST_FOREACH(InputPathBase *pathBase, inputPaths){
- InputPath *path = static_cast<InputPath*>(pathBase);
+ BOOST_FOREACH(InputPathBase *pathBase, inputPaths) {
+ InputPath *path = static_cast<InputPath*>(pathBase);
- if (SatisfyBackoff(mgr, *path)) {
- const SubPhrase<Moses2::Word> &phrase = path->subPhrase;
+ if (SatisfyBackoff(mgr, *path)) {
+ const SubPhrase<Moses2::Word> &phrase = path->subPhrase;
- TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
- path->AddTargetPhrases(*this, tps);
- }
- }
+ TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
+ path->AddTargetPhrases(*this, tps);
+ }
+ }
}
TargetPhrases *Transliteration::Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const
+ InputPath &inputPath) const
{
const SubPhrase<Moses2::Word> &sourcePhrase = inputPath.subPhrase;
size_t hash = sourcePhrase.hash();
@@ -121,10 +121,10 @@ TargetPhrases *Transliteration::Lookup(const Manager &mgr, MemPool &pool,
}
std::vector<TargetPhraseImpl*> Transliteration::CreateTargetPhrases(
- const Manager &mgr,
- MemPool &pool,
- const SubPhrase<Moses2::Word> &sourcePhrase,
- const std::string &outDir) const
+ const Manager &mgr,
+ MemPool &pool,
+ const SubPhrase<Moses2::Word> &sourcePhrase,
+ const std::string &outDir) const
{
std::vector<TargetPhraseImpl*> ret;
@@ -137,7 +137,7 @@ std::vector<TargetPhraseImpl*> Transliteration::CreateTargetPhrases(
UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
TargetPhraseImpl *tp =
- new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this, mgr.system, 1);
+ new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this, mgr.system, 1);
Moses2::Word &word = (*tp)[0];
word.CreateFromString(mgr.system.GetVocab(), mgr.system, toks[0]);
@@ -166,61 +166,61 @@ void Transliteration::EvaluateInIsolation(const System &system,
// SCFG ///////////////////////////////////////////////////////////////////////////////////////////
void Transliteration::InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ SCFG::InputPath &path) const
{
UTIL_THROW2("Not implemented");
}
void Transliteration::Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
+ const SCFG::Manager &mgr,
+ size_t maxChartSpan,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const
{
UTIL_THROW2("Not implemented");
}
void Transliteration::LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
+ const SCFG::Manager &mgr,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const
{
UTIL_THROW2("Not implemented");
}
void Transliteration::LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const Moses2::Range &subPhraseRange,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &outPath) const
{
UTIL_THROW2("Not implemented");
}
void Transliteration::LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const
{
UTIL_THROW2("Not implemented");
}
void Transliteration::LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::ActiveChartEntry &prevEntry,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const
{
UTIL_THROW2("Not implemented");
}
diff --git a/moses2/TranslationModel/Transliteration.h b/moses2/TranslationModel/Transliteration.h
new file mode 100644
index 000000000..593677d60
--- /dev/null
+++ b/moses2/TranslationModel/Transliteration.h
@@ -0,0 +1,91 @@
+/*
+ * Transliteration.h
+ *
+ * Created on: 28 Oct 2015
+ * Author: hieu
+ */
+
+#pragma once
+
+#include "PhraseTable.h"
+
+namespace Moses2
+{
+class Sentence;
+class InputPaths;
+class Range;
+
+class Transliteration: public PhraseTable
+{
+public:
+ Transliteration(size_t startInd, const std::string &line);
+ virtual ~Transliteration();
+
+ void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
+ virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
+ InputPath &inputPath) const;
+
+ virtual void
+ EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
+
+ virtual void InitActiveChart(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ SCFG::InputPath &path) const;
+
+ void Lookup(MemPool &pool,
+ const SCFG::Manager &mgr,
+ size_t maxChartSpan,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const;
+
+ void LookupUnary(MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const;
+
+protected:
+ virtual void LookupNT(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const Moses2::Range &subPhraseRange,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &outPath) const;
+
+ virtual void LookupGivenWord(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const;
+
+ virtual void LookupGivenNode(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::ActiveChartEntry &prevEntry,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const;
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+protected:
+ std::string m_filePath;
+ std::string m_mosesDir, m_scriptDir, m_externalDir, m_inputLang, m_outputLang;
+
+ std::vector<TargetPhraseImpl*> CreateTargetPhrases(
+ const Manager &mgr,
+ MemPool &pool,
+ const SubPhrase<Moses2::Word> &sourcePhrase,
+ const std::string &outDir) const;
+
+};
+
+}
+
diff --git a/contrib/moses2/TranslationModel/UnknownWordPenalty.cpp b/moses2/TranslationModel/UnknownWordPenalty.cpp
index d786b2cff..e165e7e02 100644
--- a/contrib/moses2/TranslationModel/UnknownWordPenalty.cpp
+++ b/moses2/TranslationModel/UnknownWordPenalty.cpp
@@ -26,8 +26,8 @@ namespace Moses2
{
UnknownWordPenalty::UnknownWordPenalty(size_t startInd, const std::string &line)
-:PhraseTable(startInd, line)
-,m_drop(false)
+ :PhraseTable(startInd, line)
+ ,m_drop(false)
{
m_tuneable = false;
ReadParameters();
@@ -42,27 +42,24 @@ void UnknownWordPenalty::SetParameter(const std::string& key, const std::string&
{
if (key == "drop") {
m_drop = Scan<bool>(value);
- }
- else if (key == "prefix") {
+ } else if (key == "prefix") {
m_prefix = value;
- }
- else if (key == "suffix") {
+ } else if (key == "suffix") {
m_suffix = value;
- }
- else {
+ } else {
PhraseTable::SetParameter(key, value);
}
}
void UnknownWordPenalty::ProcessXML(
- const Manager &mgr,
- MemPool &pool,
- const Sentence &sentence,
- InputPaths &inputPaths) const
+ const Manager &mgr,
+ MemPool &pool,
+ const Sentence &sentence,
+ InputPaths &inputPaths) const
{
- const Vector<const InputType::XMLOption*> &xmlOptions = sentence.GetXMLOptions();
- BOOST_FOREACH(const InputType::XMLOption *xmlOption, xmlOptions) {
- TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(pool, *this, mgr.system, xmlOption->GetTranslation());
+ const Vector<const InputType::XMLOption*> &xmlOptions = sentence.GetXMLOptions();
+ BOOST_FOREACH(const InputType::XMLOption *xmlOption, xmlOptions) {
+ TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(pool, *this, mgr.system, xmlOption->GetTranslation());
if (xmlOption->prob) {
Scores &scores = target->GetScores();
@@ -80,22 +77,22 @@ void UnknownWordPenalty::ProcessXML(
mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *tps, source);
path->AddTargetPhrases(*this, tps);
- }
+ }
}
void UnknownWordPenalty::Lookup(const Manager &mgr,
- InputPathsBase &inputPaths) const
+ InputPathsBase &inputPaths) const
{
- BOOST_FOREACH(InputPathBase *pathBase, inputPaths){
- InputPath *path = static_cast<InputPath*>(pathBase);
+ BOOST_FOREACH(InputPathBase *pathBase, inputPaths) {
+ InputPath *path = static_cast<InputPath*>(pathBase);
- if (SatisfyBackoff(mgr, *path)) {
- const SubPhrase<Moses2::Word> &phrase = path->subPhrase;
+ if (SatisfyBackoff(mgr, *path)) {
+ const SubPhrase<Moses2::Word> &phrase = path->subPhrase;
- TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
- path->AddTargetPhrases(*this, tps);
- }
- }
+ TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
+ path->AddTargetPhrases(*this, tps);
+ }
+ }
}
@@ -108,7 +105,7 @@ TargetPhrases *UnknownWordPenalty::Lookup(const Manager &mgr, MemPool &pool,
// any other pt translate this?
size_t numPt = mgr.system.mappings.size();
const TargetPhrases **allTPS =
- static_cast<InputPath&>(inputPath).targetPhrases;
+ static_cast<InputPath&>(inputPath).targetPhrases;
for (size_t i = 0; i < numPt; ++i) {
const TargetPhrases *otherTps = allTPS[i];
@@ -126,16 +123,15 @@ TargetPhrases *UnknownWordPenalty::Lookup(const Manager &mgr, MemPool &pool,
size_t numWords = m_drop ? 0 : 1;
TargetPhraseImpl *target =
- new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this,
- system, numWords);
+ new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this,
+ system, numWords);
if (!m_drop) {
Moses2::Word &word = (*target)[0];
if (m_prefix.empty() && m_suffix.empty()) {
word[0] = factor;
- }
- else {
+ } else {
stringstream strm;
if (!m_prefix.empty()) {
strm << m_prefix;
@@ -172,17 +168,17 @@ void UnknownWordPenalty::EvaluateInIsolation(const System &system,
// SCFG ///////////////////////////////////////////////////////////////////////////////////////////
void UnknownWordPenalty::InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ SCFG::InputPath &path) const
{
}
void UnknownWordPenalty::Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
+ const SCFG::Manager &mgr,
+ size_t maxChartSpan,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const
{
const System &system = mgr.system;
@@ -193,7 +189,7 @@ void UnknownWordPenalty::Lookup(MemPool &pool,
}
if (path.GetNumRules()) {
- // only create rules if no other rules
+ // only create rules if no other rules
return;
}
@@ -240,43 +236,43 @@ void UnknownWordPenalty::Lookup(MemPool &pool,
}
void UnknownWordPenalty::LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
+ const SCFG::Manager &mgr,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const
{
}
void UnknownWordPenalty::LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const Moses2::Range &subPhraseRange,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &outPath) const
{
UTIL_THROW2("Not implemented");
}
void UnknownWordPenalty::LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const
{
UTIL_THROW2("Not implemented");
}
void UnknownWordPenalty::LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::ActiveChartEntry &prevEntry,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const
{
UTIL_THROW2("Not implemented");
}
diff --git a/moses2/TranslationModel/UnknownWordPenalty.h b/moses2/TranslationModel/UnknownWordPenalty.h
new file mode 100644
index 000000000..112f0b6cf
--- /dev/null
+++ b/moses2/TranslationModel/UnknownWordPenalty.h
@@ -0,0 +1,89 @@
+/*
+ * UnknownWordPenalty.h
+ *
+ * Created on: 28 Oct 2015
+ * Author: hieu
+ */
+
+#pragma once
+
+#include "PhraseTable.h"
+
+namespace Moses2
+{
+class Sentence;
+class InputPaths;
+class Range;
+
+class UnknownWordPenalty: public PhraseTable
+{
+public:
+ UnknownWordPenalty(size_t startInd, const std::string &line);
+ virtual ~UnknownWordPenalty();
+
+ virtual void SetParameter(const std::string& key, const std::string& value);
+
+ void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
+ virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
+ InputPath &inputPath) const;
+
+ void ProcessXML(
+ const Manager &mgr,
+ MemPool &pool,
+ const Sentence &sentence,
+ InputPaths &inputPaths) const;
+
+ virtual void
+ EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const;
+
+ virtual void InitActiveChart(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ SCFG::InputPath &path) const;
+
+ void Lookup(MemPool &pool,
+ const SCFG::Manager &mgr,
+ size_t maxChartSpan,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const;
+
+ void LookupUnary(MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const;
+
+protected:
+ virtual void LookupNT(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const Moses2::Range &subPhraseRange,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &outPath) const;
+
+ virtual void LookupGivenWord(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const;
+
+ virtual void LookupGivenNode(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::ActiveChartEntry &prevEntry,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const;
+protected:
+ bool m_drop;
+ std::string m_prefix, m_suffix;
+};
+
+}
+
diff --git a/contrib/moses2/TranslationTask.cpp b/moses2/TranslationTask.cpp
index 375e4709b..219d9ffcb 100644
--- a/contrib/moses2/TranslationTask.cpp
+++ b/moses2/TranslationTask.cpp
@@ -10,14 +10,13 @@ namespace Moses2
{
TranslationTask::TranslationTask(System &system,
- const std::string &line,
- long translationId)
+ const std::string &line,
+ long translationId)
{
if (system.isPb) {
- m_mgr = new Manager(system, *this, line, translationId);
- }
- else {
- m_mgr = new SCFG::Manager(system, *this, line, translationId);
+ m_mgr = new Manager(system, *this, line, translationId);
+ } else {
+ m_mgr = new SCFG::Manager(system, *this, line, translationId);
}
}
diff --git a/contrib/moses2/TranslationTask.h b/moses2/TranslationTask.h
index bf2330357..bf2330357 100644
--- a/contrib/moses2/TranslationTask.h
+++ b/moses2/TranslationTask.h
diff --git a/contrib/moses2/TrellisPaths.cpp b/moses2/TrellisPaths.cpp
index 814da4521..814da4521 100644
--- a/contrib/moses2/TrellisPaths.cpp
+++ b/moses2/TrellisPaths.cpp
diff --git a/contrib/moses2/TrellisPaths.h b/moses2/TrellisPaths.h
index 3e2d9ab9a..6a6a59c1a 100644
--- a/contrib/moses2/TrellisPaths.h
+++ b/moses2/TrellisPaths.h
@@ -14,10 +14,8 @@ namespace Moses2
{
template<typename T>
-struct CompareTrellisPath
-{
- bool operator()(const T* pathA, const T* pathB) const
- {
+struct CompareTrellisPath {
+ bool operator()(const T* pathA, const T* pathB) const {
return (pathA->GetFutureScore() < pathB->GetFutureScore());
}
};
@@ -28,27 +26,23 @@ class TrellisPaths
public:
TrellisPaths() {}
- virtual ~TrellisPaths()
- {
+ virtual ~TrellisPaths() {
while (!empty()) {
T *path = Get();
delete path;
}
}
- bool empty() const
- {
+ bool empty() const {
return m_coll.empty();
}
//! add a new entry into collection
- void Add(T *trellisPath)
- {
+ void Add(T *trellisPath) {
m_coll.push(trellisPath);
}
- T *Get()
- {
+ T *Get() {
T *top = m_coll.top();
// Detach
@@ -56,12 +50,13 @@ public:
return top;
}
- size_t GetSize() const
- { return m_coll.size(); }
+ size_t GetSize() const {
+ return m_coll.size();
+ }
protected:
typedef std::priority_queue<T*, std::vector<T*>,
- CompareTrellisPath<T> > CollectionType;
+ CompareTrellisPath<T> > CollectionType;
CollectionType m_coll;
};
diff --git a/contrib/moses2/TypeDef.cpp b/moses2/TypeDef.cpp
index b8b79c59c..b8b79c59c 100644
--- a/contrib/moses2/TypeDef.cpp
+++ b/moses2/TypeDef.cpp
diff --git a/contrib/moses2/TypeDef.h b/moses2/TypeDef.h
index e0a1a93a3..aed39ac11 100644
--- a/contrib/moses2/TypeDef.h
+++ b/moses2/TypeDef.h
@@ -46,8 +46,7 @@ typedef std::vector<FactorType> FactorList;
// Note: StaticData uses SearchAlgorithm to determine whether the translation
// model is phrase-based or syntax-based. If you add a syntax-based search
// algorithm here then you should also update StaticData::IsSyntax().
-enum SearchAlgorithm
-{
+enum SearchAlgorithm {
Normal = 0, CubePruning = 1,
//,CubeGrowing = 2
CYKPlus = 3,
@@ -108,12 +107,10 @@ public:
bool added;
HypothesisBase *other;
- StackAdd()
- {
+ StackAdd() {
}
StackAdd(bool vadded, HypothesisBase *vOther) :
- added(vadded), other(vOther)
- {
+ added(vadded), other(vOther) {
}
};
diff --git a/contrib/moses2/Vector.cpp b/moses2/Vector.cpp
index 46af0f793..46af0f793 100644
--- a/contrib/moses2/Vector.cpp
+++ b/moses2/Vector.cpp
diff --git a/contrib/moses2/Vector.h b/moses2/Vector.h
index f35e71825..404d76dd3 100644
--- a/contrib/moses2/Vector.h
+++ b/moses2/Vector.h
@@ -19,13 +19,11 @@ class Vector: public std::vector<T, MemPoolAllocator<T> >
public:
Vector(MemPool &pool, size_t size = 0, const T &val = T()) :
- Parent(size, val, MemPoolAllocator<T>(pool))
- {
+ Parent(size, val, MemPoolAllocator<T>(pool)) {
}
Vector(const Vector &copy) :
- Parent(copy)
- {
+ Parent(copy) {
}
protected:
diff --git a/contrib/moses2/Weights.cpp b/moses2/Weights.cpp
index 643847eee..e31a0fd3b 100644
--- a/contrib/moses2/Weights.cpp
+++ b/moses2/Weights.cpp
@@ -52,8 +52,8 @@ void Weights::SetWeights(const FeatureFunctions &ffs, const std::string &ffName,
UTIL_THROW_IF2(weights.size() != numScores, "Wrong number of weights. " << weights.size() << "!=" << numScores);
for (size_t i = 0; i < numScores; ++i) {
- SCORE weight = weights[i];
- m_weights[startInd + i] = weight;
+ SCORE weight = weights[i];
+ m_weights[startInd + i] = weight;
}
}
diff --git a/contrib/moses2/Weights.h b/moses2/Weights.h
index c3c2cee62..96fdb5a71 100644
--- a/contrib/moses2/Weights.h
+++ b/moses2/Weights.h
@@ -22,8 +22,7 @@ public:
virtual ~Weights();
void Init(const FeatureFunctions &ffs);
- SCORE operator[](size_t ind) const
- {
+ SCORE operator[](size_t ind) const {
return m_weights[ind];
}
diff --git a/contrib/moses2/Word.cpp b/moses2/Word.cpp
index fe10330e7..f272f7cdc 100644
--- a/contrib/moses2/Word.cpp
+++ b/moses2/Word.cpp
@@ -33,7 +33,7 @@ Word::~Word()
}
void Word::CreateFromString(FactorCollection &vocab, const System &system,
- const std::string &str)
+ const std::string &str)
{
vector<string> toks = Tokenize(str, "|");
for (size_t i = 0; i < toks.size(); ++i) {
@@ -45,7 +45,7 @@ void Word::CreateFromString(FactorCollection &vocab, const System &system,
// null the rest
for (size_t i = toks.size(); i < MAX_NUM_FACTORS; ++i) {
- m_factors[i] = NULL;
+ m_factors[i] = NULL;
}
}
@@ -53,7 +53,7 @@ size_t Word::hash() const
{
uint64_t seed = 0;
size_t ret = util::MurmurHashNative(m_factors,
- sizeof(Factor*) * MAX_NUM_FACTORS, seed);
+ sizeof(Factor*) * MAX_NUM_FACTORS, seed);
return ret;
}
@@ -61,9 +61,9 @@ size_t Word::hash(const std::vector<FactorType> &factors) const
{
size_t seed = 0;
for (size_t i = 0; i < factors.size(); ++i) {
- FactorType factorType = factors[i];
- const Factor *factor = m_factors[factorType];
- boost::hash_combine(seed, factor);
+ FactorType factorType = factors[i];
+ const Factor *factor = m_factors[factorType];
+ boost::hash_combine(seed, factor);
}
return seed;
}
@@ -73,7 +73,7 @@ int Word::Compare(const Word &compare) const
{
int cmp = memcmp(m_factors, compare.m_factors,
- sizeof(Factor*) * MAX_NUM_FACTORS);
+ sizeof(Factor*) * MAX_NUM_FACTORS);
return cmp;
/*
@@ -112,7 +112,7 @@ void Word::OutputToStream(const System &system, std::ostream &out) const
out << *m_factors[ factorTypes[0] ];
for (size_t i = 1; i < factorTypes.size(); ++i) {
- FactorType factorType = factorTypes[i];
+ FactorType factorType = factorTypes[i];
const Factor *factor = m_factors[factorType];
out << "|" << *factor;
diff --git a/contrib/moses2/Word.h b/moses2/Word.h
index 7210c5140..9d742eece 100644
--- a/contrib/moses2/Word.h
+++ b/moses2/Word.h
@@ -24,33 +24,29 @@ public:
virtual ~Word();
void CreateFromString(FactorCollection &vocab, const System &system,
- const std::string &str);
+ const std::string &str);
virtual size_t hash() const;
virtual size_t hash(const std::vector<FactorType> &factors) const;
int Compare(const Word &compare) const;
- virtual bool operator==(const Word &compare) const
- {
+ virtual bool operator==(const Word &compare) const {
int cmp = Compare(compare);
return cmp == 0;
}
- virtual bool operator!=(const Word &compare) const
- {
+ virtual bool operator!=(const Word &compare) const {
return !((*this) == compare);
}
virtual bool operator<(const Word &compare) const;
- const Factor* operator[](size_t ind) const
- {
+ const Factor* operator[](size_t ind) const {
return m_factors[ind];
}
- const Factor*& operator[](size_t ind)
- {
+ const Factor*& operator[](size_t ind) {
return m_factors[ind];
}
diff --git a/moses2/defer/CubePruningBitmapStack/Misc.cpp b/moses2/defer/CubePruningBitmapStack/Misc.cpp
new file mode 100644
index 000000000..9f994ba8b
--- /dev/null
+++ b/moses2/defer/CubePruningBitmapStack/Misc.cpp
@@ -0,0 +1,159 @@
+/*
+ * CubePruning.cpp
+ *
+ * Created on: 27 Nov 2015
+ * Author: hieu
+ */
+
+#include "Misc.h"
+#include "Stack.h"
+#include "../Manager.h"
+#include "../../MemPool.h"
+#include "../../System.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningBitmapStack
+{
+
+////////////////////////////////////////////////////////////////////////
+QueueItem *QueueItem::Create(QueueItem *currItem,
+ Manager &mgr,
+ CubeEdge &edge,
+ size_t hypoIndex,
+ size_t tpIndex,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ QueueItem *ret;
+ if (currItem) {
+ // reuse incoming queue item to create new item
+ ret = currItem;
+ ret->Init(mgr, edge, hypoIndex, tpIndex);
+ } else if (!queueItemRecycler.empty()) {
+ // use item from recycle bin
+ ret = queueItemRecycler.back();
+ ret->Init(mgr, edge, hypoIndex, tpIndex);
+ queueItemRecycler.pop_back();
+ } else {
+ // create new item
+ ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
+ }
+
+ return ret;
+}
+
+QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
+ :edge(&edge)
+ ,hypoIndex(hypoIndex)
+ ,tpIndex(tpIndex)
+{
+ CreateHypothesis(mgr);
+}
+
+void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
+{
+ this->edge = &edge;
+ this->hypoIndex = hypoIndex;
+ this->tpIndex = tpIndex;
+
+ CreateHypothesis(mgr);
+}
+
+void QueueItem::CreateHypothesis(Manager &mgr)
+{
+ const Hypothesis *prevHypo = edge->hypos[hypoIndex];
+ const TargetPhrase &tp = edge->tps[tpIndex];
+
+ //cerr << "hypoIndex=" << hypoIndex << endl;
+ //cerr << "edge.hypos=" << edge.hypos.size() << endl;
+ //cerr << prevHypo << endl;
+ //cerr << *prevHypo << endl;
+
+ hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
+ hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
+ hypo->EvaluateWhenApplied();
+}
+
+////////////////////////////////////////////////////////////////////////
+CubeEdge::CubeEdge(
+ Manager &mgr,
+ const Hypotheses &hypos,
+ const InputPath &path,
+ const TargetPhrases &tps,
+ const Bitmap &newBitmap)
+ :hypos(hypos)
+ ,path(path)
+ ,tps(tps)
+ ,newBitmap(newBitmap)
+{
+ estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
+}
+
+std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
+{
+ out << obj.newBitmap;
+ return out;
+}
+
+bool
+CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
+{
+ //UTIL_THROW_IF2(x >= (1<<17), "Error");
+ //UTIL_THROW_IF2(y >= (1<<17), "Error");
+
+ SeenPositionItem val(this, (x<<16) + y);
+ std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
+ return pairRet.second;
+}
+
+void CubeEdge::CreateFirst(Manager &mgr,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ assert(hypos.size());
+ assert(tps.GetSize());
+
+ QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
+ queue.push(item);
+ bool setSeen = SetSeenPosition(0, 0, seenPositions);
+ assert(setSeen);
+}
+
+void CubeEdge::CreateNext(Manager &mgr,
+ QueueItem *item,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ size_t hypoIndex = item->hypoIndex;
+ size_t tpIndex = item->tpIndex;
+
+ if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
+ // reuse incoming queue item to create new item
+ QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
+ assert(newItem == item);
+ queue.push(newItem);
+ item = NULL;
+ }
+
+ if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
+ QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
+ queue.push(newItem);
+ item = NULL;
+ }
+
+ if (item) {
+ // recycle unused queue item
+ queueItemRecycler.push_back(item);
+ }
+}
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningBitmapStack/Misc.h b/moses2/defer/CubePruningBitmapStack/Misc.h
new file mode 100644
index 000000000..355f8f4c2
--- /dev/null
+++ b/moses2/defer/CubePruningBitmapStack/Misc.h
@@ -0,0 +1,111 @@
+/*
+ * CubePruning.h
+ *
+ * Created on: 27 Nov 2015
+ * Author: hieu
+ */
+#pragma once
+#include <boost/pool/pool_alloc.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+#include <vector>
+#include <queue>
+#include "../../legacy/Range.h"
+#include "../Hypothesis.h"
+#include "../../TypeDef.h"
+#include "../../Vector.h"
+#include "Stack.h"
+
+namespace Moses2
+{
+
+class Manager;
+class InputPath;
+class TargetPhrases;
+class Bitmap;
+
+namespace NSCubePruningBitmapStack
+{
+class CubeEdge;
+
+///////////////////////////////////////////
+class QueueItem
+{
+ ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
+public:
+ static QueueItem *Create(QueueItem *currItem,
+ Manager &mgr,
+ CubeEdge &edge,
+ size_t hypoIndex,
+ size_t tpIndex,
+ std::deque<QueueItem*> &queueItemRecycler);
+ QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
+
+ void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
+
+ CubeEdge *edge;
+ size_t hypoIndex, tpIndex;
+ Hypothesis *hypo;
+
+protected:
+ void CreateHypothesis(Manager &mgr);
+};
+
+///////////////////////////////////////////
+class QueueItemOrderer
+{
+public:
+ bool operator()(QueueItem* itemA, QueueItem* itemB) const {
+ HypothesisFutureScoreOrderer orderer;
+ return !orderer(itemA->hypo, itemB->hypo);
+ }
+};
+
+///////////////////////////////////////////
+class CubeEdge
+{
+ friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
+
+public:
+ typedef std::priority_queue<QueueItem*,
+ std::vector<QueueItem*>,
+ QueueItemOrderer> Queue;
+
+ typedef std::pair<const CubeEdge*, int> SeenPositionItem;
+ typedef boost::unordered_set<SeenPositionItem,
+ boost::hash<SeenPositionItem>,
+ std::equal_to<SeenPositionItem> > SeenPositions;
+
+ const Hypotheses &hypos;
+ const InputPath &path;
+ const TargetPhrases &tps;
+ const Bitmap &newBitmap;
+ SCORE estimatedScore;
+
+ CubeEdge(Manager &mgr,
+ const Hypotheses &hypos,
+ const InputPath &path,
+ const TargetPhrases &tps,
+ const Bitmap &newBitmap);
+
+ bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
+
+ void CreateFirst(Manager &mgr,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler);
+ void CreateNext(Manager &mgr,
+ QueueItem *item,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler);
+
+protected:
+
+};
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningBitmapStack/Search.cpp b/moses2/defer/CubePruningBitmapStack/Search.cpp
new file mode 100644
index 000000000..8c06f1340
--- /dev/null
+++ b/moses2/defer/CubePruningBitmapStack/Search.cpp
@@ -0,0 +1,206 @@
+/*
+ * Search.cpp
+ *
+ * Created on: 16 Nov 2015
+ * Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include "Search.h"
+#include "Stack.h"
+#include "../Manager.h"
+#include "../Hypothesis.h"
+#include "../../InputPaths.h"
+#include "../../InputPath.h"
+#include "../../System.h"
+#include "../../Sentence.h"
+#include "../../TranslationTask.h"
+#include "../../legacy/Util2.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningBitmapStack
+{
+
+////////////////////////////////////////////////////////////////////////
+Search::Search(Manager &mgr)
+ :Moses2::Search(mgr)
+ ,m_stack(mgr)
+
+ ,m_queue(QueueItemOrderer(), std::vector<QueueItem*>() )
+
+ ,m_seenPositions()
+{
+}
+
+Search::~Search()
+{
+}
+
+void Search::Decode()
+{
+ // init cue edges
+ m_cubeEdges.resize(mgr.GetInput().GetSize() + 1);
+ for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
+ m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges();
+ }
+
+ const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
+ Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
+ initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
+ initHypo->EmptyHypothesisState(mgr.GetInput());
+
+ m_stack.Add(initHypo, mgr.GetHypoRecycle());
+ PostDecode(0);
+
+ for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) {
+ //cerr << "stackInd=" << stackInd << endl;
+ m_stack.Clear();
+ Decode(stackInd);
+ PostDecode(stackInd);
+
+ //m_stack.DebugCounts();
+ //cerr << m_stacks << endl;
+ }
+
+}
+
+void Search::Decode(size_t stackInd)
+{
+ Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
+
+ // reuse queue from previous stack. Clear it first
+ std::vector<QueueItem*> &container = Container(m_queue);
+ //cerr << "container=" << container.size() << endl;
+ BOOST_FOREACH(QueueItem *item, container) {
+ // recycle unused hypos from queue
+ Hypothesis *hypo = item->hypo;
+ hypoRecycler.Recycle(hypo);
+
+ // recycle queue item
+ m_queueItemRecycler.push_back(item);
+ }
+ container.clear();
+
+ m_seenPositions.clear();
+
+ // add top hypo from every edge into queue
+ CubeEdges &edges = *m_cubeEdges[stackInd];
+
+ BOOST_FOREACH(CubeEdge *edge, edges) {
+ //cerr << *edge << " ";
+ edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
+ }
+
+ /*
+ cerr << "edges: ";
+ boost::unordered_set<const Bitmap*> uniqueBM;
+ BOOST_FOREACH(CubeEdge *edge, edges) {
+ uniqueBM.insert(&edge->newBitmap);
+ //cerr << *edge << " ";
+ }
+ cerr << edges.size() << " " << uniqueBM.size();
+ cerr << endl;
+ */
+
+ size_t pops = 0;
+ while (!m_queue.empty() && pops < mgr.system.popLimit) {
+ // get best hypo from queue, add to stack
+ //cerr << "queue=" << queue.size() << endl;
+ QueueItem *item = m_queue.top();
+ m_queue.pop();
+
+ CubeEdge *edge = item->edge;
+
+ // add hypo to stack
+ Hypothesis *hypo = item->hypo;
+ //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
+ m_stack.Add(hypo, hypoRecycler);
+
+ edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
+
+ ++pops;
+ }
+
+ /*
+ // create hypo from every edge. Increase diversity
+ while (!m_queue.empty()) {
+ QueueItem *item = m_queue.top();
+ m_queue.pop();
+
+ if (item->hypoIndex == 0 && item->tpIndex == 0) {
+ CubeEdge &edge = item->edge;
+
+ // add hypo to stack
+ Hypothesis *hypo = item->hypo;
+ //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
+ m_stacks.Add(hypo, mgr.GetHypoRecycle());
+ }
+ }
+ */
+}
+
+void Search::PostDecode(size_t stackInd)
+{
+ MemPool &pool = mgr.GetPool();
+
+ Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr);
+
+ BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) {
+ const Bitmap &hypoBitmap = *val.first.first;
+ size_t hypoEndPos = val.first.second;
+ //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
+
+ // create edges to next hypos from existing hypos
+ const InputPaths &paths = mgr.GetInputPaths();
+
+ BOOST_FOREACH(const InputPath *path, paths) {
+ const Range &pathRange = path->range;
+ //cerr << "pathRange=" << pathRange << endl;
+
+ if (!path->IsUsed()) {
+ continue;
+ }
+ if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
+ continue;
+ }
+
+ const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
+ size_t numWords = newBitmap.GetNumWordsCovered();
+
+ CubeEdges &edges = *m_cubeEdges[numWords];
+
+ // sort hypo for a particular bitmap and hypoEndPos
+ Hypotheses &sortedHypos = *val.second;
+
+ size_t numPt = mgr.system.mappings.size();
+ for (size_t i = 0; i < numPt; ++i) {
+ const TargetPhrases *tps = path->targetPhrases[i];
+ if (tps && tps->GetSize()) {
+ CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
+ edges.push_back(edge);
+ }
+ }
+ }
+ }
+
+}
+
+const Hypothesis *Search::GetBestHypo() const
+{
+ std::vector<const Hypothesis*> sortedHypos = m_stack.GetBestHypos(1);
+
+ const Hypothesis *best = NULL;
+ if (sortedHypos.size()) {
+ best = sortedHypos[0];
+ }
+ return best;
+}
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningBitmapStack/Search.h b/moses2/defer/CubePruningBitmapStack/Search.h
new file mode 100644
index 000000000..1ff0477c6
--- /dev/null
+++ b/moses2/defer/CubePruningBitmapStack/Search.h
@@ -0,0 +1,57 @@
+/*
+ * Search.h
+ *
+ * Created on: 16 Nov 2015
+ * Author: hieu
+ */
+
+#pragma once
+#include <boost/pool/pool_alloc.hpp>
+#include "../Search.h"
+#include "Misc.h"
+#include "Stack.h"
+#include "../../legacy/Range.h"
+
+namespace Moses2
+{
+
+class Bitmap;
+class Hypothesis;
+class InputPath;
+class TargetPhrases;
+
+namespace NSCubePruningBitmapStack
+{
+
+class Search : public Moses2::Search
+{
+public:
+ Search(Manager &mgr);
+ virtual ~Search();
+
+ virtual void Decode();
+ const Hypothesis *GetBestHypo() const;
+
+protected:
+ Stack m_stack;
+
+ CubeEdge::Queue m_queue;
+ CubeEdge::SeenPositions m_seenPositions;
+
+ // CUBE PRUNING VARIABLES
+ // setup
+ typedef std::vector<CubeEdge*> CubeEdges;
+ std::vector<CubeEdges*> m_cubeEdges;
+
+ std::deque<QueueItem*> m_queueItemRecycler;
+
+ // CUBE PRUNING
+ // decoding
+ void Decode(size_t stackInd);
+ void PostDecode(size_t stackInd);
+};
+
+}
+
+}
+
diff --git a/moses2/defer/CubePruningBitmapStack/Stack.cpp b/moses2/defer/CubePruningBitmapStack/Stack.cpp
new file mode 100644
index 000000000..f6abd2038
--- /dev/null
+++ b/moses2/defer/CubePruningBitmapStack/Stack.cpp
@@ -0,0 +1,299 @@
+/*
+ * Stack.cpp
+ *
+ * Created on: 24 Oct 2015
+ * Author: hieu
+ */
+#include <algorithm>
+#include <boost/foreach.hpp>
+#include "Stack.h"
+#include "../Hypothesis.h"
+#include "../Manager.h"
+#include "../../Scores.h"
+#include "../../System.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningBitmapStack
+{
+MiniStack::MiniStack(const Manager &mgr)
+ :m_coll()
+ ,m_sortedHypos(NULL)
+{}
+
+StackAdd MiniStack::Add(const Hypothesis *hypo)
+{
+ std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
+
+ // CHECK RECOMBINATION
+ if (addRet.second) {
+ // equiv hypo doesn't exists
+ return StackAdd(true, NULL);
+ } else {
+ const Hypothesis *hypoExisting = *addRet.first;
+ if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) {
+ // incoming hypo is better than the one we have
+ const Hypothesis *const &hypoExisting1 = *addRet.first;
+ const Hypothesis *&hypoExisting2 = const_cast<const Hypothesis *&>(hypoExisting1);
+ hypoExisting2 = hypo;
+
+ return StackAdd(true, const_cast<Hypothesis*>(hypoExisting));
+ } else {
+ // already storing the best hypo. discard incoming hypo
+ return StackAdd(false, const_cast<Hypothesis*>(hypo));
+ }
+ }
+
+ assert(false);
+}
+
+Hypotheses &MiniStack::GetSortedAndPruneHypos(const Manager &mgr) const
+{
+ if (m_sortedHypos == NULL) {
+ // create sortedHypos first
+ MemPool &pool = mgr.GetPool();
+ m_sortedHypos = new (pool.Allocate< Vector<const Hypothesis*> >()) Vector<const Hypothesis*>(pool, m_coll.size());
+
+ size_t ind = 0;
+ BOOST_FOREACH(const Hypothesis *hypo, m_coll) {
+ (*m_sortedHypos)[ind] = hypo;
+ ++ind;
+ }
+
+ SortAndPruneHypos(mgr);
+ }
+
+ return *m_sortedHypos;
+}
+
+void MiniStack::SortAndPruneHypos(const Manager &mgr) const
+{
+ size_t stackSize = mgr.system.stackSize;
+ Recycler<Hypothesis*> &recycler = mgr.GetHypoRecycle();
+
+ /*
+ cerr << "UNSORTED hypos:" << endl;
+ for (size_t i = 0; i < hypos.size(); ++i) {
+ const Hypothesis *hypo = hypos[i];
+ cerr << *hypo << endl;
+ }
+ cerr << endl;
+ */
+ Hypotheses::iterator iterMiddle;
+ iterMiddle = (stackSize == 0 || m_sortedHypos->size() < stackSize)
+ ? m_sortedHypos->end()
+ : m_sortedHypos->begin() + stackSize;
+
+ std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(),
+ HypothesisFutureScoreOrderer());
+
+ // prune
+ if (stackSize && m_sortedHypos->size() > stackSize) {
+ for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) {
+ Hypothesis *hypo = const_cast<Hypothesis*>((*m_sortedHypos)[i]);
+ recycler.Recycle(hypo);
+ }
+ m_sortedHypos->resize(stackSize);
+ }
+
+ /*
+ cerr << "sorted hypos:" << endl;
+ for (size_t i = 0; i < hypos.size(); ++i) {
+ const Hypothesis *hypo = hypos[i];
+ cerr << hypo << " " << *hypo << endl;
+ }
+ cerr << endl;
+ */
+
+}
+
+void MiniStack::Clear()
+{
+ m_sortedHypos = NULL;
+ m_coll.clear();
+}
+
+///////////////////////////////////////////////////////////////
+Stack::Stack(const Manager &mgr)
+ :m_mgr(mgr)
+ ,m_coll()
+ ,m_miniStackRecycler()
+{
+}
+
+Stack::~Stack()
+{
+ // TODO Auto-generated destructor stub
+}
+
+void Stack::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
+{
+ HypoCoverageInternal key = &hypo->GetBitmap();
+ StackAdd added = GetMiniStack(key).Add(hypo);
+
+ if (added.toBeDeleted) {
+ hypoRecycle.Recycle(added.toBeDeleted);
+ }
+}
+
+std::vector<const Hypothesis*> Stack::GetBestHypos(size_t num) const
+{
+ std::vector<const Hypothesis*> ret;
+ BOOST_FOREACH(const Coll::value_type &val, m_coll) {
+ const MiniStack::_HCType &hypos = val.second->GetColl();
+ ret.insert(ret.end(), hypos.begin(), hypos.end());
+ }
+
+ std::vector<const Hypothesis*>::iterator iterMiddle;
+ iterMiddle = (num == 0 || ret.size() < num)
+ ? ret.end()
+ : ret.begin()+num;
+
+ std::partial_sort(ret.begin(), iterMiddle, ret.end(),
+ HypothesisFutureScoreOrderer());
+
+ return ret;
+}
+
+size_t Stack::GetHypoSize() const
+{
+ size_t ret = 0;
+ BOOST_FOREACH(const Coll::value_type &val, m_coll) {
+ const MiniStack::_HCType &hypos = val.second->GetColl();
+ ret += hypos.size();
+ }
+ return ret;
+}
+
+MiniStack &Stack::GetMiniStack(const HypoCoverageInternal &key)
+{
+ MiniStack *ret;
+ Coll::iterator iter = m_coll.find(key);
+ if (iter == m_coll.end()) {
+ if (m_miniStackRecycler.empty()) {
+ ret = new (m_mgr.GetPool().Allocate<MiniStack>()) MiniStack(m_mgr);
+ } else {
+ ret = m_miniStackRecycler.back();
+ ret->Clear();
+ m_miniStackRecycler.pop_back();
+ }
+
+ m_coll[key] = ret;
+ } else {
+ ret = iter->second;
+ }
+ return *ret;
+}
+
+void Stack::Clear()
+{
+ BOOST_FOREACH(const Coll::value_type &val, m_coll) {
+ MiniStack *miniStack = val.second;
+ m_miniStackRecycler.push_back(miniStack);
+ }
+
+ m_coll.clear();
+}
+
+Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const
+{
+ SortedHypos ret;
+
+ MemPool &pool = mgr.GetPool();
+
+ // prune and sort
+ Hypotheses *allHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool, GetHypoSize());
+ size_t i = 0;
+
+ BOOST_FOREACH(const Coll::value_type &val, m_coll) {
+ const MiniStack *miniStack = val.second;
+ const MiniStack::MiniStack::_HCType &hypos = miniStack->GetColl();
+
+ BOOST_FOREACH(const Hypothesis *hypo, hypos) {
+ (*allHypos)[i++] = hypo;
+ }
+ }
+
+ SortAndPruneHypos(mgr, *allHypos);
+
+ // divide hypos by [bitmap, last end pos]
+ BOOST_FOREACH(const Hypothesis *hypo, *allHypos) {
+ HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
+
+ Hypotheses *hypos;
+ SortedHypos::iterator iter;
+ iter = ret.find(key);
+ if (iter == ret.end()) {
+ hypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool);
+ ret[key] = hypos;
+ } else {
+ hypos = iter->second;
+ }
+ hypos->push_back(hypo);
+ }
+
+ return ret;
+}
+
+void Stack::SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const
+{
+ size_t stackSize = mgr.system.stackSize;
+ Recycler<Hypothesis*> &recycler = mgr.GetHypoRecycle();
+
+ /*
+ cerr << "UNSORTED hypos:" << endl;
+ for (size_t i = 0; i < hypos.size(); ++i) {
+ const Hypothesis *hypo = hypos[i];
+ cerr << *hypo << endl;
+ }
+ cerr << endl;
+ */
+ Hypotheses::iterator iterMiddle;
+ iterMiddle = (stackSize == 0 || hypos.size() < stackSize)
+ ? hypos.end()
+ : hypos.begin() + stackSize;
+
+ std::partial_sort(hypos.begin(), iterMiddle, hypos.end(),
+ HypothesisFutureScoreOrderer());
+
+ // prune
+ if (stackSize && hypos.size() > stackSize) {
+ for (size_t i = stackSize; i < hypos.size(); ++i) {
+ Hypothesis *hypo = const_cast<Hypothesis*>(hypos[i]);
+ recycler.Recycle(hypo);
+ }
+ hypos.resize(stackSize);
+ }
+
+ /*
+ cerr << "sorted hypos:" << endl;
+ for (size_t i = 0; i < hypos.size(); ++i) {
+ const Hypothesis *hypo = hypos[i];
+ cerr << hypo << " " << *hypo << endl;
+ }
+ cerr << endl;
+ */
+
+}
+
+
+void Stack::DebugCounts()
+{
+ /*
+ cerr << "counts=";
+ BOOST_FOREACH(const Coll::value_type &val, GetColl()) {
+ const NSCubePruning::MiniStack &miniStack = *val.second;
+ size_t count = miniStack.GetColl().size();
+ cerr << count << " ";
+ }
+ cerr << endl;
+ */
+}
+
+}
+
+}
+
diff --git a/moses2/defer/CubePruningBitmapStack/Stack.h b/moses2/defer/CubePruningBitmapStack/Stack.h
new file mode 100644
index 000000000..f052fab42
--- /dev/null
+++ b/moses2/defer/CubePruningBitmapStack/Stack.h
@@ -0,0 +1,114 @@
+/*
+ * Stack.h
+ *
+ * Created on: 24 Oct 2015
+ * Author: hieu
+ */
+#pragma once
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+#include <deque>
+#include "../Hypothesis.h"
+#include "../../TypeDef.h"
+#include "../../Vector.h"
+#include "../../MemPool.h"
+#include "../../Recycler.h"
+#include "../../legacy/Util2.h"
+
+namespace Moses2
+{
+
+class Manager;
+
+namespace NSCubePruningBitmapStack
+{
+typedef Vector<const Hypothesis*> Hypotheses;
+
+class MiniStack
+{
+public:
+ typedef boost::unordered_set<const Hypothesis*,
+ UnorderedComparer<Hypothesis>,
+ UnorderedComparer<Hypothesis>
+ > _HCType;
+
+ MiniStack(const Manager &mgr);
+
+ StackAdd Add(const Hypothesis *hypo);
+
+ _HCType &GetColl() {
+ return m_coll;
+ }
+
+ const _HCType &GetColl() const {
+ return m_coll;
+ }
+
+ void Clear();
+
+ Hypotheses &GetSortedAndPruneHypos(const Manager &mgr) const;
+
+protected:
+ _HCType m_coll;
+ mutable Hypotheses *m_sortedHypos;
+
+ void SortAndPruneHypos(const Manager &mgr) const;
+
+};
+
+/////////////////////////////////////////////
+class Stack
+{
+protected:
+
+
+public:
+ typedef std::pair<const Bitmap*, size_t> HypoCoverage;
+ // bitmap and current endPos of hypos
+ typedef boost::unordered_map<HypoCoverage, Hypotheses*> SortedHypos;
+
+ typedef const Bitmap* HypoCoverageInternal;
+ typedef boost::unordered_map<HypoCoverageInternal, MiniStack*
+ ,boost::hash<HypoCoverageInternal>
+ ,std::equal_to<HypoCoverageInternal>
+ > Coll;
+
+
+ Stack(const Manager &mgr);
+ virtual ~Stack();
+
+ size_t GetHypoSize() const;
+
+ Coll &GetColl() {
+ return m_coll;
+ }
+ const Coll &GetColl() const {
+ return m_coll;
+ }
+
+ void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
+
+ MiniStack &GetMiniStack(const HypoCoverageInternal &key);
+
+ std::vector<const Hypothesis*> GetBestHypos(size_t num) const;
+ void Clear();
+
+ SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const;
+ void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const;
+
+ void DebugCounts();
+
+protected:
+ const Manager &m_mgr;
+ Coll m_coll;
+
+ std::deque<MiniStack*> m_miniStackRecycler;
+
+
+};
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningCardinalStack/Misc.cpp b/moses2/defer/CubePruningCardinalStack/Misc.cpp
new file mode 100644
index 000000000..197dc108a
--- /dev/null
+++ b/moses2/defer/CubePruningCardinalStack/Misc.cpp
@@ -0,0 +1,159 @@
+/*
+ * CubePruning.cpp
+ *
+ * Created on: 27 Nov 2015
+ * Author: hieu
+ */
+
+#include "Misc.h"
+#include "Stack.h"
+#include "../Manager.h"
+#include "../../MemPool.h"
+#include "../../System.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningCardinalStack
+{
+
+////////////////////////////////////////////////////////////////////////
+QueueItem *QueueItem::Create(QueueItem *currItem,
+ Manager &mgr,
+ CubeEdge &edge,
+ size_t hypoIndex,
+ size_t tpIndex,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ QueueItem *ret;
+ if (currItem) {
+ // reuse incoming queue item to create new item
+ ret = currItem;
+ ret->Init(mgr, edge, hypoIndex, tpIndex);
+ } else if (!queueItemRecycler.empty()) {
+ // use item from recycle bin
+ ret = queueItemRecycler.back();
+ ret->Init(mgr, edge, hypoIndex, tpIndex);
+ queueItemRecycler.pop_back();
+ } else {
+ // create new item
+ ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
+ }
+
+ return ret;
+}
+
+QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
+ :edge(&edge)
+ ,hypoIndex(hypoIndex)
+ ,tpIndex(tpIndex)
+{
+ CreateHypothesis(mgr);
+}
+
+void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
+{
+ this->edge = &edge;
+ this->hypoIndex = hypoIndex;
+ this->tpIndex = tpIndex;
+
+ CreateHypothesis(mgr);
+}
+
+void QueueItem::CreateHypothesis(Manager &mgr)
+{
+ const Hypothesis *prevHypo = edge->hypos[hypoIndex];
+ const TargetPhrase &tp = edge->tps[tpIndex];
+
+ //cerr << "hypoIndex=" << hypoIndex << endl;
+ //cerr << "edge.hypos=" << edge.hypos.size() << endl;
+ //cerr << prevHypo << endl;
+ //cerr << *prevHypo << endl;
+
+ hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
+ hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
+ hypo->EvaluateWhenApplied();
+}
+
+////////////////////////////////////////////////////////////////////////
+CubeEdge::CubeEdge(
+ Manager &mgr,
+ const Hypotheses &hypos,
+ const InputPath &path,
+ const TargetPhrases &tps,
+ const Bitmap &newBitmap)
+ :hypos(hypos)
+ ,path(path)
+ ,tps(tps)
+ ,newBitmap(newBitmap)
+{
+ estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
+}
+
+std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
+{
+ out << obj.newBitmap;
+ return out;
+}
+
+bool
+CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
+{
+ //UTIL_THROW_IF2(x >= (1<<17), "Error");
+ //UTIL_THROW_IF2(y >= (1<<17), "Error");
+
+ SeenPositionItem val(this, (x<<16) + y);
+ std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
+ return pairRet.second;
+}
+
+void CubeEdge::CreateFirst(Manager &mgr,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ assert(hypos.size());
+ assert(tps.GetSize());
+
+ QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
+ queue.push(item);
+ bool setSeen = SetSeenPosition(0, 0, seenPositions);
+ assert(setSeen);
+}
+
+void CubeEdge::CreateNext(Manager &mgr,
+ QueueItem *item,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ size_t hypoIndex = item->hypoIndex;
+ size_t tpIndex = item->tpIndex;
+
+ if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
+ // reuse incoming queue item to create new item
+ QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
+ assert(newItem == item);
+ queue.push(newItem);
+ item = NULL;
+ }
+
+ if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
+ QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
+ queue.push(newItem);
+ item = NULL;
+ }
+
+ if (item) {
+ // recycle unused queue item
+ queueItemRecycler.push_back(item);
+ }
+}
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningCardinalStack/Misc.h b/moses2/defer/CubePruningCardinalStack/Misc.h
new file mode 100644
index 000000000..9f5d28f1e
--- /dev/null
+++ b/moses2/defer/CubePruningCardinalStack/Misc.h
@@ -0,0 +1,112 @@
+/*
+ * CubePruning.h
+ *
+ * Created on: 27 Nov 2015
+ * Author: hieu
+ */
+#pragma once
+#include <boost/pool/pool_alloc.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+#include <vector>
+#include <queue>
+#include "../../legacy/Range.h"
+#include "../Hypothesis.h"
+#include "../../TypeDef.h"
+#include "../../Vector.h"
+#include "Stack.h"
+
+namespace Moses2
+{
+
+class Manager;
+class InputPath;
+class TargetPhrases;
+class Bitmap;
+
+namespace NSCubePruningCardinalStack
+{
+class CubeEdge;
+
+///////////////////////////////////////////
+class QueueItem
+{
+ ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
+public:
+ static QueueItem *Create(QueueItem *currItem,
+ Manager &mgr,
+ CubeEdge &edge,
+ size_t hypoIndex,
+ size_t tpIndex,
+ std::deque<QueueItem*> &queueItemRecycler);
+ QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
+
+ void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
+
+ CubeEdge *edge;
+ size_t hypoIndex, tpIndex;
+ Hypothesis *hypo;
+
+protected:
+ void CreateHypothesis(Manager &mgr);
+};
+
+///////////////////////////////////////////
+class QueueItemOrderer
+{
+public:
+ bool operator()(QueueItem* itemA, QueueItem* itemB) const {
+ HypothesisFutureScoreOrderer orderer;
+ return !orderer(itemA->hypo, itemB->hypo);
+ }
+};
+
+///////////////////////////////////////////
+class CubeEdge
+{
+ friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
+
+public:
+ typedef std::priority_queue<QueueItem*,
+ std::vector<QueueItem*>,
+ QueueItemOrderer> Queue;
+
+ typedef std::pair<const CubeEdge*, int> SeenPositionItem;
+ typedef boost::unordered_set<SeenPositionItem,
+ boost::hash<SeenPositionItem>,
+ std::equal_to<SeenPositionItem>
+ > SeenPositions;
+
+ const Hypotheses &hypos;
+ const InputPath &path;
+ const TargetPhrases &tps;
+ const Bitmap &newBitmap;
+ SCORE estimatedScore;
+
+ CubeEdge(Manager &mgr,
+ const Hypotheses &hypos,
+ const InputPath &path,
+ const TargetPhrases &tps,
+ const Bitmap &newBitmap);
+
+ bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
+
+ void CreateFirst(Manager &mgr,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler);
+ void CreateNext(Manager &mgr,
+ QueueItem *item,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler);
+
+protected:
+
+};
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningCardinalStack/Search.cpp b/moses2/defer/CubePruningCardinalStack/Search.cpp
new file mode 100644
index 000000000..23cae74eb
--- /dev/null
+++ b/moses2/defer/CubePruningCardinalStack/Search.cpp
@@ -0,0 +1,206 @@
+/*
+ * Search.cpp
+ *
+ * Created on: 16 Nov 2015
+ * Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include "Search.h"
+#include "Stack.h"
+#include "../Manager.h"
+#include "../Hypothesis.h"
+#include "../../InputPaths.h"
+#include "../../InputPath.h"
+#include "../../System.h"
+#include "../../Sentence.h"
+#include "../../TranslationTask.h"
+#include "../../legacy/Util2.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningCardinalStack
+{
+
+////////////////////////////////////////////////////////////////////////
+Search::Search(Manager &mgr)
+ :Moses2::Search(mgr)
+ ,m_stack(mgr)
+
+ ,m_queue(QueueItemOrderer(), std::vector<QueueItem* >() )
+
+ ,m_seenPositions()
+{
+}
+
+Search::~Search()
+{
+}
+
+void Search::Decode()
+{
+ // init cue edges
+ m_cubeEdges.resize(mgr.GetInput().GetSize() + 1);
+ for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
+ m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges();
+ }
+
+ const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
+ Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
+ initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
+ initHypo->EmptyHypothesisState(mgr.GetInput());
+
+ m_stack.Add(initHypo, mgr.GetHypoRecycle());
+ PostDecode(0);
+
+ for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) {
+ //cerr << "stackInd=" << stackInd << endl;
+ m_stack.Clear();
+ Decode(stackInd);
+ PostDecode(stackInd);
+
+ //m_stack.DebugCounts();
+ //cerr << m_stacks << endl;
+ }
+
+}
+
+void Search::Decode(size_t stackInd)
+{
+ Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
+
+ // reuse queue from previous stack. Clear it first
+ std::vector<QueueItem*> &container = Container(m_queue);
+ //cerr << "container=" << container.size() << endl;
+ BOOST_FOREACH(QueueItem *item, container) {
+ // recycle unused hypos from queue
+ Hypothesis *hypo = item->hypo;
+ hypoRecycler.Recycle(hypo);
+
+ // recycle queue item
+ m_queueItemRecycler.push_back(item);
+ }
+ container.clear();
+
+ m_seenPositions.clear();
+
+ // add top hypo from every edge into queue
+ CubeEdges &edges = *m_cubeEdges[stackInd];
+
+ BOOST_FOREACH(CubeEdge *edge, edges) {
+ //cerr << *edge << " ";
+ edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
+ }
+
+ /*
+ cerr << "edges: ";
+ boost::unordered_set<const Bitmap*> uniqueBM;
+ BOOST_FOREACH(CubeEdge *edge, edges) {
+ uniqueBM.insert(&edge->newBitmap);
+ //cerr << *edge << " ";
+ }
+ cerr << edges.size() << " " << uniqueBM.size();
+ cerr << endl;
+ */
+
+ size_t pops = 0;
+ while (!m_queue.empty() && pops < mgr.system.popLimit) {
+ // get best hypo from queue, add to stack
+ //cerr << "queue=" << queue.size() << endl;
+ QueueItem *item = m_queue.top();
+ m_queue.pop();
+
+ CubeEdge *edge = item->edge;
+
+ // add hypo to stack
+ Hypothesis *hypo = item->hypo;
+ //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
+ m_stack.Add(hypo, hypoRecycler);
+
+ edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
+
+ ++pops;
+ }
+
+ /*
+ // create hypo from every edge. Increase diversity
+ while (!m_queue.empty()) {
+ QueueItem *item = m_queue.top();
+ m_queue.pop();
+
+ if (item->hypoIndex == 0 && item->tpIndex == 0) {
+ CubeEdge &edge = item->edge;
+
+ // add hypo to stack
+ Hypothesis *hypo = item->hypo;
+ //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
+ m_stacks.Add(hypo, mgr.GetHypoRecycle());
+ }
+ }
+ */
+}
+
+void Search::PostDecode(size_t stackInd)
+{
+ MemPool &pool = mgr.GetPool();
+
+ Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr);
+
+ BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) {
+ const Bitmap &hypoBitmap = *val.first.first;
+ size_t hypoEndPos = val.first.second;
+ //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
+
+ // create edges to next hypos from existing hypos
+ const InputPaths &paths = mgr.GetInputPaths();
+
+ BOOST_FOREACH(const InputPath *path, paths) {
+ const Range &pathRange = path->range;
+ //cerr << "pathRange=" << pathRange << endl;
+
+ if (!path->IsUsed()) {
+ continue;
+ }
+ if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
+ continue;
+ }
+
+ const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
+ size_t numWords = newBitmap.GetNumWordsCovered();
+
+ CubeEdges &edges = *m_cubeEdges[numWords];
+
+ // sort hypo for a particular bitmap and hypoEndPos
+ Hypotheses &sortedHypos = *val.second;
+
+ size_t numPt = mgr.system.mappings.size();
+ for (size_t i = 0; i < numPt; ++i) {
+ const TargetPhrases *tps = path->targetPhrases[i];
+ if (tps && tps->GetSize()) {
+ CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
+ edges.push_back(edge);
+ }
+ }
+ }
+ }
+
+}
+
+const Hypothesis *Search::GetBestHypo() const
+{
+ std::vector<const Hypothesis*> sortedHypos = m_stack.GetBestHypos(1);
+
+ const Hypothesis *best = NULL;
+ if (sortedHypos.size()) {
+ best = sortedHypos[0];
+ }
+ return best;
+}
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningCardinalStack/Search.h b/moses2/defer/CubePruningCardinalStack/Search.h
new file mode 100644
index 000000000..f641c87d7
--- /dev/null
+++ b/moses2/defer/CubePruningCardinalStack/Search.h
@@ -0,0 +1,57 @@
+/*
+ * Search.h
+ *
+ * Created on: 16 Nov 2015
+ * Author: hieu
+ */
+
+#pragma once
+#include <boost/pool/pool_alloc.hpp>
+#include "../Search.h"
+#include "Misc.h"
+#include "Stack.h"
+#include "../../legacy/Range.h"
+
+namespace Moses2
+{
+
+class Bitmap;
+class Hypothesis;
+class InputPath;
+class TargetPhrases;
+
+namespace NSCubePruningCardinalStack
+{
+
+class Search : public Moses2::Search
+{
+public:
+ Search(Manager &mgr);
+ virtual ~Search();
+
+ virtual void Decode();
+ const Hypothesis *GetBestHypo() const;
+
+protected:
+ Stack m_stack;
+
+ CubeEdge::Queue m_queue;
+ CubeEdge::SeenPositions m_seenPositions;
+
+ // CUBE PRUNING VARIABLES
+ // setup
+ typedef std::vector<CubeEdge*> CubeEdges;
+ std::vector<CubeEdges*> m_cubeEdges;
+
+ std::deque<QueueItem*> m_queueItemRecycler;
+
+ // CUBE PRUNING
+ // decoding
+ void Decode(size_t stackInd);
+ void PostDecode(size_t stackInd);
+};
+
+}
+
+}
+
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Stack.cpp b/moses2/defer/CubePruningCardinalStack/Stack.cpp
index 0c296d8ca..60a3fe1e8 100644
--- a/contrib/moses2/defer/CubePruningCardinalStack/Stack.cpp
+++ b/moses2/defer/CubePruningCardinalStack/Stack.cpp
@@ -22,13 +22,14 @@ namespace NSCubePruningCardinalStack
///////////////////////////////////////////////////////////////
Stack::Stack(const Manager &mgr)
-:m_mgr(mgr)
-,m_coll()
+ :m_mgr(mgr)
+ ,m_coll()
{
}
-Stack::~Stack() {
- // TODO Auto-generated destructor stub
+Stack::~Stack()
+{
+ // TODO Auto-generated destructor stub
}
void Stack::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
@@ -37,24 +38,22 @@ void Stack::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
// CHECK RECOMBINATION
if (addRet.second) {
- // equiv hypo doesn't exists
- }
- else {
- const Hypothesis *hypoExisting = *addRet.first;
- if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) {
- // incoming hypo is better than the one we have
- const Hypothesis *const &hypoExisting1 = *addRet.first;
- const Hypothesis *&hypoExisting2 = const_cast<const Hypothesis *&>(hypoExisting1);
- hypoExisting2 = hypo;
-
- Hypothesis *hypoToBeDeleted = const_cast<Hypothesis*>(hypoExisting);
- hypoRecycle.Recycle(hypoToBeDeleted);
- }
- else {
- // already storing the best hypo. discard incoming hypo
- Hypothesis *hypoToBeDeleted = const_cast<Hypothesis*>(hypo);
- hypoRecycle.Recycle(hypoToBeDeleted);
- }
+ // equiv hypo doesn't exists
+ } else {
+ const Hypothesis *hypoExisting = *addRet.first;
+ if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) {
+ // incoming hypo is better than the one we have
+ const Hypothesis *const &hypoExisting1 = *addRet.first;
+ const Hypothesis *&hypoExisting2 = const_cast<const Hypothesis *&>(hypoExisting1);
+ hypoExisting2 = hypo;
+
+ Hypothesis *hypoToBeDeleted = const_cast<Hypothesis*>(hypoExisting);
+ hypoRecycle.Recycle(hypoToBeDeleted);
+ } else {
+ // already storing the best hypo. discard incoming hypo
+ Hypothesis *hypoToBeDeleted = const_cast<Hypothesis*>(hypo);
+ hypoRecycle.Recycle(hypoToBeDeleted);
+ }
}
}
@@ -65,24 +64,24 @@ std::vector<const Hypothesis*> Stack::GetBestHypos(size_t num) const
std::vector<const Hypothesis*>::iterator iterMiddle;
iterMiddle = (num == 0 || ret.size() < num)
- ? ret.end()
- : ret.begin()+num;
+ ? ret.end()
+ : ret.begin()+num;
std::partial_sort(ret.begin(), iterMiddle, ret.end(),
- HypothesisFutureScoreOrderer());
+ HypothesisFutureScoreOrderer());
return ret;
}
size_t Stack::GetHypoSize() const
{
- return m_coll.size();
+ return m_coll.size();
}
void Stack::Clear()
{
- m_coll.clear();
+ m_coll.clear();
}
Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const
@@ -95,25 +94,24 @@ Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const
Hypotheses *allHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool, GetHypoSize());
size_t i = 0;
BOOST_FOREACH(const Hypothesis *hypo, m_coll) {
- (*allHypos)[i++] = hypo;
+ (*allHypos)[i++] = hypo;
}
SortAndPruneHypos(mgr, *allHypos);
// divide hypos by [bitmap, last end pos]
BOOST_FOREACH(const Hypothesis *hypo, *allHypos) {
- HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
-
- Hypotheses *hypos;
- SortedHypos::iterator iter;
- iter = ret.find(key);
- if (iter == ret.end()) {
- hypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool);
- ret[key] = hypos;
- }
- else {
- hypos = iter->second;
- }
- hypos->push_back(hypo);
+ HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
+
+ Hypotheses *hypos;
+ SortedHypos::iterator iter;
+ iter = ret.find(key);
+ if (iter == ret.end()) {
+ hypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool);
+ ret[key] = hypos;
+ } else {
+ hypos = iter->second;
+ }
+ hypos->push_back(hypo);
}
return ret;
@@ -160,33 +158,33 @@ void Stack::SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const
/*
cerr << "UNSORTED hypos:" << endl;
for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << *hypo << endl;
+ const Hypothesis *hypo = hypos[i];
+ cerr << *hypo << endl;
}
cerr << endl;
*/
Hypotheses::iterator iterMiddle;
iterMiddle = (stackSize == 0 || hypos.size() < stackSize)
- ? hypos.end()
- : hypos.begin() + stackSize;
+ ? hypos.end()
+ : hypos.begin() + stackSize;
std::partial_sort(hypos.begin(), iterMiddle, hypos.end(),
- HypothesisFutureScoreOrderer());
+ HypothesisFutureScoreOrderer());
// prune
if (stackSize && hypos.size() > stackSize) {
- for (size_t i = stackSize; i < hypos.size(); ++i) {
- Hypothesis *hypo = const_cast<Hypothesis*>(hypos[i]);
- recycler.Recycle(hypo);
- }
- hypos.resize(stackSize);
+ for (size_t i = stackSize; i < hypos.size(); ++i) {
+ Hypothesis *hypo = const_cast<Hypothesis*>(hypos[i]);
+ recycler.Recycle(hypo);
+ }
+ hypos.resize(stackSize);
}
/*
cerr << "sorted hypos:" << endl;
for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << hypo << " " << *hypo << endl;
+ const Hypothesis *hypo = hypos[i];
+ cerr << hypo << " " << *hypo << endl;
}
cerr << endl;
*/
diff --git a/moses2/defer/CubePruningCardinalStack/Stack.h b/moses2/defer/CubePruningCardinalStack/Stack.h
new file mode 100644
index 000000000..94e987b7b
--- /dev/null
+++ b/moses2/defer/CubePruningCardinalStack/Stack.h
@@ -0,0 +1,71 @@
+/*
+ * Stack.h
+ *
+ * Created on: 24 Oct 2015
+ * Author: hieu
+ */
+#pragma once
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+#include <deque>
+#include "../Hypothesis.h"
+#include "../../TypeDef.h"
+#include "../../Vector.h"
+#include "../../MemPool.h"
+#include "../../Recycler.h"
+#include "../../legacy/Util2.h"
+
+namespace Moses2
+{
+
+class Manager;
+
+namespace NSCubePruningCardinalStack
+{
+typedef Vector<const Hypothesis*> Hypotheses;
+
+
+/////////////////////////////////////////////
+class Stack
+{
+protected:
+ typedef boost::unordered_set<const Hypothesis*,
+ UnorderedComparer<Hypothesis>,
+ UnorderedComparer<Hypothesis>
+ > _HCType;
+
+public:
+ typedef std::pair<const Bitmap*, size_t> HypoCoverage;
+ typedef boost::unordered_map<HypoCoverage, Hypotheses*> SortedHypos;
+
+ Stack(const Manager &mgr);
+ virtual ~Stack();
+
+ size_t GetHypoSize() const;
+
+ _HCType &GetColl() {
+ return m_coll;
+ }
+ const _HCType &GetColl() const {
+ return m_coll;
+ }
+
+ void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
+
+ std::vector<const Hypothesis*> GetBestHypos(size_t num) const;
+ void Clear();
+
+ SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const;
+ void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const;
+
+protected:
+ const Manager &m_mgr;
+ _HCType m_coll;
+
+};
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningPerBitmap/Misc.cpp b/moses2/defer/CubePruningPerBitmap/Misc.cpp
new file mode 100644
index 000000000..8e94dac5d
--- /dev/null
+++ b/moses2/defer/CubePruningPerBitmap/Misc.cpp
@@ -0,0 +1,159 @@
+/*
+ * CubePruning.cpp
+ *
+ * Created on: 27 Nov 2015
+ * Author: hieu
+ */
+
+#include "Misc.h"
+#include "../Manager.h"
+#include "../../MemPool.h"
+#include "../../System.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningPerBitmap
+{
+
+////////////////////////////////////////////////////////////////////////
+QueueItem *QueueItem::Create(QueueItem *currItem,
+ Manager &mgr,
+ CubeEdge &edge,
+ size_t hypoIndex,
+ size_t tpIndex,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ QueueItem *ret;
+ if (currItem) {
+ // reuse incoming queue item to create new item
+ ret = currItem;
+ ret->Init(mgr, edge, hypoIndex, tpIndex);
+ } else if (!queueItemRecycler.empty()) {
+ // use item from recycle bin
+ ret = queueItemRecycler.back();
+ ret->Init(mgr, edge, hypoIndex, tpIndex);
+ queueItemRecycler.pop_back();
+ } else {
+ // create new item
+ ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
+ }
+
+ return ret;
+}
+
+QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
+ :edge(&edge)
+ ,hypoIndex(hypoIndex)
+ ,tpIndex(tpIndex)
+{
+ CreateHypothesis(mgr);
+}
+
+void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
+{
+ this->edge = &edge;
+ this->hypoIndex = hypoIndex;
+ this->tpIndex = tpIndex;
+
+ CreateHypothesis(mgr);
+}
+
+void QueueItem::CreateHypothesis(Manager &mgr)
+{
+ const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex];
+ const TargetPhrase &tp = edge->tps[tpIndex];
+
+ //cerr << "hypoIndex=" << hypoIndex << endl;
+ //cerr << "edge.hypos=" << edge.hypos.size() << endl;
+ //cerr << prevHypo << endl;
+ //cerr << *prevHypo << endl;
+
+ hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
+ hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
+ hypo->EvaluateWhenApplied();
+}
+
+////////////////////////////////////////////////////////////////////////
+CubeEdge::CubeEdge(
+ Manager &mgr,
+ const NSCubePruningMiniStack::MiniStack &miniStack,
+ const InputPath &path,
+ const TargetPhrases &tps,
+ const Bitmap &newBitmap)
+ :miniStack(miniStack)
+ ,path(path)
+ ,tps(tps)
+ ,newBitmap(newBitmap)
+{
+ estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
+}
+
+std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
+{
+ out << obj.newBitmap;
+ return out;
+}
+
+bool
+CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
+{
+ //UTIL_THROW_IF2(x >= (1<<17), "Error");
+ //UTIL_THROW_IF2(y >= (1<<17), "Error");
+
+ SeenPositionItem val(this, (x<<16) + y);
+ std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
+ return pairRet.second;
+}
+
+void CubeEdge::CreateFirst(Manager &mgr,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ if (miniStack.GetSortedAndPruneHypos(mgr).size()) {
+ assert(tps.GetSize());
+
+ QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
+ queue.push(item);
+ bool setSeen = SetSeenPosition(0, 0, seenPositions);
+ assert(setSeen);
+ }
+}
+
+void CubeEdge::CreateNext(Manager &mgr,
+ QueueItem *item,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ size_t hypoIndex = item->hypoIndex;
+ size_t tpIndex = item->tpIndex;
+
+ if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
+ // reuse incoming queue item to create new item
+ QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
+ assert(newItem == item);
+ queue.push(newItem);
+ item = NULL;
+ }
+
+ if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
+ QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
+ queue.push(newItem);
+ item = NULL;
+ }
+
+ if (item) {
+ // recycle unused queue item
+ queueItemRecycler.push_back(item);
+ }
+}
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningPerBitmap/Misc.h b/moses2/defer/CubePruningPerBitmap/Misc.h
new file mode 100644
index 000000000..3fa22f9a6
--- /dev/null
+++ b/moses2/defer/CubePruningPerBitmap/Misc.h
@@ -0,0 +1,113 @@
+/*
+ * CubePruning.h
+ *
+ * Created on: 27 Nov 2015
+ * Author: hieu
+ */
+#pragma once
+#include <boost/pool/pool_alloc.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+#include <vector>
+#include <queue>
+#include "../../legacy/Range.h"
+#include "../Hypothesis.h"
+#include "../../TypeDef.h"
+#include "../../Vector.h"
+#include "../CubePruningMiniStack/Stack.h"
+
+namespace Moses2
+{
+
+class Manager;
+class InputPath;
+class TargetPhrases;
+class Bitmap;
+
+namespace NSCubePruningPerBitmap
+{
+class CubeEdge;
+
+///////////////////////////////////////////
+class QueueItem
+{
+ ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
+public:
+ static QueueItem *Create(QueueItem *currItem,
+ Manager &mgr,
+ CubeEdge &edge,
+ size_t hypoIndex,
+ size_t tpIndex,
+ std::deque<QueueItem*> &queueItemRecycler);
+ QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
+
+ void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
+
+ CubeEdge *edge;
+ size_t hypoIndex, tpIndex;
+ Hypothesis *hypo;
+
+protected:
+ void CreateHypothesis(Manager &mgr);
+};
+
+///////////////////////////////////////////
+class QueueItemOrderer
+{
+public:
+ bool operator()(QueueItem* itemA, QueueItem* itemB) const {
+ HypothesisFutureScoreOrderer orderer;
+ return !orderer(itemA->hypo, itemB->hypo);
+ }
+};
+
+///////////////////////////////////////////
+class CubeEdge
+{
+ friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
+
+public:
+ typedef std::priority_queue<QueueItem*,
+ std::vector<QueueItem*>,
+ QueueItemOrderer> Queue;
+
+ typedef std::pair<const CubeEdge*, int> SeenPositionItem;
+ typedef boost::unordered_set<SeenPositionItem,
+ boost::hash<SeenPositionItem>,
+ std::equal_to<SeenPositionItem>
+ > SeenPositions;
+
+ const NSCubePruningMiniStack::MiniStack &miniStack;
+ const InputPath &path;
+ const TargetPhrases &tps;
+ const Bitmap &newBitmap;
+ SCORE estimatedScore;
+
+ CubeEdge(Manager &mgr,
+ const NSCubePruningMiniStack::MiniStack &miniStack,
+ const InputPath &path,
+ const TargetPhrases &tps,
+ const Bitmap &newBitmap);
+
+ bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
+
+ void CreateFirst(Manager &mgr,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler);
+ void CreateNext(Manager &mgr,
+ QueueItem *item,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler);
+
+
+protected:
+
+};
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningPerBitmap/Search.cpp b/moses2/defer/CubePruningPerBitmap/Search.cpp
new file mode 100644
index 000000000..d07b28a72
--- /dev/null
+++ b/moses2/defer/CubePruningPerBitmap/Search.cpp
@@ -0,0 +1,271 @@
+/*
+ * Search.cpp
+ *
+ * Created on: 16 Nov 2015
+ * Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include "Search.h"
+#include "../Manager.h"
+#include "../Hypothesis.h"
+#include "../../InputPaths.h"
+#include "../../InputPath.h"
+#include "../../System.h"
+#include "../../Sentence.h"
+#include "../../TranslationTask.h"
+#include "../../legacy/Util2.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningPerBitmap
+{
+
+////////////////////////////////////////////////////////////////////////
+Search::Search(Manager &mgr)
+ :Moses2::Search(mgr)
+ ,m_stacks(mgr)
+
+ ,m_queue(QueueItemOrderer(),
+ std::vector<QueueItem*>() )
+
+ ,m_seenPositions()
+{
+}
+
+Search::~Search()
+{
+}
+
+void Search::Decode()
+{
+ // init stacks
+ m_stacks.Init(mgr.GetInput().GetSize() + 1);
+
+ const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
+ Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
+ initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
+ initHypo->EmptyHypothesisState(mgr.GetInput());
+
+ m_stacks.Add(initHypo, mgr.GetHypoRecycle());
+
+ for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) {
+ CreateSearchGraph(stackInd);
+ }
+
+ for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) {
+ //cerr << "stackInd=" << stackInd << endl;
+ Decode(stackInd);
+
+ //cerr << m_stacks << endl;
+ }
+
+ //DebugCounts();
+}
+
+void Search::Decode(size_t stackInd)
+{
+ NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
+
+ // FOR EACH BITMAP IN EACH STACK
+ boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> > uniqueBM;
+
+ BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
+ NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
+
+ const Bitmap *bitmap = val.first.first;
+ uniqueBM[bitmap].push_back(&miniStack);
+ }
+
+ // decode each bitmap
+ boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> >::iterator iter;
+ for (iter = uniqueBM.begin(); iter != uniqueBM.end(); ++iter) {
+ const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks = iter->second;
+ Decode(miniStacks);
+ }
+
+ /*
+ // FOR EACH STACK
+ vector<NSCubePruningMiniStack::MiniStack*> miniStacks;
+ BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
+ NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
+
+ miniStacks.push_back(&miniStack);
+ }
+ Decode(miniStacks);
+ */
+}
+
+void Search::Decode(const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks)
+{
+ Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
+
+ // reuse queue from previous stack. Clear it first
+ std::vector<QueueItem*> &container = Container(m_queue);
+ //cerr << "container=" << container.size() << endl;
+ BOOST_FOREACH(QueueItem *item, container) {
+ // recycle unused hypos from queue
+ Hypothesis *hypo = item->hypo;
+ hypoRecycler.Recycle(hypo);
+
+ // recycle queue item
+ m_queueItemRecycler.push_back(item);
+ }
+ container.clear();
+
+ m_seenPositions.clear();
+
+ BOOST_FOREACH(NSCubePruningMiniStack::MiniStack *miniStack, miniStacks) {
+ // add top hypo from every edge into queue
+ CubeEdges &edges = *m_cubeEdges[miniStack];
+
+ BOOST_FOREACH(CubeEdge *edge, edges) {
+ //cerr << "edge=" << *edge << endl;
+ edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
+ }
+ }
+
+ size_t pops = 0;
+ while (!m_queue.empty() && pops < mgr.system.popLimit) {
+ // get best hypo from queue, add to stack
+ //cerr << "queue=" << queue.size() << endl;
+ QueueItem *item = m_queue.top();
+ m_queue.pop();
+
+ CubeEdge *edge = item->edge;
+
+ // add hypo to stack
+ Hypothesis *hypo = item->hypo;
+ //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
+ m_stacks.Add(hypo, hypoRecycler);
+
+ edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
+
+ ++pops;
+ }
+
+ /*
+ // create hypo from every edge. Increase diversity
+ while (!m_queue.empty()) {
+ QueueItem *item = m_queue.top();
+ m_queue.pop();
+
+ if (item->hypoIndex == 0 && item->tpIndex == 0) {
+ CubeEdge &edge = item->edge;
+
+ // add hypo to stack
+ Hypothesis *hypo = item->hypo;
+ //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
+ m_stacks.Add(hypo, mgr.GetHypoRecycle());
+ }
+ }
+ */
+}
+
+
+void Search::CreateSearchGraph(size_t stackInd)
+{
+ NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
+ MemPool &pool = mgr.GetPool();
+
+ BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
+ const Bitmap &hypoBitmap = *val.first.first;
+ size_t hypoEndPos = val.first.second;
+ //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
+
+ // create edges to next hypos from existing hypos
+ const InputPaths &paths = mgr.GetInputPaths();
+
+ BOOST_FOREACH(const InputPath *path, paths) {
+ const Range &pathRange = path->range;
+ //cerr << "pathRange=" << pathRange << endl;
+
+ if (!path->IsUsed()) {
+ continue;
+ }
+ if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
+ continue;
+ }
+
+ const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
+
+ // sort hypo for a particular bitmap and hypoEndPos
+ const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
+
+
+ // add cube edge
+ size_t numPt = mgr.system.mappings.size();
+ for (size_t i = 0; i < numPt; ++i) {
+ const TargetPhrases *tps = path->targetPhrases[i];
+ if (tps && tps->GetSize()) {
+ // create next mini stack
+ NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange);
+
+ CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap);
+
+ CubeEdges *edges;
+ boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack);
+ if (iter == m_cubeEdges.end()) {
+ edges = new (pool.Allocate<CubeEdges>()) CubeEdges();
+ m_cubeEdges[&nextMiniStack] = edges;
+ } else {
+ edges = iter->second;
+ }
+
+ edges->push_back(edge);
+ }
+ }
+ }
+ }
+
+}
+
+
+const Hypothesis *Search::GetBestHypo() const
+{
+ const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back();
+ std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1);
+
+ const Hypothesis *best = NULL;
+ if (sortedHypos.size()) {
+ best = sortedHypos[0];
+ }
+ return best;
+}
+
+void Search::DebugCounts()
+{
+ std::map<size_t, size_t> counts;
+
+ for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
+ //cerr << "stackInd=" << stackInd << endl;
+ const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
+ BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
+ const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
+ size_t count = miniStack.GetColl().size();
+
+ if (counts.find(count) == counts.end()) {
+ counts[count] = 0;
+ } else {
+ ++counts[count];
+ }
+ }
+ //cerr << m_stacks << endl;
+ }
+
+ std::map<size_t, size_t>::const_iterator iter;
+ for (iter = counts.begin(); iter != counts.end(); ++iter) {
+ cerr << iter->first << "=" << iter->second << " ";
+ }
+ cerr << endl;
+}
+
+
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningPerBitmap/Search.h b/moses2/defer/CubePruningPerBitmap/Search.h
new file mode 100644
index 000000000..cb2164074
--- /dev/null
+++ b/moses2/defer/CubePruningPerBitmap/Search.h
@@ -0,0 +1,66 @@
+/*
+ * Search.h
+ *
+ * Created on: 16 Nov 2015
+ * Author: hieu
+ */
+
+#pragma once
+#include <boost/pool/pool_alloc.hpp>
+#include <boost/unordered_map.hpp>
+#include "../Search.h"
+#include "Misc.h"
+#include "Stacks.h"
+#include "../../legacy/Range.h"
+
+namespace Moses2
+{
+
+class Bitmap;
+class Hypothesis;
+class InputPath;
+class TargetPhrases;
+
+namespace NSCubePruningMiniStack
+{
+class MiniStack;
+}
+
+namespace NSCubePruningPerBitmap
+{
+
+class Search : public Moses2::Search
+{
+public:
+ Search(Manager &mgr);
+ virtual ~Search();
+
+ virtual void Decode();
+ const Hypothesis *GetBestHypo() const;
+
+protected:
+ Stacks m_stacks;
+
+ CubeEdge::Queue m_queue;
+ CubeEdge::SeenPositions m_seenPositions;
+
+ // CUBE PRUNING VARIABLES
+ // setup
+ typedef std::vector<CubeEdge*> CubeEdges;
+ boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*> m_cubeEdges;
+
+ std::deque<QueueItem*> m_queueItemRecycler;
+
+ // CUBE PRUNING
+ // decoding
+ void CreateSearchGraph(size_t stackInd);
+ void Decode(size_t stackInd);
+ void Decode(const std::vector<NSCubePruningMiniStack::MiniStack*> &miniStacks);
+
+ void DebugCounts();
+};
+
+}
+
+}
+
diff --git a/moses2/defer/CubePruningPerBitmap/Stacks.cpp b/moses2/defer/CubePruningPerBitmap/Stacks.cpp
new file mode 100644
index 000000000..9930f575e
--- /dev/null
+++ b/moses2/defer/CubePruningPerBitmap/Stacks.cpp
@@ -0,0 +1,72 @@
+/*
+ * Stacks.cpp
+ *
+ * Created on: 6 Nov 2015
+ * Author: hieu
+ */
+
+#include "Stacks.h"
+#include "../../System.h"
+#include "../Manager.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningPerBitmap
+{
+
+Stacks::Stacks(const Manager &mgr)
+ :m_mgr(mgr)
+{
+}
+
+Stacks::~Stacks()
+{
+}
+
+void Stacks::Init(size_t numStacks)
+{
+ m_stacks.resize(numStacks);
+ for (size_t i = 0; i < m_stacks.size(); ++i) {
+ m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruningMiniStack::Stack>()) NSCubePruningMiniStack::Stack(m_mgr);
+ }
+}
+
+
+std::ostream& operator<<(std::ostream &out, const Stacks &obj)
+{
+ for (size_t i = 0; i < obj.GetSize(); ++i) {
+ const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i];
+ out << stack.GetHypoSize() << " ";
+ }
+
+ return out;
+}
+
+void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
+{
+ size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
+ //cerr << "numWordsCovered=" << numWordsCovered << endl;
+ NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
+ stack.Add(hypo, hypoRecycle);
+
+}
+
+NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange)
+{
+ size_t numWordsCovered = newBitmap.GetNumWordsCovered();
+ //cerr << "numWordsCovered=" << numWordsCovered << endl;
+ NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
+
+ NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos());
+ stack.GetMiniStack(key);
+
+}
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningPerBitmap/Stacks.h b/moses2/defer/CubePruningPerBitmap/Stacks.h
new file mode 100644
index 000000000..28d939885
--- /dev/null
+++ b/moses2/defer/CubePruningPerBitmap/Stacks.h
@@ -0,0 +1,55 @@
+/*
+ * Stacks.h
+ *
+ * Created on: 6 Nov 2015
+ * Author: hieu
+ */
+
+#pragma once
+
+#include <vector>
+#include "../CubePruningMiniStack/Stack.h"
+#include "../../Recycler.h"
+
+namespace Moses2
+{
+class Manager;
+
+namespace NSCubePruningPerBitmap
+{
+
+class Stacks
+{
+ friend std::ostream& operator<<(std::ostream &, const Stacks &);
+public:
+ Stacks(const Manager &mgr);
+ virtual ~Stacks();
+
+ void Init(size_t numStacks);
+
+ size_t GetSize() const {
+ return m_stacks.size();
+ }
+
+ const NSCubePruningMiniStack::Stack &Back() const {
+ return *m_stacks.back();
+ }
+
+ NSCubePruningMiniStack::Stack &operator[](size_t ind) {
+ return *m_stacks[ind];
+ }
+
+ void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
+ NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange);
+
+protected:
+ const Manager &m_mgr;
+ std::vector<NSCubePruningMiniStack::Stack*> m_stacks;
+};
+
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningPerMiniStack/Misc.cpp b/moses2/defer/CubePruningPerMiniStack/Misc.cpp
new file mode 100644
index 000000000..de8971362
--- /dev/null
+++ b/moses2/defer/CubePruningPerMiniStack/Misc.cpp
@@ -0,0 +1,159 @@
+/*
+ * CubePruning.cpp
+ *
+ * Created on: 27 Nov 2015
+ * Author: hieu
+ */
+
+#include "Misc.h"
+#include "../Manager.h"
+#include "../../MemPool.h"
+#include "../../System.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningPerMiniStack
+{
+
+////////////////////////////////////////////////////////////////////////
+QueueItem *QueueItem::Create(QueueItem *currItem,
+ Manager &mgr,
+ CubeEdge &edge,
+ size_t hypoIndex,
+ size_t tpIndex,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ QueueItem *ret;
+ if (currItem) {
+ // reuse incoming queue item to create new item
+ ret = currItem;
+ ret->Init(mgr, edge, hypoIndex, tpIndex);
+ } else if (!queueItemRecycler.empty()) {
+ // use item from recycle bin
+ ret = queueItemRecycler.back();
+ ret->Init(mgr, edge, hypoIndex, tpIndex);
+ queueItemRecycler.pop_back();
+ } else {
+ // create new item
+ ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
+ }
+
+ return ret;
+}
+
+QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
+ :edge(&edge)
+ ,hypoIndex(hypoIndex)
+ ,tpIndex(tpIndex)
+{
+ CreateHypothesis(mgr);
+}
+
+void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
+{
+ this->edge = &edge;
+ this->hypoIndex = hypoIndex;
+ this->tpIndex = tpIndex;
+
+ CreateHypothesis(mgr);
+}
+
+void QueueItem::CreateHypothesis(Manager &mgr)
+{
+ const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex];
+ const TargetPhrase &tp = edge->tps[tpIndex];
+
+ //cerr << "hypoIndex=" << hypoIndex << endl;
+ //cerr << "edge.hypos=" << edge.hypos.size() << endl;
+ //cerr << prevHypo << endl;
+ //cerr << *prevHypo << endl;
+
+ hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
+ hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
+ hypo->EvaluateWhenApplied();
+}
+
+////////////////////////////////////////////////////////////////////////
+CubeEdge::CubeEdge(
+ Manager &mgr,
+ const NSCubePruningMiniStack::MiniStack &miniStack,
+ const InputPath &path,
+ const TargetPhrases &tps,
+ const Bitmap &newBitmap)
+ :miniStack(miniStack)
+ ,path(path)
+ ,tps(tps)
+ ,newBitmap(newBitmap)
+{
+ estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
+}
+
+std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
+{
+ out << obj.newBitmap;
+ return out;
+}
+
+bool
+CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
+{
+ //UTIL_THROW_IF2(x >= (1<<17), "Error");
+ //UTIL_THROW_IF2(y >= (1<<17), "Error");
+
+ SeenPositionItem val(this, (x<<16) + y);
+ std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
+ return pairRet.second;
+}
+
+void CubeEdge::CreateFirst(Manager &mgr,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ if (miniStack.GetSortedAndPruneHypos(mgr).size()) {
+ assert(tps.GetSize());
+
+ QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
+ queue.push(item);
+ bool setSeen = SetSeenPosition(0, 0, seenPositions);
+ assert(setSeen);
+ }
+}
+
+void CubeEdge::CreateNext(Manager &mgr,
+ QueueItem *item,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler)
+{
+ size_t hypoIndex = item->hypoIndex;
+ size_t tpIndex = item->tpIndex;
+
+ if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
+ // reuse incoming queue item to create new item
+ QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
+ assert(newItem == item);
+ queue.push(newItem);
+ item = NULL;
+ }
+
+ if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
+ QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
+ queue.push(newItem);
+ item = NULL;
+ }
+
+ if (item) {
+ // recycle unused queue item
+ queueItemRecycler.push_back(item);
+ }
+}
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningPerMiniStack/Misc.h b/moses2/defer/CubePruningPerMiniStack/Misc.h
new file mode 100644
index 000000000..511fd42f5
--- /dev/null
+++ b/moses2/defer/CubePruningPerMiniStack/Misc.h
@@ -0,0 +1,113 @@
+/*
+ * CubePruning.h
+ *
+ * Created on: 27 Nov 2015
+ * Author: hieu
+ */
+#pragma once
+#include <boost/pool/pool_alloc.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+#include <vector>
+#include <queue>
+#include "../../legacy/Range.h"
+#include "../Hypothesis.h"
+#include "../../TypeDef.h"
+#include "../../Vector.h"
+#include "../CubePruningMiniStack/Stack.h"
+
+namespace Moses2
+{
+
+class Manager;
+class InputPath;
+class TargetPhrases;
+class Bitmap;
+
+namespace NSCubePruningPerMiniStack
+{
+class CubeEdge;
+
+///////////////////////////////////////////
+class QueueItem
+{
+ ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
+public:
+ static QueueItem *Create(QueueItem *currItem,
+ Manager &mgr,
+ CubeEdge &edge,
+ size_t hypoIndex,
+ size_t tpIndex,
+ std::deque<QueueItem*> &queueItemRecycler);
+ QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
+
+ void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
+
+ CubeEdge *edge;
+ size_t hypoIndex, tpIndex;
+ Hypothesis *hypo;
+
+protected:
+ void CreateHypothesis(Manager &mgr);
+};
+
+///////////////////////////////////////////
+class QueueItemOrderer
+{
+public:
+ bool operator()(QueueItem* itemA, QueueItem* itemB) const {
+ HypothesisFutureScoreOrderer orderer;
+ return !orderer(itemA->hypo, itemB->hypo);
+ }
+};
+
+///////////////////////////////////////////
+class CubeEdge
+{
+ friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
+
+public:
+ typedef std::priority_queue<QueueItem*,
+ std::vector<QueueItem*>,
+ QueueItemOrderer> Queue;
+
+ typedef std::pair<const CubeEdge*, int> SeenPositionItem;
+ typedef boost::unordered_set<SeenPositionItem,
+ boost::hash<SeenPositionItem>,
+ std::equal_to<SeenPositionItem>
+ > SeenPositions;
+
+ const NSCubePruningMiniStack::MiniStack &miniStack;
+ const InputPath &path;
+ const TargetPhrases &tps;
+ const Bitmap &newBitmap;
+ SCORE estimatedScore;
+
+ CubeEdge(Manager &mgr,
+ const NSCubePruningMiniStack::MiniStack &miniStack,
+ const InputPath &path,
+ const TargetPhrases &tps,
+ const Bitmap &newBitmap);
+
+ bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
+
+ void CreateFirst(Manager &mgr,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler);
+ void CreateNext(Manager &mgr,
+ QueueItem *item,
+ Queue &queue,
+ SeenPositions &seenPositions,
+ std::deque<QueueItem*> &queueItemRecycler);
+
+
+protected:
+
+};
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningPerMiniStack/Search.cpp b/moses2/defer/CubePruningPerMiniStack/Search.cpp
new file mode 100644
index 000000000..1de52cb3d
--- /dev/null
+++ b/moses2/defer/CubePruningPerMiniStack/Search.cpp
@@ -0,0 +1,246 @@
+/*
+ * Search.cpp
+ *
+ * Created on: 16 Nov 2015
+ * Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include "Search.h"
+#include "../Manager.h"
+#include "../Hypothesis.h"
+#include "../../InputPaths.h"
+#include "../../InputPath.h"
+#include "../../System.h"
+#include "../../Sentence.h"
+#include "../../TranslationTask.h"
+#include "../../legacy/Util2.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningPerMiniStack
+{
+
+////////////////////////////////////////////////////////////////////////
+Search::Search(Manager &mgr)
+ :Moses2::Search(mgr)
+ ,m_stacks(mgr)
+
+ ,m_queue(QueueItemOrderer(),
+ std::vector<QueueItem*>() )
+
+ ,m_seenPositions()
+{
+}
+
+Search::~Search()
+{
+}
+
+void Search::Decode()
+{
+ // init stacks
+ m_stacks.Init(mgr.GetInput().GetSize() + 1);
+
+ const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
+ Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
+ initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
+ initHypo->EmptyHypothesisState(mgr.GetInput());
+
+ m_stacks.Add(initHypo, mgr.GetHypoRecycle());
+
+ for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) {
+ CreateSearchGraph(stackInd);
+ }
+
+ for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) {
+ //cerr << "stackInd=" << stackInd << endl;
+ Decode(stackInd);
+
+ //cerr << m_stacks << endl;
+ }
+
+ //DebugCounts();
+}
+
+void Search::Decode(size_t stackInd)
+{
+ NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
+ BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
+ NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
+ Decode(miniStack);
+ }
+
+}
+
+void Search::Decode(NSCubePruningMiniStack::MiniStack &miniStack)
+{
+ Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
+
+ // reuse queue from previous stack. Clear it first
+ std::vector<QueueItem*> &container = Container(m_queue);
+ //cerr << "container=" << container.size() << endl;
+ BOOST_FOREACH(QueueItem *item, container) {
+ // recycle unused hypos from queue
+ Hypothesis *hypo = item->hypo;
+ hypoRecycler.Recycle(hypo);
+
+ // recycle queue item
+ m_queueItemRecycler.push_back(item);
+ }
+ container.clear();
+
+ m_seenPositions.clear();
+
+ // add top hypo from every edge into queue
+ CubeEdges &edges = *m_cubeEdges[&miniStack];
+
+ BOOST_FOREACH(CubeEdge *edge, edges) {
+ //cerr << "edge=" << *edge << endl;
+ edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
+ }
+
+ size_t pops = 0;
+ while (!m_queue.empty() && pops < mgr.system.popLimit) {
+ // get best hypo from queue, add to stack
+ //cerr << "queue=" << queue.size() << endl;
+ QueueItem *item = m_queue.top();
+ m_queue.pop();
+
+ CubeEdge *edge = item->edge;
+
+ // add hypo to stack
+ Hypothesis *hypo = item->hypo;
+ //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
+ m_stacks.Add(hypo, hypoRecycler);
+
+ edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
+
+ ++pops;
+ }
+
+ /*
+ // create hypo from every edge. Increase diversity
+ while (!m_queue.empty()) {
+ QueueItem *item = m_queue.top();
+ m_queue.pop();
+
+ if (item->hypoIndex == 0 && item->tpIndex == 0) {
+ CubeEdge &edge = item->edge;
+
+ // add hypo to stack
+ Hypothesis *hypo = item->hypo;
+ //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
+ m_stacks.Add(hypo, mgr.GetHypoRecycle());
+ }
+ }
+ */
+}
+
+
+void Search::CreateSearchGraph(size_t stackInd)
+{
+ NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
+ MemPool &pool = mgr.GetPool();
+
+ BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
+ const Bitmap &hypoBitmap = *val.first.first;
+ size_t hypoEndPos = val.first.second;
+ //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
+
+ // create edges to next hypos from existing hypos
+ const InputPaths &paths = mgr.GetInputPaths();
+
+ BOOST_FOREACH(const InputPath *path, paths) {
+ const Range &pathRange = path->range;
+ //cerr << "pathRange=" << pathRange << endl;
+
+ if (!path->IsUsed()) {
+ continue;
+ }
+ if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
+ continue;
+ }
+
+ const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
+
+ // sort hypo for a particular bitmap and hypoEndPos
+ const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
+
+
+ // add cube edge
+ size_t numPt = mgr.system.mappings.size();
+ for (size_t i = 0; i < numPt; ++i) {
+ const TargetPhrases *tps = path->targetPhrases[i];
+ if (tps && tps->GetSize()) {
+ // create next mini stack
+ NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange);
+
+ CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap);
+
+ CubeEdges *edges;
+ boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack);
+ if (iter == m_cubeEdges.end()) {
+ edges = new (pool.Allocate<CubeEdges>()) CubeEdges();
+ m_cubeEdges[&nextMiniStack] = edges;
+ } else {
+ edges = iter->second;
+ }
+
+ edges->push_back(edge);
+ }
+ }
+ }
+ }
+
+}
+
+
+const Hypothesis *Search::GetBestHypo() const
+{
+ const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back();
+ std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1);
+
+ const Hypothesis *best = NULL;
+ if (sortedHypos.size()) {
+ best = sortedHypos[0];
+ }
+ return best;
+}
+
+void Search::DebugCounts()
+{
+ std::map<size_t, size_t> counts;
+
+ for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
+ //cerr << "stackInd=" << stackInd << endl;
+ const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
+ BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
+ const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
+ size_t count = miniStack.GetColl().size();
+
+ if (counts.find(count) == counts.end()) {
+ counts[count] = 0;
+ } else {
+ ++counts[count];
+ }
+ }
+ //cerr << m_stacks << endl;
+ }
+
+ std::map<size_t, size_t>::const_iterator iter;
+ for (iter = counts.begin(); iter != counts.end(); ++iter) {
+ cerr << iter->first << "=" << iter->second << " ";
+ }
+ cerr << endl;
+}
+
+
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningPerMiniStack/Search.h b/moses2/defer/CubePruningPerMiniStack/Search.h
new file mode 100644
index 000000000..2adb9631c
--- /dev/null
+++ b/moses2/defer/CubePruningPerMiniStack/Search.h
@@ -0,0 +1,66 @@
+/*
+ * Search.h
+ *
+ * Created on: 16 Nov 2015
+ * Author: hieu
+ */
+
+#pragma once
+#include <boost/pool/pool_alloc.hpp>
+#include <boost/unordered_map.hpp>
+#include "../Search.h"
+#include "Misc.h"
+#include "Stacks.h"
+#include "../../legacy/Range.h"
+
+namespace Moses2
+{
+
+class Bitmap;
+class Hypothesis;
+class InputPath;
+class TargetPhrases;
+
+namespace NSCubePruningMiniStack
+{
+class MiniStack;
+}
+
+namespace NSCubePruningPerMiniStack
+{
+
+class Search : public Moses2::Search
+{
+public:
+ Search(Manager &mgr);
+ virtual ~Search();
+
+ virtual void Decode();
+ const Hypothesis *GetBestHypo() const;
+
+protected:
+ Stacks m_stacks;
+
+ CubeEdge::Queue m_queue;
+ CubeEdge::SeenPositions m_seenPositions;
+
+ // CUBE PRUNING VARIABLES
+ // setup
+ typedef std::vector<CubeEdge*> CubeEdges;
+ boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*> m_cubeEdges;
+
+ std::deque<QueueItem*> m_queueItemRecycler;
+
+ // CUBE PRUNING
+ // decoding
+ void CreateSearchGraph(size_t stackInd);
+ void Decode(size_t stackInd);
+ void Decode(NSCubePruningMiniStack::MiniStack &miniStack);
+
+ void DebugCounts();
+};
+
+}
+
+}
+
diff --git a/moses2/defer/CubePruningPerMiniStack/Stacks.cpp b/moses2/defer/CubePruningPerMiniStack/Stacks.cpp
new file mode 100644
index 000000000..4e81e8e48
--- /dev/null
+++ b/moses2/defer/CubePruningPerMiniStack/Stacks.cpp
@@ -0,0 +1,72 @@
+/*
+ * Stacks.cpp
+ *
+ * Created on: 6 Nov 2015
+ * Author: hieu
+ */
+
+#include "Stacks.h"
+#include "../../System.h"
+#include "../Manager.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+namespace NSCubePruningPerMiniStack
+{
+
+Stacks::Stacks(const Manager &mgr)
+ :m_mgr(mgr)
+{
+}
+
+Stacks::~Stacks()
+{
+}
+
+void Stacks::Init(size_t numStacks)
+{
+ m_stacks.resize(numStacks);
+ for (size_t i = 0; i < m_stacks.size(); ++i) {
+ m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruningMiniStack::Stack>()) NSCubePruningMiniStack::Stack(m_mgr);
+ }
+}
+
+
+std::ostream& operator<<(std::ostream &out, const Stacks &obj)
+{
+ for (size_t i = 0; i < obj.GetSize(); ++i) {
+ const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i];
+ out << stack.GetHypoSize() << " ";
+ }
+
+ return out;
+}
+
+void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
+{
+ size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
+ //cerr << "numWordsCovered=" << numWordsCovered << endl;
+ NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
+ stack.Add(hypo, hypoRecycle);
+
+}
+
+NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange)
+{
+ size_t numWordsCovered = newBitmap.GetNumWordsCovered();
+ //cerr << "numWordsCovered=" << numWordsCovered << endl;
+ NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
+
+ NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos());
+ stack.GetMiniStack(key);
+
+}
+
+}
+
+}
+
+
diff --git a/moses2/defer/CubePruningPerMiniStack/Stacks.h b/moses2/defer/CubePruningPerMiniStack/Stacks.h
new file mode 100644
index 000000000..74469b767
--- /dev/null
+++ b/moses2/defer/CubePruningPerMiniStack/Stacks.h
@@ -0,0 +1,55 @@
+/*
+ * Stacks.h
+ *
+ * Created on: 6 Nov 2015
+ * Author: hieu
+ */
+
+#pragma once
+
+#include <vector>
+#include "../CubePruningMiniStack/Stack.h"
+#include "../../Recycler.h"
+
+namespace Moses2
+{
+class Manager;
+
+namespace NSCubePruningPerMiniStack
+{
+
+class Stacks
+{
+ friend std::ostream& operator<<(std::ostream &, const Stacks &);
+public:
+ Stacks(const Manager &mgr);
+ virtual ~Stacks();
+
+ void Init(size_t numStacks);
+
+ size_t GetSize() const {
+ return m_stacks.size();
+ }
+
+ const NSCubePruningMiniStack::Stack &Back() const {
+ return *m_stacks.back();
+ }
+
+ NSCubePruningMiniStack::Stack &operator[](size_t ind) {
+ return *m_stacks[ind];
+ }
+
+ void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
+ NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange);
+
+protected:
+ const Manager &m_mgr;
+ std::vector<NSCubePruningMiniStack::Stack*> m_stacks;
+};
+
+
+}
+
+}
+
+
diff --git a/contrib/moses2/legacy/Bitmap.cpp b/moses2/legacy/Bitmap.cpp
index a8dc7db4d..ed5ccd750 100644
--- a/contrib/moses2/legacy/Bitmap.cpp
+++ b/moses2/legacy/Bitmap.cpp
@@ -26,7 +26,7 @@ namespace Moses2
{
Bitmap::Bitmap(MemPool &pool, size_t size) :
- m_bitmap(pool, size)
+ m_bitmap(pool, size)
{
}
@@ -47,9 +47,9 @@ void Bitmap::Init(const std::vector<bool>& initializer)
// Find the first gap, and cache it.
Array<char>::const_iterator first_gap = std::find(m_bitmap.begin(),
- m_bitmap.end(), false);
+ m_bitmap.end(), false);
m_firstGap = ((first_gap == m_bitmap.end()) ?
- NOT_FOUND: first_gap - m_bitmap.begin());
+ NOT_FOUND: first_gap - m_bitmap.begin());
}
void Bitmap::Init(const Bitmap &copy, const Range &range)
diff --git a/moses2/legacy/Bitmap.h b/moses2/legacy/Bitmap.h
new file mode 100644
index 000000000..3ceb9b01d
--- /dev/null
+++ b/moses2/legacy/Bitmap.h
@@ -0,0 +1,240 @@
+// $Id$
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#pragma once
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+#include <iostream>
+#include <cstring>
+#include <cmath>
+#include <cstdlib>
+#include "Range.h"
+#include "../Array.h"
+
+namespace Moses2
+{
+class MemPool;
+
+typedef unsigned long WordsBitmapID;
+
+/** Vector of boolean to represent whether a word has been translated or not.
+ *
+ * Implemented using a vector of char, which is usually the same representation
+ * for the elements that a C array of bool would use. A vector of bool, or a
+ * Boost dynamic_bitset, could be much more efficient in theory. Unfortunately
+ * algorithms like std::find() are not optimized for vector<bool> on gcc or
+ * clang, and dynamic_bitset lacks all the optimized search operations we want.
+ * Only benchmarking will tell what works best. Perhaps dynamic_bitset could
+ * still be a dramatic improvement, if we flip the meaning of the bits around
+ * so we can use its find_first() and find_next() for the most common searches.
+ */
+class Bitmap
+{
+ friend std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap);
+private:
+ Array<char> m_bitmap; //! Ticks of words in sentence that have been done.
+ size_t m_firstGap; //! Cached position of first gap, or NOT_FOUND.
+ size_t m_numWordsCovered;
+
+ Bitmap(); // not implemented
+ Bitmap& operator=(const Bitmap& other);
+
+ /** Update the first gap, when bits are flipped */
+ void UpdateFirstGap(size_t startPos, size_t endPos, bool value) {
+ if (value) {
+ //may remove gap
+ if (startPos <= m_firstGap && m_firstGap <= endPos) {
+ m_firstGap = NOT_FOUND;
+ for (size_t i = endPos + 1; i < m_bitmap.size(); ++i) {
+ if (!m_bitmap[i]) {
+ m_firstGap = i;
+ break;
+ }
+ }
+ }
+
+ } else {
+ //setting positions to false, may add new gap
+ if (startPos < m_firstGap) {
+ m_firstGap = startPos;
+ }
+ }
+ }
+
+ //! set value between 2 positions, inclusive
+ void
+ SetValueNonOverlap(Range const& range) {
+ size_t startPos = range.GetStartPos();
+ size_t endPos = range.GetEndPos();
+
+ for(size_t pos = startPos; pos <= endPos; pos++) {
+ m_bitmap[pos] = true;
+ }
+
+ m_numWordsCovered += range.GetNumWordsCovered();
+ UpdateFirstGap(startPos, endPos, true);
+ }
+
+public:
+ //! Create Bitmap of length size, and initialise with vector.
+ explicit Bitmap(MemPool &pool, size_t size);
+
+ void Init(const std::vector<bool>& initializer);
+ void Init(const Bitmap &copy, const Range &range);
+
+ //! Count of words translated.
+ size_t GetNumWordsCovered() const {
+ return m_numWordsCovered;
+ }
+
+ //! position of 1st word not yet translated, or NOT_FOUND if everything already translated
+ size_t GetFirstGapPos() const {
+ return m_firstGap;
+ }
+
+ //! position of last word not yet translated, or NOT_FOUND if everything already translated
+ size_t GetLastGapPos() const {
+ for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) {
+ if (!m_bitmap[pos]) {
+ return pos;
+ }
+ }
+ // no starting pos
+ return NOT_FOUND;
+ }
+
+ //! position of last translated word
+ size_t GetLastPos() const {
+ for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) {
+ if (m_bitmap[pos]) {
+ return pos;
+ }
+ }
+ // no starting pos
+ return NOT_FOUND;
+ }
+
+ //! whether a word has been translated at a particular position
+ bool GetValue(size_t pos) const {
+ return bool(m_bitmap[pos]);
+ }
+ //! set value at a particular position
+ void SetValue( size_t pos, bool value ) {
+ bool origValue = m_bitmap[pos];
+ if (origValue == value) {
+ // do nothing
+ } else {
+ m_bitmap[pos] = value;
+ UpdateFirstGap(pos, pos, value);
+ if (value) {
+ ++m_numWordsCovered;
+ } else {
+ --m_numWordsCovered;
+ }
+ }
+ }
+
+ //! whether every word has been translated
+ bool IsComplete() const {
+ return GetSize() == GetNumWordsCovered();
+ }
+ //! whether the wordrange overlaps with any translated word in this bitmap
+ bool Overlap(const Range &compare) const {
+ for (size_t pos = compare.GetStartPos(); pos <= compare.GetEndPos(); pos++) {
+ if (m_bitmap[pos])
+ return true;
+ }
+ return false;
+ }
+ //! number of elements
+ size_t GetSize() const {
+ return m_bitmap.size();
+ }
+
+ inline size_t GetEdgeToTheLeftOf(size_t l) const {
+ if (l == 0) return l;
+ while (l && !m_bitmap[l-1]) {
+ --l;
+ }
+ return l;
+ }
+
+ inline size_t GetEdgeToTheRightOf(size_t r) const {
+ if (r+1 == m_bitmap.size()) return r;
+ return (
+ std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) -
+ m_bitmap.begin()
+ ) - 1;
+ }
+
+ //! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16
+ WordsBitmapID GetID() const {
+ assert(m_bitmap.size() < (1<<16));
+
+ size_t start = GetFirstGapPos();
+ if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
+
+ size_t end = GetLastPos();
+ if (end == NOT_FOUND) end = 0;// nothing translated yet
+
+ assert(end < start || end-start <= 16);
+ WordsBitmapID id = 0;
+ for(size_t pos = end; pos > start; pos--) {
+ id = id*2 + (int) GetValue(pos);
+ }
+ return id + (1<<16) * start;
+ }
+
+ //! converts bitmap into an integer ID, with an additional span covered
+ WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
+ assert(m_bitmap.size() < (1<<16));
+
+ size_t start = GetFirstGapPos();
+ if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
+
+ size_t end = GetLastPos();
+ if (end == NOT_FOUND) end = 0;// nothing translated yet
+
+ if (start == startPos) start = endPos+1;
+ if (end < endPos) end = endPos;
+
+ assert(end < start || end-start <= 16);
+ WordsBitmapID id = 0;
+ for(size_t pos = end; pos > start; pos--) {
+ id = id*2;
+ if (GetValue(pos) || (startPos<=pos && pos<=endPos))
+ id++;
+ }
+ return id + (1<<16) * start;
+ }
+
+ // for unordered_set in stack
+ size_t hash() const;
+ bool operator==(const Bitmap& other) const;
+ bool operator!=(const Bitmap& other) const {
+ return !(*this == other);
+ }
+
+};
+
+}
diff --git a/contrib/moses2/legacy/Bitmaps.cpp b/moses2/legacy/Bitmaps.cpp
index 879ad9d71..b1fee5ea6 100644
--- a/contrib/moses2/legacy/Bitmaps.cpp
+++ b/moses2/legacy/Bitmaps.cpp
@@ -8,7 +8,7 @@ namespace Moses2
{
Bitmaps::Bitmaps(MemPool &pool) :
- m_pool(pool)
+ m_pool(pool)
{
}
@@ -17,7 +17,7 @@ Bitmaps::~Bitmaps()
}
void Bitmaps::Init(size_t inputSize,
- const std::vector<bool> &initSourceCompleted)
+ const std::vector<bool> &initSourceCompleted)
{
m_initBitmap = new (m_pool.Allocate<Bitmap>()) Bitmap(m_pool, inputSize);
m_initBitmap->Init(initSourceCompleted);
@@ -29,8 +29,7 @@ const Bitmap &Bitmaps::GetNextBitmap(const Bitmap &bm, const Range &range)
Bitmap *newBM;
if (m_recycler.empty()) {
newBM = new (m_pool.Allocate<Bitmap>()) Bitmap(m_pool, bm.GetSize());
- }
- else {
+ } else {
newBM = m_recycler.top();
m_recycler.pop();
}
@@ -41,8 +40,7 @@ const Bitmap &Bitmaps::GetNextBitmap(const Bitmap &bm, const Range &range)
if (iter == m_coll.end()) {
m_coll[newBM] = NextBitmaps();
return *newBM;
- }
- else {
+ } else {
m_recycler.push(newBM);
return *iter->first;
@@ -61,8 +59,7 @@ const Bitmap &Bitmaps::GetBitmap(const Bitmap &bm, const Range &range)
// not seen the link yet.
newBM = &GetNextBitmap(bm, range);
next[&range] = newBM;
- }
- else {
+ } else {
// link exist
//std::cerr << "link exists" << endl;
newBM = iterNext->second;
diff --git a/contrib/moses2/legacy/Bitmaps.h b/moses2/legacy/Bitmaps.h
index d8207b59e..aa0ea8f82 100644
--- a/contrib/moses2/legacy/Bitmaps.h
+++ b/moses2/legacy/Bitmaps.h
@@ -15,7 +15,7 @@ class Bitmaps
{
typedef boost::unordered_map<const Range*, const Bitmap*> NextBitmaps;
typedef boost::unordered_map<const Bitmap*, NextBitmaps,
- UnorderedComparer<Bitmap>, UnorderedComparer<Bitmap> > Coll;
+ UnorderedComparer<Bitmap>, UnorderedComparer<Bitmap> > Coll;
//typedef std::set<const Bitmap*, OrderedComparer<Bitmap> > Coll;
Coll m_coll;
Bitmap *m_initBitmap;
@@ -29,8 +29,7 @@ public:
virtual ~Bitmaps();
void Init(size_t inputSize, const std::vector<bool> &initSourceCompleted);
- const Bitmap &GetInitialBitmap() const
- {
+ const Bitmap &GetInitialBitmap() const {
return *m_initBitmap;
}
const Bitmap &GetBitmap(const Bitmap &bm, const Range &range);
diff --git a/contrib/moses2/legacy/Factor.cpp b/moses2/legacy/Factor.cpp
index be9bad2c1..be9bad2c1 100644
--- a/contrib/moses2/legacy/Factor.cpp
+++ b/moses2/legacy/Factor.cpp
diff --git a/contrib/moses2/legacy/Factor.h b/moses2/legacy/Factor.h
index 99d53f4f0..541f2364a 100644
--- a/contrib/moses2/legacy/Factor.h
+++ b/moses2/legacy/Factor.h
@@ -49,14 +49,12 @@ class Factor
size_t m_id;
//! protected constructor. only friend class, FactorCollection, is allowed to create Factor objects
- Factor()
- {
+ Factor() {
}
// Needed for STL containers. They'll delegate through FactorFriend, which is never exposed publicly.
Factor(const Factor &factor) :
- m_string(factor.m_string), m_id(factor.m_id)
- {
+ m_string(factor.m_string), m_id(factor.m_id) {
}
// Not implemented. Shouldn't be called.
@@ -64,13 +62,11 @@ class Factor
public:
//! original string representation of the factor
- StringPiece GetString() const
- {
+ StringPiece GetString() const {
return m_string;
}
//! contiguous ID
- inline size_t GetId() const
- {
+ inline size_t GetId() const {
return m_id;
}
@@ -79,21 +75,18 @@ public:
* +1 = more than
* 0 = same
*/
- inline int Compare(const Factor &compare) const
- {
+ inline int Compare(const Factor &compare) const {
if (this < &compare) return -1;
if (this > &compare) return 1;
return 0;
}
//! transitive comparison used for adding objects into FactorCollection
- inline bool operator<(const Factor &compare) const
- {
+ inline bool operator<(const Factor &compare) const {
return this < &compare;
}
// quick equality comparison. Not used
- inline bool operator==(const Factor &compare) const
- {
+ inline bool operator==(const Factor &compare) const {
return this == &compare;
}
};
diff --git a/contrib/moses2/legacy/FactorCollection.cpp b/moses2/legacy/FactorCollection.cpp
index f8beb9b40..80081bab9 100644
--- a/contrib/moses2/legacy/FactorCollection.cpp
+++ b/moses2/legacy/FactorCollection.cpp
@@ -55,14 +55,13 @@ const Factor *FactorCollection::AddFactor(const StringPiece &factorString,
std::pair<Set::iterator, bool> ret(set.insert(to_ins));
if (ret.second) {
ret.first->in.m_string.set(
- memcpy(m_string_backing.Allocate(factorString.size()),
- factorString.data(), factorString.size()), factorString.size());
+ memcpy(m_string_backing.Allocate(factorString.size()),
+ factorString.data(), factorString.size()), factorString.size());
if (isNonTerminal) {
m_factorIdNonTerminal++;
UTIL_THROW_IF2(m_factorIdNonTerminal >= moses_MaxNumNonterminals,
- "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile");
- }
- else {
+ "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile");
+ } else {
m_factorId++;
}
}
@@ -101,7 +100,7 @@ ostream& operator<<(ostream& out, const FactorCollection& factorCollection)
boost::shared_lock<boost::shared_mutex> lock(factorCollection.m_accessLock);
#endif
for (FactorCollection::Set::const_iterator i = factorCollection.m_set.begin();
- i != factorCollection.m_set.end(); ++i) {
+ i != factorCollection.m_set.end(); ++i) {
out << i->in;
}
return out;
diff --git a/contrib/moses2/legacy/FactorCollection.h b/moses2/legacy/FactorCollection.h
index 0430e5cde..1b29dee69 100644
--- a/contrib/moses2/legacy/FactorCollection.h
+++ b/moses2/legacy/FactorCollection.h
@@ -52,8 +52,7 @@ class System;
* FactorFriend's public copy constructor and everybody else sees Factor's
* private copy constructor.
*/
-struct FactorFriend
-{
+struct FactorFriend {
Factor in;
};
@@ -71,19 +70,15 @@ class FactorCollection
friend class System;
struct HashFactor: public std::unary_function<const FactorFriend &,
- std::size_t>
- {
- std::size_t operator()(const FactorFriend &factor) const
- {
+ std::size_t> {
+ std::size_t operator()(const FactorFriend &factor) const {
return util::MurmurHashNative(factor.in.m_string.data(),
- factor.in.m_string.size());
+ factor.in.m_string.size());
}
};
struct EqualsFactor: public std::binary_function<const FactorFriend &,
- const FactorFriend &, bool>
- {
- bool operator()(const FactorFriend &left, const FactorFriend &right) const
- {
+ const FactorFriend &, bool> {
+ bool operator()(const FactorFriend &left, const FactorFriend &right) const {
return left.in.GetString() == right.in.GetString();
}
};
@@ -103,8 +98,7 @@ class FactorCollection
//! constructor. only the 1 static variable can be created
FactorCollection() :
- m_factorIdNonTerminal(0), m_factorId(moses_MaxNumNonterminals)
- {
+ m_factorIdNonTerminal(0), m_factorId(moses_MaxNumNonterminals) {
}
public:
@@ -114,15 +108,14 @@ public:
* If a factor already exist in the collection, return the existing factor, if not create a new 1
*/
const Factor *AddFactor(const StringPiece &factorString, const System &system,
- bool isNonTerminal);
+ bool isNonTerminal);
- size_t GetNumNonTerminals()
- {
+ size_t GetNumNonTerminals() {
return m_factorIdNonTerminal;
}
const Factor *GetFactor(const StringPiece &factorString, bool isNonTerminal =
- false);
+ false);
};
diff --git a/contrib/moses2/legacy/InputFileStream.cpp b/moses2/legacy/InputFileStream.cpp
index a68ea53ef..25bb156fe 100644
--- a/contrib/moses2/legacy/InputFileStream.cpp
+++ b/moses2/legacy/InputFileStream.cpp
@@ -29,12 +29,11 @@ namespace Moses2
{
InputFileStream::InputFileStream(const std::string &filePath) :
- std::istream(NULL), m_streambuf(NULL)
+ std::istream(NULL), m_streambuf(NULL)
{
if (filePath.size() > 3 && filePath.substr(filePath.size() - 3, 3) == ".gz") {
m_streambuf = new gzfilebuf(filePath.c_str());
- }
- else {
+ } else {
std::filebuf* fb = new std::filebuf();
fb = fb->open(filePath.c_str(), std::ios::in);
if (!fb) {
diff --git a/contrib/moses2/legacy/InputFileStream.h b/moses2/legacy/InputFileStream.h
index d8f78848c..d8f78848c 100644
--- a/contrib/moses2/legacy/InputFileStream.h
+++ b/moses2/legacy/InputFileStream.h
diff --git a/contrib/moses2/legacy/Matrix.cpp b/moses2/legacy/Matrix.cpp
index 9d2abc8ab..9d2abc8ab 100644
--- a/contrib/moses2/legacy/Matrix.cpp
+++ b/moses2/legacy/Matrix.cpp
diff --git a/contrib/moses2/legacy/Matrix.h b/moses2/legacy/Matrix.h
index 6c498b53d..e2dbbba2c 100644
--- a/contrib/moses2/legacy/Matrix.h
+++ b/moses2/legacy/Matrix.h
@@ -39,16 +39,14 @@ protected:
public:
Matrix(MemPool &pool, size_t rows, size_t cols) :
- m_rows(rows), m_cols(cols)
- {
+ m_rows(rows), m_cols(cols) {
m_array = pool.Allocate<T>(rows * cols);
}
- ~Matrix(); // not implemented
+ //~Matrix(); // not implemented
// set upper triangle
- void InitTriangle(const T &val)
- {
+ void InitTriangle(const T &val) {
assert(m_rows == m_cols);
for (size_t row = 0; row < m_rows; row++) {
for (size_t col = row; col < m_cols; col++) {
@@ -58,8 +56,7 @@ public:
}
// everything
- void Init(const T &val)
- {
+ void Init(const T &val) {
for (size_t row = 0; row < m_rows; row++) {
for (size_t col = 0; col < m_cols; col++) {
SetValue(row, col, val);
@@ -68,36 +65,30 @@ public:
}
/** Returns length of the square: typically the sentence length */
- inline size_t GetSize() const
- {
+ inline size_t GetSize() const {
assert(m_rows == m_cols);
return m_rows;
}
- inline size_t GetRows() const
- {
+ inline size_t GetRows() const {
return m_rows;
}
- inline size_t GetCols() const
- {
+ inline size_t GetCols() const {
return m_cols;
}
/** Get a future cost score for a span */
- inline const T &GetValue(size_t row, size_t col) const
- {
+ inline const T &GetValue(size_t row, size_t col) const {
return m_array[row * m_cols + col];
}
- inline T &GetValue(size_t row, size_t col)
- {
+ inline T &GetValue(size_t row, size_t col) {
return m_array[row * m_cols + col];
}
/** Set a future cost score for a span */
- inline void SetValue(size_t row, size_t col, const T &value)
- {
+ inline void SetValue(size_t row, size_t col, const T &value) {
m_array[row * m_cols + col] = value;
}
};
diff --git a/contrib/moses2/legacy/OutputCollector.h b/moses2/legacy/OutputCollector.h
index 5504d9add..fdd54c5a2 100644
--- a/contrib/moses2/legacy/OutputCollector.h
+++ b/moses2/legacy/OutputCollector.h
@@ -43,28 +43,24 @@ class OutputCollector
{
public:
OutputCollector(std::ostream* outStream = &std::cout,
- std::ostream* debugStream = &std::cerr) :
- m_nextOutput(0), m_outStream(outStream), m_debugStream(debugStream), m_isHoldingOutputStream(
- false), m_isHoldingDebugStream(false)
- {
+ std::ostream* debugStream = &std::cerr) :
+ m_nextOutput(0), m_outStream(outStream), m_debugStream(debugStream), m_isHoldingOutputStream(
+ false), m_isHoldingDebugStream(false) {
}
OutputCollector(std::string xout, std::string xerr = "") :
- m_nextOutput(0)
- {
+ m_nextOutput(0) {
// TO DO open magic streams instead of regular ofstreams! [UG]
if (xout == "/dev/stderr") {
m_outStream = &std::cerr;
m_isHoldingOutputStream = false;
- }
- else if (xout.size() && xout != "/dev/stdout" && xout != "-") {
+ } else if (xout.size() && xout != "/dev/stdout" && xout != "-") {
m_outStream = new std::ofstream(xout.c_str());
UTIL_THROW_IF2(!m_outStream->good(),
- "Failed to open output file" << xout);
+ "Failed to open output file" << xout);
m_isHoldingOutputStream = true;
- }
- else {
+ } else {
m_outStream = &std::cout;
m_isHoldingOutputStream = false;
}
@@ -72,37 +68,31 @@ public:
if (xerr == "/dev/stdout") {
m_debugStream = &std::cout;
m_isHoldingDebugStream = false;
- }
- else if (xerr.size() && xerr != "/dev/stderr") {
+ } else if (xerr.size() && xerr != "/dev/stderr") {
m_debugStream = new std::ofstream(xerr.c_str());
UTIL_THROW_IF2(!m_debugStream->good(),
- "Failed to open debug stream" << xerr);
+ "Failed to open debug stream" << xerr);
m_isHoldingDebugStream = true;
- }
- else {
+ } else {
m_debugStream = &std::cerr;
m_isHoldingDebugStream = false;
}
}
- ~OutputCollector()
- {
+ ~OutputCollector() {
if (m_isHoldingOutputStream) delete m_outStream;
if (m_isHoldingDebugStream) delete m_debugStream;
}
- void HoldOutputStream()
- {
+ void HoldOutputStream() {
m_isHoldingOutputStream = true;
}
- void HoldDebugStream()
- {
+ void HoldDebugStream() {
m_isHoldingDebugStream = true;
}
- bool OutputIsCout() const
- {
+ bool OutputIsCout() const {
return (m_outStream == &std::cout);
}
@@ -110,8 +100,7 @@ public:
* Write or cache the output, as appropriate.
**/
void Write(int sourceId, const std::string& output, const std::string& debug =
- "")
- {
+ "") {
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
@@ -126,15 +115,14 @@ public:
*m_outStream << iter->second << std::flush;
++m_nextOutput;
std::map<int, std::string>::iterator debugIter = m_debugs.find(
- iter->first);
+ iter->first);
m_outputs.erase(iter);
if (debugIter != m_debugs.end()) {
*m_debugStream << debugIter->second << std::flush;
m_debugs.erase(debugIter);
}
}
- }
- else {
+ } else {
//save for later
m_outputs[sourceId] = output;
m_debugs[sourceId] = debug;
@@ -154,8 +142,7 @@ private:
#endif
public:
- void SetOutputStream(std::ostream* outStream)
- {
+ void SetOutputStream(std::ostream* outStream) {
m_outStream = outStream;
}
diff --git a/contrib/moses2/legacy/OutputFileStream.cpp b/moses2/legacy/OutputFileStream.cpp
index ad46f3a0c..81047ffe1 100644
--- a/contrib/moses2/legacy/OutputFileStream.cpp
+++ b/moses2/legacy/OutputFileStream.cpp
@@ -31,12 +31,12 @@ using namespace boost::algorithm;
namespace Moses2
{
OutputFileStream::OutputFileStream() :
- boost::iostreams::filtering_ostream(), m_outFile(NULL), m_open(false)
+ boost::iostreams::filtering_ostream(), m_outFile(NULL), m_open(false)
{
}
OutputFileStream::OutputFileStream(const std::string &filePath) :
- m_outFile(NULL), m_open(false)
+ m_outFile(NULL), m_open(false)
{
Open(filePath);
}
@@ -52,10 +52,9 @@ bool OutputFileStream::Open(const std::string &filePath)
if (filePath == std::string("-")) {
// Write to standard output. Leave m_outFile null.
this->push(std::cout);
- }
- else {
+ } else {
m_outFile = new ofstream(filePath.c_str(),
- ios_base::out | ios_base::binary);
+ ios_base::out | ios_base::binary);
if (m_outFile->fail()) {
return false;
}
diff --git a/contrib/moses2/legacy/OutputFileStream.h b/moses2/legacy/OutputFileStream.h
index 27c0b4539..27c0b4539 100644
--- a/contrib/moses2/legacy/OutputFileStream.h
+++ b/moses2/legacy/OutputFileStream.h
diff --git a/contrib/moses2/legacy/Parameter.cpp b/moses2/legacy/Parameter.cpp
index 870a49f2a..7376c1099 100644
--- a/contrib/moses2/legacy/Parameter.cpp
+++ b/moses2/legacy/Parameter.cpp
@@ -26,12 +26,14 @@
#include <sstream>
#include <algorithm>
#include <boost/algorithm/string/predicate.hpp>
+#include <boost/program_options.hpp>
+
#include "Parameter.h"
#include "InputFileStream.h"
+#include "../FF/FeatureRegistry.h"
#include "util/string_stream.hh"
#include "util/exception.hh"
#include "util/random.hh"
-#include <boost/program_options.hpp>
using namespace std;
using namespace boost::algorithm;
@@ -48,7 +50,7 @@ Parameter::Parameter()
po::options_description main_opts("Main Options");
AddParam(main_opts, "config", "f", "location of the configuration file");
AddParam(main_opts, "input-file", "i",
- "location of the input file to be translated");
+ "location of the input file to be translated");
AddParam(main_opts, "verbose", "v", "verbosity level of the logging");
AddParam(main_opts, "show-weights", "print feature weights and exit");
@@ -63,7 +65,7 @@ Parameter::Parameter()
// one should be able to specify different factor delimiters for intput and output
AddParam(factor_opts, "mapping", "description of decoding steps"); // whatever that means ...
AddParam(factor_opts, "placeholder-factor",
- "Which source factor to use to store the original text for placeholders. The factor must not be used by a translation or gen model");
+ "Which source factor to use to store the original text for placeholders. The factor must not be used by a translation or gen model");
///////////////////////////////////////////////////////////////////////////////////////
// general search options
@@ -80,11 +82,11 @@ Parameter::Parameter()
desc += "9=forest-to-string";
AddParam(search_opts, "search-algorithm", desc);
AddParam(search_opts, "beam-threshold", "b",
- "threshold for threshold pruning");
+ "threshold for threshold pruning");
//AddParam(search_opts, "early-discarding-threshold", "edt",
// "threshold for constructing hypotheses based on estimate cost");
AddParam(search_opts, "stack", "s",
- "maximum stack size for histogram pruning. 0 = unlimited stack size");
+ "maximum stack size for histogram pruning. 0 = unlimited stack size");
//AddParam(search_opts, "stack-diversity", "sd",
// "minimum number of hypothesis of each coverage in stack (default 0)");
@@ -92,18 +94,18 @@ Parameter::Parameter()
//AddParam(search_opts, "weight-file", "wf",
// "feature weights file. Do *not* put weights for 'core' features in here - they go in moses.ini");
AddParam(search_opts, "weight",
- "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");
+ "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");
AddParam(search_opts, "feature-overwrite",
- "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\"");
+ "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\"");
po::options_description tune_opts("Options used in tuning.");
AddParam(tune_opts, "weight-overwrite",
- "special parameter for mert. All on 1 line. Overrides weights specified in 'weights' argument");
+ "special parameter for mert. All on 1 line. Overrides weights specified in 'weights' argument");
AddParam(tune_opts, "feature-add",
- "Add a feature function on the command line. Used by mira to add BLEU feature");
+ "Add a feature function on the command line. Used by mira to add BLEU feature");
AddParam(tune_opts, "weight-add",
- "Add weight for FF if it doesn't exist, i.e weights here are added 1st, and can be override by the ini file or on the command line. Used to specify initial weights for FF that was also specified on the copmmand line");
+ "Add weight for FF if it doesn't exist, i.e weights here are added 1st, and can be override by the ini file or on the command line. Used to specify initial weights for FF that was also specified on the copmmand line");
// phrase table limitations:
//AddParam(search_opts, "max-partial-trans-opt",
@@ -111,7 +113,7 @@ Parameter::Parameter()
//AddParam(search_opts, "max-trans-opt-per-coverage",
// "maximum number of translation options per input span (after applying mapping steps)");
AddParam(search_opts, "max-phrase-length",
- "maximum phrase length (default 20)");
+ "maximum phrase length (default 20)");
//AddParam(search_opts, "translation-option-threshold", "tot",
// "threshold for translation options relative to best for input phrase");
@@ -121,14 +123,14 @@ Parameter::Parameter()
//AddParam(search_opts, "phrase-drop-allowed", "da",
// "if present, allow dropping of source words"); //da = drop any (word); see -du for comparison
AddParam(search_opts, "threads", "th",
- "number of threads to use in decoding (defaults to single-threaded)");
+ "number of threads to use in decoding (defaults to single-threaded)");
// distortion options
po::options_description disto_opts("Distortion options");
AddParam(disto_opts, "distortion-limit", "dl",
- "distortion (reordering) limit in maximum number of words (0 = monotone, -1 = unlimited)");
+ "distortion (reordering) limit in maximum number of words (0 = monotone, -1 = unlimited)");
AddParam(disto_opts, "monotone-at-punctuation", "mp",
- "do not reorder over punctuation");
+ "do not reorder over punctuation");
//AddParam(disto_opts, "early-distortion-cost", "edc",
// "include estimate of distortion cost yet to be incurred in the score [Moore & Quirk 2007]. Default is no");
//AddParam(disto_opts, "distortion",
@@ -137,18 +139,18 @@ Parameter::Parameter()
// cube pruning
po::options_description cube_opts("Cube pruning options.");
AddParam(cube_opts, "cube-pruning-pop-limit", "cbp",
- "How many hypotheses should be popped for each stack. (default = 1000)");
+ "How many hypotheses should be popped for each stack. (default = 1000)");
AddParam(cube_opts, "cube-pruning-diversity", "cbd",
- "How many hypotheses should be created for each coverage. (default = 0)");
+ "How many hypotheses should be created for each coverage. (default = 0)");
AddParam(cube_opts, "cube-pruning-lazy-scoring", "cbls",
- "Don't fully score a hypothesis until it is popped");
+ "Don't fully score a hypothesis until it is popped");
//AddParam(cube_opts, "cube-pruning-deterministic-search", "cbds",
// "Break ties deterministically during search");
///////////////////////////////////////////////////////////////////////////////////////
// minimum bayes risk decoding
po::options_description mbr_opts(
- "Minimum Bayes Risk (MBR), Lattice MBR, and Consensus decoding");
+ "Minimum Bayes Risk (MBR), Lattice MBR, and Consensus decoding");
//AddParam(mbr_opts, "minimum-bayes-risk", "mbr",
// "use miminum Bayes risk to determine best translation");
@@ -177,12 +179,12 @@ Parameter::Parameter()
// OOV handling options
po::options_description oov_opts("OOV Handling Options");
AddParam(oov_opts, "drop-unknown", "du",
- "drop unknown words instead of copying them");
+ "drop unknown words instead of copying them");
AddParam(oov_opts, "mark-unknown", "mu", "mark unknown words in output");
AddParam(oov_opts, "unknown-word-prefix",
- "prefix to unknwon word when marked (default: 'UNK')");
+ "prefix to unknwon word when marked (default: 'UNK')");
AddParam(oov_opts, "unknown-word-suffix",
- "suffix to unknwon word when marked (default: '')");
+ "suffix to unknwon word when marked (default: '')");
//AddParam(oov_opts, "lmodel-oov-feature",
// "add language model oov feature, one per model");
//AddParam(oov_opts, "output-unknowns",
@@ -195,9 +197,9 @@ Parameter::Parameter()
po::options_description input_opts("Input Format Options");
AddParam(input_opts, "input-factors", "list of factors in the input");
AddParam(input_opts, "inputtype",
- "text (0), confusion network (1), word lattice (2), tree (3) (default = 0)");
+ "text (0), confusion network (1), word lattice (2), tree (3) (default = 0)");
AddParam(input_opts, "xml-input", "xi",
- "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'constraint', 'ignore'");
+ "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'constraint', 'ignore'");
//AddParam(input_opts, "xml-brackets", "xb",
// "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode");
//AddParam(input_opts, "start-translation-id", "Id of 1st input. Default = 0");
@@ -219,10 +221,10 @@ Parameter::Parameter()
//AddParam(output_opts, "print-all-derivations",
// "to print all derivations in search graph");
AddParam(output_opts, "translation-details", "T",
- "for each best hypothesis, report translation details to the given file");
+ "for each best hypothesis, report translation details to the given file");
AddParam(output_opts, "output-hypo-score",
- "Output the hypo score to stdout with the output string. For search error analysis. Default is false");
+ "Output the hypo score to stdout with the output string. For search error analysis. Default is false");
//AddParam(output_opts, "output-word-graph", "owg",
// "Output stack info as word graph. Takes filename, 0=only hypos in stack, 1=stack + nbest hypos");
//AddParam(output_opts, "tree-translation-details", "Ttree",
@@ -234,9 +236,9 @@ Parameter::Parameter()
//AddParam(output_opts, "sort-word-alignment",
// "Sort word alignments for more consistent display. 0=no sort (default), 1=target order");
AddParam(output_opts, "report-segmentation", "t",
- "report phrase segmentation in the output");
+ "report phrase segmentation in the output");
AddParam(output_opts, "report-segmentation-enriched", "tt",
- "report phrase segmentation in the output with additional information");
+ "report phrase segmentation in the output with additional information");
// translation-all-details was introduced in the context of DIMwid: Decoder Inspection for Moses (using Widgets)
// see here: https://ufal.mff.cuni.cz/pbml/100/art-kurtz-seemann-braune-maletti.pdf
@@ -264,7 +266,7 @@ Parameter::Parameter()
// nbest-options
po::options_description nbest_opts("N-best Options");
AddParam(nbest_opts, "n-best-list",
- "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
+ "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
// AddParam(nbest_opts,"n-best-list-file", "file of n-best-list to be generated; specify - as the file in order to write to STDOUT");
// AddParam(nbest_opts,"n-best-list-size", "size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
//AddParam(nbest_opts, "labeled-n-best-list",
@@ -272,7 +274,7 @@ Parameter::Parameter()
//AddParam(nbest_opts, "n-best-trees",
// "Write n-best target-side trees to n-best-list");
AddParam(nbest_opts, "n-best-factor",
- "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
+ "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
//AddParam(nbest_opts, "report-all-factors-in-n-best",
// "Report all factors in n-best-lists. Default is false");
//AddParam(nbest_opts, "lattice-samples",
@@ -294,7 +296,7 @@ Parameter::Parameter()
// string("Max. number of sessions cached.")
// + "Least recently used session is dumped first.");
AddParam(server_opts, "serial",
- "Run server in serial mode, processing only one request at a time.");
+ "Run server in serial mode, processing only one request at a time.");
AddParam(server_opts,"server-maxconn",
"Max. No of simultaneous HTTP transactions allowed by the server.");
@@ -313,9 +315,9 @@ Parameter::Parameter()
po::options_description chart_opts("Chart Decoding Options");
AddParam(chart_opts, "max-chart-span",
- "maximum num. of source word chart rules can consume (default 10)");
+ "maximum num. of source word chart rules can consume (default 10)");
AddParam(chart_opts, "non-terminals",
- "list of non-term symbols, space separated");
+ "list of non-term symbols, space separated");
//AddParam(chart_opts, "rule-limit",
// "a little like table limit. But for chart decoding rules. Default is DEFAULT_MAX_TRANS_OPT_SIZE");
//AddParam(chart_opts, "source-label-overlap",
@@ -336,7 +338,7 @@ Parameter::Parameter()
//AddParam(o,"continue-partial-translation", "cpt", "start from nonempty hypothesis");
AddParam(misc_opts, "decoding-graph-backoff", "dpb",
- "only use subsequent decoding paths for unknown spans of given length");
+ "only use subsequent decoding paths for unknown spans of given length");
//AddParam(misc_opts, "references",
// "Reference file(s) - used for bleu score feature");
//AddParam(misc_opts, "recover-input-path", "r",
@@ -355,11 +357,11 @@ Parameter::Parameter()
// "Context window (in words) for context-sensitive translation: {+|-|+-}<number>.");
AddParam(misc_opts, "cpu-affinity-offset", "CPU Affinity. Default = -1 (no affinity)");
AddParam(misc_opts, "cpu-affinity-increment",
- "Set to 1 (default) to put each thread on different cores. 0 to run all threads on one core");
+ "Set to 1 (default) to put each thread on different cores. 0 to run all threads on one core");
// Compact phrase table and reordering table.
po::options_description cpt_opts(
- "Options when using compact phrase and reordering tables.");
+ "Options when using compact phrase and reordering tables.");
//AddParam(cpt_opts, "minphr-memory",
// "Load phrase table in minphr format into memory");
//AddParam(cpt_opts, "minlexr-memory",
@@ -374,7 +376,7 @@ Parameter::Parameter()
// DEPRECATED options
po::options_description deprec_opts("Deprecated Options");
AddParam(deprec_opts, "text-type",
- "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
+ "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
/*
AddParam(deprec_opts, "link-param-count",
@@ -429,11 +431,11 @@ Parameter::Parameter()
"DEPRECATED. DO NOT USE. location and properties of the language models");
AddParam(deprec_opts, "lmodel-dub",
"DEPRECATED. DO NOT USE. dictionary upper bounds of language models");
-#ifdef HAVE_SYNLM
+ #ifdef HAVE_SYNLM
AddParam(deprec_opts,"slmodel-file", "DEPRECATED. DO NOT USE. location of the syntactic language model file(s)");
AddParam(deprec_opts,"slmodel-factor", "DEPRECATED. DO NOT USE. factor to use with syntactic language model");
AddParam(deprec_opts,"slmodel-beam", "DEPRECATED. DO NOT USE. beam width to use with syntactic language model's parser");
-#endif
+ #endif
AddParam(deprec_opts, "ttable-file",
"DEPRECATED. DO NOT USE. location and properties of the translation tables");
AddParam(deprec_opts, "phrase-pair-feature",
@@ -492,8 +494,7 @@ const PARAM_VEC *Parameter::GetParam(const std::string &paramName) const
PARAM_MAP::const_iterator iter = m_setting.find(paramName);
if (iter == m_setting.end()) {
return NULL;
- }
- else {
+ } else {
return &iter->second;
}
@@ -501,7 +502,7 @@ const PARAM_VEC *Parameter::GetParam(const std::string &paramName) const
/** initialize a parameter, sub of constructor */
void Parameter::AddParam(po::options_description& optgroup,
- string const& paramName, string const& description)
+ string const& paramName, string const& description)
{
m_valid[paramName] = true;
m_description[paramName] = description;
@@ -510,8 +511,8 @@ void Parameter::AddParam(po::options_description& optgroup,
/** initialize a parameter (including abbreviation), sub of constructor */
void Parameter::AddParam(po::options_description& optgroup,
- string const& paramName, string const& abbrevName,
- string const& description)
+ string const& paramName, string const& abbrevName,
+ string const& description)
{
m_valid[paramName] = true;
m_valid[abbrevName] = true;
@@ -570,7 +571,8 @@ bool Parameter::LoadParam(int argc, char* xargv[])
{
// legacy parameter handling: all parameters are expected
// to start with a single dash
- char* argv[argc + 1];
+ char **argv = (char**) alloca(argc * sizeof(char*));
+
for (int i = 0; i < argc; ++i) {
argv[i] = xargv[i];
if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-') ++argv[i];
@@ -579,17 +581,16 @@ bool Parameter::LoadParam(int argc, char* xargv[])
// config file (-f) arg mandatory
string configPath;
if ((configPath = FindParam("-f", argc, argv)) == "" && (configPath =
- FindParam("-config", argc, argv)) == "") {
+ FindParam("-config", argc, argv)) == "") {
PrintCredit();
Explain();
- PrintFF();
+ FeatureRegistry::Instance().PrintFF();
cerr << endl;
cerr << "No configuration file was specified. Use -config or -f";
cerr << endl;
return false;
- }
- else {
+ } else {
if (!ReadConfigFile(configPath)) {
std::cerr << "Could not read " << configPath;
return false;
@@ -598,14 +599,14 @@ bool Parameter::LoadParam(int argc, char* xargv[])
// overwrite parameters with values from switches
for (PARAM_STRING::const_iterator iterParam = m_description.begin();
- iterParam != m_description.end(); iterParam++) {
+ iterParam != m_description.end(); iterParam++) {
const string paramName = iterParam->first;
OverwriteParam("-" + paramName, paramName, argc, argv);
}
// ... also shortcuts
for (PARAM_STRING::const_iterator iterParam = m_abbreviation.begin();
- iterParam != m_abbreviation.end(); iterParam++) {
+ iterParam != m_abbreviation.end(); iterParam++) {
const string paramName = iterParam->first;
const string paramShortName = iterParam->second;
OverwriteParam("-" + paramShortName, paramName, argc, argv);
@@ -617,11 +618,11 @@ bool Parameter::LoadParam(int argc, char* xargv[])
int verbose = 1;
if (m_setting.find("verbose") != m_setting.end()
&& m_setting["verbose"].size() > 0) verbose = Scan<int>(
- m_setting["verbose"][0]);
+ m_setting["verbose"][0]);
if (verbose >= 1) { // only if verbose
cerr << "Defined parameters (per moses.ini or switch):" << endl;
for (PARAM_MAP::const_iterator iterParam = m_setting.begin();
- iterParam != m_setting.end(); iterParam++) {
+ iterParam != m_setting.end(); iterParam++) {
cerr << "\t" << iterParam->first << ": ";
for (size_t i = 0; i < iterParam->second.size(); i++)
cerr << iterParam->second[i] << " ";
@@ -715,7 +716,7 @@ void Parameter::SetWeight(const std::string &name, size_t ind, float weight)
}
void Parameter::SetWeight(const std::string &name, size_t ind,
- const vector<float> &weights)
+ const vector<float> &weights)
{
PARAM_VEC &newWeights = m_setting["weight"];
string line = name + SPrint(ind) + "=";
@@ -727,7 +728,7 @@ void Parameter::SetWeight(const std::string &name, size_t ind,
}
void Parameter::AddWeight(const std::string &name, size_t ind,
- const std::vector<float> &weights)
+ const std::vector<float> &weights)
{
PARAM_VEC &newWeights = m_setting["weight"];
@@ -775,13 +776,12 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
PARAM_VEC &numInputScores = m_setting["input-scores"];
if (inputWeights.size() == 1) {
UTIL_THROW_IF2(numInputScores.size() != 0,
- "No [input-scores] section allowed");
+ "No [input-scores] section allowed");
numInputScores.push_back("1");
numInputScores.push_back("0");
- }
- else if (inputWeights.size() == 2) {
+ } else if (inputWeights.size() == 2) {
UTIL_THROW_IF2(numInputScores.size() != 0,
- "No [input-scores] section allowed");
+ "No [input-scores] section allowed");
numInputScores.push_back("1");
numInputScores.push_back("1");
}
@@ -819,15 +819,14 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
if (maxTargetPhrase.size() == 1 && translationVector.size() > 1) {
cerr << "Using uniform ttable-limit of " << maxTargetPhrase[0]
- << " for all translation tables." << endl;
+ << " for all translation tables." << endl;
for (size_t i = 1; i < translationVector.size(); i++)
maxTargetPhrase.push_back(maxTargetPhrase[0]);
- }
- else if (maxTargetPhrase.size() != 1
- && maxTargetPhrase.size() < translationVector.size()) {
+ } else if (maxTargetPhrase.size() != 1
+ && maxTargetPhrase.size() < translationVector.size()) {
std::cerr << "You specified " << translationVector.size()
- << " translation tables, but only " << maxTargetPhrase.size()
- << " ttable-limits.";
+ << " translation tables, but only " << maxTargetPhrase.size()
+ << " ttable-limits.";
return;
}
@@ -846,7 +845,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
return;
}
UTIL_THROW_IF2(token.size() < 5,
- "Phrase table must have at least 5 scores");
+ "Phrase table must have at least 5 scores");
int implementation = Scan<int>(token[0]);
@@ -884,8 +883,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
if (ptIndices.find(ptType) == ptIndices.end()) {
ptIndices[ptType] = 0;
ptInd = 0;
- }
- else {
+ } else {
ptInd = ++ptIndices[ptType];
}
@@ -896,7 +894,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
vector<float> weights(numFF);
for (size_t currFF = 0; currFF < numFF; ++currFF) {
UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
- "Errors converting old phrase-table weights to new weights");
+ "Errors converting old phrase-table weights to new weights");
float weight = Scan<float>(oldWeights[currOldInd]);
weights[currFF] = weight;
@@ -916,7 +914,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
//characteristics of the phrase table
vector<FactorType> input = Tokenize<FactorType>(token[1], ","), output =
- Tokenize<FactorType>(token[2], ",");
+ Tokenize<FactorType>(token[2], ",");
size_t numScoreComponent = Scan<size_t>(token[3]);
string filePath = token[4];
@@ -976,7 +974,7 @@ void Parameter::ConvertWeightArgsDistortion()
const PARAM_VEC *lextable = GetParam(oldLexReordingName);
for (size_t indTable = 0; lextable && indTable < lextable->size();
- ++indTable) {
+ ++indTable) {
const string &line = lextable->at(indTable);
vector<string> toks = Tokenize(line);
@@ -985,7 +983,7 @@ void Parameter::ConvertWeightArgsDistortion()
vector<float> weights(numFF);
for (size_t currFF = 0; currFF < numFF; ++currFF) {
UTIL_THROW_IF2(oldWeights && currOldInd >= oldWeights->size(),
- "Errors converting old distortion weights to new weights");
+ "Errors converting old distortion weights to new weights");
float weight = Scan<float>(oldWeights->at(currOldInd));
weights[currFF] = weight;
@@ -998,9 +996,9 @@ void Parameter::ConvertWeightArgsDistortion()
vector<FactorType> factors = Tokenize<FactorType>(toks[0], "-");
UTIL_THROW_IF2(factors.size() != 2,
- "Error in old factor specification for lexicalized reordering model: " << toks[0]);
+ "Error in old factor specification for lexicalized reordering model: " << toks[0]);
strme << "input-factor=" << factors[0] << " output-factor=" << factors[1]
- << " ";
+ << " ";
strme << "num-features=" << toks[2] << " ";
strme << "path=" << toks[3];
@@ -1072,7 +1070,7 @@ void Parameter::ConvertWeightArgsLM()
vector<float> weightsLM(numFF);
for (size_t currFF = 0; currFF < numFF; ++currFF) {
UTIL_THROW_IF2(currOldInd >= weights.size(),
- "Errors converting old LM weights to new weights");
+ "Errors converting old LM weights to new weights");
weightsLM[currFF] = Scan<float>(weights[currOldInd]);
if (isChartDecoding) {
weightsLM[currFF] = UntransformLMScore(weightsLM[currFF]);
@@ -1084,12 +1082,11 @@ void Parameter::ConvertWeightArgsLM()
SetWeight(newFeatureName, lmIndex, weightsLM);
string featureLine = newFeatureName + " " + "factor=" + modelToks[1] + " " // factor
- + "order=" + modelToks[2] + " " // order
- + "num-features=" + SPrint(numFF) + " ";
+ + "order=" + modelToks[2] + " " // order
+ + "num-features=" + SPrint(numFF) + " ";
if (lmType == 9) {
featureLine += "lazyken=1 ";
- }
- else if (lmType == 8) {
+ } else if (lmType == 8) {
featureLine += "lazyken=0 ";
}
@@ -1125,7 +1122,7 @@ void Parameter::ConvertWeightArgsGeneration(const std::string &oldWeightName,
vector<float> weights(numFF);
for (size_t currFF = 0; currFF < numFF; ++currFF) {
UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
- "Errors converting old generation weights to new weights");
+ "Errors converting old generation weights to new weights");
float weight = Scan<float>(oldWeights[currOldInd]);
weights[currFF] = weight;
@@ -1135,8 +1132,8 @@ void Parameter::ConvertWeightArgsGeneration(const std::string &oldWeightName,
util::StringStream strme;
strme << "Generation " << "input-factor=" << modelToks[0] << " "
- << "output-factor=" << modelToks[1] << " " << "num-features="
- << modelToks[2] << " " << "path=" << modelToks[3];
+ << "output-factor=" << modelToks[1] << " " << "num-features="
+ << modelToks[2] << " " << "path=" << modelToks[3];
AddFeature(strme.str());
}
}
@@ -1182,7 +1179,7 @@ void Parameter::ConvertPhrasePenalty()
const PARAM_VEC *params = GetParam(oldWeightName);
if (params) {
UTIL_THROW_IF2(params->size() != 1,
- "There should be only 1 phrase-penalty weight");
+ "There should be only 1 phrase-penalty weight");
float weight = Scan<float>(params->at(0));
AddFeature("PhrasePenalty");
SetWeight("PhrasePenalty", 0, weight);
@@ -1195,7 +1192,7 @@ void Parameter::ConvertWeightArgs()
{
// can't handle discr LM. must do it manually 'cos of bigram/n-gram split
UTIL_THROW_IF2(m_setting.count("weight-dlm") != 0,
- "Can't handle discr LM. must do it manually 'cos of bigram/n-gram split");
+ "Can't handle discr LM. must do it manually 'cos of bigram/n-gram split");
// check that old & new format aren't mixed
if (m_setting.count("weight")
@@ -1287,23 +1284,20 @@ void Parameter::WeightOverwrite()
name = tok.substr(0, tok.size() - 1);
std::map<std::string, std::vector<float> >::const_iterator found =
- m_weights.find(name);
+ m_weights.find(name);
if (found != m_weights.end()) {
oldWeights = &(found->second);
- }
- else {
+ } else {
oldWeights = NULL;
}
cnt = 0;
- }
- else {
+ } else {
// a weight for curr ff
if (toks[i] == "x") {
UTIL_THROW_IF2(!oldWeights || cnt >= oldWeights->size(),
- "Keeping previous weight failed in weight-overwrite");
+ "Keeping previous weight failed in weight-overwrite");
weights.push_back(oldWeights->at(cnt));
- }
- else {
+ } else {
float weight = Scan<float>(toks[i]);
weights.push_back(weight);
}
@@ -1324,7 +1318,7 @@ bool Parameter::Validate()
PARAM_MAP::const_iterator iterParams;
for (iterParams = m_setting.begin(); iterParams != m_setting.end();
- ++iterParams) {
+ ++iterParams) {
const std::string &key = iterParams->first;
if (m_valid.find(key) == m_valid.end()) {
@@ -1336,10 +1330,10 @@ bool Parameter::Validate()
if (m_setting["lmodel-dub"].size() > 0) {
if (m_setting["lmodel-file"].size() != m_setting["lmodel-dub"].size()) {
std::cerr << "Config and parameters specify "
- << static_cast<int>(m_setting["lmodel-file"].size())
- << " language model files (lmodel-file), but "
- << static_cast<int>(m_setting["lmodel-dub"].size())
- << " LM upperbounds (lmodel-dub)" << endl;
+ << static_cast<int>(m_setting["lmodel-file"].size())
+ << " language model files (lmodel-file), but "
+ << static_cast<int>(m_setting["lmodel-dub"].size())
+ << " LM upperbounds (lmodel-dub)" << endl;
noErrorFlag = false;
}
}
@@ -1351,7 +1345,7 @@ bool Parameter::Validate()
noErrorFlag = FileExists(m_setting["input-file"][0]);
if (!noErrorFlag) {
std::cerr << endl << "Input file " << m_setting["input-file"][0]
- << " does not exist";
+ << " does not exist";
}
}
// generation tables
@@ -1379,7 +1373,7 @@ bool Parameter::Validate()
/** check whether a file exists */
bool Parameter::FilesExist(const string &paramName, int fieldNo,
- std::vector<std::string> const& extensions)
+ std::vector<std::string> const& extensions)
{
typedef std::vector<std::string> StringVec;
StringVec::const_iterator iter;
@@ -1399,8 +1393,8 @@ bool Parameter::FilesExist(const string &paramName, int fieldNo,
if (tokenizeIndex >= vec.size()) {
std::cerr << "Expected at least " << (tokenizeIndex + 1)
- << " tokens per entry in '" << paramName << "', but only found "
- << vec.size();
+ << " tokens per entry in '" << paramName << "', but only found "
+ << vec.size();
return false;
}
const string &pathStr = vec[tokenizeIndex];
@@ -1426,8 +1420,7 @@ string Parameter::FindParam(const string &paramSwitch, int argc, char* argv[])
if (string(argv[i]) == paramSwitch) {
if (i + 1 < argc) {
return argv[i + 1];
- }
- else {
+ } else {
std::cerr << "Option " << paramSwitch << " requires a parameter!";
// TODO return some sort of error, not the empty string
}
@@ -1442,7 +1435,7 @@ string Parameter::FindParam(const string &paramSwitch, int argc, char* argv[])
* \param argc number of arguments on command line
* \param argv values of paramters on command line */
void Parameter::OverwriteParam(const string &paramSwitch,
- const string &paramName, int argc, char* argv[])
+ const string &paramName, int argc, char* argv[])
{
int startPos = -1;
for (int i = 0; i < argc; i++) {
@@ -1478,8 +1471,7 @@ bool Parameter::ReadConfigFile(const string &filePath)
if (line.size() == 0) {
// blank line. do nothing.
- }
- else if (line[0] == '[') {
+ } else if (line[0] == '[') {
// new parameter
for (size_t currPos = 0; currPos < line.size(); currPos++) {
if (line[currPos] == ']') {
@@ -1487,8 +1479,7 @@ bool Parameter::ReadConfigFile(const string &filePath)
break;
}
}
- }
- else {
+ } else {
// add value to parameter
m_setting[paramName].push_back(line);
}
@@ -1496,14 +1487,12 @@ bool Parameter::ReadConfigFile(const string &filePath)
return true;
}
-struct Credit
-{
+struct Credit {
string name, contact, currentPursuits, areaResponsibility;
int sortId;
Credit(string name, string contact, string currentPursuits,
- string areaResponsibility)
- {
+ string areaResponsibility) {
this->name = name;
this->contact = contact;
this->currentPursuits = currentPursuits;
@@ -1511,8 +1500,7 @@ struct Credit
this->sortId = util::rand_excl(1000);
}
- bool operator<(const Credit &other) const
- {
+ bool operator<(const Credit &other) const {
/*
if (areaResponsibility.size() != 0 && other.areaResponsibility.size() ==0)
return true;
@@ -1532,7 +1520,7 @@ std::ostream& operator<<(std::ostream &os, const Credit &credit)
if (credit.contact != "") os << "\t contact: " << credit.contact;
if (credit.currentPursuits != "") os << " " << credit.currentPursuits;
if (credit.areaResponsibility != "") os << " I'll answer question on: "
- << credit.areaResponsibility;
+ << credit.areaResponsibility;
return os;
}
@@ -1542,38 +1530,38 @@ void Parameter::PrintCredit()
srand(time(NULL));
everyone.push_back(
- Credit("Nicola Bertoldi", "911", "", "scripts & other stuff"));
+ Credit("Nicola Bertoldi", "911", "", "scripts & other stuff"));
everyone.push_back(Credit("Ondrej Bojar", "", "czech this out!", ""));
everyone.push_back(
- Credit("Chris Callison-Burch", "anytime, anywhere",
- "international playboy", ""));
+ Credit("Chris Callison-Burch", "anytime, anywhere",
+ "international playboy", ""));
everyone.push_back(Credit("Alexandra Constantin", "", "eu sunt varza", ""));
everyone.push_back(
- Credit("Brooke Cowan", "brooke@csail.mit.edu",
- "if you're going to san francisco, be sure to wear a flower in your hair",
- ""));
+ Credit("Brooke Cowan", "brooke@csail.mit.edu",
+ "if you're going to san francisco, be sure to wear a flower in your hair",
+ ""));
everyone.push_back(
- Credit("Chris Dyer", "can't. i'll be out driving my mustang",
- "driving my mustang", ""));
+ Credit("Chris Dyer", "can't. i'll be out driving my mustang",
+ "driving my mustang", ""));
everyone.push_back(
- Credit("Marcello Federico", "federico at itc at it",
- "Researcher at ITC-irst, Trento, Italy", "IRST language model"));
+ Credit("Marcello Federico", "federico at itc at it",
+ "Researcher at ITC-irst, Trento, Italy", "IRST language model"));
everyone.push_back(
- Credit("Evan Herbst", "Small college in upstate New York", "", ""));
+ Credit("Evan Herbst", "Small college in upstate New York", "", ""));
everyone.push_back(
- Credit("Philipp Koehn", "only between 2 and 4am", "",
- "Nothing fazes this dude"));
+ Credit("Philipp Koehn", "only between 2 and 4am", "",
+ "Nothing fazes this dude"));
everyone.push_back(
- Credit("Christine Moran", "weird building at MIT", "", ""));
+ Credit("Christine Moran", "weird building at MIT", "", ""));
everyone.push_back(
- Credit("Wade Shen", "via morse code", "buying another laptop", ""));
+ Credit("Wade Shen", "via morse code", "buying another laptop", ""));
everyone.push_back(
- Credit("Richard Zens", "richard at aachen dot de", "",
- "ambiguous source input, confusion networks, confusing source code"));
+ Credit("Richard Zens", "richard at aachen dot de", "",
+ "ambiguous source input, confusion networks, confusing source code"));
everyone.push_back(
- Credit("Hieu Hoang", "http://www.hoang.co.uk/hieu/",
- "phd student at Edinburgh Uni. Original Moses developer",
- "general queries/ flames on Moses."));
+ Credit("Hieu Hoang", "http://www.hoang.co.uk/hieu/",
+ "phd student at Edinburgh Uni. Original Moses developer",
+ "general queries/ flames on Moses."));
sort(everyone.begin(), everyone.end());
@@ -1620,29 +1608,23 @@ void Parameter::OverwriteParam(const string &paramName, PARAM_VEC values)
m_setting[paramName]; // defines the parameter, important for boolean switches
if (m_setting[paramName].size() > 1) {
cerr << " (the parameter had " << m_setting[paramName].size()
- << " previous values)";
+ << " previous values)";
UTIL_THROW_IF2(m_setting[paramName].size() != values.size(),
- "Number of weight override for " << paramName << " is not the same as the original number of weights");
- }
- else {
+ "Number of weight override for " << paramName << " is not the same as the original number of weights");
+ } else {
cerr << " (the parameter does not have previous values)";
m_setting[paramName].resize(values.size());
}
cerr << " with the following values:";
int i = 0;
for (PARAM_VEC::iterator iter = values.begin(); iter != values.end();
- iter++, i++) {
+ iter++, i++) {
m_setting[paramName][i] = *iter;
cerr << " " << *iter;
}
cerr << std::endl;
}
-void Parameter::PrintFF() const
-{
- //StaticData::Instance().GetFeatureRegistry().PrintFF();
-}
-
std::set<std::string> Parameter::GetWeightNames() const
{
std::set<std::string> ret;
@@ -1661,7 +1643,7 @@ void Parameter::Save(const std::string path)
PARAM_MAP::const_iterator iterOuter;
for (iterOuter = m_setting.begin(); iterOuter != m_setting.end();
- ++iterOuter) {
+ ++iterOuter) {
const std::string &sectionName = iterOuter->first;
file << "[" << sectionName << "]" << endl;
@@ -1681,7 +1663,7 @@ void Parameter::Save(const std::string path)
template<>
void Parameter::SetParameter<bool>(bool &parameter,
- std::string const& parameterName, bool const& defaultValue) const
+ std::string const& parameterName, bool const& defaultValue) const
{
const PARAM_VEC *params = GetParam(parameterName);
diff --git a/contrib/moses2/legacy/Parameter.h b/moses2/legacy/Parameter.h
index 5f5ff393c..501f35e99 100644
--- a/contrib/moses2/legacy/Parameter.h
+++ b/moses2/legacy/Parameter.h
@@ -58,47 +58,46 @@ protected:
std::string FindParam(const std::string &paramSwitch, int argc, char* argv[]);
void OverwriteParam(const std::string &paramSwitch,
- const std::string &paramName, int argc, char* argv[]);
+ const std::string &paramName, int argc, char* argv[]);
bool ReadConfigFile(const std::string &filePath);
bool FilesExist(const std::string &paramName, int fieldNo,
- std::vector<std::string> const& fileExtension = std::vector<std::string>(
- 1, ""));
+ std::vector<std::string> const& fileExtension = std::vector<std::string>(
+ 1, ""));
bool isOption(const char* token);
bool Validate();
void
AddParam(options_description& optgroup, value_semantic const* optvalue,
- std::string const& paramName, std::string const& description);
+ std::string const& paramName, std::string const& description);
void
AddParam(options_description& optgroup, std::string const &paramName,
- std::string const &description);
+ std::string const &description);
void
AddParam(options_description& optgroup, value_semantic const* optvalue,
- std::string const& paramName, std::string const& abbrevName,
- std::string const& description);
+ std::string const& paramName, std::string const& abbrevName,
+ std::string const& description);
void
AddParam(options_description& optgroup, std::string const& paramName,
- std::string const& abbrevName, std::string const& description);
+ std::string const& abbrevName, std::string const& description);
void PrintCredit();
- void PrintFF() const;
void SetWeight(const std::string &name, size_t ind, float weight);
void SetWeight(const std::string &name, size_t ind,
- const std::vector<float> &weights);
+ const std::vector<float> &weights);
void AddWeight(const std::string &name, size_t ind,
- const std::vector<float> &weights);
+ const std::vector<float> &weights);
void ConvertWeightArgs();
void ConvertWeightArgsSingleWeight(const std::string &oldWeightName,
- const std::string &newWeightName);
+ const std::string &newWeightName);
void ConvertWeightArgsPhraseModel(const std::string &oldWeightName);
void ConvertWeightArgsLM();
void ConvertWeightArgsDistortion();
void ConvertWeightArgsGeneration(const std::string &oldWeightName,
- const std::string &newWeightName);
+ const std::string &newWeightName);
void ConvertWeightArgsPhrasePenalty();
void ConvertWeightArgsWordPenalty();
void ConvertPhrasePenalty();
@@ -119,22 +118,19 @@ public:
const PARAM_VEC *GetParam(const std::string &paramName) const;
/** check if parameter is defined (either in moses.ini or as switch) */
- bool isParamSpecified(const std::string &paramName) const
- {
+ bool isParamSpecified(const std::string &paramName) const {
return m_setting.find(paramName) != m_setting.end();
}
void OverwriteParam(const std::string &paramName, PARAM_VEC values);
std::vector<float> GetWeights(const std::string &name);
- const std::map<std::string, std::vector<float> > &GetAllWeights() const
- {
+ const std::map<std::string, std::vector<float> > &GetAllWeights() const {
return m_weights;
}
std::set<std::string> GetWeightNames() const;
- const PARAM_MAP &GetParams() const
- {
+ const PARAM_MAP &GetParams() const {
return m_setting;
}
@@ -142,21 +138,18 @@ public:
template<typename T>
void SetParameter(T &var, const std::string &name,
- const T &defaultValue) const
- {
+ const T &defaultValue) const {
const PARAM_VEC *params = GetParam(name);
if (params && params->size()) {
var = Scan<T>(params->at(0));
- }
- else {
+ } else {
var = defaultValue;
}
}
void SetParameter(bool& var, std::string const& name);
- bool SetBooleanSwitch(bool& val, std::string const name)
- {
+ bool SetBooleanSwitch(bool& val, std::string const name) {
// issues a warning if format is wrong
const PARAM_VEC *params = GetParam(name);
val = (params && params->size());
@@ -171,7 +164,7 @@ public:
template<>
void Parameter::SetParameter<bool>(bool &var, const std::string &name,
- const bool &defaultValue) const;
+ const bool &defaultValue) const;
}
diff --git a/contrib/moses2/legacy/Range.cpp b/moses2/legacy/Range.cpp
index 7186e4265..7186e4265 100644
--- a/contrib/moses2/legacy/Range.cpp
+++ b/moses2/legacy/Range.cpp
diff --git a/contrib/moses2/legacy/Range.h b/moses2/legacy/Range.h
index 76d720bed..9acfba45d 100644
--- a/contrib/moses2/legacy/Range.h
+++ b/moses2/legacy/Range.h
@@ -44,48 +44,40 @@ class Range
// m_endPos is inclusive
size_t m_startPos, m_endPos;
public:
- inline explicit Range()
- {
+ inline explicit Range() {
}
inline Range(size_t startPos, size_t endPos) :
- m_startPos(startPos), m_endPos(endPos)
- {
+ m_startPos(startPos), m_endPos(endPos) {
}
inline Range(const Range &copy) :
- m_startPos(copy.GetStartPos()), m_endPos(copy.GetEndPos())
- {
+ m_startPos(copy.GetStartPos()), m_endPos(copy.GetEndPos()) {
}
- inline size_t GetStartPos() const
- {
+ inline size_t GetStartPos() const {
return m_startPos;
}
- inline size_t GetEndPos() const
- {
+ inline size_t GetEndPos() const {
return m_endPos;
}
- inline void SetStartPos(size_t val)
- {
+ inline void SetStartPos(size_t val) {
m_startPos = val;
}
- inline void SetEndPos(size_t val)
- {
+ inline void SetEndPos(size_t val) {
m_endPos = val;
}
//! count of words translated
- inline size_t GetNumWordsCovered() const
- {
+ inline size_t GetNumWordsCovered() const {
assert(
- (m_startPos == NOT_FOUND && m_endPos == NOT_FOUND) || (m_startPos != NOT_FOUND && m_endPos != NOT_FOUND));
+ (m_startPos == NOT_FOUND && m_endPos == NOT_FOUND) || (m_startPos != NOT_FOUND && m_endPos != NOT_FOUND));
return (m_startPos == NOT_FOUND) ? 0 : m_endPos - m_startPos + 1;
}
//! transitive comparison
inline bool operator<(const Range& x) const {
return (m_startPos<x.m_startPos
- || (m_startPos==x.m_startPos && m_endPos<x.m_endPos));
+ || (m_startPos==x.m_startPos && m_endPos<x.m_endPos));
}
// equality operator
diff --git a/contrib/moses2/legacy/ThreadPool.cpp b/moses2/legacy/ThreadPool.cpp
index 3e159020b..43423e545 100644
--- a/contrib/moses2/legacy/ThreadPool.cpp
+++ b/moses2/legacy/ThreadPool.cpp
@@ -19,12 +19,14 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <stdio.h>
+#ifdef __linux
#include <pthread.h>
-#include <pthread.h>
+#include <unistd.h>
+#endif
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
-#include <unistd.h>
+#include <thread>
#include "ThreadPool.h"
@@ -37,15 +39,21 @@ namespace Moses2
do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0)
ThreadPool::ThreadPool(size_t numThreads, int cpuAffinityOffset,
- int cpuAffinityIncr) :
- m_stopped(false), m_stopping(false), m_queueLimit(0)
+ int cpuAffinityIncr) :
+ m_stopped(false), m_stopping(false), m_queueLimit(0)
{
+#if defined(_WIN32) || defined(_WIN64)
+ size_t numCPU = std::thread::hardware_concurrency();
+#else
size_t numCPU = sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+ //cerr << "numCPU=" << numCPU << endl;
+
int cpuInd = cpuAffinityOffset % numCPU;
for (size_t i = 0; i < numThreads; ++i) {
boost::thread *thread = m_threads.create_thread(
- boost::bind(&ThreadPool::Execute, this));
+ boost::bind(&ThreadPool::Execute, this));
#ifdef __linux
if (cpuAffinityOffset >= 0) {
@@ -104,8 +112,7 @@ void ThreadPool::Execute()
task->Run();
}
m_threadAvailable.notify_all();
- }
- while (!m_stopped);
+ } while (!m_stopped);
}
void ThreadPool::Submit(boost::shared_ptr<Task> task)
diff --git a/contrib/moses2/legacy/ThreadPool.h b/moses2/legacy/ThreadPool.h
index 62a8f43ad..e2cfac4a8 100644
--- a/contrib/moses2/legacy/ThreadPool.h
+++ b/moses2/legacy/ThreadPool.h
@@ -51,12 +51,10 @@ class Task
{
public:
virtual void Run() = 0;
- virtual bool DeleteAfterExecution()
- {
+ virtual bool DeleteAfterExecution() {
return true;
}
- virtual ~Task()
- {
+ virtual ~Task() {
}
};
@@ -67,10 +65,9 @@ public:
* Construct a thread pool of a fixed size.
**/
explicit ThreadPool(size_t numThreads, int cpuAffinityOffset = -1,
- int cpuAffinityIncr = 1);
+ int cpuAffinityIncr = 1);
- ~ThreadPool()
- {
+ ~ThreadPool() {
Stop();
}
@@ -88,8 +85,7 @@ public:
/**
* Set maximum number of queued threads (otherwise Submit blocks)
**/
- void SetQueueLimit(size_t limit)
- {
+ void SetQueueLimit(size_t limit) {
m_queueLimit = limit;
}
@@ -113,12 +109,10 @@ class TestTask: public Task
{
public:
TestTask(int id) :
- m_id(id)
- {
+ m_id(id) {
}
- virtual void Run()
- {
+ virtual void Run() {
#ifdef BOOST_HAS_PTHREADS
pthread_t tid = pthread_self();
#else
@@ -128,8 +122,7 @@ public:
std::cerr << "Executing " << m_id << " in thread id " << tid << std::endl;
}
- virtual ~TestTask()
- {
+ virtual ~TestTask() {
}
private:
diff --git a/contrib/moses2/legacy/Timer.cpp b/moses2/legacy/Timer.cpp
index b1857ee0d..81858e2fc 100644
--- a/contrib/moses2/legacy/Timer.cpp
+++ b/moses2/legacy/Timer.cpp
@@ -8,7 +8,7 @@ namespace Moses2
{
Timer::Timer() :
- running(false), stopped(false)
+ running(false), stopped(false)
{
start_time = 0;
}
@@ -46,8 +46,7 @@ void Timer::start(const char* msg)
if (stopped) {
start_time = util::WallTime() - (stop_time - start_time);
stopped = false;
- }
- else {
+ } else {
start_time = util::WallTime();
running = true;
}
diff --git a/contrib/moses2/legacy/Timer.h b/moses2/legacy/Timer.h
index 3f44ef4b9..3f44ef4b9 100644
--- a/contrib/moses2/legacy/Timer.h
+++ b/moses2/legacy/Timer.h
diff --git a/contrib/moses2/legacy/Util2.cpp b/moses2/legacy/Util2.cpp
index ffc348090..9b4ff217c 100644
--- a/contrib/moses2/legacy/Util2.cpp
+++ b/moses2/legacy/Util2.cpp
@@ -15,14 +15,15 @@ bool Scan<bool>(const std::string &input)
if (lc == "yes" || lc == "y" || lc == "true" || lc == "1") return true;
if (lc == "no" || lc == "n" || lc == "false" || lc == "0") return false;
UTIL_THROW(BoolValueException,
- "Could not interpret " << input << " as a boolean. After lowercasing, valid values are yes, y, true, 1, no, n, false, and 0.");
+ "Could not interpret " << input << " as a boolean. After lowercasing, valid values are yes, y, true, 1, no, n, false, and 0.");
}
const std::string ToLower(const std::string& str)
{
std::string lc(str);
- std::transform(lc.begin(), lc.end(), lc.begin(), (int (*)(int))std::tolower);return
-lc ;
+ std::transform(lc.begin(), lc.end(), lc.begin(), (int (*)(int))std::tolower);
+ return
+ lc ;
}
}
diff --git a/contrib/moses2/legacy/Util2.h b/moses2/legacy/Util2.h
index eef638f93..c2b8cf58f 100644
--- a/contrib/moses2/legacy/Util2.h
+++ b/moses2/legacy/Util2.h
@@ -17,27 +17,32 @@
namespace Moses2
{
+#ifdef TRACE_ERR
+#undef TRACE_ERR
+#endif
+#ifdef TRACE_ENABLE
+#define TRACE_ERR(str) do { std::cerr << str; } while (false)
+#else
+#define TRACE_ERR(str) do {} while (false)
+#endif
+
template<typename T>
class UnorderedComparer
{
public:
- size_t operator()(const T& obj) const
- {
+ size_t operator()(const T& obj) const {
return obj.hash();
}
- bool operator()(const T& a, const T& b) const
- {
+ bool operator()(const T& a, const T& b) const {
return a == b;
}
- size_t operator()(const T* obj) const
- {
+ size_t operator()(const T* obj) const {
return obj->hash();
}
- bool operator()(const T* a, const T* b) const
- {
+ bool operator()(const T* a, const T* b) const {
return (*a) == (*b);
}
@@ -53,7 +58,7 @@ void Init(T arr[], size_t size, const T &val)
//! delete white spaces at beginning and end of string
inline std::string Trim(const std::string& str, const std::string dropChars =
- " \t\n\r")
+ " \t\n\r")
{
std::string res = str;
res.erase(str.find_last_not_of(dropChars) + 1);
@@ -107,32 +112,32 @@ inline SearchAlgorithm Scan<SearchAlgorithm>(const std::string &input)
}
template<>
- inline XmlInputType Scan<XmlInputType>(const std::string &input)
- {
- XmlInputType ret;
- if (input=="exclusive") ret = XmlExclusive;
- else if (input=="inclusive") ret = XmlInclusive;
- else if (input=="constraint") ret = XmlConstraint;
- else if (input=="ignore") ret = XmlIgnore;
- else if (input=="pass-through") ret = XmlPassThrough;
- else {
- UTIL_THROW2("Unknown XML input type");
- }
-
- return ret;
+inline XmlInputType Scan<XmlInputType>(const std::string &input)
+{
+ XmlInputType ret;
+ if (input=="exclusive") ret = XmlExclusive;
+ else if (input=="inclusive") ret = XmlInclusive;
+ else if (input=="constraint") ret = XmlConstraint;
+ else if (input=="ignore") ret = XmlIgnore;
+ else if (input=="pass-through") ret = XmlPassThrough;
+ else {
+ UTIL_THROW2("Unknown XML input type");
}
+ return ret;
+}
+
template<>
- inline InputTypeEnum Scan<InputTypeEnum>(const std::string &input)
- {
- return (InputTypeEnum) Scan<size_t>(input);
- }
+inline InputTypeEnum Scan<InputTypeEnum>(const std::string &input)
+{
+ return (InputTypeEnum) Scan<size_t>(input);
+}
template<>
- inline WordAlignmentSort Scan<WordAlignmentSort>(const std::string &input)
- {
- return (WordAlignmentSort) Scan<size_t>(input);
- }
+inline WordAlignmentSort Scan<WordAlignmentSort>(const std::string &input)
+{
+ return (WordAlignmentSort) Scan<size_t>(input);
+}
//! convert vectors of string to vectors of type T variables
template<typename T>
@@ -182,7 +187,7 @@ inline std::vector<std::string> Tokenize(const std::string& str,
//! tokenise input string to vector of type T
template<typename T>
inline std::vector<T> Tokenize(const std::string &input,
- const std::string& delimiters = " \t")
+ const std::string& delimiters = " \t")
{
std::vector<std::string> stringVector = Tokenize(input, delimiters);
return Scan<T>(stringVector);
@@ -201,8 +206,7 @@ inline std::vector<std::string> TokenizeFirstOnly(const std::string& str,
// Found a token, add it to the vector.
tokens.push_back(str.substr(0, pos));
tokens.push_back(str.substr(pos + 1, str.size() - pos - 1));
- }
- else {
+ } else {
tokens.push_back(str);
}
@@ -210,7 +214,7 @@ inline std::vector<std::string> TokenizeFirstOnly(const std::string& str,
}
inline std::vector<std::string> TokenizeMultiCharSeparator(
- const std::string& str, const std::string& separator)
+ const std::string& str, const std::string& separator)
{
std::vector<std::string> tokens;
@@ -233,7 +237,7 @@ inline std::vector<std::string> TokenizeMultiCharSeparator(
// speeded up version of above
inline void TokenizeMultiCharSeparator(std::vector<std::string> &output,
- const std::string& str, const std::string& separator)
+ const std::string& str, const std::string& separator)
{
size_t pos = 0;
// Find first "non-delimiter".
@@ -296,7 +300,7 @@ template<class COLL>
void RemoveAllInColl(COLL &coll)
{
for (typename COLL::const_iterator iter = coll.begin(); iter != coll.end();
- ++iter) {
+ ++iter) {
delete (*iter);
}
coll.clear();
@@ -328,10 +332,8 @@ T &GetThreadSpecificObj(boost::thread_specific_ptr<T> &coll)
template<class T, class S, class C>
S& Container(std::priority_queue<T, S, C>& q)
{
- struct HackedQueue: private std::priority_queue<T, S, C>
- {
- static S& Container(std::priority_queue<T, S, C>& q)
- {
+ struct HackedQueue: private std::priority_queue<T, S, C> {
+ static S& Container(std::priority_queue<T, S, C>& q) {
return q.*&HackedQueue::c;
}
};
diff --git a/contrib/moses2/legacy/gzfilebuf.h b/moses2/legacy/gzfilebuf.h
index ea7021757..db5998095 100644
--- a/contrib/moses2/legacy/gzfilebuf.h
+++ b/moses2/legacy/gzfilebuf.h
@@ -15,40 +15,34 @@ namespace Moses2
class gzfilebuf: public std::streambuf
{
public:
- gzfilebuf(const char *filename)
- {
+ gzfilebuf(const char *filename) {
_gzf = gzopen(filename, "rb");
if (!_gzf) throw std::runtime_error(
"Could not open " + std::string(filename) + ".");
setg(_buff + sizeof(int), // beginning of putback area
- _buff + sizeof(int), // read position
- _buff + sizeof(int)); // end position
+ _buff + sizeof(int), // read position
+ _buff + sizeof(int)); // end position
}
- ~gzfilebuf()
- {
+ ~gzfilebuf() {
gzclose(_gzf);
}
protected:
- virtual int_type overflow(int_type /* c */)
- {
+ virtual int_type overflow(int_type /* c */) {
throw;
}
// write multiple characters
- virtual std::streamsize xsputn(const char* /* s */, std::streamsize /* num */)
- {
+ virtual std::streamsize xsputn(const char* /* s */, std::streamsize /* num */) {
throw;
}
virtual std::streampos seekpos(std::streampos /* sp */,
- std::ios_base::openmode /* which = std::ios_base::in | std::ios_base::out */)
- {
+ std::ios_base::openmode /* which = std::ios_base::in | std::ios_base::out */) {
throw;
}
//read one character
- virtual int_type underflow()
- {
+ virtual int_type underflow() {
// is read position before end of _buff?
if (gptr() < egptr()) {
return traits_type::to_int_type(*gptr());
@@ -67,7 +61,7 @@ protected:
* the putback _buff (area of first four characters)
*/
std::memmove(_buff + (sizeof(int) - numPutback), gptr() - numPutback,
- numPutback);
+ numPutback);
// read new characters
int num = gzread(_gzf, _buff + sizeof(int), _buffsize - sizeof(int));
@@ -78,15 +72,14 @@ protected:
// reset _buff pointers
setg(_buff + (sizeof(int) - numPutback), // beginning of putback area
- _buff + sizeof(int), // read position
- _buff + sizeof(int) + num); // end of buffer
+ _buff + sizeof(int), // read position
+ _buff + sizeof(int) + num); // end of buffer
// return next character
return traits_type::to_int_type(*gptr());
}
- std::streamsize xsgetn(char* s, std::streamsize num)
- {
+ std::streamsize xsgetn(char* s, std::streamsize num) {
return gzread(_gzf, s, num);
}
diff --git a/moses2/legacy/xmlrpc-c.h b/moses2/legacy/xmlrpc-c.h
new file mode 100644
index 000000000..1cdccad16
--- /dev/null
+++ b/moses2/legacy/xmlrpc-c.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#ifdef HAVE_XMLRPC_C
+#include <xmlrpc-c/base.hpp>
+#else
+namespace xmlrpc_c
+{
+class value;
+}
+#endif
diff --git a/moses2/parameters/AllOptions.cpp b/moses2/parameters/AllOptions.cpp
new file mode 100644
index 000000000..954d7e8e7
--- /dev/null
+++ b/moses2/parameters/AllOptions.cpp
@@ -0,0 +1,118 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#include "../legacy/Parameter.h"
+#include "../legacy/Util2.h"
+#include "AllOptions.h"
+
+namespace Moses2
+{
+AllOptions::
+AllOptions()
+ : mira(false)
+ , use_legacy_pt(false)
+{ }
+
+AllOptions::
+AllOptions(Parameter const& param)
+{
+ init(param);
+}
+
+bool
+AllOptions::
+init(Parameter const& param)
+{
+ if (!search.init(param)) return false;
+ if (!cube.init(param)) return false;
+ if (!nbest.init(param)) return false;
+ if (!reordering.init(param)) return false;
+ if (!context.init(param)) return false;
+ if (!input.init(param)) return false;
+ if (!mbr.init(param)) return false;
+ if (!lmbr.init(param)) return false;
+ if (!output.init(param)) return false;
+ if (!unk.init(param)) return false;
+ if (!server.init(param)) return false;
+ if (!syntax.init(param)) return false;
+
+ param.SetParameter(mira, "mira", false);
+
+ return sanity_check();
+}
+
+bool
+AllOptions::
+sanity_check()
+{
+ using namespace std;
+ if (lmbr.enabled) {
+ if (mbr.enabled) {
+ cerr << "Error: Cannot use both n-best mbr and lattice mbr together" << endl;
+ return false;
+ }
+ mbr.enabled = true;
+ }
+ if (search.consensus) {
+ if (mbr.enabled) {
+ cerr << "Error: Cannot use consensus decoding together with mbr"
+ << endl;
+ return false;
+ }
+ mbr.enabled = true;
+ }
+
+ // RecoverPath should only be used with confusion net or word lattice input
+ if (output.RecoverPath && input.input_type == SentenceInput) {
+ TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
+ output.RecoverPath = false;
+ }
+
+ // set m_nbest_options.enabled = true if necessary:
+ nbest.enabled = (nbest.enabled || mira || search.consensus
+ || nbest.nbest_size > 0
+ || mbr.enabled || lmbr.enabled
+ || !output.SearchGraph.empty()
+ || !output.SearchGraphExtended.empty()
+ || !output.SearchGraphSLF.empty()
+ || !output.SearchGraphHG.empty()
+ || !output.SearchGraphPB.empty()
+ || output.lattice_sample_size != 0);
+
+ return true;
+}
+
+#ifdef HAVE_XMLRPC_C
+bool
+AllOptions::
+update(std::map<std::string,xmlrpc_c::value>const& param)
+{
+ if (!search.update(param)) return false;
+ if (!cube.update(param)) return false;
+ if (!nbest.update(param)) return false;
+ if (!reordering.update(param)) return false;
+ if (!context.update(param)) return false;
+ if (!input.update(param)) return false;
+ if (!mbr.update(param)) return false;
+ if (!lmbr.update(param)) return false;
+ if (!output.update(param)) return false;
+ if (!unk.update(param)) return false;
+ if (!server.update(param)) return false;
+ //if (!syntax.update(param)) return false;
+ return sanity_check();
+}
+#endif
+
+bool
+AllOptions::
+NBestDistinct() const
+{
+ return (nbest.only_distinct
+ || mbr.enabled || lmbr.enabled
+ || output.lattice_sample_size
+ || !output.SearchGraph.empty()
+ || !output.SearchGraphExtended.empty()
+ || !output.SearchGraphSLF.empty()
+ || !output.SearchGraphHG.empty());
+}
+
+
+}
diff --git a/moses2/parameters/AllOptions.h b/moses2/parameters/AllOptions.h
new file mode 100644
index 000000000..2f09cd385
--- /dev/null
+++ b/moses2/parameters/AllOptions.h
@@ -0,0 +1,50 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include <string>
+#include <boost/shared_ptr.hpp>
+#include "OptionsBaseClass.h"
+#include "SearchOptions.h"
+#include "CubePruningOptions.h"
+#include "NBestOptions.h"
+#include "ReorderingOptions.h"
+#include "ContextParameters.h"
+#include "InputOptions.h"
+#include "MBR_Options.h"
+#include "LMBR_Options.h"
+#include "ReportingOptions.h"
+#include "OOVHandlingOptions.h"
+#include "ServerOptions.h"
+#include "SyntaxOptions.h"
+
+namespace Moses2
+{
+struct
+ AllOptions : public OptionsBaseClass {
+ typedef boost::shared_ptr<AllOptions const> ptr;
+ SearchOptions search;
+ CubePruningOptions cube;
+ NBestOptions nbest;
+ ReorderingOptions reordering;
+ ContextParameters context;
+ InputOptions input;
+ MBR_Options mbr;
+ LMBR_Options lmbr;
+ ReportingOptions output;
+ OOVHandlingOptions unk;
+ ServerOptions server;
+ SyntaxOptions syntax;
+ bool mira;
+ bool use_legacy_pt;
+ // StackOptions stack;
+ // BeamSearchOptions beam;
+ bool init(Parameter const& param);
+ bool sanity_check();
+ AllOptions();
+ AllOptions(Parameter const& param);
+
+ bool update(std::map<std::string,xmlrpc_c::value>const& param);
+ bool NBestDistinct() const;
+
+};
+
+}
diff --git a/moses2/parameters/BeamSearchOptions.h b/moses2/parameters/BeamSearchOptions.h
new file mode 100644
index 000000000..590c7a53f
--- /dev/null
+++ b/moses2/parameters/BeamSearchOptions.h
@@ -0,0 +1,14 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include <string>
+#include "OptionsBaseClass.h"
+namespace Moses2
+{
+
+struct
+ BeamSearchOptions : public OptionsBaseClass {
+ bool init(Parameter const& param);
+ BeamSearchOptions(Parameter const& param);
+};
+
+}
diff --git a/contrib/moses2/parameters/BookkeepingOptions.cpp b/moses2/parameters/BookkeepingOptions.cpp
index d54f84644..d54f84644 100644
--- a/contrib/moses2/parameters/BookkeepingOptions.cpp
+++ b/moses2/parameters/BookkeepingOptions.cpp
diff --git a/moses2/parameters/BookkeepingOptions.h b/moses2/parameters/BookkeepingOptions.h
new file mode 100644
index 000000000..75a04a2a0
--- /dev/null
+++ b/moses2/parameters/BookkeepingOptions.h
@@ -0,0 +1,17 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include "OptionsBaseClass.h"
+
+namespace Moses2
+{
+class Parameter;
+
+struct BookkeepingOptions : public OptionsBaseClass {
+ bool need_alignment_info;
+ bool init(Parameter const& param);
+ BookkeepingOptions();
+};
+
+
+
+}
diff --git a/contrib/moses2/parameters/ContextParameters.cpp b/moses2/parameters/ContextParameters.cpp
index 144692399..3c5b894ee 100644
--- a/contrib/moses2/parameters/ContextParameters.cpp
+++ b/moses2/parameters/ContextParameters.cpp
@@ -1,5 +1,4 @@
#include "ContextParameters.h"
-#include "moses/Util.h"
#include "../legacy/Parameter.h"
namespace Moses2
@@ -7,7 +6,7 @@ namespace Moses2
ContextParameters::
ContextParameters()
- : look_ahead(0), look_back(0)
+ : look_ahead(0), look_back(0)
{ }
bool
@@ -21,18 +20,17 @@ init(Parameter const& params)
if (context_window == "")
return true;
-
- if (context_window.substr(0,3) == "all")
- {
- look_back = look_ahead = std::numeric_limits<size_t>::max();
- return true;
- }
-
+
+ if (context_window.substr(0,3) == "all") {
+ look_back = look_ahead = std::numeric_limits<size_t>::max();
+ return true;
+ }
+
size_t p = context_window.find_first_of("0123456789");
if (p == 0)
look_back = look_ahead = atoi(context_window.c_str());
-
- if (p == 1) {
+
+ if (p == 1) {
if (context_window[0] == '-')
look_back = atoi(context_window.substr(1).c_str());
else if (context_window[0] == '+')
diff --git a/contrib/moses2/parameters/ContextParameters.h b/moses2/parameters/ContextParameters.h
index 54923c548..5226e8eca 100644
--- a/contrib/moses2/parameters/ContextParameters.h
+++ b/moses2/parameters/ContextParameters.h
@@ -1,8 +1,6 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
-#include "moses/TypeDef.h"
-#include "moses/Util.h"
#include "OptionsBaseClass.h"
namespace Moses2
diff --git a/moses2/parameters/CubePruningOptions.cpp b/moses2/parameters/CubePruningOptions.cpp
new file mode 100644
index 000000000..0772eaddb
--- /dev/null
+++ b/moses2/parameters/CubePruningOptions.cpp
@@ -0,0 +1,76 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#include "CubePruningOptions.h"
+#include "../TypeDef.h"
+#include "../legacy/Parameter.h"
+
+namespace Moses2
+{
+
+CubePruningOptions::
+CubePruningOptions()
+ : pop_limit(DEFAULT_CUBE_PRUNING_POP_LIMIT)
+ , diversity(DEFAULT_CUBE_PRUNING_DIVERSITY)
+ , lazy_scoring(false)
+ , deterministic_search(false)
+{}
+
+bool
+CubePruningOptions::
+init(Parameter const& param)
+{
+ param.SetParameter(pop_limit, "cube-pruning-pop-limit",
+ DEFAULT_CUBE_PRUNING_POP_LIMIT);
+ param.SetParameter(diversity, "cube-pruning-diversity",
+ DEFAULT_CUBE_PRUNING_DIVERSITY);
+ param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false);
+ //param.SetParameter(deterministic_search, "cube-pruning-deterministic-search", false);
+ return true;
+}
+
+#ifdef HAVE_XMLRPC_C
+bool
+CubePruningOptions::
+update(std::map<std::string,xmlrpc_c::value>const& params)
+{
+ typedef std::map<std::string, xmlrpc_c::value> params_t;
+
+ params_t::const_iterator si = params.find("cube-pruning-pop-limit");
+ if (si != params.end()) pop_limit = xmlrpc_c::value_int(si->second);
+
+ si = params.find("cube-pruning-diversity");
+ if (si != params.end()) diversity = xmlrpc_c::value_int(si->second);
+
+ si = params.find("cube-pruning-lazy-scoring");
+ if (si != params.end()) {
+ std::string spec = xmlrpc_c::value_string(si->second);
+ if (spec == "true" or spec == "on" or spec == "1")
+ lazy_scoring = true;
+ else if (spec == "false" or spec == "off" or spec == "0")
+ lazy_scoring = false;
+ else {
+ char const* msg
+ = "Error parsing specification for cube-pruning-lazy-scoring";
+ xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
+ }
+ }
+
+ si = params.find("cube-pruning-deterministic-search");
+ if (si != params.end()) {
+ std::string spec = xmlrpc_c::value_string(si->second);
+ if (spec == "true" or spec == "on" or spec == "1")
+ deterministic_search = true;
+ else if (spec == "false" or spec == "off" or spec == "0")
+ deterministic_search = false;
+ else {
+ char const* msg
+ = "Error parsing specification for cube-pruning-deterministic-search";
+ xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
+ }
+ }
+
+ return true;
+}
+#endif
+
+
+}
diff --git a/moses2/parameters/CubePruningOptions.h b/moses2/parameters/CubePruningOptions.h
new file mode 100644
index 000000000..6fa43b7ec
--- /dev/null
+++ b/moses2/parameters/CubePruningOptions.h
@@ -0,0 +1,24 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include <string>
+#include "OptionsBaseClass.h"
+
+namespace Moses2
+{
+
+struct
+ CubePruningOptions : public OptionsBaseClass {
+ size_t pop_limit;
+ size_t diversity;
+ bool lazy_scoring;
+ bool deterministic_search;
+
+ bool init(Parameter const& param);
+ CubePruningOptions(Parameter const& param);
+ CubePruningOptions();
+
+ bool
+ update(std::map<std::string,xmlrpc_c::value>const& params);
+};
+
+}
diff --git a/moses2/parameters/InputOptions.cpp b/moses2/parameters/InputOptions.cpp
new file mode 100644
index 000000000..7a8c9242c
--- /dev/null
+++ b/moses2/parameters/InputOptions.cpp
@@ -0,0 +1,99 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#include "InputOptions.h"
+#include <vector>
+#include <iostream>
+#include "../legacy/Parameter.h"
+
+namespace Moses2
+{
+
+InputOptions::
+InputOptions()
+ : continue_partial_translation(false)
+ , input_type(SentenceInput)
+ , xml_policy(XmlPassThrough)
+ , placeholder_factor(NOT_FOUND)
+{
+ xml_brackets.first = "<";
+ xml_brackets.second = ">";
+ factor_order.assign(1,0);
+ factor_delimiter = "|";
+}
+
+bool
+InputOptions::
+init(Parameter const& param)
+{
+ param.SetParameter(input_type, "inputtype", SentenceInput);
+#if 0
+ if (input_type == SentenceInput) {
+ VERBOSE(2, "input type is: text input");
+ } else if (input_type == ConfusionNetworkInput) {
+ VERBOSE(2, "input type is: confusion net");
+ } else if (input_type == WordLatticeInput) {
+ VERBOSE(2, "input type is: word lattice");
+ } else if (input_type == TreeInputType) {
+ VERBOSE(2, "input type is: tree");
+ } else if (input_type == TabbedSentenceInput) {
+ VERBOSE(2, "input type is: tabbed sentence");
+ } else if (input_type == ForestInputType) {
+ VERBOSE(2, "input type is: forest");
+ }
+#endif
+
+
+ param.SetParameter(continue_partial_translation,
+ "continue-partial-translation", false);
+
+ param.SetParameter<XmlInputType>(xml_policy, "xml-input", XmlPassThrough);
+
+ // specify XML tags opening and closing brackets for XML option
+ // Do we really want this to be configurable???? UG
+ const PARAM_VEC *pspec;
+ pspec = param.GetParam("xml-brackets");
+ if (pspec && pspec->size()) {
+ std::vector<std::string> brackets = Tokenize(pspec->at(0));
+ if(brackets.size()!=2) {
+ std::cerr << "invalid xml-brackets value, "
+ << "must specify exactly 2 blank-delimited strings "
+ << "for XML tags opening and closing brackets"
+ << std::endl;
+ exit(1);
+ }
+
+ xml_brackets.first= brackets[0];
+ xml_brackets.second=brackets[1];
+
+#if 0
+ VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
+ << xml_brackets.first << " and "
+ << xml_brackets.second << std::endl);
+#endif
+ }
+
+ pspec = param.GetParam("input-factors");
+ if (pspec) factor_order = Scan<FactorType>(*pspec);
+ if (factor_order.empty()) factor_order.assign(1,0);
+ param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND);
+
+ param.SetParameter<std::string>(factor_delimiter, "factor-delimiter", "|");
+ param.SetParameter<std::string>(input_file_path,"input-file","");
+
+ return true;
+}
+
+
+#ifdef HAVE_XMLRPC_C
+bool
+InputOptions::
+update(std::map<std::string,xmlrpc_c::value>const& param)
+{
+ typedef std::map<std::string, xmlrpc_c::value> params_t;
+ params_t::const_iterator si = param.find("xml-input");
+ if (si != param.end())
+ xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
+ return true;
+}
+#endif
+
+}
diff --git a/moses2/parameters/InputOptions.h b/moses2/parameters/InputOptions.h
new file mode 100644
index 000000000..6e70e1e1e
--- /dev/null
+++ b/moses2/parameters/InputOptions.h
@@ -0,0 +1,31 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include <string>
+#include <string>
+#include "OptionsBaseClass.h"
+#include "../TypeDef.h"
+
+namespace Moses2
+{
+struct
+ InputOptions : public OptionsBaseClass {
+ bool continue_partial_translation;
+ InputTypeEnum input_type;
+ XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
+ std::vector<FactorType> factor_order; // input factor order
+ std::string factor_delimiter;
+ FactorType placeholder_factor; // where to store original text for placeholders
+ std::string input_file_path;
+ std::pair<std::string,std::string> xml_brackets;
+ // strings to use as XML tags' opening and closing brackets.
+ // Default are "<" and ">"
+
+ InputOptions();
+
+ bool init(Parameter const& param);
+ bool update(std::map<std::string,xmlrpc_c::value>const& param);
+
+};
+
+}
+
diff --git a/moses2/parameters/LMBR_Options.cpp b/moses2/parameters/LMBR_Options.cpp
new file mode 100644
index 000000000..a65c071b9
--- /dev/null
+++ b/moses2/parameters/LMBR_Options.cpp
@@ -0,0 +1,39 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#include "LMBR_Options.h"
+#include "../legacy/Parameter.h"
+
+namespace Moses2
+{
+
+LMBR_Options::
+LMBR_Options()
+ : enabled(false)
+ , use_lattice_hyp_set(false)
+ , precision(0.8f)
+ , ratio(0.6f)
+ , map_weight(0.8f)
+ , pruning_factor(30)
+{ }
+
+bool
+LMBR_Options::
+init(Parameter const& param)
+{
+ param.SetParameter(enabled, "lminimum-bayes-risk", false);
+
+ param.SetParameter(ratio, "lmbr-r", 0.6f);
+ param.SetParameter(precision, "lmbr-p", 0.8f);
+ param.SetParameter(map_weight, "lmbr-map-weight", 0.0f);
+ param.SetParameter(pruning_factor, "lmbr-pruning-factor", size_t(30));
+ param.SetParameter(use_lattice_hyp_set, "lattice-hypo-set", false);
+
+ PARAM_VEC const* params = param.GetParam("lmbr-thetas");
+ if (params) theta = Scan<float>(*params);
+
+ return true;
+}
+
+
+
+
+}
diff --git a/moses2/parameters/LMBR_Options.h b/moses2/parameters/LMBR_Options.h
new file mode 100644
index 000000000..84e5fd759
--- /dev/null
+++ b/moses2/parameters/LMBR_Options.h
@@ -0,0 +1,25 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include <string>
+#include <vector>
+#include "OptionsBaseClass.h"
+
+namespace Moses2
+{
+
+// Options for mimum bayes risk decoding
+struct
+ LMBR_Options : public OptionsBaseClass {
+ bool enabled;
+ bool use_lattice_hyp_set; //! to use nbest as hypothesis set during lattice MBR
+ float precision; //! unigram precision theta - see Tromble et al 08 for more details
+ float ratio; //! decaying factor for ngram thetas - see Tromble et al 08
+ float map_weight; //! Weight given to the map solution. See Kumar et al 09
+ size_t pruning_factor; //! average number of nodes per word wanted in pruned lattice
+ std::vector<float> theta; //! theta(s) for lattice mbr calculation
+ bool init(Parameter const& param);
+ LMBR_Options();
+};
+
+}
+
diff --git a/contrib/moses2/parameters/LookupOptions.h b/moses2/parameters/LookupOptions.h
index 3728d97d1..58aa733cf 100644
--- a/contrib/moses2/parameters/LookupOptions.h
+++ b/moses2/parameters/LookupOptions.h
@@ -6,12 +6,11 @@
namespace Moses2
{
- struct
- LookupOptions : public OptionsBaseClass
- {
- bool init(Parameter const& param);
- LookupOptions() {}
- };
+struct
+ LookupOptions : public OptionsBaseClass {
+ bool init(Parameter const& param);
+ LookupOptions() {}
+};
}
diff --git a/moses2/parameters/MBR_Options.cpp b/moses2/parameters/MBR_Options.cpp
new file mode 100644
index 000000000..0e7abd384
--- /dev/null
+++ b/moses2/parameters/MBR_Options.cpp
@@ -0,0 +1,26 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#include "MBR_Options.h"
+#include "../legacy/Parameter.h"
+
+namespace Moses2
+{
+
+MBR_Options::
+MBR_Options()
+ : enabled(false)
+ , size(200)
+ , scale(1.0f)
+{}
+
+
+bool
+MBR_Options::
+init(Parameter const& param)
+{
+ param.SetParameter(enabled, "minimum-bayes-risk", false);
+ param.SetParameter<size_t>(size, "mbr-size", 200);
+ param.SetParameter(scale, "mbr-scale", 1.0f);
+ return true;
+}
+
+}
diff --git a/moses2/parameters/MBR_Options.h b/moses2/parameters/MBR_Options.h
new file mode 100644
index 000000000..0f8068ca0
--- /dev/null
+++ b/moses2/parameters/MBR_Options.h
@@ -0,0 +1,20 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include <string>
+#include "OptionsBaseClass.h"
+namespace Moses2
+{
+
+// Options for mimum bayes risk decoding
+struct
+ MBR_Options : public OptionsBaseClass {
+ bool enabled;
+ size_t size; //! number of translation candidates considered
+ float scale; /*! scaling factor for computing marginal probability
+ * of candidate translation */
+ bool init(Parameter const& param);
+ MBR_Options();
+};
+
+}
+
diff --git a/contrib/moses2/parameters/NBestOptions.cpp b/moses2/parameters/NBestOptions.cpp
index 0536793b8..d72c155e2 100644
--- a/contrib/moses2/parameters/NBestOptions.cpp
+++ b/moses2/parameters/NBestOptions.cpp
@@ -5,19 +5,19 @@
namespace Moses2
{
- NBestOptions::
- NBestOptions()
- : nbest_size(0)
- , factor(20)
- , enabled(false)
- , print_trees(false)
- , only_distinct(false)
- , include_alignment_info(false)
- , include_feature_labels(true)
- , include_segmentation(false)
- , include_passthrough(false)
- , include_all_factors(false)
- {}
+NBestOptions::
+NBestOptions()
+ : nbest_size(0)
+ , factor(20)
+ , enabled(false)
+ , print_trees(false)
+ , only_distinct(false)
+ , include_alignment_info(false)
+ , include_feature_labels(true)
+ , include_segmentation(false)
+ , include_passthrough(false)
+ , include_all_factors(false)
+{}
bool
@@ -48,9 +48,9 @@ init(Parameter const& P)
enabled = output_file_path.size();
return true;
}
-
+
#ifdef HAVE_XMLRPC_C
-bool
+bool
NBestOptions::
update(std::map<std::string,xmlrpc_c::value>const& param)
{
diff --git a/contrib/moses2/parameters/NBestOptions.h b/moses2/parameters/NBestOptions.h
index f2e478b84..d3caed425 100644
--- a/contrib/moses2/parameters/NBestOptions.h
+++ b/moses2/parameters/NBestOptions.h
@@ -5,8 +5,7 @@
namespace Moses2
{
-struct NBestOptions : public OptionsBaseClass
-{
+struct NBestOptions : public OptionsBaseClass {
size_t nbest_size;
size_t factor;
bool enabled;
diff --git a/moses2/parameters/OOVHandlingOptions.cpp b/moses2/parameters/OOVHandlingOptions.cpp
new file mode 100644
index 000000000..c7a5e30f2
--- /dev/null
+++ b/moses2/parameters/OOVHandlingOptions.cpp
@@ -0,0 +1,48 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#include "OOVHandlingOptions.h"
+#include <vector>
+#include <iostream>
+#include "../legacy/Parameter.h"
+
+namespace Moses2
+{
+
+OOVHandlingOptions::
+OOVHandlingOptions()
+{
+ drop = false;
+ mark = false;
+ prefix = "UNK";
+ suffix = "";
+ word_deletion_enabled = false;
+ always_create_direct_transopt = false;
+}
+
+bool
+OOVHandlingOptions::
+init(Parameter const& param)
+{
+ param.SetParameter(drop,"drop-unknown",false);
+ param.SetParameter(mark,"mark-unknown",false);
+ param.SetParameter(word_deletion_enabled, "phrase-drop-allowed", false);
+ param.SetParameter(always_create_direct_transopt, "always-create-direct-transopt", false);
+ param.SetParameter<std::string>(prefix,"unknown-word-prefix","UNK");
+ param.SetParameter<std::string>(suffix,"unknown-word-suffix","");
+ return true;
+}
+
+
+#ifdef HAVE_XMLRPC_C
+bool
+OOVHandlingOptions::
+update(std::map<std::string,xmlrpc_c::value>const& param)
+{
+ typedef std::map<std::string, xmlrpc_c::value> params_t;
+ // params_t::const_iterator si = param.find("xml-input");
+ // if (si != param.end())
+ // xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
+ return true;
+}
+#endif
+
+}
diff --git a/moses2/parameters/OOVHandlingOptions.h b/moses2/parameters/OOVHandlingOptions.h
new file mode 100644
index 000000000..1b56d6d93
--- /dev/null
+++ b/moses2/parameters/OOVHandlingOptions.h
@@ -0,0 +1,26 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include <string>
+#include <string>
+#include "OptionsBaseClass.h"
+
+namespace Moses2
+{
+struct
+ OOVHandlingOptions : public OptionsBaseClass {
+ bool drop;
+ bool mark;
+ std::string prefix;
+ std::string suffix;
+
+ bool word_deletion_enabled;
+ bool always_create_direct_transopt;
+ OOVHandlingOptions();
+
+ bool init(Parameter const& param);
+ bool update(std::map<std::string,xmlrpc_c::value>const& param);
+
+};
+
+}
+
diff --git a/moses2/parameters/OptionsBaseClass.cpp b/moses2/parameters/OptionsBaseClass.cpp
new file mode 100644
index 000000000..8ccb0a563
--- /dev/null
+++ b/moses2/parameters/OptionsBaseClass.cpp
@@ -0,0 +1,29 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
+#include "OptionsBaseClass.h"
+#include "../legacy/Parameter.h"
+
+namespace Moses2
+{
+
+#ifdef HAVE_XMLRPC_C
+bool
+OptionsBaseClass::
+update(std::map<std::string,xmlrpc_c::value>const& params)
+{
+ return true;
+}
+#endif
+
+#ifdef HAVE_XMLRPC_C
+bool
+OptionsBaseClass::
+check(std::map<std::string, xmlrpc_c::value> const& param,
+ std::string const key, bool dfltval)
+{
+ std::map<std::string, xmlrpc_c::value>::const_iterator m;
+ m = param.find(key);
+ if (m == param.end()) return dfltval;
+ return Scan<bool>(xmlrpc_c::value_string(m->second));
+}
+#endif
+}
diff --git a/moses2/parameters/OptionsBaseClass.h b/moses2/parameters/OptionsBaseClass.h
new file mode 100644
index 000000000..5265e9b23
--- /dev/null
+++ b/moses2/parameters/OptionsBaseClass.h
@@ -0,0 +1,19 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include "../legacy/xmlrpc-c.h"
+#include <string>
+#include <map>
+namespace Moses2
+{
+class Parameter;
+
+struct OptionsBaseClass {
+#ifdef HAVE_XMLRPC_C
+ virtual bool
+ update(std::map<std::string,xmlrpc_c::value>const& params);
+#endif
+ bool
+ check(std::map<std::string, xmlrpc_c::value> const& param,
+ std::string const key, bool dfltval);
+};
+}
diff --git a/moses2/parameters/ReorderingOptions.cpp b/moses2/parameters/ReorderingOptions.cpp
new file mode 100644
index 000000000..64e777de7
--- /dev/null
+++ b/moses2/parameters/ReorderingOptions.cpp
@@ -0,0 +1,31 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#include "ReorderingOptions.h"
+#include "../legacy/Parameter.h"
+
+namespace Moses2
+{
+
+ReorderingOptions::
+ReorderingOptions()
+ : max_distortion(-1)
+ , monotone_at_punct(false)
+ , use_early_distortion_cost(false)
+{}
+
+
+ReorderingOptions::
+ReorderingOptions(Parameter const& param)
+{
+ init(param);
+}
+
+bool
+ReorderingOptions::
+init(Parameter const& param)
+{
+ param.SetParameter(max_distortion, "distortion-limit", -1);
+ param.SetParameter(monotone_at_punct, "monotone-at-punctuation", false);
+ param.SetParameter(use_early_distortion_cost, "early-distortion-cost", false);
+ return true;
+}
+}
diff --git a/moses2/parameters/ReorderingOptions.h b/moses2/parameters/ReorderingOptions.h
new file mode 100644
index 000000000..319124e83
--- /dev/null
+++ b/moses2/parameters/ReorderingOptions.h
@@ -0,0 +1,19 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include <string>
+#include "OptionsBaseClass.h"
+namespace Moses2
+{
+
+struct
+ ReorderingOptions : public OptionsBaseClass {
+ int max_distortion;
+ bool monotone_at_punct;
+ bool use_early_distortion_cost;
+ bool init(Parameter const& param);
+ ReorderingOptions(Parameter const& param);
+ ReorderingOptions();
+};
+
+}
+
diff --git a/moses2/parameters/ReportingOptions.cpp b/moses2/parameters/ReportingOptions.cpp
new file mode 100644
index 000000000..428cc0566
--- /dev/null
+++ b/moses2/parameters/ReportingOptions.cpp
@@ -0,0 +1,152 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#include "ReportingOptions.h"
+#include "../legacy/Parameter.h"
+
+namespace Moses2
+{
+using namespace std;
+
+ReportingOptions::
+ReportingOptions()
+ : start_translation_id(0)
+ , ReportAllFactors(false)
+ , ReportSegmentation(0)
+ , PrintAlignmentInfo(false)
+ , PrintAllDerivations(false)
+ , PrintTranslationOptions(false)
+ , WA_SortOrder(NoSort)
+ , WordGraph(false)
+ , DontPruneSearchGraph(false)
+ , RecoverPath(false)
+ , ReportHypoScore(false)
+ , PrintID(false)
+ , PrintPassThrough(false)
+ , include_lhs_in_search_graph(false)
+ , lattice_sample_size(0)
+{
+ factor_order.assign(1,0);
+ factor_delimiter = "|";
+}
+
+bool
+ReportingOptions::
+init(Parameter const& param)
+{
+ param.SetParameter<long>(start_translation_id, "start-translation-id", 0);
+
+ // including factors in the output
+ param.SetParameter(ReportAllFactors, "report-all-factors", false);
+
+ // segmentation reporting
+ ReportSegmentation = (param.GetParam("report-segmentation-enriched")
+ ? 2 : param.GetParam("report-segmentation")
+ ? 1 : 0);
+
+ // word alignment reporting
+ param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false);
+ param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort);
+ std::string e; // hack to save us param.SetParameter<string>(...)
+ param.SetParameter(AlignmentOutputFile,"alignment-output-file", e);
+
+
+ param.SetParameter(PrintAllDerivations, "print-all-derivations", false);
+ param.SetParameter(PrintTranslationOptions, "print-translation-option", false);
+
+ // output a word graph
+ PARAM_VEC const* params;
+ params = param.GetParam("output-word-graph");
+ WordGraph = (params && params->size() == 2); // what are the two options?
+
+ // dump the search graph
+ param.SetParameter(SearchGraph, "output-search-graph", e);
+ param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e);
+ param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e);
+ param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e);
+#ifdef HAVE_PROTOBUF
+ param.SetParameter(SearchGraphPB, "output-search-graph-pb", e);
+#endif
+
+ param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false);
+ param.SetParameter(include_lhs_in_search_graph,
+ "include-lhs-in-search-graph", false );
+
+
+ // miscellaneous
+ param.SetParameter(RecoverPath, "recover-input-path",false);
+ param.SetParameter(ReportHypoScore, "output-hypo-score",false);
+ param.SetParameter(PrintID, "print-id",false);
+ param.SetParameter(PrintPassThrough, "print-passthrough",false);
+ param.SetParameter(detailed_all_transrep_filepath,
+ "translation-all-details", e);
+ param.SetParameter(detailed_transrep_filepath, "translation-details", e);
+ param.SetParameter(detailed_tree_transrep_filepath,
+ "tree-translation-details", e);
+
+ params = param.GetParam("lattice-samples");
+ if (params) {
+ if (params->size() ==2 ) {
+ lattice_sample_filepath = params->at(0);
+ lattice_sample_size = Scan<size_t>(params->at(1));
+ } else {
+ std::cerr <<"wrong format for switch -lattice-samples file size";
+ return false;
+ }
+ }
+
+
+ if (ReportAllFactors) {
+ factor_order.clear();
+ for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
+ factor_order.push_back(i);
+ } else {
+ params= param.GetParam("output-factors");
+ if (params) factor_order = Scan<FactorType>(*params);
+ if (factor_order.empty()) factor_order.assign(1,0);
+ }
+
+ param.SetParameter(factor_delimiter, "factor-delimiter", std::string("|"));
+ param.SetParameter(factor_delimiter, "output-factor-delimiter", factor_delimiter);
+
+ return true;
+}
+
+#ifdef HAVE_XMLRPC_C
+bool
+ReportingOptions::
+update(std::map<std::string, xmlrpc_c::value>const& param)
+{
+ ReportAllFactors = check(param, "report-all-factors", ReportAllFactors);
+
+
+ std::map<std::string, xmlrpc_c::value>::const_iterator m;
+ m = param.find("output-factors");
+ if (m != param.end()) {
+ factor_order=Tokenize<FactorType>(xmlrpc_c::value_string(m->second),",");
+ }
+
+ if (ReportAllFactors) {
+ factor_order.clear();
+ for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
+ factor_order.push_back(i);
+ }
+
+ m = param.find("align");
+ if (m != param.end() && Scan<bool>(xmlrpc_c::value_string(m->second)))
+ ReportSegmentation = 1;
+
+ PrintAlignmentInfo = check(param,"word-align",PrintAlignmentInfo);
+
+ m = param.find("factor-delimiter");
+ if (m != param.end()) {
+ factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
+ }
+
+ m = param.find("output-factor-delimiter");
+ if (m != param.end()) {
+ factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
+ }
+
+ return true;
+}
+#endif
+}
diff --git a/moses2/parameters/ReportingOptions.h b/moses2/parameters/ReportingOptions.h
new file mode 100644
index 000000000..6b491f3b6
--- /dev/null
+++ b/moses2/parameters/ReportingOptions.h
@@ -0,0 +1,69 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include <string>
+#include <vector>
+#include "OptionsBaseClass.h"
+#include "../TypeDef.h"
+
+namespace Moses2
+{
+
+struct
+ ReportingOptions : public OptionsBaseClass {
+ long start_translation_id;
+
+ std::vector<FactorType> factor_order;
+ std::string factor_delimiter;
+
+ bool ReportAllFactors; // m_reportAllFactors;
+ int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
+
+ bool PrintAlignmentInfo; // m_PrintAlignmentInfo
+ bool PrintAllDerivations;
+ bool PrintTranslationOptions;
+
+ WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
+ std::string AlignmentOutputFile;
+
+ bool WordGraph;
+
+ std::string SearchGraph;
+ std::string SearchGraphExtended;
+ std::string SearchGraphSLF;
+ std::string SearchGraphHG;
+ std::string SearchGraphPB;
+ bool DontPruneSearchGraph;
+
+ bool RecoverPath; // recover input path?
+ bool ReportHypoScore;
+
+ bool PrintID;
+ bool PrintPassThrough;
+
+ // transrep = translation reporting
+ std::string detailed_transrep_filepath;
+ std::string detailed_tree_transrep_filepath;
+ std::string detailed_all_transrep_filepath;
+ bool include_lhs_in_search_graph;
+
+
+ std::string lattice_sample_filepath;
+ size_t lattice_sample_size;
+
+ bool init(Parameter const& param);
+
+ /// do we need to keep the search graph from decoding?
+ bool NeedSearchGraph() const {
+ return !(SearchGraph.empty() && SearchGraphExtended.empty());
+ }
+
+#ifdef HAVE_XMLRPC_C
+ bool update(std::map<std::string, xmlrpc_c::value>const& param);
+#endif
+
+
+ ReportingOptions();
+};
+
+}
+
diff --git a/moses2/parameters/SearchOptions.cpp b/moses2/parameters/SearchOptions.cpp
new file mode 100644
index 000000000..b3a468896
--- /dev/null
+++ b/moses2/parameters/SearchOptions.cpp
@@ -0,0 +1,106 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#include "SearchOptions.h"
+#include "../legacy/Parameter.h"
+
+namespace Moses2
+{
+
+SearchOptions::
+SearchOptions()
+ : algo(Normal)
+ , stack_size(DEFAULT_MAX_HYPOSTACK_SIZE)
+ , stack_diversity(0)
+ , disable_discarding(false)
+ , max_phrase_length(DEFAULT_MAX_PHRASE_LENGTH)
+ , max_trans_opt_per_cov(DEFAULT_MAX_TRANS_OPT_SIZE)
+ , max_partial_trans_opt(DEFAULT_MAX_PART_TRANS_OPT_SIZE)
+ , beam_width(DEFAULT_BEAM_WIDTH)
+ , timeout(0)
+ , consensus(false)
+ , early_discarding_threshold(DEFAULT_EARLY_DISCARDING_THRESHOLD)
+ , trans_opt_threshold(DEFAULT_TRANSLATION_OPTION_THRESHOLD)
+{ }
+
+SearchOptions::
+SearchOptions(Parameter const& param)
+ : stack_diversity(0)
+{
+ init(param);
+}
+
+bool
+SearchOptions::
+init(Parameter const& param)
+{
+ param.SetParameter(algo, "search-algorithm", Normal);
+ param.SetParameter(stack_size, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
+ param.SetParameter(stack_diversity, "stack-diversity", size_t(0));
+ param.SetParameter(beam_width, "beam-threshold", DEFAULT_BEAM_WIDTH);
+ param.SetParameter(early_discarding_threshold, "early-discarding-threshold",
+ DEFAULT_EARLY_DISCARDING_THRESHOLD);
+ param.SetParameter(timeout, "time-out", 0);
+ param.SetParameter(max_phrase_length, "max-phrase-length",
+ DEFAULT_MAX_PHRASE_LENGTH);
+ param.SetParameter(trans_opt_threshold, "translation-option-threshold",
+ DEFAULT_TRANSLATION_OPTION_THRESHOLD);
+ param.SetParameter(max_trans_opt_per_cov, "max-trans-opt-per-coverage",
+ DEFAULT_MAX_TRANS_OPT_SIZE);
+ param.SetParameter(max_partial_trans_opt, "max-partial-trans-opt",
+ DEFAULT_MAX_PART_TRANS_OPT_SIZE);
+
+ param.SetParameter(consensus, "consensus-decoding", false);
+ param.SetParameter(disable_discarding, "disable-discarding", false);
+
+ // transformation to log of a few scores
+ beam_width = TransformScore(beam_width);
+ trans_opt_threshold = TransformScore(trans_opt_threshold);
+ early_discarding_threshold = TransformScore(early_discarding_threshold);
+
+ return true;
+}
+
+bool
+is_syntax(SearchAlgorithm algo)
+{
+ return (algo == CYKPlus || algo == ChartIncremental ||
+ algo == SyntaxS2T || algo == SyntaxT2S ||
+ algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
+}
+
+#ifdef HAVE_XMLRPC_C
+bool
+SearchOptions::
+update(std::map<std::string,xmlrpc_c::value>const& params)
+{
+ typedef std::map<std::string, xmlrpc_c::value> params_t;
+
+ params_t::const_iterator si = params.find("search-algorithm");
+ if (si != params.end()) {
+ // use named parameters
+ std::string spec = xmlrpc_c::value_string(si->second);
+ if (spec == "normal" || spec == "0") algo = Normal;
+ else if (spec == "cube" || spec == "1") algo = CubePruning;
+ else throw xmlrpc_c::fault("Unsupported search algorithm",
+ xmlrpc_c::fault::CODE_PARSE);
+ }
+
+ si = params.find("stack");
+ if (si != params.end()) stack_size = xmlrpc_c::value_int(si->second);
+
+ si = params.find("stack-diversity");
+ if (si != params.end()) stack_diversity = xmlrpc_c::value_int(si->second);
+
+ si = params.find("beam-threshold");
+ if (si != params.end()) beam_width = xmlrpc_c::value_double(si->second);
+
+ si = params.find("time-out");
+ if (si != params.end()) timeout = xmlrpc_c::value_int(si->second);
+
+ si = params.find("max-phrase-length");
+ if (si != params.end()) max_phrase_length = xmlrpc_c::value_int(si->second);
+
+ return true;
+}
+#endif
+
+}
diff --git a/moses2/parameters/SearchOptions.h b/moses2/parameters/SearchOptions.h
new file mode 100644
index 000000000..31e364d14
--- /dev/null
+++ b/moses2/parameters/SearchOptions.h
@@ -0,0 +1,53 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include <string>
+#include <limits>
+#include "OptionsBaseClass.h"
+#include "../TypeDef.h"
+
+namespace Moses2
+{
+
+bool is_syntax(SearchAlgorithm algo);
+
+struct
+ SearchOptions : public OptionsBaseClass {
+ SearchAlgorithm algo;
+
+ // stack decoding
+ size_t stack_size; // maxHypoStackSize;
+ size_t stack_diversity; // minHypoStackDiversity;
+ bool disable_discarding;
+ // Disable discarding of bad hypotheses from HypothesisStackNormal
+ size_t max_phrase_length;
+ size_t max_trans_opt_per_cov;
+ size_t max_partial_trans_opt;
+ // beam search
+ float beam_width;
+
+ int timeout;
+
+ bool consensus; //! Use Consensus decoding (DeNero et al 2009)
+
+ // reordering options
+ // bool reorderingConstraint; //! use additional reordering constraints
+ // bool useEarlyDistortionCost;
+
+ float early_discarding_threshold;
+ float trans_opt_threshold;
+
+ bool init(Parameter const& param);
+ SearchOptions(Parameter const& param);
+ SearchOptions();
+
+ bool
+ UseEarlyDiscarding() const {
+ return early_discarding_threshold != -std::numeric_limits<float>::infinity();
+ }
+
+ bool
+ update(std::map<std::string,xmlrpc_c::value>const& params);
+
+};
+
+}
diff --git a/contrib/moses2/parameters/ServerOptions.cpp b/moses2/parameters/ServerOptions.cpp
index d8942c5c7..3a21c1891 100644
--- a/contrib/moses2/parameters/ServerOptions.cpp
+++ b/moses2/parameters/ServerOptions.cpp
@@ -14,27 +14,23 @@ namespace Moses2
// If none of 'dhms' is given, it is assumed that it's seconds.
// Specs can be combined, e.g. 2h30m, although it's probably nonsense
// to be so specific.
-size_t
+size_t
parse_timespec(std::string const& spec)
{
size_t t = 0, timeout = 0;
- BOOST_FOREACH(char const& c, spec)
- {
- if (c >= '0' && c <= '9')
- {
- t = t * 10 + c - '0';
- }
- else
- {
- if (c == 'd') timeout = t * 24 * 3600;
- else if (c == 'h') timeout += t * 3600;
- else if (c == 'm') timeout += t * 60;
- else if (c == 's') timeout += t;
- else UTIL_THROW2("Can't parse specification '" << spec
- << " at " << HERE);
- t = 0;
- }
+ BOOST_FOREACH(char const& c, spec) {
+ if (c >= '0' && c <= '9') {
+ t = t * 10 + c - '0';
+ } else {
+ if (c == 'd') timeout = t * 24 * 3600;
+ else if (c == 'h') timeout += t * 3600;
+ else if (c == 'm') timeout += t * 60;
+ else if (c == 's') timeout += t;
+ else UTIL_THROW2("Can't parse specification '" << spec
+ << " at " << HERE);
+ t = 0;
}
+ }
return timeout;
}
@@ -54,7 +50,7 @@ ServerOptions()
ServerOptions::
ServerOptions(Parameter const& P)
-{
+{
init(P);
}
diff --git a/moses2/parameters/ServerOptions.h b/moses2/parameters/ServerOptions.h
new file mode 100644
index 000000000..377b4d31b
--- /dev/null
+++ b/moses2/parameters/ServerOptions.h
@@ -0,0 +1,41 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include <string>
+#include <map>
+#include <stdint.h>
+#include <xmlrpc-c/base.hpp>
+#include <xmlrpc-c/registry.hpp>
+#include <xmlrpc-c/server_abyss.hpp>
+
+namespace Moses2
+{
+class Parameter;
+
+struct
+ ServerOptions {
+ bool is_serial;
+ uint32_t numThreads; // might not be used any more, actually
+
+ size_t sessionTimeout; // this is related to Moses translation sessions
+ size_t sessionCacheSize; // this is related to Moses translation sessions
+
+ int port; // this is for the abyss server
+ std::string logfile; // this is for the abyss server
+ int maxConn; // this is for the abyss server
+ int maxConnBacklog; // this is for the abyss server
+ int keepaliveTimeout; // this is for the abyss server
+ int keepaliveMaxConn; // this is for the abyss server
+ int timeout; // this is for the abyss server
+
+ bool init(Parameter const& param);
+ ServerOptions(Parameter const& param);
+ ServerOptions();
+
+ bool
+ update(std::map<std::string,xmlrpc_c::value>const& params) {
+ return true;
+ }
+
+};
+
+}
diff --git a/contrib/moses2/parameters/SyntaxOptions.cpp b/moses2/parameters/SyntaxOptions.cpp
index 0d4b0c7eb..4c6730615 100644
--- a/contrib/moses2/parameters/SyntaxOptions.cpp
+++ b/moses2/parameters/SyntaxOptions.cpp
@@ -11,10 +11,10 @@
namespace Moses2
{
SyntaxOptions::SyntaxOptions()
-: s2t_parsing_algo(RecursiveCYKPlus)
-, default_non_term_only_for_empty_range(false)
-, source_label_overlap(SourceLabelOverlapAdd)
-, rule_limit(DEFAULT_MAX_TRANS_OPT_SIZE)
+ : s2t_parsing_algo(RecursiveCYKPlus)
+ , default_non_term_only_for_empty_range(false)
+ , source_label_overlap(SourceLabelOverlapAdd)
+ , rule_limit(DEFAULT_MAX_TRANS_OPT_SIZE)
{}
bool SyntaxOptions::init(Parameter const& param)
diff --git a/contrib/moses2/parameters/SyntaxOptions.h b/moses2/parameters/SyntaxOptions.h
index 133a1d9ed..c32a0c85c 100644
--- a/contrib/moses2/parameters/SyntaxOptions.h
+++ b/moses2/parameters/SyntaxOptions.h
@@ -19,8 +19,7 @@ typedef std::pair<std::string, float> UnknownLHSEntry;
typedef std::vector<UnknownLHSEntry> UnknownLHSList;
struct
-SyntaxOptions : public OptionsBaseClass
-{
+ SyntaxOptions : public OptionsBaseClass {
S2TParsingAlgorithm s2t_parsing_algo;
SCFG::Word input_default_non_terminal;
SCFG::Word output_default_non_terminal;
diff --git a/contrib/moses2/pugiconfig.hpp b/moses2/pugiconfig.hpp
index e50b580bf..1e3bdd1f3 100644
--- a/contrib/moses2/pugiconfig.hpp
+++ b/moses2/pugiconfig.hpp
@@ -62,7 +62,7 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
diff --git a/moses2/pugixml.cpp b/moses2/pugixml.cpp
new file mode 100644
index 000000000..a39f25880
--- /dev/null
+++ b/moses2/pugixml.cpp
@@ -0,0 +1,11456 @@
+/**
+ * pugixml parser - version 1.7
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef SOURCE_PUGIXML_CPP
+#define SOURCE_PUGIXML_CPP
+
+#include "pugixml.hpp"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+#ifdef PUGIXML_WCHAR_MODE
+# include <wchar.h>
+#endif
+
+#ifndef PUGIXML_NO_XPATH
+# include <math.h>
+# include <float.h>
+# ifdef PUGIXML_NO_EXCEPTIONS
+# include <setjmp.h>
+# endif
+#endif
+
+#ifndef PUGIXML_NO_STL
+# include <istream>
+# include <ostream>
+# include <string>
+#endif
+
+// For placement new
+#include <new>
+
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable: 4127) // conditional expression is constant
+# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
+# pragma warning(disable: 4702) // unreachable code
+# pragma warning(disable: 4996) // this function or variable may be unsafe
+# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
+#endif
+
+#ifdef __INTEL_COMPILER
+# pragma warning(disable: 177) // function was declared but never referenced
+# pragma warning(disable: 279) // controlling expression is constant
+# pragma warning(disable: 1478 1786) // function was declared "deprecated"
+# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
+#endif
+
+#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
+# pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
+#endif
+
+#ifdef __BORLANDC__
+# pragma option push
+# pragma warn -8008 // condition is always false
+# pragma warn -8066 // unreachable code
+#endif
+
+#ifdef __SNC__
+// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
+# pragma diag_suppress=178 // function was declared but never referenced
+# pragma diag_suppress=237 // controlling expression is constant
+#endif
+
+// Inlining controls
+#if defined(_MSC_VER) && _MSC_VER >= 1300
+# define PUGI__NO_INLINE __declspec(noinline)
+#elif defined(__GNUC__)
+# define PUGI__NO_INLINE __attribute__((noinline))
+#else
+# define PUGI__NO_INLINE
+#endif
+
+// Branch weight controls
+#if defined(__GNUC__)
+# define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
+#else
+# define PUGI__UNLIKELY(cond) (cond)
+#endif
+
+// Simple static assertion
+#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
+
+// Digital Mars C++ bug workaround for passing char loaded from memory via stack
+#ifdef __DMC__
+# define PUGI__DMC_VOLATILE volatile
+#else
+# define PUGI__DMC_VOLATILE
+#endif
+
+// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
+#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
+using std::memcpy;
+using std::memmove;
+using std::memset;
+#endif
+
+// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
+#if defined(_MSC_VER) && !defined(__S3E__)
+# define PUGI__MSVC_CRT_VERSION _MSC_VER
+#endif
+
+#ifdef PUGIXML_HEADER_ONLY
+# define PUGI__NS_BEGIN namespace pugi { namespace impl {
+# define PUGI__NS_END } }
+# define PUGI__FN inline
+# define PUGI__FN_NO_INLINE inline
+#else
+# if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
+# define PUGI__NS_BEGIN namespace pugi { namespace impl {
+# define PUGI__NS_END } }
+# else
+# define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
+# define PUGI__NS_END } } }
+# endif
+# define PUGI__FN
+# define PUGI__FN_NO_INLINE PUGI__NO_INLINE
+#endif
+
+// uintptr_t
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
+# include <stdint.h>
+#else
+namespace pugi
+{
+# ifndef _UINTPTR_T_DEFINED
+typedef size_t uintptr_t;
+# endif
+
+typedef unsigned __int8 uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+}
+#endif
+
+// Memory allocation
+PUGI__NS_BEGIN
+PUGI__FN void* default_allocate(size_t size)
+{
+ return malloc(size);
+}
+
+PUGI__FN void default_deallocate(void* ptr)
+{
+ free(ptr);
+}
+
+template <typename T>
+struct xml_memory_management_function_storage {
+ static allocation_function allocate;
+ static deallocation_function deallocate;
+};
+
+// Global allocation functions are stored in class statics so that in header mode linker deduplicates them
+// Without a template<> we'll get multiple definitions of the same static
+template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
+template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
+
+typedef xml_memory_management_function_storage<int> xml_memory;
+PUGI__NS_END
+
+// String utilities
+PUGI__NS_BEGIN
+// Get string length
+PUGI__FN size_t strlength(const char_t* s)
+{
+ assert(s);
+
+#ifdef PUGIXML_WCHAR_MODE
+ return wcslen(s);
+#else
+ return strlen(s);
+#endif
+}
+
+// Compare two strings
+PUGI__FN bool strequal(const char_t* src, const char_t* dst)
+{
+ assert(src && dst);
+
+#ifdef PUGIXML_WCHAR_MODE
+ return wcscmp(src, dst) == 0;
+#else
+ return strcmp(src, dst) == 0;
+#endif
+}
+
+// Compare lhs with [rhs_begin, rhs_end)
+PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
+{
+ for (size_t i = 0; i < count; ++i)
+ if (lhs[i] != rhs[i])
+ return false;
+
+ return lhs[count] == 0;
+}
+
+// Get length of wide string, even if CRT lacks wide character support
+PUGI__FN size_t strlength_wide(const wchar_t* s)
+{
+ assert(s);
+
+#ifdef PUGIXML_WCHAR_MODE
+ return wcslen(s);
+#else
+ const wchar_t* end = s;
+ while (*end) end++;
+ return static_cast<size_t>(end - s);
+#endif
+}
+PUGI__NS_END
+
+// auto_ptr-like object for exception recovery
+PUGI__NS_BEGIN
+template <typename T, typename D = void(*)(T*)> struct auto_deleter {
+ T* data;
+ D deleter;
+
+ auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) {
+ }
+
+ ~auto_deleter() {
+ if (data) deleter(data);
+ }
+
+ T* release() {
+ T* result = data;
+ data = 0;
+ return result;
+ }
+};
+PUGI__NS_END
+
+#ifdef PUGIXML_COMPACT
+PUGI__NS_BEGIN
+class compact_hash_table
+{
+public:
+ compact_hash_table(): _items(0), _capacity(0), _count(0) {
+ }
+
+ void clear() {
+ if (_items) {
+ xml_memory::deallocate(_items);
+ _items = 0;
+ _capacity = 0;
+ _count = 0;
+ }
+ }
+
+ void** find(const void* key) {
+ assert(key);
+
+ if (_capacity == 0) return 0;
+
+ size_t hashmod = _capacity - 1;
+ size_t bucket = hash(key) & hashmod;
+
+ for (size_t probe = 0; probe <= hashmod; ++probe) {
+ item_t& probe_item = _items[bucket];
+
+ if (probe_item.key == key)
+ return &probe_item.value;
+
+ if (probe_item.key == 0)
+ return 0;
+
+ // hash collision, quadratic probing
+ bucket = (bucket + probe + 1) & hashmod;
+ }
+
+ assert(!"Hash table is full");
+ return 0;
+ }
+
+ void** insert(const void* key) {
+ assert(key);
+ assert(_capacity != 0 && _count < _capacity - _capacity / 4);
+
+ size_t hashmod = _capacity - 1;
+ size_t bucket = hash(key) & hashmod;
+
+ for (size_t probe = 0; probe <= hashmod; ++probe) {
+ item_t& probe_item = _items[bucket];
+
+ if (probe_item.key == 0) {
+ probe_item.key = key;
+ _count++;
+ return &probe_item.value;
+ }
+
+ if (probe_item.key == key)
+ return &probe_item.value;
+
+ // hash collision, quadratic probing
+ bucket = (bucket + probe + 1) & hashmod;
+ }
+
+ assert(!"Hash table is full");
+ return 0;
+ }
+
+ bool reserve() {
+ if (_count + 16 >= _capacity - _capacity / 4)
+ return rehash();
+
+ return true;
+ }
+
+private:
+ struct item_t {
+ const void* key;
+ void* value;
+ };
+
+ item_t* _items;
+ size_t _capacity;
+
+ size_t _count;
+
+ bool rehash();
+
+ static unsigned int hash(const void* key) {
+ unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
+
+ // MurmurHash3 32-bit finalizer
+ h ^= h >> 16;
+ h *= 0x85ebca6bu;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35u;
+ h ^= h >> 16;
+
+ return h;
+ }
+};
+
+PUGI__FN_NO_INLINE bool compact_hash_table::rehash()
+{
+ compact_hash_table rt;
+ rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
+ rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity));
+
+ if (!rt._items)
+ return false;
+
+ memset(rt._items, 0, sizeof(item_t) * rt._capacity);
+
+ for (size_t i = 0; i < _capacity; ++i)
+ if (_items[i].key)
+ *rt.insert(_items[i].key) = _items[i].value;
+
+ if (_items)
+ xml_memory::deallocate(_items);
+
+ _capacity = rt._capacity;
+ _items = rt._items;
+
+ assert(_count == rt._count);
+
+ return true;
+}
+
+PUGI__NS_END
+#endif
+
+PUGI__NS_BEGIN
+static const size_t xml_memory_page_size =
+#ifdef PUGIXML_MEMORY_PAGE_SIZE
+ PUGIXML_MEMORY_PAGE_SIZE
+#else
+ 32768
+#endif
+ ;
+
+#ifdef PUGIXML_COMPACT
+static const uintptr_t xml_memory_block_alignment = 4;
+
+static const uintptr_t xml_memory_page_alignment = sizeof(void*);
+#else
+static const uintptr_t xml_memory_block_alignment = sizeof(void*);
+
+static const uintptr_t xml_memory_page_alignment = 64;
+static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
+#endif
+
+// extra metadata bits
+static const uintptr_t xml_memory_page_contents_shared_mask = 32;
+static const uintptr_t xml_memory_page_name_allocated_mask = 16;
+static const uintptr_t xml_memory_page_value_allocated_mask = 8;
+static const uintptr_t xml_memory_page_type_mask = 7;
+
+// combined masks for string uniqueness
+static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
+static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
+
+#ifdef PUGIXML_COMPACT
+#define PUGI__GETPAGE_IMPL(header) (header).get_page()
+#else
+#define PUGI__GETPAGE_IMPL(header) reinterpret_cast<impl::xml_memory_page*>((header) & impl::xml_memory_page_pointer_mask)
+#endif
+
+#define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
+#define PUGI__NODETYPE(n) static_cast<xml_node_type>(((n)->header & impl::xml_memory_page_type_mask) + 1)
+
+struct xml_allocator;
+
+struct xml_memory_page {
+ static xml_memory_page* construct(void* memory) {
+ xml_memory_page* result = static_cast<xml_memory_page*>(memory);
+
+ result->allocator = 0;
+ result->prev = 0;
+ result->next = 0;
+ result->busy_size = 0;
+ result->freed_size = 0;
+
+#ifdef PUGIXML_COMPACT
+ result->compact_string_base = 0;
+ result->compact_shared_parent = 0;
+ result->compact_page_marker = 0;
+#endif
+
+ return result;
+ }
+
+ xml_allocator* allocator;
+
+ xml_memory_page* prev;
+ xml_memory_page* next;
+
+ size_t busy_size;
+ size_t freed_size;
+
+#ifdef PUGIXML_COMPACT
+ char_t* compact_string_base;
+ void* compact_shared_parent;
+ uint32_t* compact_page_marker;
+#endif
+};
+
+struct xml_memory_string_header {
+ uint16_t page_offset; // offset from page->data
+ uint16_t full_size; // 0 if string occupies whole page
+};
+
+struct xml_allocator {
+ xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) {
+#ifdef PUGIXML_COMPACT
+ _hash = 0;
+#endif
+ }
+
+ xml_memory_page* allocate_page(size_t data_size) {
+ size_t size = sizeof(xml_memory_page) + data_size;
+
+ // allocate block with some alignment, leaving memory for worst-case padding
+ void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
+ if (!memory) return 0;
+
+ // align to next page boundary (note: this guarantees at least 1 usable byte before the page)
+ char* page_memory = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1));
+
+ // prepare page structure
+ xml_memory_page* page = xml_memory_page::construct(page_memory);
+ assert(page);
+
+ page->allocator = _root->allocator;
+
+ // record the offset for freeing the memory block
+ assert(page_memory > memory && page_memory - static_cast<char*>(memory) <= 127);
+ page_memory[-1] = static_cast<char>(page_memory - static_cast<char*>(memory));
+
+ return page;
+ }
+
+ static void deallocate_page(xml_memory_page* page) {
+ char* page_memory = reinterpret_cast<char*>(page);
+
+ xml_memory::deallocate(page_memory - page_memory[-1]);
+ }
+
+ void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
+
+ void* allocate_memory(size_t size, xml_memory_page*& out_page) {
+ if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
+ return allocate_memory_oob(size, out_page);
+
+ void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
+
+ _busy_size += size;
+
+ out_page = _root;
+
+ return buf;
+ }
+
+#ifdef PUGIXML_COMPACT
+ void* allocate_object(size_t size, xml_memory_page*& out_page) {
+ void* result = allocate_memory(size + sizeof(uint32_t), out_page);
+ if (!result) return 0;
+
+ // adjust for marker
+ ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
+
+ if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) {
+ // insert new marker
+ uint32_t* marker = static_cast<uint32_t*>(result);
+
+ *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
+ out_page->compact_page_marker = marker;
+
+ // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
+ // this will make sure deallocate_memory correctly tracks the size
+ out_page->freed_size += sizeof(uint32_t);
+
+ return marker + 1;
+ } else {
+ // roll back uint32_t part
+ _busy_size -= sizeof(uint32_t);
+
+ return result;
+ }
+ }
+#else
+ void* allocate_object(size_t size, xml_memory_page*& out_page) {
+ return allocate_memory(size, out_page);
+ }
+#endif
+
+ void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) {
+ if (page == _root) page->busy_size = _busy_size;
+
+ assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
+ (void)!ptr;
+
+ page->freed_size += size;
+ assert(page->freed_size <= page->busy_size);
+
+ if (page->freed_size == page->busy_size) {
+ if (page->next == 0) {
+ assert(_root == page);
+
+ // top page freed, just reset sizes
+ page->busy_size = 0;
+ page->freed_size = 0;
+
+#ifdef PUGIXML_COMPACT
+ // reset compact state to maximize efficiency
+ page->compact_string_base = 0;
+ page->compact_shared_parent = 0;
+ page->compact_page_marker = 0;
+#endif
+
+ _busy_size = 0;
+ } else {
+ assert(_root != page);
+ assert(page->prev);
+
+ // remove from the list
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
+
+ // deallocate
+ deallocate_page(page);
+ }
+ }
+ }
+
+ char_t* allocate_string(size_t length) {
+ static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
+
+ PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
+
+ // allocate memory for string and header block
+ size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
+
+ // round size up to block alignment boundary
+ size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
+
+ xml_memory_page* page;
+ xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
+
+ if (!header) return 0;
+
+ // setup header
+ ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
+
+ assert(page_offset % xml_memory_block_alignment == 0);
+ assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
+ header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
+
+ // full_size == 0 for large strings that occupy the whole page
+ assert(full_size % xml_memory_block_alignment == 0);
+ assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
+ header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
+
+ // round-trip through void* to avoid 'cast increases required alignment of target type' warning
+ // header is guaranteed a pointer-sized alignment, which should be enough for char_t
+ return static_cast<char_t*>(static_cast<void*>(header + 1));
+ }
+
+ void deallocate_string(char_t* string) {
+ // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
+ // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
+
+ // get header
+ xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
+ assert(header);
+
+ // deallocate
+ size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
+ xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
+
+ // if full_size == 0 then this string occupies the whole page
+ size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
+
+ deallocate_memory(header, full_size, page);
+ }
+
+ bool reserve() {
+#ifdef PUGIXML_COMPACT
+ return _hash->reserve();
+#else
+ return true;
+#endif
+ }
+
+ xml_memory_page* _root;
+ size_t _busy_size;
+
+#ifdef PUGIXML_COMPACT
+ compact_hash_table* _hash;
+#endif
+};
+
+PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
+{
+ const size_t large_allocation_threshold = xml_memory_page_size / 4;
+
+ xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
+ out_page = page;
+
+ if (!page) return 0;
+
+ if (size <= large_allocation_threshold) {
+ _root->busy_size = _busy_size;
+
+ // insert page at the end of linked list
+ page->prev = _root;
+ _root->next = page;
+ _root = page;
+
+ _busy_size = size;
+ } else {
+ // insert page before the end of linked list, so that it is deleted as soon as possible
+ // the last page is not deleted even if it's empty (see deallocate_memory)
+ assert(_root->prev);
+
+ page->prev = _root->prev;
+ page->next = _root;
+
+ _root->prev->next = page;
+ _root->prev = page;
+
+ page->busy_size = size;
+ }
+
+ return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
+}
+PUGI__NS_END
+
+#ifdef PUGIXML_COMPACT
+PUGI__NS_BEGIN
+static const uintptr_t compact_alignment_log2 = 2;
+static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
+
+class compact_header
+{
+public:
+ compact_header(xml_memory_page* page, unsigned int flags) {
+ PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
+
+ ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
+ assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
+
+ _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
+ _flags = static_cast<unsigned char>(flags);
+ }
+
+ void operator&=(uintptr_t mod) {
+ _flags &= static_cast<unsigned char>(mod);
+ }
+
+ void operator|=(uintptr_t mod) {
+ _flags |= static_cast<unsigned char>(mod);
+ }
+
+ uintptr_t operator&(uintptr_t mod) const {
+ return _flags & mod;
+ }
+
+ xml_memory_page* get_page() const {
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
+ const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
+
+ return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
+ }
+
+private:
+ unsigned char _page;
+ unsigned char _flags;
+};
+
+PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
+{
+ const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
+
+ return header->get_page();
+}
+
+template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
+{
+ return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object));
+}
+
+template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
+{
+ *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value;
+}
+
+template <typename T, int header_offset, int start = -126> class compact_pointer
+{
+public:
+ compact_pointer(): _data(0) {
+ }
+
+ void operator=(const compact_pointer& rhs) {
+ *this = rhs + 0;
+ }
+
+ void operator=(T* value) {
+ if (value) {
+ // value is guaranteed to be compact-aligned; 'this' is not
+ // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
+ // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
+ // compensate for arithmetic shift rounding for negative values
+ ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
+ ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
+
+ if (static_cast<uintptr_t>(offset) <= 253)
+ _data = static_cast<unsigned char>(offset + 1);
+ else {
+ compact_set_value<header_offset>(this, value);
+
+ _data = 255;
+ }
+ } else
+ _data = 0;
+ }
+
+ operator T*() const {
+ if (_data) {
+ if (_data < 255) {
+ uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
+
+ return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2));
+ } else
+ return compact_get_value<header_offset, T>(this);
+ } else
+ return 0;
+ }
+
+ T* operator->() const {
+ return *this;
+ }
+
+private:
+ unsigned char _data;
+};
+
+template <typename T, int header_offset> class compact_pointer_parent
+{
+public:
+ compact_pointer_parent(): _data(0) {
+ }
+
+ void operator=(const compact_pointer_parent& rhs) {
+ *this = rhs + 0;
+ }
+
+ void operator=(T* value) {
+ if (value) {
+ // value is guaranteed to be compact-aligned; 'this' is not
+ // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
+ // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
+ // compensate for arithmetic shift behavior for negative values
+ ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
+ ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
+
+ if (static_cast<uintptr_t>(offset) <= 65533) {
+ _data = static_cast<unsigned short>(offset + 1);
+ } else {
+ xml_memory_page* page = compact_get_page(this, header_offset);
+
+ if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
+ page->compact_shared_parent = value;
+
+ if (page->compact_shared_parent == value) {
+ _data = 65534;
+ } else {
+ compact_set_value<header_offset>(this, value);
+
+ _data = 65535;
+ }
+ }
+ } else {
+ _data = 0;
+ }
+ }
+
+ operator T*() const {
+ if (_data) {
+ if (_data < 65534) {
+ uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
+
+ return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2));
+ } else if (_data == 65534)
+ return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
+ else
+ return compact_get_value<header_offset, T>(this);
+ } else
+ return 0;
+ }
+
+ T* operator->() const {
+ return *this;
+ }
+
+private:
+ uint16_t _data;
+};
+
+template <int header_offset, int base_offset> class compact_string
+{
+public:
+ compact_string(): _data(0) {
+ }
+
+ void operator=(const compact_string& rhs) {
+ *this = rhs + 0;
+ }
+
+ void operator=(char_t* value) {
+ if (value) {
+ xml_memory_page* page = compact_get_page(this, header_offset);
+
+ if (PUGI__UNLIKELY(page->compact_string_base == 0))
+ page->compact_string_base = value;
+
+ ptrdiff_t offset = value - page->compact_string_base;
+
+ if (static_cast<uintptr_t>(offset) < (65535 << 7)) {
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
+
+ if (*base == 0) {
+ *base = static_cast<uint16_t>((offset >> 7) + 1);
+ _data = static_cast<unsigned char>((offset & 127) + 1);
+ } else {
+ ptrdiff_t remainder = offset - ((*base - 1) << 7);
+
+ if (static_cast<uintptr_t>(remainder) <= 253) {
+ _data = static_cast<unsigned char>(remainder + 1);
+ } else {
+ compact_set_value<header_offset>(this, value);
+
+ _data = 255;
+ }
+ }
+ } else {
+ compact_set_value<header_offset>(this, value);
+
+ _data = 255;
+ }
+ } else {
+ _data = 0;
+ }
+ }
+
+ operator char_t*() const {
+ if (_data) {
+ if (_data < 255) {
+ xml_memory_page* page = compact_get_page(this, header_offset);
+
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
+ assert(*base);
+
+ ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
+
+ return page->compact_string_base + offset;
+ } else {
+ return compact_get_value<header_offset, char_t>(this);
+ }
+ } else
+ return 0;
+ }
+
+private:
+ unsigned char _data;
+};
+PUGI__NS_END
+#endif
+
+#ifdef PUGIXML_COMPACT
+namespace pugi
+{
+struct xml_attribute_struct {
+ xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) {
+ PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
+ }
+
+ impl::compact_header header;
+
+ uint16_t namevalue_base;
+
+ impl::compact_string<4, 2> name;
+ impl::compact_string<5, 3> value;
+
+ impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
+ impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
+};
+
+struct xml_node_struct {
+ xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1), namevalue_base(0) {
+ PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
+ }
+
+ impl::compact_header header;
+
+ uint16_t namevalue_base;
+
+ impl::compact_string<4, 2> name;
+ impl::compact_string<5, 3> value;
+
+ impl::compact_pointer_parent<xml_node_struct, 6> parent;
+
+ impl::compact_pointer<xml_node_struct, 8, 0> first_child;
+
+ impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
+ impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
+
+ impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
+};
+}
+#else
+namespace pugi
+{
+struct xml_attribute_struct {
+ xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) {
+ }
+
+ uintptr_t header;
+
+ char_t* name;
+ char_t* value;
+
+ xml_attribute_struct* prev_attribute_c;
+ xml_attribute_struct* next_attribute;
+};
+
+struct xml_node_struct {
+ xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) {
+ }
+
+ uintptr_t header;
+
+ char_t* name;
+ char_t* value;
+
+ xml_node_struct* parent;
+
+ xml_node_struct* first_child;
+
+ xml_node_struct* prev_sibling_c;
+ xml_node_struct* next_sibling;
+
+ xml_attribute_struct* first_attribute;
+};
+}
+#endif
+
+PUGI__NS_BEGIN
+struct xml_extra_buffer {
+ char_t* buffer;
+ xml_extra_buffer* next;
+};
+
+struct xml_document_struct: public xml_node_struct, public xml_allocator {
+ xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) {
+#ifdef PUGIXML_COMPACT
+ _hash = &hash;
+#endif
+ }
+
+ const char_t* buffer;
+
+ xml_extra_buffer* extra_buffers;
+
+#ifdef PUGIXML_COMPACT
+ compact_hash_table hash;
+#endif
+};
+
+template <typename Object> inline xml_allocator& get_allocator(const Object* object)
+{
+ assert(object);
+
+ return *PUGI__GETPAGE(object)->allocator;
+}
+
+template <typename Object> inline xml_document_struct& get_document(const Object* object)
+{
+ assert(object);
+
+ return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
+}
+PUGI__NS_END
+
+// Low-level DOM operations
+PUGI__NS_BEGIN
+inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
+{
+ xml_memory_page* page;
+ void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
+ if (!memory) return 0;
+
+ return new (memory) xml_attribute_struct(page);
+}
+
+inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
+{
+ xml_memory_page* page;
+ void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
+ if (!memory) return 0;
+
+ return new (memory) xml_node_struct(page, type);
+}
+
+inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
+{
+ if (a->header & impl::xml_memory_page_name_allocated_mask)
+ alloc.deallocate_string(a->name);
+
+ if (a->header & impl::xml_memory_page_value_allocated_mask)
+ alloc.deallocate_string(a->value);
+
+ alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
+}
+
+inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
+{
+ if (n->header & impl::xml_memory_page_name_allocated_mask)
+ alloc.deallocate_string(n->name);
+
+ if (n->header & impl::xml_memory_page_value_allocated_mask)
+ alloc.deallocate_string(n->value);
+
+ for (xml_attribute_struct* attr = n->first_attribute; attr; ) {
+ xml_attribute_struct* next = attr->next_attribute;
+
+ destroy_attribute(attr, alloc);
+
+ attr = next;
+ }
+
+ for (xml_node_struct* child = n->first_child; child; ) {
+ xml_node_struct* next = child->next_sibling;
+
+ destroy_node(child, alloc);
+
+ child = next;
+ }
+
+ alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
+}
+
+inline void append_node(xml_node_struct* child, xml_node_struct* node)
+{
+ child->parent = node;
+
+ xml_node_struct* head = node->first_child;
+
+ if (head) {
+ xml_node_struct* tail = head->prev_sibling_c;
+
+ tail->next_sibling = child;
+ child->prev_sibling_c = tail;
+ head->prev_sibling_c = child;
+ } else {
+ node->first_child = child;
+ child->prev_sibling_c = child;
+ }
+}
+
+inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
+{
+ child->parent = node;
+
+ xml_node_struct* head = node->first_child;
+
+ if (head) {
+ child->prev_sibling_c = head->prev_sibling_c;
+ head->prev_sibling_c = child;
+ } else
+ child->prev_sibling_c = child;
+
+ child->next_sibling = head;
+ node->first_child = child;
+}
+
+inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
+{
+ xml_node_struct* parent = node->parent;
+
+ child->parent = parent;
+
+ if (node->next_sibling)
+ node->next_sibling->prev_sibling_c = child;
+ else
+ parent->first_child->prev_sibling_c = child;
+
+ child->next_sibling = node->next_sibling;
+ child->prev_sibling_c = node;
+
+ node->next_sibling = child;
+}
+
+inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
+{
+ xml_node_struct* parent = node->parent;
+
+ child->parent = parent;
+
+ if (node->prev_sibling_c->next_sibling)
+ node->prev_sibling_c->next_sibling = child;
+ else
+ parent->first_child = child;
+
+ child->prev_sibling_c = node->prev_sibling_c;
+ child->next_sibling = node;
+
+ node->prev_sibling_c = child;
+}
+
+inline void remove_node(xml_node_struct* node)
+{
+ xml_node_struct* parent = node->parent;
+
+ if (node->next_sibling)
+ node->next_sibling->prev_sibling_c = node->prev_sibling_c;
+ else
+ parent->first_child->prev_sibling_c = node->prev_sibling_c;
+
+ if (node->prev_sibling_c->next_sibling)
+ node->prev_sibling_c->next_sibling = node->next_sibling;
+ else
+ parent->first_child = node->next_sibling;
+
+ node->parent = 0;
+ node->prev_sibling_c = 0;
+ node->next_sibling = 0;
+}
+
+inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+{
+ xml_attribute_struct* head = node->first_attribute;
+
+ if (head) {
+ xml_attribute_struct* tail = head->prev_attribute_c;
+
+ tail->next_attribute = attr;
+ attr->prev_attribute_c = tail;
+ head->prev_attribute_c = attr;
+ } else {
+ node->first_attribute = attr;
+ attr->prev_attribute_c = attr;
+ }
+}
+
+inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+{
+ xml_attribute_struct* head = node->first_attribute;
+
+ if (head) {
+ attr->prev_attribute_c = head->prev_attribute_c;
+ head->prev_attribute_c = attr;
+ } else
+ attr->prev_attribute_c = attr;
+
+ attr->next_attribute = head;
+ node->first_attribute = attr;
+}
+
+inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
+{
+ if (place->next_attribute)
+ place->next_attribute->prev_attribute_c = attr;
+ else
+ node->first_attribute->prev_attribute_c = attr;
+
+ attr->next_attribute = place->next_attribute;
+ attr->prev_attribute_c = place;
+ place->next_attribute = attr;
+}
+
+inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
+{
+ if (place->prev_attribute_c->next_attribute)
+ place->prev_attribute_c->next_attribute = attr;
+ else
+ node->first_attribute = attr;
+
+ attr->prev_attribute_c = place->prev_attribute_c;
+ attr->next_attribute = place;
+ place->prev_attribute_c = attr;
+}
+
+inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+{
+ if (attr->next_attribute)
+ attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
+ else
+ node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
+
+ if (attr->prev_attribute_c->next_attribute)
+ attr->prev_attribute_c->next_attribute = attr->next_attribute;
+ else
+ node->first_attribute = attr->next_attribute;
+
+ attr->prev_attribute_c = 0;
+ attr->next_attribute = 0;
+}
+
+PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
+{
+ if (!alloc.reserve()) return 0;
+
+ xml_node_struct* child = allocate_node(alloc, type);
+ if (!child) return 0;
+
+ append_node(child, node);
+
+ return child;
+}
+
+PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
+{
+ if (!alloc.reserve()) return 0;
+
+ xml_attribute_struct* attr = allocate_attribute(alloc);
+ if (!attr) return 0;
+
+ append_attribute(attr, node);
+
+ return attr;
+}
+PUGI__NS_END
+
+// Helper classes for code generation
+PUGI__NS_BEGIN
+struct opt_false {
+ enum { value = 0 };
+};
+
+struct opt_true {
+ enum { value = 1 };
+};
+PUGI__NS_END
+
+// Unicode utilities
+PUGI__NS_BEGIN
+inline uint16_t endian_swap(uint16_t value)
+{
+ return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
+}
+
+inline uint32_t endian_swap(uint32_t value)
+{
+ return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
+}
+
+struct utf8_counter {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t ch) {
+ // U+0000..U+007F
+ if (ch < 0x80) return result + 1;
+ // U+0080..U+07FF
+ else if (ch < 0x800) return result + 2;
+ // U+0800..U+FFFF
+ else return result + 3;
+ }
+
+ static value_type high(value_type result, uint32_t) {
+ // U+10000..U+10FFFF
+ return result + 4;
+ }
+};
+
+struct utf8_writer {
+ typedef uint8_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch) {
+ // U+0000..U+007F
+ if (ch < 0x80) {
+ *result = static_cast<uint8_t>(ch);
+ return result + 1;
+ }
+ // U+0080..U+07FF
+ else if (ch < 0x800) {
+ result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
+ result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 2;
+ }
+ // U+0800..U+FFFF
+ else {
+ result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
+ result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+ result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 3;
+ }
+ }
+
+ static value_type high(value_type result, uint32_t ch) {
+ // U+10000..U+10FFFF
+ result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
+ result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
+ result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+ result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 4;
+ }
+
+ static value_type any(value_type result, uint32_t ch) {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+};
+
+struct utf16_counter {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t) {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t) {
+ return result + 2;
+ }
+};
+
+struct utf16_writer {
+ typedef uint16_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch) {
+ *result = static_cast<uint16_t>(ch);
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch) {
+ uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
+ uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
+
+ result[0] = static_cast<uint16_t>(0xD800 + msh);
+ result[1] = static_cast<uint16_t>(0xDC00 + lsh);
+
+ return result + 2;
+ }
+
+ static value_type any(value_type result, uint32_t ch) {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+};
+
+struct utf32_counter {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t) {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t) {
+ return result + 1;
+ }
+};
+
+struct utf32_writer {
+ typedef uint32_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch) {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch) {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type any(value_type result, uint32_t ch) {
+ *result = ch;
+
+ return result + 1;
+ }
+};
+
+struct latin1_writer {
+ typedef uint8_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch) {
+ *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch) {
+ (void)ch;
+
+ *result = '?';
+
+ return result + 1;
+ }
+};
+
+struct utf8_decoder {
+ typedef uint8_t type;
+
+ template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) {
+ const uint8_t utf8_byte_mask = 0x3f;
+
+ while (size) {
+ uint8_t lead = *data;
+
+ // 0xxxxxxx -> U+0000..U+007F
+ if (lead < 0x80) {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+
+ // process aligned single-byte (ascii) blocks
+ if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) {
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) {
+ result = Traits::low(result, data[0]);
+ result = Traits::low(result, data[1]);
+ result = Traits::low(result, data[2]);
+ result = Traits::low(result, data[3]);
+ data += 4;
+ size -= 4;
+ }
+ }
+ }
+ // 110xxxxx -> U+0080..U+07FF
+ else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) {
+ result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
+ data += 2;
+ size -= 2;
+ }
+ // 1110xxxx -> U+0800-U+FFFF
+ else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) {
+ result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
+ data += 3;
+ size -= 3;
+ }
+ // 11110xxx -> U+10000..U+10FFFF
+ else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) {
+ result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
+ data += 4;
+ size -= 4;
+ }
+ // 10xxxxxx or 11111xxx -> invalid
+ else {
+ data += 1;
+ size -= 1;
+ }
+ }
+
+ return result;
+ }
+};
+
+template <typename opt_swap> struct utf16_decoder {
+ typedef uint16_t type;
+
+ template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) {
+ while (size) {
+ uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+D7FF
+ if (lead < 0xD800) {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+ }
+ // U+E000..U+FFFF
+ else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+ }
+ // surrogate pair lead
+ else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) {
+ uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
+
+ if (static_cast<unsigned int>(next - 0xDC00) < 0x400) {
+ result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
+ data += 2;
+ size -= 2;
+ } else {
+ data += 1;
+ size -= 1;
+ }
+ } else {
+ data += 1;
+ size -= 1;
+ }
+ }
+
+ return result;
+ }
+};
+
+template <typename opt_swap> struct utf32_decoder {
+ typedef uint32_t type;
+
+ template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) {
+ while (size) {
+ uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+FFFF
+ if (lead < 0x10000) {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+ }
+ // U+10000..U+10FFFF
+ else {
+ result = Traits::high(result, lead);
+ data += 1;
+ size -= 1;
+ }
+ }
+
+ return result;
+ }
+};
+
+struct latin1_decoder {
+ typedef uint8_t type;
+
+ template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) {
+ while (size) {
+ result = Traits::low(result, *data);
+ data += 1;
+ size -= 1;
+ }
+
+ return result;
+ }
+};
+
+template <size_t size> struct wchar_selector;
+
+template <> struct wchar_selector<2> {
+ typedef uint16_t type;
+ typedef utf16_counter counter;
+ typedef utf16_writer writer;
+ typedef utf16_decoder<opt_false> decoder;
+};
+
+template <> struct wchar_selector<4> {
+ typedef uint32_t type;
+ typedef utf32_counter counter;
+ typedef utf32_writer writer;
+ typedef utf32_decoder<opt_false> decoder;
+};
+
+typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
+typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
+
+struct wchar_decoder {
+ typedef wchar_t type;
+
+ template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) {
+ typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
+
+ return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
+ }
+};
+
+#ifdef PUGIXML_WCHAR_MODE
+PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
+{
+ for (size_t i = 0; i < length; ++i)
+ result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
+}
+#endif
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+enum chartype_t {
+ ct_parse_pcdata = 1, // \0, &, \r, <
+ ct_parse_attr = 2, // \0, &, \r, ', "
+ ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
+ ct_space = 8, // \r, \n, space, tab
+ ct_parse_cdata = 16, // \0, ], >, \r
+ ct_parse_comment = 32, // \0, -, >, \r
+ ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
+ ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
+};
+
+static const unsigned char chartype_table[256] = {
+ 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
+ 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
+};
+
+enum chartypex_t {
+ ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
+ ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
+ ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
+ ctx_digit = 8, // 0-9
+ ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
+};
+
+static const unsigned char chartypex_table[256] = {
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
+ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
+
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+
+#ifdef PUGIXML_WCHAR_MODE
+#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
+#else
+#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
+#endif
+
+#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
+#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
+
+PUGI__FN bool is_little_endian()
+{
+ unsigned int ui = 1;
+
+ return *reinterpret_cast<unsigned char*>(&ui) == 1;
+}
+
+PUGI__FN xml_encoding get_wchar_encoding()
+{
+ PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
+
+ if (sizeof(wchar_t) == 2)
+ return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+ else
+ return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+}
+
+PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
+{
+ // look for BOM in first few bytes
+ if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
+ if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
+ if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
+ if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
+ if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
+
+ // look for <, <? or <?xm in various encodings
+ if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
+ if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
+ if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
+ if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
+ if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
+
+ // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
+ if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
+ if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
+
+ // no known BOM detected, assume utf8
+ return encoding_utf8;
+}
+
+PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
+{
+ // replace wchar encoding with utf implementation
+ if (encoding == encoding_wchar) return get_wchar_encoding();
+
+ // replace utf16 encoding with utf16 with specific endianness
+ if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ // replace utf32 encoding with utf32 with specific endianness
+ if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ // only do autodetection if no explicit encoding is requested
+ if (encoding != encoding_auto) return encoding;
+
+ // skip encoding autodetection if input buffer is too small
+ if (size < 4) return encoding_utf8;
+
+ // try to guess encoding (based on XML specification, Appendix F.1)
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
+
+ PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
+
+ return guess_buffer_encoding(d0, d1, d2, d3);
+}
+
+PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+{
+ size_t length = size / sizeof(char_t);
+
+ if (is_mutable) {
+ out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
+ out_length = length;
+ } else {
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ if (contents)
+ memcpy(buffer, contents, length * sizeof(char_t));
+ else
+ assert(length == 0);
+
+ buffer[length] = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+ }
+
+ return true;
+}
+
+#ifdef PUGIXML_WCHAR_MODE
+PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
+{
+ return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
+ (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
+}
+
+PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+{
+ const char_t* data = static_cast<const char_t*>(contents);
+ size_t length = size / sizeof(char_t);
+
+ if (is_mutable) {
+ char_t* buffer = const_cast<char_t*>(data);
+
+ convert_wchar_endian_swap(buffer, data, length);
+
+ out_buffer = buffer;
+ out_length = length;
+ } else {
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ convert_wchar_endian_swap(buffer, data, length);
+ buffer[length] = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+ }
+
+ return true;
+}
+
+template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
+{
+ const typename D::type* data = static_cast<const typename D::type*>(contents);
+ size_t data_length = size / sizeof(typename D::type);
+
+ // first pass: get length in wchar_t units
+ size_t length = D::process(data, data_length, 0, wchar_counter());
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf16 input to wchar_t
+ wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+ wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+}
+
+PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+{
+ // get native encoding
+ xml_encoding wchar_encoding = get_wchar_encoding();
+
+ // fast path: no conversion required
+ if (encoding == wchar_encoding)
+ return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // only endian-swapping is required
+ if (need_endian_swap_utf(encoding, wchar_encoding))
+ return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
+
+ // source encoding is utf8
+ if (encoding == encoding_utf8)
+ return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
+
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
+ }
+
+ // source encoding is utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
+ }
+
+ // source encoding is latin1
+ if (encoding == encoding_latin1)
+ return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
+
+ assert(!"Invalid encoding");
+ return false;
+}
+#else
+template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
+{
+ const typename D::type* data = static_cast<const typename D::type*>(contents);
+ size_t data_length = size / sizeof(typename D::type);
+
+ // first pass: get length in utf8 units
+ size_t length = D::process(data, data_length, 0, utf8_counter());
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf16 input to utf8
+ uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+ uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+}
+
+PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
+{
+ for (size_t i = 0; i < size; ++i)
+ if (data[i] > 127)
+ return i;
+
+ return size;
+}
+
+PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+{
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
+ size_t data_length = size;
+
+ // get size of prefix that does not need utf8 conversion
+ size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
+ assert(prefix_length <= data_length);
+
+ const uint8_t* postfix = data + prefix_length;
+ size_t postfix_length = data_length - prefix_length;
+
+ // if no conversion is needed, just return the original buffer
+ if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // first pass: get length in utf8 units
+ size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert latin1 input to utf8
+ memcpy(buffer, data, prefix_length);
+
+ uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+ uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+}
+
+PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+{
+ // fast path: no conversion required
+ if (encoding == encoding_utf8)
+ return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
+ }
+
+ // source encoding is utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
+ }
+
+ // source encoding is latin1
+ if (encoding == encoding_latin1)
+ return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
+
+ assert(!"Invalid encoding");
+ return false;
+}
+#endif
+
+PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
+{
+ // get length in utf8 characters
+ return wchar_decoder::process(str, length, 0, utf8_counter());
+}
+
+PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
+{
+ // convert to utf8
+ uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
+ uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
+
+ assert(begin + size == end);
+ (void)!end;
+ (void)!size;
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
+{
+ // first pass: get length in utf8 characters
+ size_t size = as_utf8_begin(str, length);
+
+ // allocate resulting string
+ std::string result;
+ result.resize(size);
+
+ // second pass: convert to utf8
+ if (size > 0) as_utf8_end(&result[0], size, str, length);
+
+ return result;
+}
+
+PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
+{
+ const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
+
+ // first pass: get length in wchar_t units
+ size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
+
+ // allocate resulting string
+ std::basic_string<wchar_t> result;
+ result.resize(length);
+
+ // second pass: convert to wchar_t
+ if (length > 0) {
+ wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
+ wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
+
+ assert(begin + length == end);
+ (void)!end;
+ }
+
+ return result;
+}
+#endif
+
+template <typename Header>
+inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
+{
+ // never reuse shared memory
+ if (header & xml_memory_page_contents_shared_mask) return false;
+
+ size_t target_length = strlength(target);
+
+ // always reuse document buffer memory if possible
+ if ((header & header_mask) == 0) return target_length >= length;
+
+ // reuse heap memory if waste is not too great
+ const size_t reuse_threshold = 32;
+
+ return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
+{
+ if (source_length == 0) {
+ // empty string and null pointer are equivalent, so just deallocate old memory
+ xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
+
+ if (header & header_mask) alloc->deallocate_string(dest);
+
+ // mark the string as not allocated
+ dest = 0;
+ header &= ~header_mask;
+
+ return true;
+ } else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) {
+ // we can reuse old buffer, so just copy the new data (including zero terminator)
+ memcpy(dest, source, source_length * sizeof(char_t));
+ dest[source_length] = 0;
+
+ return true;
+ } else {
+ xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
+
+ if (!alloc->reserve()) return false;
+
+ // allocate new buffer
+ char_t* buf = alloc->allocate_string(source_length + 1);
+ if (!buf) return false;
+
+ // copy the string (including zero terminator)
+ memcpy(buf, source, source_length * sizeof(char_t));
+ buf[source_length] = 0;
+
+ // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
+ if (header & header_mask) alloc->deallocate_string(dest);
+
+ // the string is now allocated, so set the flag
+ dest = buf;
+ header |= header_mask;
+
+ return true;
+ }
+}
+
+struct gap {
+ char_t* end;
+ size_t size;
+
+ gap(): end(0), size(0) {
+ }
+
+ // Push new gap, move s count bytes further (skipping the gap).
+ // Collapse previous gap.
+ void push(char_t*& s, size_t count) {
+ if (end) { // there was a gap already; collapse it
+ // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
+ assert(s >= end);
+ memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
+ }
+
+ s += count; // end of current gap
+
+ // "merge" two gaps
+ end = s;
+ size += count;
+ }
+
+ // Collapse all gaps, return past-the-end pointer
+ char_t* flush(char_t* s) {
+ if (end) {
+ // Move [old_gap_end, current_pos) to [old_gap_start, ...)
+ assert(s >= end);
+ memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
+
+ return s - size;
+ } else return s;
+ }
+};
+
+PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
+{
+ char_t* stre = s + 1;
+
+ switch (*stre) {
+ case '#': { // &#...
+ unsigned int ucsc = 0;
+
+ if (stre[1] == 'x') { // &#x... (hex code)
+ stre += 2;
+
+ char_t ch = *stre;
+
+ if (ch == ';') return stre;
+
+ for (;;) {
+ if (static_cast<unsigned int>(ch - '0') <= 9)
+ ucsc = 16 * ucsc + (ch - '0');
+ else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
+ ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
+ else if (ch == ';')
+ break;
+ else // cancel
+ return stre;
+
+ ch = *++stre;
+ }
+
+ ++stre;
+ } else { // &#... (dec code)
+ char_t ch = *++stre;
+
+ if (ch == ';') return stre;
+
+ for (;;) {
+ if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
+ ucsc = 10 * ucsc + (ch - '0');
+ else if (ch == ';')
+ break;
+ else // cancel
+ return stre;
+
+ ch = *++stre;
+ }
+
+ ++stre;
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
+#else
+ s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
+#endif
+
+ g.push(s, stre - s);
+ return stre;
+ }
+
+ case 'a': { // &a
+ ++stre;
+
+ if (*stre == 'm') { // &am
+ if (*++stre == 'p' && *++stre == ';') { // &amp;
+ *s++ = '&';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ } else if (*stre == 'p') { // &ap
+ if (*++stre == 'o' && *++stre == 's' && *++stre == ';') { // &apos;
+ *s++ = '\'';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ }
+ break;
+ }
+
+ case 'g': { // &g
+ if (*++stre == 't' && *++stre == ';') { // &gt;
+ *s++ = '>';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ case 'l': { // &l
+ if (*++stre == 't' && *++stre == ';') { // &lt;
+ *s++ = '<';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ case 'q': { // &q
+ if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') { // &quot;
+ *s++ = '"';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return stre;
+}
+
+// Parser utilities
+#define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
+#define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
+#define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
+#define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
+#define PUGI__POPNODE() { cursor = cursor->parent; }
+#define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
+#define PUGI__SCANWHILE(X) { while (X) ++s; }
+#define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
+#define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
+#define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
+#define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
+
+PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
+{
+ gap g;
+
+ while (true) {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
+
+ if (*s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ } else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) { // comment ends here
+ *g.flush(s) = 0;
+
+ return s + (s[2] == '>' ? 3 : 2);
+ } else if (*s == 0) {
+ return 0;
+ } else ++s;
+ }
+}
+
+PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
+{
+ gap g;
+
+ while (true) {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
+
+ if (*s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ } else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) { // CDATA ends here
+ *g.flush(s) = 0;
+
+ return s + 1;
+ } else if (*s == 0) {
+ return 0;
+ } else ++s;
+ }
+}
+
+typedef char_t* (*strconv_pcdata_t)(char_t*);
+
+template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl {
+ static char_t* parse(char_t* s) {
+ gap g;
+
+ char_t* begin = s;
+
+ while (true) {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
+
+ if (*s == '<') { // PCDATA ends here
+ char_t* end = g.flush(s);
+
+ if (opt_trim::value)
+ while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+ --end;
+
+ *end = 0;
+
+ return s + 1;
+ } else if (opt_eol::value && *s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ } else if (opt_escape::value && *s == '&') {
+ s = strconv_escape(s, g);
+ } else if (*s == 0) {
+ char_t* end = g.flush(s);
+
+ if (opt_trim::value)
+ while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+ --end;
+
+ *end = 0;
+
+ return s;
+ } else ++s;
+ }
+ }
+};
+
+PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
+{
+ PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
+
+ switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) { // get bitmask for flags (eol escapes trim)
+ case 0:
+ return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
+ case 1:
+ return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
+ case 2:
+ return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
+ case 3:
+ return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
+ case 4:
+ return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
+ case 5:
+ return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
+ case 6:
+ return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
+ case 7:
+ return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
+ default:
+ assert(false);
+ return 0; // should not get here
+ }
+}
+
+typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
+
+template <typename opt_escape> struct strconv_attribute_impl {
+ static char_t* parse_wnorm(char_t* s, char_t end_quote) {
+ gap g;
+
+ // trim leading whitespaces
+ if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+ char_t* str = s;
+
+ do ++str;
+ while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+ g.push(s, str - s);
+ }
+
+ while (true) {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
+
+ if (*s == end_quote) {
+ char_t* str = g.flush(s);
+
+ do *str-- = 0;
+ while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+ return s + 1;
+ } else if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+ *s++ = ' ';
+
+ if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+ char_t* str = s + 1;
+ while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
+
+ g.push(s, str - s);
+ }
+ } else if (opt_escape::value && *s == '&') {
+ s = strconv_escape(s, g);
+ } else if (!*s) {
+ return 0;
+ } else ++s;
+ }
+ }
+
+ static char_t* parse_wconv(char_t* s, char_t end_quote) {
+ gap g;
+
+ while (true) {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
+
+ if (*s == end_quote) {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ } else if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+ if (*s == '\r') {
+ *s++ = ' ';
+
+ if (*s == '\n') g.push(s, 1);
+ } else *s++ = ' ';
+ } else if (opt_escape::value && *s == '&') {
+ s = strconv_escape(s, g);
+ } else if (!*s) {
+ return 0;
+ } else ++s;
+ }
+ }
+
+ static char_t* parse_eol(char_t* s, char_t end_quote) {
+ gap g;
+
+ while (true) {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
+
+ if (*s == end_quote) {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ } else if (*s == '\r') {
+ *s++ = '\n';
+
+ if (*s == '\n') g.push(s, 1);
+ } else if (opt_escape::value && *s == '&') {
+ s = strconv_escape(s, g);
+ } else if (!*s) {
+ return 0;
+ } else ++s;
+ }
+ }
+
+ static char_t* parse_simple(char_t* s, char_t end_quote) {
+ gap g;
+
+ while (true) {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
+
+ if (*s == end_quote) {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ } else if (opt_escape::value && *s == '&') {
+ s = strconv_escape(s, g);
+ } else if (!*s) {
+ return 0;
+ } else ++s;
+ }
+ }
+};
+
+PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
+{
+ PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
+
+ switch ((optmask >> 4) & 15) { // get bitmask for flags (wconv wnorm eol escapes)
+ case 0:
+ return strconv_attribute_impl<opt_false>::parse_simple;
+ case 1:
+ return strconv_attribute_impl<opt_true>::parse_simple;
+ case 2:
+ return strconv_attribute_impl<opt_false>::parse_eol;
+ case 3:
+ return strconv_attribute_impl<opt_true>::parse_eol;
+ case 4:
+ return strconv_attribute_impl<opt_false>::parse_wconv;
+ case 5:
+ return strconv_attribute_impl<opt_true>::parse_wconv;
+ case 6:
+ return strconv_attribute_impl<opt_false>::parse_wconv;
+ case 7:
+ return strconv_attribute_impl<opt_true>::parse_wconv;
+ case 8:
+ return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 9:
+ return strconv_attribute_impl<opt_true>::parse_wnorm;
+ case 10:
+ return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 11:
+ return strconv_attribute_impl<opt_true>::parse_wnorm;
+ case 12:
+ return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 13:
+ return strconv_attribute_impl<opt_true>::parse_wnorm;
+ case 14:
+ return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 15:
+ return strconv_attribute_impl<opt_true>::parse_wnorm;
+ default:
+ assert(false);
+ return 0; // should not get here
+ }
+}
+
+inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
+{
+ xml_parse_result result;
+ result.status = status;
+ result.offset = offset;
+
+ return result;
+}
+
+struct xml_parser {
+ xml_allocator alloc;
+ xml_allocator* alloc_state;
+ char_t* error_offset;
+ xml_parse_status error_status;
+
+ xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok) {
+ }
+
+ ~xml_parser() {
+ *alloc_state = alloc;
+ }
+
+ // DOCTYPE consists of nested sections of the following possible types:
+ // <!-- ... -->, <? ... ?>, "...", '...'
+ // <![...]]>
+ // <!...>
+ // First group can not contain nested groups
+ // Second group can contain nested groups of the same type
+ // Third group can contain all other groups
+ char_t* parse_doctype_primitive(char_t* s) {
+ if (*s == '"' || *s == '\'') {
+ // quoted string
+ char_t ch = *s++;
+ PUGI__SCANFOR(*s == ch);
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s++;
+ } else if (s[0] == '<' && s[1] == '?') {
+ // <? ... ?>
+ s += 2;
+ PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s += 2;
+ } else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') {
+ s += 4;
+ PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s += 3;
+ } else PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ return s;
+ }
+
+ char_t* parse_doctype_ignore(char_t* s) {
+ size_t depth = 0;
+
+ assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
+ s += 3;
+
+ while (*s) {
+ if (s[0] == '<' && s[1] == '!' && s[2] == '[') {
+ // nested ignore section
+ s += 3;
+ depth++;
+ } else if (s[0] == ']' && s[1] == ']' && s[2] == '>') {
+ // ignore section end
+ s += 3;
+
+ if (depth == 0)
+ return s;
+
+ depth--;
+ } else s++;
+ }
+
+ PUGI__THROW_ERROR(status_bad_doctype, s);
+ }
+
+ char_t* parse_doctype_group(char_t* s, char_t endch) {
+ size_t depth = 0;
+
+ assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
+ s += 2;
+
+ while (*s) {
+ if (s[0] == '<' && s[1] == '!' && s[2] != '-') {
+ if (s[2] == '[') {
+ // ignore
+ s = parse_doctype_ignore(s);
+ if (!s) return s;
+ } else {
+ // some control group
+ s += 2;
+ depth++;
+ }
+ } else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') {
+ // unknown tag (forbidden), or some primitive group
+ s = parse_doctype_primitive(s);
+ if (!s) return s;
+ } else if (*s == '>') {
+ if (depth == 0)
+ return s;
+
+ depth--;
+ s++;
+ } else s++;
+ }
+
+ if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ return s;
+ }
+
+ char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) {
+ // parse node contents, starting with exclamation mark
+ ++s;
+
+ if (*s == '-') { // '<!-...'
+ ++s;
+
+ if (*s == '-') { // '<!--...'
+ ++s;
+
+ if (PUGI__OPTSET(parse_comments)) {
+ PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
+ cursor->value = s; // Save the offset.
+ }
+
+ if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) {
+ s = strconv_comment(s, endch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
+ } else {
+ // Scan for terminating '-->'.
+ PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_comment, s);
+
+ if (PUGI__OPTSET(parse_comments))
+ *s = 0; // Zero-terminate this segment at the first terminating '-'.
+
+ s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
+ }
+ } else PUGI__THROW_ERROR(status_bad_comment, s);
+ } else if (*s == '[') {
+ // '<![CDATA[...'
+ if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') {
+ ++s;
+
+ if (PUGI__OPTSET(parse_cdata)) {
+ PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
+ cursor->value = s; // Save the offset.
+
+ if (PUGI__OPTSET(parse_eol)) {
+ s = strconv_cdata(s, endch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
+ } else {
+ // Scan for terminating ']]>'.
+ PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+ *s++ = 0; // Zero-terminate this segment.
+ }
+ } else { // Flagged for discard, but we still have to scan for the terminator.
+ // Scan for terminating ']]>'.
+ PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+ ++s;
+ }
+
+ s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
+ } else PUGI__THROW_ERROR(status_bad_cdata, s);
+ } else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) {
+ s -= 2;
+
+ if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ char_t* mark = s + 9;
+
+ s = parse_doctype_group(s, endch);
+ if (!s) return s;
+
+ assert((*s == 0 && endch == '>') || *s == '>');
+ if (*s) *s++ = 0;
+
+ if (PUGI__OPTSET(parse_doctype)) {
+ while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
+
+ PUGI__PUSHNODE(node_doctype);
+
+ cursor->value = mark;
+ }
+ } else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
+ else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
+ else PUGI__THROW_ERROR(status_unrecognized_tag, s);
+
+ return s;
+ }
+
+ char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) {
+ // load into registers
+ xml_node_struct* cursor = ref_cursor;
+ char_t ch = 0;
+
+ // parse node contents, starting with question mark
+ ++s;
+
+ // read PI target
+ char_t* target = s;
+
+ if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
+
+ PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ // determine node type; stricmp / strcasecmp is not portable
+ bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
+
+ if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) {
+ if (declaration) {
+ // disallow non top-level declarations
+ if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
+
+ PUGI__PUSHNODE(node_declaration);
+ } else {
+ PUGI__PUSHNODE(node_pi);
+ }
+
+ cursor->name = target;
+
+ PUGI__ENDSEG();
+
+ // parse value/attributes
+ if (ch == '?') {
+ // empty node
+ if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
+ s += (*s == '>');
+
+ PUGI__POPNODE();
+ } else if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+ PUGI__SKIPWS();
+
+ // scan for tag end
+ char_t* value = s;
+
+ PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ if (declaration) {
+ // replace ending ? with / so that 'element' terminates properly
+ *s = '/';
+
+ // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
+ s = value;
+ } else {
+ // store value and step over >
+ cursor->value = value;
+
+ PUGI__POPNODE();
+
+ PUGI__ENDSEG();
+
+ s += (*s == '>');
+ }
+ } else PUGI__THROW_ERROR(status_bad_pi, s);
+ } else {
+ // scan for tag end
+ PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ s += (s[1] == '>' ? 2 : 1);
+ }
+
+ // store from registers
+ ref_cursor = cursor;
+
+ return s;
+ }
+
+ char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) {
+ strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
+ strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
+
+ char_t ch = 0;
+ xml_node_struct* cursor = root;
+ char_t* mark = s;
+
+ while (*s != 0) {
+ if (*s == '<') {
+ ++s;
+
+LOC_TAG:
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) { // '<#...'
+ PUGI__PUSHNODE(node_element); // Append a new node to the tree.
+
+ cursor->name = s;
+
+ PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
+ PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+ if (ch == '>') {
+ // end of tag
+ } else if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+LOC_ATTRIBUTES:
+ while (true) {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) { // <... #...
+ xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
+ if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
+
+ a->name = s; // Save the offset.
+
+ PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
+ PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+ if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ ch = *s;
+ ++s;
+ }
+
+ if (ch == '=') { // '<... #=...'
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ if (*s == '"' || *s == '\'') { // '<... #="...'
+ ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
+ ++s; // Step over the quote.
+ a->value = s; // Save the offset.
+
+ s = strconv_attribute(s, ch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
+
+ // After this line the loop continues from the start;
+ // Whitespaces, / and > are ok, symbols and EOF are wrong,
+ // everything else will be detected
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
+ } else PUGI__THROW_ERROR(status_bad_attribute, s);
+ } else PUGI__THROW_ERROR(status_bad_attribute, s);
+ } else if (*s == '/') {
+ ++s;
+
+ if (*s == '>') {
+ PUGI__POPNODE();
+ s++;
+ break;
+ } else if (*s == 0 && endch == '>') {
+ PUGI__POPNODE();
+ break;
+ } else PUGI__THROW_ERROR(status_bad_start_element, s);
+ } else if (*s == '>') {
+ ++s;
+
+ break;
+ } else if (*s == 0 && endch == '>') {
+ break;
+ } else PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+
+ // !!!
+ } else if (ch == '/') { // '<#.../'
+ if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
+
+ PUGI__POPNODE(); // Pop.
+
+ s += (*s == '>');
+ } else if (ch == 0) {
+ // we stepped over null terminator, backtrack & handle closing tag
+ --s;
+
+ if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
+ } else PUGI__THROW_ERROR(status_bad_start_element, s);
+ } else if (*s == '/') {
+ ++s;
+
+ char_t* name = cursor->name;
+ if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+ while (PUGI__IS_CHARTYPE(*s, ct_symbol)) {
+ if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+ }
+
+ if (*name) {
+ if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
+ else PUGI__THROW_ERROR(status_end_element_mismatch, s);
+ }
+
+ PUGI__POPNODE(); // Pop.
+
+ PUGI__SKIPWS();
+
+ if (*s == 0) {
+ if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+ } else {
+ if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+ ++s;
+ }
+ } else if (*s == '?') { // '<?...'
+ s = parse_question(s, cursor, optmsk, endch);
+ if (!s) return s;
+
+ assert(cursor);
+ if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
+ } else if (*s == '!') { // '<!...'
+ s = parse_exclamation(s, cursor, optmsk, endch);
+ if (!s) return s;
+ } else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
+ else PUGI__THROW_ERROR(status_unrecognized_tag, s);
+ } else {
+ mark = s; // Save this offset while searching for a terminator.
+
+ PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
+
+ if (*s == '<' || !*s) {
+ // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
+ assert(mark != s);
+
+ if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata)) {
+ continue;
+ } else if (PUGI__OPTSET(parse_ws_pcdata_single)) {
+ if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
+ }
+ }
+
+ if (!PUGI__OPTSET(parse_trim_pcdata))
+ s = mark;
+
+ if (cursor->parent || PUGI__OPTSET(parse_fragment)) {
+ PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
+ cursor->value = s; // Save the offset.
+
+ s = strconv_pcdata(s);
+
+ PUGI__POPNODE(); // Pop since this is a standalone.
+
+ if (!*s) break;
+ } else {
+ PUGI__SCANFOR(*s == '<'); // '...<'
+ if (!*s) break;
+
+ ++s;
+ }
+
+ // We're after '<'
+ goto LOC_TAG;
+ }
+ }
+
+ // check that last tag is closed
+ if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+ return s;
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ static char_t* parse_skip_bom(char_t* s) {
+ unsigned int bom = 0xfeff;
+ return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
+ }
+#else
+ static char_t* parse_skip_bom(char_t* s) {
+ return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
+ }
+#endif
+
+ static bool has_element_node_siblings(xml_node_struct* node) {
+ while (node) {
+ if (PUGI__NODETYPE(node) == node_element) return true;
+
+ node = node->next_sibling;
+ }
+
+ return false;
+ }
+
+ static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) {
+ // early-out for empty documents
+ if (length == 0)
+ return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
+
+ // get last child of the root before parsing
+ xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
+
+ // create parser on stack
+ xml_parser parser(static_cast<xml_allocator*>(xmldoc));
+
+ // save last character and make buffer zero-terminated (speeds up parsing)
+ char_t endch = buffer[length - 1];
+ buffer[length - 1] = 0;
+
+ // skip BOM to make sure it does not end up as part of parse output
+ char_t* buffer_data = parse_skip_bom(buffer);
+
+ // perform actual parsing
+ parser.parse_tree(buffer_data, root, optmsk, endch);
+
+ xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
+ assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
+
+ if (result) {
+ // since we removed last character, we have to handle the only possible false positive (stray <)
+ if (endch == '<')
+ return make_parse_result(status_unrecognized_tag, length - 1);
+
+ // check if there are any element nodes parsed
+ xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
+
+ if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
+ return make_parse_result(status_no_document_element, length - 1);
+ } else {
+ // roll back offset if it occurs on a null terminator in the source buffer
+ if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
+ result.offset--;
+ }
+
+ return result;
+ }
+};
+
+// Output facilities
+PUGI__FN xml_encoding get_write_native_encoding()
+{
+#ifdef PUGIXML_WCHAR_MODE
+ return get_wchar_encoding();
+#else
+ return encoding_utf8;
+#endif
+}
+
+PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
+{
+ // replace wchar encoding with utf implementation
+ if (encoding == encoding_wchar) return get_wchar_encoding();
+
+ // replace utf16 encoding with utf16 with specific endianness
+ if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ // replace utf32 encoding with utf32 with specific endianness
+ if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ // only do autodetection if no explicit encoding is requested
+ if (encoding != encoding_auto) return encoding;
+
+ // assume utf8 encoding
+ return encoding_utf8;
+}
+
+template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
+{
+ PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
+
+ typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
+
+ return static_cast<size_t>(end - dest) * sizeof(*dest);
+}
+
+template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
+{
+ PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
+
+ typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
+
+ if (opt_swap) {
+ for (typename T::value_type i = dest; i != end; ++i)
+ *i = endian_swap(*i);
+ }
+
+ return static_cast<size_t>(end - dest) * sizeof(*dest);
+}
+
+#ifdef PUGIXML_WCHAR_MODE
+PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+{
+ if (length < 1) return 0;
+
+ // discard last character if it's the lead of a surrogate pair
+ return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
+}
+
+PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+{
+ // only endian-swapping is required
+ if (need_endian_swap_utf(encoding, get_wchar_encoding())) {
+ convert_wchar_endian_swap(r_char, data, length);
+
+ return length * sizeof(char_t);
+ }
+
+ // convert to utf8
+ if (encoding == encoding_utf8)
+ return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
+
+ // convert to utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
+ }
+
+ // convert to utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
+ }
+
+ // convert to latin1
+ if (encoding == encoding_latin1)
+ return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
+
+ assert(!"Invalid encoding");
+ return 0;
+}
+#else
+PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+{
+ if (length < 5) return 0;
+
+ for (size_t i = 1; i <= 4; ++i) {
+ uint8_t ch = static_cast<uint8_t>(data[length - i]);
+
+ // either a standalone character or a leading one
+ if ((ch & 0xc0) != 0x80) return length - i;
+ }
+
+ // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
+ return length;
+}
+
+PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+{
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
+ }
+
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
+ }
+
+ if (encoding == encoding_latin1)
+ return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
+
+ assert(!"Invalid encoding");
+ return 0;
+}
+#endif
+
+class xml_buffered_writer
+{
+ xml_buffered_writer(const xml_buffered_writer&);
+ xml_buffered_writer& operator=(const xml_buffered_writer&);
+
+public:
+ xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) {
+ PUGI__STATIC_ASSERT(bufcapacity >= 8);
+ }
+
+ size_t flush() {
+ flush(buffer, bufsize);
+ bufsize = 0;
+ return 0;
+ }
+
+ void flush(const char_t* data, size_t size) {
+ if (size == 0) return;
+
+ // fast path, just write data
+ if (encoding == get_write_native_encoding())
+ writer.write(data, size * sizeof(char_t));
+ else {
+ // convert chunk
+ size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
+ assert(result <= sizeof(scratch));
+
+ // write data
+ writer.write(scratch.data_u8, result);
+ }
+ }
+
+ void write_direct(const char_t* data, size_t length) {
+ // flush the remaining buffer contents
+ flush();
+
+ // handle large chunks
+ if (length > bufcapacity) {
+ if (encoding == get_write_native_encoding()) {
+ // fast path, can just write data chunk
+ writer.write(data, length * sizeof(char_t));
+ return;
+ }
+
+ // need to convert in suitable chunks
+ while (length > bufcapacity) {
+ // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
+ // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
+ size_t chunk_size = get_valid_length(data, bufcapacity);
+ assert(chunk_size);
+
+ // convert chunk and write
+ flush(data, chunk_size);
+
+ // iterate
+ data += chunk_size;
+ length -= chunk_size;
+ }
+
+ // small tail is copied below
+ bufsize = 0;
+ }
+
+ memcpy(buffer + bufsize, data, length * sizeof(char_t));
+ bufsize += length;
+ }
+
+ void write_buffer(const char_t* data, size_t length) {
+ size_t offset = bufsize;
+
+ if (offset + length <= bufcapacity) {
+ memcpy(buffer + offset, data, length * sizeof(char_t));
+ bufsize = offset + length;
+ } else {
+ write_direct(data, length);
+ }
+ }
+
+ void write_string(const char_t* data) {
+ // write the part of the string that fits in the buffer
+ size_t offset = bufsize;
+
+ while (*data && offset < bufcapacity)
+ buffer[offset++] = *data++;
+
+ // write the rest
+ if (offset < bufcapacity) {
+ bufsize = offset;
+ } else {
+ // backtrack a bit if we have split the codepoint
+ size_t length = offset - bufsize;
+ size_t extra = length - get_valid_length(data - length, length);
+
+ bufsize = offset - extra;
+
+ write_direct(data - extra, strlength(data) + extra);
+ }
+ }
+
+ void write(char_t d0) {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 1) offset = flush();
+
+ buffer[offset + 0] = d0;
+ bufsize = offset + 1;
+ }
+
+ void write(char_t d0, char_t d1) {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 2) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ bufsize = offset + 2;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2) {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 3) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ bufsize = offset + 3;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3) {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 4) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ bufsize = offset + 4;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 5) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ buffer[offset + 4] = d4;
+ bufsize = offset + 5;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 6) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ buffer[offset + 4] = d4;
+ buffer[offset + 5] = d5;
+ bufsize = offset + 6;
+ }
+
+ // utf8 maximum expansion: x4 (-> utf32)
+ // utf16 maximum expansion: x2 (-> utf32)
+ // utf32 maximum expansion: x1
+ enum {
+ bufcapacitybytes =
+#ifdef PUGIXML_MEMORY_OUTPUT_STACK
+ PUGIXML_MEMORY_OUTPUT_STACK
+#else
+ 10240
+#endif
+ ,
+ bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
+ };
+
+ char_t buffer[bufcapacity];
+
+ union {
+ uint8_t data_u8[4 * bufcapacity];
+ uint16_t data_u16[2 * bufcapacity];
+ uint32_t data_u32[bufcapacity];
+ char_t data_char[bufcapacity];
+ } scratch;
+
+ xml_writer& writer;
+ size_t bufsize;
+ xml_encoding encoding;
+};
+
+PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
+{
+ while (*s) {
+ const char_t* prev = s;
+
+ // While *s is a usual symbol
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
+
+ writer.write_buffer(prev, static_cast<size_t>(s - prev));
+
+ switch (*s) {
+ case 0:
+ break;
+ case '&':
+ writer.write('&', 'a', 'm', 'p', ';');
+ ++s;
+ break;
+ case '<':
+ writer.write('&', 'l', 't', ';');
+ ++s;
+ break;
+ case '>':
+ writer.write('&', 'g', 't', ';');
+ ++s;
+ break;
+ case '"':
+ writer.write('&', 'q', 'u', 'o', 't', ';');
+ ++s;
+ break;
+ default: { // s is not a usual symbol
+ unsigned int ch = static_cast<unsigned int>(*s++);
+ assert(ch < 32);
+
+ writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
+ }
+ }
+ }
+}
+
+PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
+{
+ if (flags & format_no_escapes)
+ writer.write_string(s);
+ else
+ text_output_escaped(writer, s, type);
+}
+
+PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
+{
+ do {
+ writer.write('<', '!', '[', 'C', 'D');
+ writer.write('A', 'T', 'A', '[');
+
+ const char_t* prev = s;
+
+ // look for ]]> sequence - we can't output it as is since it terminates CDATA
+ while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
+
+ // skip ]] if we stopped at ]]>, > will go to the next CDATA section
+ if (*s) s += 2;
+
+ writer.write_buffer(prev, static_cast<size_t>(s - prev));
+
+ writer.write(']', ']', '>');
+ } while (*s);
+}
+
+PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
+{
+ switch (indent_length) {
+ case 1: {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0]);
+ break;
+ }
+
+ case 2: {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1]);
+ break;
+ }
+
+ case 3: {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1], indent[2]);
+ break;
+ }
+
+ case 4: {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1], indent[2], indent[3]);
+ break;
+ }
+
+ default: {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write_buffer(indent, indent_length);
+ }
+ }
+}
+
+PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
+{
+ writer.write('<', '!', '-', '-');
+
+ while (*s) {
+ const char_t* prev = s;
+
+ // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
+ while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
+
+ writer.write_buffer(prev, static_cast<size_t>(s - prev));
+
+ if (*s) {
+ assert(*s == '-');
+
+ writer.write('-', ' ');
+ ++s;
+ }
+ }
+
+ writer.write('-', '-', '>');
+}
+
+PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
+{
+ while (*s) {
+ const char_t* prev = s;
+
+ // look for ?> sequence - we can't output it since ?> terminates PI
+ while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
+
+ writer.write_buffer(prev, static_cast<size_t>(s - prev));
+
+ if (*s) {
+ assert(s[0] == '?' && s[1] == '>');
+
+ writer.write('?', ' ', '>');
+ s += 2;
+ }
+ }
+}
+
+PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
+{
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+ for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) {
+ if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) {
+ writer.write('\n');
+
+ text_output_indent(writer, indent, indent_length, depth + 1);
+ } else {
+ writer.write(' ');
+ }
+
+ writer.write_string(a->name ? a->name + 0 : default_name);
+ writer.write('=', '"');
+
+ if (a->value)
+ text_output(writer, a->value, ctx_special_attr, flags);
+
+ writer.write('"');
+ }
+}
+
+PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
+{
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+ const char_t* name = node->name ? node->name + 0 : default_name;
+
+ writer.write('<');
+ writer.write_string(name);
+
+ if (node->first_attribute)
+ node_output_attributes(writer, node, indent, indent_length, flags, depth);
+
+ if (!node->first_child) {
+ writer.write(' ', '/', '>');
+
+ return false;
+ } else {
+ writer.write('>');
+
+ return true;
+ }
+}
+
+PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
+{
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+ const char_t* name = node->name ? node->name + 0 : default_name;
+
+ writer.write('<', '/');
+ writer.write_string(name);
+ writer.write('>');
+}
+
+PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
+{
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+ switch (PUGI__NODETYPE(node)) {
+ case node_pcdata:
+ text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
+ break;
+
+ case node_cdata:
+ text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
+ break;
+
+ case node_comment:
+ node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
+ break;
+
+ case node_pi:
+ writer.write('<', '?');
+ writer.write_string(node->name ? node->name + 0 : default_name);
+
+ if (node->value) {
+ writer.write(' ');
+ node_output_pi_value(writer, node->value);
+ }
+
+ writer.write('?', '>');
+ break;
+
+ case node_declaration:
+ writer.write('<', '?');
+ writer.write_string(node->name ? node->name + 0 : default_name);
+ node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
+ writer.write('?', '>');
+ break;
+
+ case node_doctype:
+ writer.write('<', '!', 'D', 'O', 'C');
+ writer.write('T', 'Y', 'P', 'E');
+
+ if (node->value) {
+ writer.write(' ');
+ writer.write_string(node->value);
+ }
+
+ writer.write('>');
+ break;
+
+ default:
+ assert(!"Invalid node type");
+ }
+}
+
+enum indent_flags_t {
+ indent_newline = 1,
+ indent_indent = 2
+};
+
+PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
+{
+ size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
+ unsigned int indent_flags = indent_indent;
+
+ xml_node_struct* node = root;
+
+ do {
+ assert(node);
+
+ // begin writing current node
+ if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) {
+ node_output_simple(writer, node, flags);
+
+ indent_flags = 0;
+ } else {
+ if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
+ writer.write('\n');
+
+ if ((indent_flags & indent_indent) && indent_length)
+ text_output_indent(writer, indent, indent_length, depth);
+
+ if (PUGI__NODETYPE(node) == node_element) {
+ indent_flags = indent_newline | indent_indent;
+
+ if (node_output_start(writer, node, indent, indent_length, flags, depth)) {
+ node = node->first_child;
+ depth++;
+ continue;
+ }
+ } else if (PUGI__NODETYPE(node) == node_document) {
+ indent_flags = indent_indent;
+
+ if (node->first_child) {
+ node = node->first_child;
+ continue;
+ }
+ } else {
+ node_output_simple(writer, node, flags);
+
+ indent_flags = indent_newline | indent_indent;
+ }
+ }
+
+ // continue to the next node
+ while (node != root) {
+ if (node->next_sibling) {
+ node = node->next_sibling;
+ break;
+ }
+
+ node = node->parent;
+
+ // write closing node
+ if (PUGI__NODETYPE(node) == node_element) {
+ depth--;
+
+ if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
+ writer.write('\n');
+
+ if ((indent_flags & indent_indent) && indent_length)
+ text_output_indent(writer, indent, indent_length, depth);
+
+ node_output_end(writer, node);
+
+ indent_flags = indent_newline | indent_indent;
+ }
+ }
+ } while (node != root);
+
+ if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
+ writer.write('\n');
+}
+
+PUGI__FN bool has_declaration(xml_node_struct* node)
+{
+ for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) {
+ xml_node_type type = PUGI__NODETYPE(child);
+
+ if (type == node_declaration) return true;
+ if (type == node_element) return false;
+ }
+
+ return false;
+}
+
+PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
+{
+ for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
+ if (a == attr)
+ return true;
+
+ return false;
+}
+
+PUGI__FN bool allow_insert_attribute(xml_node_type parent)
+{
+ return parent == node_element || parent == node_declaration;
+}
+
+PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
+{
+ if (parent != node_document && parent != node_element) return false;
+ if (child == node_document || child == node_null) return false;
+ if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
+
+ return true;
+}
+
+PUGI__FN bool allow_move(xml_node parent, xml_node child)
+{
+ // check that child can be a child of parent
+ if (!allow_insert_child(parent.type(), child.type()))
+ return false;
+
+ // check that node is not moved between documents
+ if (parent.root() != child.root())
+ return false;
+
+ // check that new parent is not in the child subtree
+ xml_node cur = parent;
+
+ while (cur) {
+ if (cur == child)
+ return false;
+
+ cur = cur.parent();
+ }
+
+ return true;
+}
+
+template <typename String, typename Header>
+PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
+{
+ assert(!dest && (header & header_mask) == 0);
+
+ if (source) {
+ if (alloc && (source_header & header_mask) == 0) {
+ dest = source;
+
+ // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
+ header |= xml_memory_page_contents_shared_mask;
+ source_header |= xml_memory_page_contents_shared_mask;
+ } else
+ strcpy_insitu(dest, header, header_mask, source, strlength(source));
+ }
+}
+
+PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
+{
+ node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
+ node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
+
+ for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) {
+ xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
+
+ if (da) {
+ node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
+ node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
+ }
+ }
+}
+
+PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
+{
+ xml_allocator& alloc = get_allocator(dn);
+ xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
+
+ node_copy_contents(dn, sn, shared_alloc);
+
+ xml_node_struct* dit = dn;
+ xml_node_struct* sit = sn->first_child;
+
+ while (sit && sit != sn) {
+ if (sit != dn) {
+ xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
+
+ if (copy) {
+ node_copy_contents(copy, sit, shared_alloc);
+
+ if (sit->first_child) {
+ dit = copy;
+ sit = sit->first_child;
+ continue;
+ }
+ }
+ }
+
+ // continue to the next node
+ do {
+ if (sit->next_sibling) {
+ sit = sit->next_sibling;
+ break;
+ }
+
+ sit = sit->parent;
+ dit = dit->parent;
+ } while (sit != sn);
+ }
+}
+
+PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
+{
+ xml_allocator& alloc = get_allocator(da);
+ xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
+
+ node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
+ node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
+}
+
+inline bool is_text_node(xml_node_struct* node)
+{
+ xml_node_type type = PUGI__NODETYPE(node);
+
+ return type == node_pcdata || type == node_cdata;
+}
+
+// get value with conversion functions
+template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos)
+{
+ U result = 0;
+ const char_t* s = value;
+
+ while (PUGI__IS_CHARTYPE(*s, ct_space))
+ s++;
+
+ bool negative = (*s == '-');
+
+ s += (*s == '+' || *s == '-');
+
+ bool overflow = false;
+
+ if (s[0] == '0' && (s[1] | ' ') == 'x') {
+ s += 2;
+
+ // since overflow detection relies on length of the sequence skip leading zeros
+ while (*s == '0')
+ s++;
+
+ const char_t* start = s;
+
+ for (;;) {
+ if (static_cast<unsigned>(*s - '0') < 10)
+ result = result * 16 + (*s - '0');
+ else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
+ result = result * 16 + ((*s | ' ') - 'a' + 10);
+ else
+ break;
+
+ s++;
+ }
+
+ size_t digits = static_cast<size_t>(s - start);
+
+ overflow = digits > sizeof(U) * 2;
+ } else {
+ // since overflow detection relies on length of the sequence skip leading zeros
+ while (*s == '0')
+ s++;
+
+ const char_t* start = s;
+
+ for (;;) {
+ if (static_cast<unsigned>(*s - '0') < 10)
+ result = result * 10 + (*s - '0');
+ else
+ break;
+
+ s++;
+ }
+
+ size_t digits = static_cast<size_t>(s - start);
+
+ PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
+
+ const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
+ const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
+ const size_t high_bit = sizeof(U) * 8 - 1;
+
+ overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
+ }
+
+ if (negative)
+ return (overflow || result > minneg) ? 0 - minneg : 0 - result;
+ else
+ return (overflow || result > maxpos) ? maxpos : result;
+}
+
+PUGI__FN int get_value_int(const char_t* value)
+{
+ return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX);
+}
+
+PUGI__FN unsigned int get_value_uint(const char_t* value)
+{
+ return string_to_integer<unsigned int>(value, 0, UINT_MAX);
+}
+
+PUGI__FN double get_value_double(const char_t* value)
+{
+#ifdef PUGIXML_WCHAR_MODE
+ return wcstod(value, 0);
+#else
+ return strtod(value, 0);
+#endif
+}
+
+PUGI__FN float get_value_float(const char_t* value)
+{
+#ifdef PUGIXML_WCHAR_MODE
+ return static_cast<float>(wcstod(value, 0));
+#else
+ return static_cast<float>(strtod(value, 0));
+#endif
+}
+
+PUGI__FN bool get_value_bool(const char_t* value)
+{
+ // only look at first char
+ char_t first = *value;
+
+ // 1*, t* (true), T* (True), y* (yes), Y* (YES)
+ return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN long long get_value_llong(const char_t* value)
+{
+ return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
+}
+
+PUGI__FN unsigned long long get_value_ullong(const char_t* value)
+{
+ return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
+}
+#endif
+
+template <typename U>
+PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
+{
+ char_t* result = end - 1;
+ U rest = negative ? 0 - value : value;
+
+ do {
+ *result-- = static_cast<char_t>('0' + (rest % 10));
+ rest /= 10;
+ } while (rest);
+
+ assert(result >= begin);
+ (void)begin;
+
+ *result = '-';
+
+ return result + !negative;
+}
+
+// set value with conversion functions
+template <typename String, typename Header>
+PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
+{
+#ifdef PUGIXML_WCHAR_MODE
+ char_t wbuf[128];
+ assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
+
+ size_t offset = 0;
+ for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
+
+ return strcpy_insitu(dest, header, header_mask, wbuf, offset);
+#else
+ return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
+#endif
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value)
+{
+ char_t buf[64];
+ char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
+ char_t* begin = integer_to_string<unsigned int>(buf, end, value, value < 0);
+
+ return strcpy_insitu(dest, header, header_mask, begin, end - begin);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value)
+{
+ char_t buf[64];
+ char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
+ char_t* begin = integer_to_string<unsigned int>(buf, end, value, false);
+
+ return strcpy_insitu(dest, header, header_mask, begin, end - begin);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
+{
+ char buf[128];
+ sprintf(buf, "%.9g", value);
+
+ return set_value_ascii(dest, header, header_mask, buf);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
+{
+ char buf[128];
+ sprintf(buf, "%.17g", value);
+
+ return set_value_ascii(dest, header, header_mask, buf);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value)
+{
+ return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value)
+{
+ char_t buf[64];
+ char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
+ char_t* begin = integer_to_string<unsigned long long>(buf, end, value, value < 0);
+
+ return strcpy_insitu(dest, header, header_mask, begin, end - begin);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value)
+{
+ char_t buf[64];
+ char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
+ char_t* begin = integer_to_string<unsigned long long>(buf, end, value, false);
+
+ return strcpy_insitu(dest, header, header_mask, begin, end - begin);
+}
+#endif
+
+PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
+{
+ // check input buffer
+ if (!contents && size) return make_parse_result(status_io_error);
+
+ // get actual encoding
+ xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
+
+ // get private buffer
+ char_t* buffer = 0;
+ size_t length = 0;
+
+ if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
+
+ // delete original buffer if we performed a conversion
+ if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
+
+ // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
+ if (own || buffer != contents) *out_buffer = buffer;
+
+ // store buffer for offset_debug
+ doc->buffer = buffer;
+
+ // parse
+ xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
+
+ // remember encoding
+ res.encoding = buffer_encoding;
+
+ return res;
+}
+
+// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
+PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
+{
+#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
+ // there are 64-bit versions of fseek/ftell, let's use them
+ typedef __int64 length_type;
+
+ _fseeki64(file, 0, SEEK_END);
+ length_type length = _ftelli64(file);
+ _fseeki64(file, 0, SEEK_SET);
+#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
+ // there are 64-bit versions of fseek/ftell, let's use them
+ typedef off64_t length_type;
+
+ fseeko64(file, 0, SEEK_END);
+ length_type length = ftello64(file);
+ fseeko64(file, 0, SEEK_SET);
+#else
+ // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
+ typedef long length_type;
+
+ fseek(file, 0, SEEK_END);
+ length_type length = ftell(file);
+ fseek(file, 0, SEEK_SET);
+#endif
+
+ // check for I/O errors
+ if (length < 0) return status_io_error;
+
+ // check for overflow
+ size_t result = static_cast<size_t>(length);
+
+ if (static_cast<length_type>(result) != length) return status_out_of_memory;
+
+ // finalize
+ out_result = result;
+
+ return status_ok;
+}
+
+// This function assumes that buffer has extra sizeof(char_t) writable bytes after size
+PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
+{
+ // We only need to zero-terminate if encoding conversion does not do it for us
+#ifdef PUGIXML_WCHAR_MODE
+ xml_encoding wchar_encoding = get_wchar_encoding();
+
+ if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) {
+ size_t length = size / sizeof(char_t);
+
+ static_cast<char_t*>(buffer)[length] = 0;
+ return (length + 1) * sizeof(char_t);
+ }
+#else
+ if (encoding == encoding_utf8) {
+ static_cast<char*>(buffer)[size] = 0;
+ return size + 1;
+ }
+#endif
+
+ return size;
+}
+
+PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
+{
+ if (!file) return make_parse_result(status_file_not_found);
+
+ // get file size (can result in I/O errors)
+ size_t size = 0;
+ xml_parse_status size_status = get_file_size(file, size);
+ if (size_status != status_ok) return make_parse_result(size_status);
+
+ size_t max_suffix_size = sizeof(char_t);
+
+ // allocate buffer for the whole file
+ char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
+ if (!contents) return make_parse_result(status_out_of_memory);
+
+ // read file in memory
+ size_t read_size = fread(contents, 1, size, file);
+
+ if (read_size != size) {
+ xml_memory::deallocate(contents);
+ return make_parse_result(status_io_error);
+ }
+
+ xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
+
+ return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
+}
+
+#ifndef PUGIXML_NO_STL
+template <typename T> struct xml_stream_chunk {
+ static xml_stream_chunk* create() {
+ void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
+ if (!memory) return 0;
+
+ return new (memory) xml_stream_chunk();
+ }
+
+ static void destroy(xml_stream_chunk* chunk) {
+ // free chunk chain
+ while (chunk) {
+ xml_stream_chunk* next_ = chunk->next;
+
+ xml_memory::deallocate(chunk);
+
+ chunk = next_;
+ }
+ }
+
+ xml_stream_chunk(): next(0), size(0) {
+ }
+
+ xml_stream_chunk* next;
+ size_t size;
+
+ T data[xml_memory_page_size / sizeof(T)];
+};
+
+template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
+{
+ auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
+
+ // read file to a chunk list
+ size_t total = 0;
+ xml_stream_chunk<T>* last = 0;
+
+ while (!stream.eof()) {
+ // allocate new chunk
+ xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
+ if (!chunk) return status_out_of_memory;
+
+ // append chunk to list
+ if (last) last = last->next = chunk;
+ else chunks.data = last = chunk;
+
+ // read data to chunk
+ stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
+ chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
+
+ // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
+ if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
+
+ // guard against huge files (chunk size is small enough to make this overflow check work)
+ if (total + chunk->size < total) return status_out_of_memory;
+ total += chunk->size;
+ }
+
+ size_t max_suffix_size = sizeof(char_t);
+
+ // copy chunk list to a contiguous buffer
+ char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
+ if (!buffer) return status_out_of_memory;
+
+ char* write = buffer;
+
+ for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) {
+ assert(write + chunk->size <= buffer + total);
+ memcpy(write, chunk->data, chunk->size);
+ write += chunk->size;
+ }
+
+ assert(write == buffer + total);
+
+ // return buffer
+ *out_buffer = buffer;
+ *out_size = total;
+
+ return status_ok;
+}
+
+template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
+{
+ // get length of remaining data in stream
+ typename std::basic_istream<T>::pos_type pos = stream.tellg();
+ stream.seekg(0, std::ios::end);
+ std::streamoff length = stream.tellg() - pos;
+ stream.seekg(pos);
+
+ if (stream.fail() || pos < 0) return status_io_error;
+
+ // guard against huge files
+ size_t read_length = static_cast<size_t>(length);
+
+ if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
+
+ size_t max_suffix_size = sizeof(char_t);
+
+ // read stream data into memory (guard against stream exceptions with buffer holder)
+ auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
+ if (!buffer.data) return status_out_of_memory;
+
+ stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
+
+ // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
+ if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
+
+ // return buffer
+ size_t actual_length = static_cast<size_t>(stream.gcount());
+ assert(actual_length <= read_length);
+
+ *out_buffer = buffer.release();
+ *out_size = actual_length * sizeof(T);
+
+ return status_ok;
+}
+
+template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
+{
+ void* buffer = 0;
+ size_t size = 0;
+ xml_parse_status status = status_ok;
+
+ // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
+ if (stream.fail()) return make_parse_result(status_io_error);
+
+ // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
+ if (stream.tellg() < 0) {
+ stream.clear(); // clear error flags that could be set by a failing tellg
+ status = load_stream_data_noseek(stream, &buffer, &size);
+ } else
+ status = load_stream_data_seek(stream, &buffer, &size);
+
+ if (status != status_ok) return make_parse_result(status);
+
+ xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
+
+ return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
+}
+#endif
+
+#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
+PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
+{
+ return _wfopen(path, mode);
+}
+#else
+PUGI__FN char* convert_path_heap(const wchar_t* str)
+{
+ assert(str);
+
+ // first pass: get length in utf8 characters
+ size_t length = strlength_wide(str);
+ size_t size = as_utf8_begin(str, length);
+
+ // allocate resulting string
+ char* result = static_cast<char*>(xml_memory::allocate(size + 1));
+ if (!result) return 0;
+
+ // second pass: convert to utf8
+ as_utf8_end(result, size, str, length);
+
+ // zero-terminate
+ result[size] = 0;
+
+ return result;
+}
+
+PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
+{
+ // there is no standard function to open wide paths, so our best bet is to try utf8 path
+ char* path_utf8 = convert_path_heap(path);
+ if (!path_utf8) return 0;
+
+ // convert mode to ASCII (we mirror _wfopen interface)
+ char mode_ascii[4] = {0};
+ for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
+
+ // try to open the utf8 path
+ FILE* result = fopen(path_utf8, mode_ascii);
+
+ // free dummy buffer
+ xml_memory::deallocate(path_utf8);
+
+ return result;
+}
+#endif
+
+PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
+{
+ if (!file) return false;
+
+ xml_writer_file writer(file);
+ doc.save(writer, indent, flags, encoding);
+
+ return ferror(file) == 0;
+}
+
+struct name_null_sentry {
+ xml_node_struct* node;
+ char_t* name;
+
+ name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) {
+ node->name = 0;
+ }
+
+ ~name_null_sentry() {
+ node->name = name;
+ }
+};
+PUGI__NS_END
+
+namespace pugi
+{
+PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
+{
+}
+
+PUGI__FN void xml_writer_file::write(const void* data, size_t size)
+{
+ size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
+ (void)!result; // unfortunately we can't do proper error handling here
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
+{
+}
+
+PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
+{
+}
+
+PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
+{
+ if (narrow_stream) {
+ assert(!wide_stream);
+ narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
+ } else {
+ assert(wide_stream);
+ assert(size % sizeof(wchar_t) == 0);
+
+ wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
+ }
+}
+#endif
+
+PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
+{
+}
+
+PUGI__FN xml_tree_walker::~xml_tree_walker()
+{
+}
+
+PUGI__FN int xml_tree_walker::depth() const
+{
+ return _depth;
+}
+
+PUGI__FN bool xml_tree_walker::begin(xml_node&)
+{
+ return true;
+}
+
+PUGI__FN bool xml_tree_walker::end(xml_node&)
+{
+ return true;
+}
+
+PUGI__FN xml_attribute::xml_attribute(): _attr(0)
+{
+}
+
+PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
+{
+}
+
+PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
+{
+}
+
+PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
+{
+ return _attr ? unspecified_bool_xml_attribute : 0;
+}
+
+PUGI__FN bool xml_attribute::operator!() const
+{
+ return !_attr;
+}
+
+PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
+{
+ return (_attr == r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
+{
+ return (_attr != r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
+{
+ return (_attr < r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
+{
+ return (_attr > r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
+{
+ return (_attr <= r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
+{
+ return (_attr >= r._attr);
+}
+
+PUGI__FN xml_attribute xml_attribute::next_attribute() const
+{
+ return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
+}
+
+PUGI__FN xml_attribute xml_attribute::previous_attribute() const
+{
+ return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
+}
+
+PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
+{
+ return (_attr && _attr->value) ? _attr->value + 0 : def;
+}
+
+PUGI__FN int xml_attribute::as_int(int def) const
+{
+ return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
+}
+
+PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
+{
+ return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
+}
+
+PUGI__FN double xml_attribute::as_double(double def) const
+{
+ return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
+}
+
+PUGI__FN float xml_attribute::as_float(float def) const
+{
+ return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
+}
+
+PUGI__FN bool xml_attribute::as_bool(bool def) const
+{
+ return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN long long xml_attribute::as_llong(long long def) const
+{
+ return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
+}
+
+PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
+{
+ return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
+}
+#endif
+
+PUGI__FN bool xml_attribute::empty() const
+{
+ return !_attr;
+}
+
+PUGI__FN const char_t* xml_attribute::name() const
+{
+ return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* xml_attribute::value() const
+{
+ return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
+}
+
+PUGI__FN size_t xml_attribute::hash_value() const
+{
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
+}
+
+PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
+{
+ return _attr;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
+{
+ set_value(rhs);
+ return *this;
+}
+#endif
+
+PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
+{
+ if (!_attr) return false;
+
+ return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
+}
+
+PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
+{
+ if (!_attr) return false;
+
+ return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
+}
+
+PUGI__FN bool xml_attribute::set_value(int rhs)
+{
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
+{
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(double rhs)
+{
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(float rhs)
+{
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(bool rhs)
+{
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN bool xml_attribute::set_value(long long rhs)
+{
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
+{
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+#endif
+
+#ifdef __BORLANDC__
+PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
+{
+ return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
+{
+ return (bool)lhs || rhs;
+}
+#endif
+
+PUGI__FN xml_node::xml_node(): _root(0)
+{
+}
+
+PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
+{
+}
+
+PUGI__FN static void unspecified_bool_xml_node(xml_node***)
+{
+}
+
+PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
+{
+ return _root ? unspecified_bool_xml_node : 0;
+}
+
+PUGI__FN bool xml_node::operator!() const
+{
+ return !_root;
+}
+
+PUGI__FN xml_node::iterator xml_node::begin() const
+{
+ return iterator(_root ? _root->first_child + 0 : 0, _root);
+}
+
+PUGI__FN xml_node::iterator xml_node::end() const
+{
+ return iterator(0, _root);
+}
+
+PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
+{
+ return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
+}
+
+PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
+{
+ return attribute_iterator(0, _root);
+}
+
+PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
+{
+ return xml_object_range<xml_node_iterator>(begin(), end());
+}
+
+PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
+{
+ return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
+}
+
+PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
+{
+ return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
+}
+
+PUGI__FN bool xml_node::operator==(const xml_node& r) const
+{
+ return (_root == r._root);
+}
+
+PUGI__FN bool xml_node::operator!=(const xml_node& r) const
+{
+ return (_root != r._root);
+}
+
+PUGI__FN bool xml_node::operator<(const xml_node& r) const
+{
+ return (_root < r._root);
+}
+
+PUGI__FN bool xml_node::operator>(const xml_node& r) const
+{
+ return (_root > r._root);
+}
+
+PUGI__FN bool xml_node::operator<=(const xml_node& r) const
+{
+ return (_root <= r._root);
+}
+
+PUGI__FN bool xml_node::operator>=(const xml_node& r) const
+{
+ return (_root >= r._root);
+}
+
+PUGI__FN bool xml_node::empty() const
+{
+ return !_root;
+}
+
+PUGI__FN const char_t* xml_node::name() const
+{
+ return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
+}
+
+PUGI__FN xml_node_type xml_node::type() const
+{
+ return _root ? PUGI__NODETYPE(_root) : node_null;
+}
+
+PUGI__FN const char_t* xml_node::value() const
+{
+ return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
+}
+
+PUGI__FN xml_node xml_node::child(const char_t* name_) const
+{
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+ return xml_node();
+}
+
+PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
+{
+ if (!_root) return xml_attribute();
+
+ for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
+ if (i->name && impl::strequal(name_, i->name))
+ return xml_attribute(i);
+
+ return xml_attribute();
+}
+
+PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
+{
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
+ if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+ return xml_node();
+}
+
+PUGI__FN xml_node xml_node::next_sibling() const
+{
+ return _root ? xml_node(_root->next_sibling) : xml_node();
+}
+
+PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
+{
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
+ if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+ return xml_node();
+}
+
+PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
+{
+ xml_attribute_struct* hint = hint_._attr;
+
+ // if hint is not an attribute of node, behavior is not defined
+ assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
+
+ if (!_root) return xml_attribute();
+
+ // optimistically search from hint up until the end
+ for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
+ if (i->name && impl::strequal(name_, i->name)) {
+ // update hint to maximize efficiency of searching for consecutive attributes
+ hint_._attr = i->next_attribute;
+
+ return xml_attribute(i);
+ }
+
+ // wrap around and search from the first attribute until the hint
+ // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
+ for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
+ if (j->name && impl::strequal(name_, j->name)) {
+ // update hint to maximize efficiency of searching for consecutive attributes
+ hint_._attr = j->next_attribute;
+
+ return xml_attribute(j);
+ }
+
+ return xml_attribute();
+}
+
+PUGI__FN xml_node xml_node::previous_sibling() const
+{
+ if (!_root) return xml_node();
+
+ if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
+ else return xml_node();
+}
+
+PUGI__FN xml_node xml_node::parent() const
+{
+ return _root ? xml_node(_root->parent) : xml_node();
+}
+
+PUGI__FN xml_node xml_node::root() const
+{
+ return _root ? xml_node(&impl::get_document(_root)) : xml_node();
+}
+
+PUGI__FN xml_text xml_node::text() const
+{
+ return xml_text(_root);
+}
+
+PUGI__FN const char_t* xml_node::child_value() const
+{
+ if (!_root) return PUGIXML_TEXT("");
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if (impl::is_text_node(i) && i->value)
+ return i->value;
+
+ return PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
+{
+ return child(name_).child_value();
+}
+
+PUGI__FN xml_attribute xml_node::first_attribute() const
+{
+ return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
+}
+
+PUGI__FN xml_attribute xml_node::last_attribute() const
+{
+ return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
+}
+
+PUGI__FN xml_node xml_node::first_child() const
+{
+ return _root ? xml_node(_root->first_child) : xml_node();
+}
+
+PUGI__FN xml_node xml_node::last_child() const
+{
+ return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
+}
+
+PUGI__FN bool xml_node::set_name(const char_t* rhs)
+{
+ xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
+
+ if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
+ return false;
+
+ return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
+}
+
+PUGI__FN bool xml_node::set_value(const char_t* rhs)
+{
+ xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
+
+ if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
+ return false;
+
+ return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
+}
+
+PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
+{
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(alloc));
+ if (!a) return xml_attribute();
+
+ impl::append_attribute(a._attr, _root);
+
+ a.set_name(name_);
+
+ return a;
+}
+
+PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
+{
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(alloc));
+ if (!a) return xml_attribute();
+
+ impl::prepend_attribute(a._attr, _root);
+
+ a.set_name(name_);
+
+ return a;
+}
+
+PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
+{
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+ if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(alloc));
+ if (!a) return xml_attribute();
+
+ impl::insert_attribute_after(a._attr, attr._attr, _root);
+
+ a.set_name(name_);
+
+ return a;
+}
+
+PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
+{
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+ if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(alloc));
+ if (!a) return xml_attribute();
+
+ impl::insert_attribute_before(a._attr, attr._attr, _root);
+
+ a.set_name(name_);
+
+ return a;
+}
+
+PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
+{
+ if (!proto) return xml_attribute();
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(alloc));
+ if (!a) return xml_attribute();
+
+ impl::append_attribute(a._attr, _root);
+ impl::node_copy_attribute(a._attr, proto._attr);
+
+ return a;
+}
+
+PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
+{
+ if (!proto) return xml_attribute();
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(alloc));
+ if (!a) return xml_attribute();
+
+ impl::prepend_attribute(a._attr, _root);
+ impl::node_copy_attribute(a._attr, proto._attr);
+
+ return a;
+}
+
+PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
+{
+ if (!proto) return xml_attribute();
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+ if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(alloc));
+ if (!a) return xml_attribute();
+
+ impl::insert_attribute_after(a._attr, attr._attr, _root);
+ impl::node_copy_attribute(a._attr, proto._attr);
+
+ return a;
+}
+
+PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
+{
+ if (!proto) return xml_attribute();
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+ if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(alloc));
+ if (!a) return xml_attribute();
+
+ impl::insert_attribute_before(a._attr, attr._attr, _root);
+ impl::node_copy_attribute(a._attr, proto._attr);
+
+ return a;
+}
+
+PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
+{
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ xml_node n(impl::allocate_node(alloc, type_));
+ if (!n) return xml_node();
+
+ impl::append_node(n._root, _root);
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
+{
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ xml_node n(impl::allocate_node(alloc, type_));
+ if (!n) return xml_node();
+
+ impl::prepend_node(n._root, _root);
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
+{
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ xml_node n(impl::allocate_node(alloc, type_));
+ if (!n) return xml_node();
+
+ impl::insert_node_before(n._root, node._root);
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
+{
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ xml_node n(impl::allocate_node(alloc, type_));
+ if (!n) return xml_node();
+
+ impl::insert_node_after(n._root, node._root);
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::append_child(const char_t* name_)
+{
+ xml_node result = append_child(node_element);
+
+ result.set_name(name_);
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
+{
+ xml_node result = prepend_child(node_element);
+
+ result.set_name(name_);
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
+{
+ xml_node result = insert_child_after(node_element, node);
+
+ result.set_name(name_);
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
+{
+ xml_node result = insert_child_before(node_element, node);
+
+ result.set_name(name_);
+
+ return result;
+}
+
+PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
+{
+ xml_node_type type_ = proto.type();
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ xml_node n(impl::allocate_node(alloc, type_));
+ if (!n) return xml_node();
+
+ impl::append_node(n._root, _root);
+ impl::node_copy_tree(n._root, proto._root);
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
+{
+ xml_node_type type_ = proto.type();
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ xml_node n(impl::allocate_node(alloc, type_));
+ if (!n) return xml_node();
+
+ impl::prepend_node(n._root, _root);
+ impl::node_copy_tree(n._root, proto._root);
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
+{
+ xml_node_type type_ = proto.type();
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ xml_node n(impl::allocate_node(alloc, type_));
+ if (!n) return xml_node();
+
+ impl::insert_node_after(n._root, node._root);
+ impl::node_copy_tree(n._root, proto._root);
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
+{
+ xml_node_type type_ = proto.type();
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ xml_node n(impl::allocate_node(alloc, type_));
+ if (!n) return xml_node();
+
+ impl::insert_node_before(n._root, node._root);
+ impl::node_copy_tree(n._root, proto._root);
+
+ return n;
+}
+
+PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
+{
+ if (!impl::allow_move(*this, moved)) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+ impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+ impl::remove_node(moved._root);
+ impl::append_node(moved._root, _root);
+
+ return moved;
+}
+
+PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
+{
+ if (!impl::allow_move(*this, moved)) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+ impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+ impl::remove_node(moved._root);
+ impl::prepend_node(moved._root, _root);
+
+ return moved;
+}
+
+PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
+{
+ if (!impl::allow_move(*this, moved)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+ if (moved._root == node._root) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+ impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+ impl::remove_node(moved._root);
+ impl::insert_node_after(moved._root, node._root);
+
+ return moved;
+}
+
+PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
+{
+ if (!impl::allow_move(*this, moved)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+ if (moved._root == node._root) return xml_node();
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return xml_node();
+
+ // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+ impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+ impl::remove_node(moved._root);
+ impl::insert_node_before(moved._root, node._root);
+
+ return moved;
+}
+
+PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
+{
+ return remove_attribute(attribute(name_));
+}
+
+PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
+{
+ if (!_root || !a._attr) return false;
+ if (!impl::is_attribute_of(a._attr, _root)) return false;
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return false;
+
+ impl::remove_attribute(a._attr, _root);
+ impl::destroy_attribute(a._attr, alloc);
+
+ return true;
+}
+
+PUGI__FN bool xml_node::remove_child(const char_t* name_)
+{
+ return remove_child(child(name_));
+}
+
+PUGI__FN bool xml_node::remove_child(const xml_node& n)
+{
+ if (!_root || !n._root || n._root->parent != _root) return false;
+
+ impl::xml_allocator& alloc = impl::get_allocator(_root);
+ if (!alloc.reserve()) return false;
+
+ impl::remove_node(n._root);
+ impl::destroy_node(n._root, alloc);
+
+ return true;
+}
+
+PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
+{
+ // append_buffer is only valid for elements/documents
+ if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
+
+ // get document node
+ impl::xml_document_struct* doc = &impl::get_document(_root);
+
+ // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
+ doc->header |= impl::xml_memory_page_contents_shared_mask;
+
+ // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
+ impl::xml_memory_page* page = 0;
+ impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
+ (void)page;
+
+ if (!extra) return impl::make_parse_result(status_out_of_memory);
+
+ // add extra buffer to the list
+ extra->buffer = 0;
+ extra->next = doc->extra_buffers;
+ doc->extra_buffers = extra;
+
+ // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
+ impl::name_null_sentry sentry(_root);
+
+ return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
+}
+
+PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
+{
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if (i->name && impl::strequal(name_, i->name)) {
+ for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
+ if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
+ return xml_node(i);
+ }
+
+ return xml_node();
+}
+
+PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
+{
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
+ if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
+ return xml_node(i);
+
+ return xml_node();
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN string_t xml_node::path(char_t delimiter) const
+{
+ if (!_root) return string_t();
+
+ size_t offset = 0;
+
+ for (xml_node_struct* i = _root; i; i = i->parent) {
+ offset += (i != _root);
+ offset += i->name ? impl::strlength(i->name) : 0;
+ }
+
+ string_t result;
+ result.resize(offset);
+
+ for (xml_node_struct* j = _root; j; j = j->parent) {
+ if (j != _root)
+ result[--offset] = delimiter;
+
+ if (j->name && *j->name) {
+ size_t length = impl::strlength(j->name);
+
+ offset -= length;
+ memcpy(&result[offset], j->name, length * sizeof(char_t));
+ }
+ }
+
+ assert(offset == 0);
+
+ return result;
+}
+#endif
+
+PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
+{
+ xml_node found = *this; // Current search context.
+
+ if (!_root || !path_ || !path_[0]) return found;
+
+ if (path_[0] == delimiter) {
+ // Absolute path; e.g. '/foo/bar'
+ found = found.root();
+ ++path_;
+ }
+
+ const char_t* path_segment = path_;
+
+ while (*path_segment == delimiter) ++path_segment;
+
+ const char_t* path_segment_end = path_segment;
+
+ while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
+
+ if (path_segment == path_segment_end) return found;
+
+ const char_t* next_segment = path_segment_end;
+
+ while (*next_segment == delimiter) ++next_segment;
+
+ if (*path_segment == '.' && path_segment + 1 == path_segment_end)
+ return found.first_element_by_path(next_segment, delimiter);
+ else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
+ return found.parent().first_element_by_path(next_segment, delimiter);
+ else {
+ for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) {
+ if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) {
+ xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
+
+ if (subsearch) return subsearch;
+ }
+ }
+
+ return xml_node();
+ }
+}
+
+PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
+{
+ walker._depth = -1;
+
+ xml_node arg_begin = *this;
+ if (!walker.begin(arg_begin)) return false;
+
+ xml_node cur = first_child();
+
+ if (cur) {
+ ++walker._depth;
+
+ do {
+ xml_node arg_for_each = cur;
+ if (!walker.for_each(arg_for_each))
+ return false;
+
+ if (cur.first_child()) {
+ ++walker._depth;
+ cur = cur.first_child();
+ } else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else {
+ // Borland C++ workaround
+ while (!cur.next_sibling() && cur != *this && !cur.parent().empty()) {
+ --walker._depth;
+ cur = cur.parent();
+ }
+
+ if (cur != *this)
+ cur = cur.next_sibling();
+ }
+ } while (cur && cur != *this);
+ }
+
+ assert(walker._depth == -1);
+
+ xml_node arg_end = *this;
+ return walker.end(arg_end);
+}
+
+PUGI__FN size_t xml_node::hash_value() const
+{
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
+}
+
+PUGI__FN xml_node_struct* xml_node::internal_object() const
+{
+ return _root;
+}
+
+PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
+{
+ if (!_root) return;
+
+ impl::xml_buffered_writer buffered_writer(writer, encoding);
+
+ impl::node_output(buffered_writer, _root, indent, flags, depth);
+
+ buffered_writer.flush();
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
+{
+ xml_writer_stream writer(stream);
+
+ print(writer, indent, flags, encoding, depth);
+}
+
+PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
+{
+ xml_writer_stream writer(stream);
+
+ print(writer, indent, flags, encoding_wchar, depth);
+}
+#endif
+
+PUGI__FN ptrdiff_t xml_node::offset_debug() const
+{
+ if (!_root) return -1;
+
+ impl::xml_document_struct& doc = impl::get_document(_root);
+
+ // we can determine the offset reliably only if there is exactly once parse buffer
+ if (!doc.buffer || doc.extra_buffers) return -1;
+
+ switch (type()) {
+ case node_document:
+ return 0;
+
+ case node_element:
+ case node_declaration:
+ case node_pi:
+ return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
+
+ case node_pcdata:
+ case node_cdata:
+ case node_comment:
+ case node_doctype:
+ return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
+
+ default:
+ return -1;
+ }
+}
+
+#ifdef __BORLANDC__
+PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
+{
+ return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
+{
+ return (bool)lhs || rhs;
+}
+#endif
+
+PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
+{
+}
+
+PUGI__FN xml_node_struct* xml_text::_data() const
+{
+ if (!_root || impl::is_text_node(_root)) return _root;
+
+ for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
+ if (impl::is_text_node(node))
+ return node;
+
+ return 0;
+}
+
+PUGI__FN xml_node_struct* xml_text::_data_new()
+{
+ xml_node_struct* d = _data();
+ if (d) return d;
+
+ return xml_node(_root).append_child(node_pcdata).internal_object();
+}
+
+PUGI__FN xml_text::xml_text(): _root(0)
+{
+}
+
+PUGI__FN static void unspecified_bool_xml_text(xml_text***)
+{
+}
+
+PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
+{
+ return _data() ? unspecified_bool_xml_text : 0;
+}
+
+PUGI__FN bool xml_text::operator!() const
+{
+ return !_data();
+}
+
+PUGI__FN bool xml_text::empty() const
+{
+ return _data() == 0;
+}
+
+PUGI__FN const char_t* xml_text::get() const
+{
+ xml_node_struct* d = _data();
+
+ return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
+{
+ xml_node_struct* d = _data();
+
+ return (d && d->value) ? d->value + 0 : def;
+}
+
+PUGI__FN int xml_text::as_int(int def) const
+{
+ xml_node_struct* d = _data();
+
+ return (d && d->value) ? impl::get_value_int(d->value) : def;
+}
+
+PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
+{
+ xml_node_struct* d = _data();
+
+ return (d && d->value) ? impl::get_value_uint(d->value) : def;
+}
+
+PUGI__FN double xml_text::as_double(double def) const
+{
+ xml_node_struct* d = _data();
+
+ return (d && d->value) ? impl::get_value_double(d->value) : def;
+}
+
+PUGI__FN float xml_text::as_float(float def) const
+{
+ xml_node_struct* d = _data();
+
+ return (d && d->value) ? impl::get_value_float(d->value) : def;
+}
+
+PUGI__FN bool xml_text::as_bool(bool def) const
+{
+ xml_node_struct* d = _data();
+
+ return (d && d->value) ? impl::get_value_bool(d->value) : def;
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN long long xml_text::as_llong(long long def) const
+{
+ xml_node_struct* d = _data();
+
+ return (d && d->value) ? impl::get_value_llong(d->value) : def;
+}
+
+PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
+{
+ xml_node_struct* d = _data();
+
+ return (d && d->value) ? impl::get_value_ullong(d->value) : def;
+}
+#endif
+
+PUGI__FN bool xml_text::set(const char_t* rhs)
+{
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
+}
+
+PUGI__FN bool xml_text::set(int rhs)
+{
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+PUGI__FN bool xml_text::set(unsigned int rhs)
+{
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+PUGI__FN bool xml_text::set(float rhs)
+{
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+PUGI__FN bool xml_text::set(double rhs)
+{
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+PUGI__FN bool xml_text::set(bool rhs)
+{
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN bool xml_text::set(long long rhs)
+{
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+PUGI__FN bool xml_text::set(unsigned long long rhs)
+{
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+#endif
+
+PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
+{
+ set(rhs);
+ return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(int rhs)
+{
+ set(rhs);
+ return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
+{
+ set(rhs);
+ return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(double rhs)
+{
+ set(rhs);
+ return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(float rhs)
+{
+ set(rhs);
+ return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(bool rhs)
+{
+ set(rhs);
+ return *this;
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN xml_text& xml_text::operator=(long long rhs)
+{
+ set(rhs);
+ return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
+{
+ set(rhs);
+ return *this;
+}
+#endif
+
+PUGI__FN xml_node xml_text::data() const
+{
+ return xml_node(_data());
+}
+
+#ifdef __BORLANDC__
+PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
+{
+ return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
+{
+ return (bool)lhs || rhs;
+}
+#endif
+
+PUGI__FN xml_node_iterator::xml_node_iterator()
+{
+}
+
+PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
+{
+}
+
+PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
+{
+}
+
+PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
+{
+ return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
+}
+
+PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
+{
+ return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
+}
+
+PUGI__FN xml_node& xml_node_iterator::operator*() const
+{
+ assert(_wrap._root);
+ return _wrap;
+}
+
+PUGI__FN xml_node* xml_node_iterator::operator->() const
+{
+ assert(_wrap._root);
+ return const_cast<xml_node*>(&_wrap); // BCC32 workaround
+}
+
+PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
+{
+ assert(_wrap._root);
+ _wrap._root = _wrap._root->next_sibling;
+ return *this;
+}
+
+PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
+{
+ xml_node_iterator temp = *this;
+ ++*this;
+ return temp;
+}
+
+PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
+{
+ _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
+ return *this;
+}
+
+PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
+{
+ xml_node_iterator temp = *this;
+ --*this;
+ return temp;
+}
+
+PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
+{
+}
+
+PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
+{
+}
+
+PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
+{
+}
+
+PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
+{
+ return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
+}
+
+PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
+{
+ return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
+}
+
+PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
+{
+ assert(_wrap._attr);
+ return _wrap;
+}
+
+PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
+{
+ assert(_wrap._attr);
+ return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
+}
+
+PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
+{
+ assert(_wrap._attr);
+ _wrap._attr = _wrap._attr->next_attribute;
+ return *this;
+}
+
+PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
+{
+ xml_attribute_iterator temp = *this;
+ ++*this;
+ return temp;
+}
+
+PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
+{
+ _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
+ return *this;
+}
+
+PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
+{
+ xml_attribute_iterator temp = *this;
+ --*this;
+ return temp;
+}
+
+PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
+{
+}
+
+PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
+{
+}
+
+PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
+{
+}
+
+PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
+{
+ return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
+}
+
+PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
+{
+ return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
+}
+
+PUGI__FN xml_node& xml_named_node_iterator::operator*() const
+{
+ assert(_wrap._root);
+ return _wrap;
+}
+
+PUGI__FN xml_node* xml_named_node_iterator::operator->() const
+{
+ assert(_wrap._root);
+ return const_cast<xml_node*>(&_wrap); // BCC32 workaround
+}
+
+PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
+{
+ assert(_wrap._root);
+ _wrap = _wrap.next_sibling(_name);
+ return *this;
+}
+
+PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
+{
+ xml_named_node_iterator temp = *this;
+ ++*this;
+ return temp;
+}
+
+PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
+{
+ if (_wrap._root)
+ _wrap = _wrap.previous_sibling(_name);
+ else {
+ _wrap = _parent.last_child();
+
+ if (!impl::strequal(_wrap.name(), _name))
+ _wrap = _wrap.previous_sibling(_name);
+ }
+
+ return *this;
+}
+
+PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
+{
+ xml_named_node_iterator temp = *this;
+ --*this;
+ return temp;
+}
+
+PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
+{
+}
+
+PUGI__FN xml_parse_result::operator bool() const
+{
+ return status == status_ok;
+}
+
+PUGI__FN const char* xml_parse_result::description() const
+{
+ switch (status) {
+ case status_ok:
+ return "No error";
+
+ case status_file_not_found:
+ return "File was not found";
+ case status_io_error:
+ return "Error reading from file/stream";
+ case status_out_of_memory:
+ return "Could not allocate memory";
+ case status_internal_error:
+ return "Internal error occurred";
+
+ case status_unrecognized_tag:
+ return "Could not determine tag type";
+
+ case status_bad_pi:
+ return "Error parsing document declaration/processing instruction";
+ case status_bad_comment:
+ return "Error parsing comment";
+ case status_bad_cdata:
+ return "Error parsing CDATA section";
+ case status_bad_doctype:
+ return "Error parsing document type declaration";
+ case status_bad_pcdata:
+ return "Error parsing PCDATA section";
+ case status_bad_start_element:
+ return "Error parsing start element tag";
+ case status_bad_attribute:
+ return "Error parsing element attribute";
+ case status_bad_end_element:
+ return "Error parsing end element tag";
+ case status_end_element_mismatch:
+ return "Start-end tags mismatch";
+
+ case status_append_invalid_root:
+ return "Unable to append nodes: root is not an element or document";
+
+ case status_no_document_element:
+ return "No document element found";
+
+ default:
+ return "Unknown error";
+ }
+}
+
+PUGI__FN xml_document::xml_document(): _buffer(0)
+{
+ create();
+}
+
+PUGI__FN xml_document::~xml_document()
+{
+ destroy();
+}
+
+PUGI__FN void xml_document::reset()
+{
+ destroy();
+ create();
+}
+
+PUGI__FN void xml_document::reset(const xml_document& proto)
+{
+ reset();
+
+ for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
+ append_copy(cur);
+}
+
+PUGI__FN void xml_document::create()
+{
+ assert(!_root);
+
+#ifdef PUGIXML_COMPACT
+ const size_t page_offset = sizeof(uint32_t);
+#else
+ const size_t page_offset = 0;
+#endif
+
+ // initialize sentinel page
+ PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) + page_offset <= sizeof(_memory));
+
+ // align upwards to page boundary
+ void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
+
+ // prepare page structure
+ impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
+ assert(page);
+
+ page->busy_size = impl::xml_memory_page_size;
+
+ // setup first page marker
+#ifdef PUGIXML_COMPACT
+ // round-trip through void* to avoid 'cast increases required alignment of target type' warning
+ page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
+ *page->compact_page_marker = sizeof(impl::xml_memory_page);
+#endif
+
+ // allocate new root
+ _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
+ _root->prev_sibling_c = _root;
+
+ // setup sentinel page
+ page->allocator = static_cast<impl::xml_document_struct*>(_root);
+
+ // verify the document allocation
+ assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
+}
+
+PUGI__FN void xml_document::destroy()
+{
+ assert(_root);
+
+ // destroy static storage
+ if (_buffer) {
+ impl::xml_memory::deallocate(_buffer);
+ _buffer = 0;
+ }
+
+ // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
+ for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next) {
+ if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
+ }
+
+ // destroy dynamic storage, leave sentinel page (it's in static memory)
+ impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
+ assert(root_page && !root_page->prev);
+ assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
+
+ for (impl::xml_memory_page* page = root_page->next; page; ) {
+ impl::xml_memory_page* next = page->next;
+
+ impl::xml_allocator::deallocate_page(page);
+
+ page = next;
+ }
+
+#ifdef PUGIXML_COMPACT
+ // destroy hash table
+ static_cast<impl::xml_document_struct*>(_root)->hash.clear();
+#endif
+
+ _root = 0;
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
+{
+ reset();
+
+ return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
+}
+
+PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
+{
+ reset();
+
+ return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
+}
+#endif
+
+PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
+{
+ // Force native encoding (skip autodetection)
+#ifdef PUGIXML_WCHAR_MODE
+ xml_encoding encoding = encoding_wchar;
+#else
+ xml_encoding encoding = encoding_utf8;
+#endif
+
+ return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
+}
+
+PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
+{
+ return load_string(contents, options);
+}
+
+PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
+{
+ reset();
+
+ using impl::auto_deleter; // MSVC7 workaround
+ auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, "rb"), fclose);
+
+ return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
+}
+
+PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
+{
+ reset();
+
+ using impl::auto_deleter; // MSVC7 workaround
+ auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, L"rb"), fclose);
+
+ return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
+}
+
+PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
+{
+ reset();
+
+ return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
+}
+
+PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
+{
+ reset();
+
+ return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
+}
+
+PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
+{
+ reset();
+
+ return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
+}
+
+PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+ impl::xml_buffered_writer buffered_writer(writer, encoding);
+
+ if ((flags & format_write_bom) && encoding != encoding_latin1) {
+ // BOM always represents the codepoint U+FEFF, so just write it in native encoding
+#ifdef PUGIXML_WCHAR_MODE
+ unsigned int bom = 0xfeff;
+ buffered_writer.write(static_cast<wchar_t>(bom));
+#else
+ buffered_writer.write('\xef', '\xbb', '\xbf');
+#endif
+ }
+
+ if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) {
+ buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
+ if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
+ buffered_writer.write('?', '>');
+ if (!(flags & format_raw)) buffered_writer.write('\n');
+ }
+
+ impl::node_output(buffered_writer, _root, indent, flags, 0);
+
+ buffered_writer.flush();
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+ xml_writer_stream writer(stream);
+
+ save(writer, indent, flags, encoding);
+}
+
+PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
+{
+ xml_writer_stream writer(stream);
+
+ save(writer, indent, flags, encoding_wchar);
+}
+#endif
+
+PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+ using impl::auto_deleter; // MSVC7 workaround
+ auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), fclose);
+
+ return impl::save_file_impl(*this, file.data, indent, flags, encoding);
+}
+
+PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+ using impl::auto_deleter; // MSVC7 workaround
+ auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), fclose);
+
+ return impl::save_file_impl(*this, file.data, indent, flags, encoding);
+}
+
+PUGI__FN xml_node xml_document::document_element() const
+{
+ assert(_root);
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if (PUGI__NODETYPE(i) == node_element)
+ return xml_node(i);
+
+ return xml_node();
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
+{
+ assert(str);
+
+ return impl::as_utf8_impl(str, impl::strlength_wide(str));
+}
+
+PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
+{
+ return impl::as_utf8_impl(str.c_str(), str.size());
+}
+
+PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
+{
+ assert(str);
+
+ return impl::as_wide_impl(str, strlen(str));
+}
+
+PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
+{
+ return impl::as_wide_impl(str.c_str(), str.size());
+}
+#endif
+
+PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
+{
+ impl::xml_memory::allocate = allocate;
+ impl::xml_memory::deallocate = deallocate;
+}
+
+PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
+{
+ return impl::xml_memory::allocate;
+}
+
+PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
+{
+ return impl::xml_memory::deallocate;
+}
+}
+
+#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
+namespace std
+{
+// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
+PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
+{
+ return std::bidirectional_iterator_tag();
+}
+
+PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
+{
+ return std::bidirectional_iterator_tag();
+}
+
+PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
+{
+ return std::bidirectional_iterator_tag();
+}
+}
+#endif
+
+#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
+namespace std
+{
+// Workarounds for (non-standard) iterator category detection
+PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
+{
+ return std::bidirectional_iterator_tag();
+}
+
+PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
+{
+ return std::bidirectional_iterator_tag();
+}
+
+PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
+{
+ return std::bidirectional_iterator_tag();
+}
+}
+#endif
+
+#ifndef PUGIXML_NO_XPATH
+// STL replacements
+PUGI__NS_BEGIN
+struct equal_to {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+ return lhs == rhs;
+ }
+};
+
+struct not_equal_to {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+ return lhs != rhs;
+ }
+};
+
+struct less {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+ return lhs < rhs;
+ }
+};
+
+struct less_equal {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+ return lhs <= rhs;
+ }
+};
+
+template <typename T> void swap(T& lhs, T& rhs)
+{
+ T temp = lhs;
+ lhs = rhs;
+ rhs = temp;
+}
+
+template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
+{
+ I result = begin;
+
+ for (I it = begin + 1; it != end; ++it)
+ if (pred(*it, *result))
+ result = it;
+
+ return result;
+}
+
+template <typename I> void reverse(I begin, I end)
+{
+ while (end - begin > 1) swap(*begin++, *--end);
+}
+
+template <typename I> I unique(I begin, I end)
+{
+ // fast skip head
+ while (end - begin > 1 && *begin != *(begin + 1)) begin++;
+
+ if (begin == end) return begin;
+
+ // last written element
+ I write = begin++;
+
+ // merge unique elements
+ while (begin != end) {
+ if (*begin != *write)
+ *++write = *begin++;
+ else
+ begin++;
+ }
+
+ // past-the-end (write points to live element)
+ return write + 1;
+}
+
+template <typename I> void copy_backwards(I begin, I end, I target)
+{
+ while (begin != end) *--target = *--end;
+}
+
+template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
+{
+ assert(begin != end);
+
+ for (I it = begin + 1; it != end; ++it) {
+ T val = *it;
+
+ if (pred(val, *begin)) {
+ // move to front
+ copy_backwards(begin, it, it + 1);
+ *begin = val;
+ } else {
+ I hole = it;
+
+ // move hole backwards
+ while (pred(val, *(hole - 1))) {
+ *hole = *(hole - 1);
+ hole--;
+ }
+
+ // fill hole with element
+ *hole = val;
+ }
+ }
+}
+
+// std variant for elements with ==
+template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
+{
+ I eqbeg = middle, eqend = middle + 1;
+
+ // expand equal range
+ while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
+ while (eqend != end && *eqend == *eqbeg) ++eqend;
+
+ // process outer elements
+ I ltend = eqbeg, gtbeg = eqend;
+
+ for (;;) {
+ // find the element from the right side that belongs to the left one
+ for (; gtbeg != end; ++gtbeg)
+ if (!pred(*eqbeg, *gtbeg)) {
+ if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
+ else break;
+ }
+
+ // find the element from the left side that belongs to the right one
+ for (; ltend != begin; --ltend)
+ if (!pred(*(ltend - 1), *eqbeg)) {
+ if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
+ else break;
+ }
+
+ // scanned all elements
+ if (gtbeg == end && ltend == begin) {
+ *out_eqbeg = eqbeg;
+ *out_eqend = eqend;
+ return;
+ }
+
+ // make room for elements by moving equal area
+ if (gtbeg == end) {
+ if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
+ swap(*eqbeg, *--eqend);
+ } else if (ltend == begin) {
+ if (eqend != gtbeg) swap(*eqbeg, *eqend);
+ ++eqend;
+ swap(*gtbeg++, *eqbeg++);
+ } else swap(*gtbeg++, *--ltend);
+ }
+}
+
+template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
+{
+ if (pred(*middle, *first)) swap(*middle, *first);
+ if (pred(*last, *middle)) swap(*last, *middle);
+ if (pred(*middle, *first)) swap(*middle, *first);
+}
+
+template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
+{
+ if (last - first <= 40) {
+ // median of three for small chunks
+ median3(first, middle, last, pred);
+ } else {
+ // median of nine
+ size_t step = (last - first + 1) / 8;
+
+ median3(first, first + step, first + 2 * step, pred);
+ median3(middle - step, middle, middle + step, pred);
+ median3(last - 2 * step, last - step, last, pred);
+ median3(first + step, middle, last - step, pred);
+ }
+}
+
+template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
+{
+ // sort large chunks
+ while (end - begin > 32) {
+ // find median element
+ I middle = begin + (end - begin) / 2;
+ median(begin, middle, end - 1, pred);
+
+ // partition in three chunks (< = >)
+ I eqbeg, eqend;
+ partition(begin, middle, end, pred, &eqbeg, &eqend);
+
+ // loop on larger half
+ if (eqbeg - begin > end - eqend) {
+ sort(eqend, end, pred);
+ end = eqbeg;
+ } else {
+ sort(begin, eqbeg, pred);
+ begin = eqend;
+ }
+ }
+
+ // insertion sort small chunk
+ if (begin != end) insertion_sort(begin, end, pred, &*begin);
+}
+PUGI__NS_END
+
+// Allocator used for AST and evaluation stacks
+PUGI__NS_BEGIN
+static const size_t xpath_memory_page_size =
+#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
+ PUGIXML_MEMORY_XPATH_PAGE_SIZE
+#else
+ 4096
+#endif
+ ;
+
+static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
+
+struct xpath_memory_block {
+ xpath_memory_block* next;
+ size_t capacity;
+
+ union {
+ char data[xpath_memory_page_size];
+ double alignment;
+ };
+};
+
+class xpath_allocator
+{
+ xpath_memory_block* _root;
+ size_t _root_size;
+
+public:
+#ifdef PUGIXML_NO_EXCEPTIONS
+ jmp_buf* error_handler;
+#endif
+
+ xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ error_handler = 0;
+#endif
+ }
+
+ void* allocate_nothrow(size_t size) {
+ // round size up to block alignment boundary
+ size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
+
+ if (_root_size + size <= _root->capacity) {
+ void* buf = &_root->data[0] + _root_size;
+ _root_size += size;
+ return buf;
+ } else {
+ // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
+ size_t block_capacity_base = sizeof(_root->data);
+ size_t block_capacity_req = size + block_capacity_base / 4;
+ size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
+
+ size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
+
+ xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
+ if (!block) return 0;
+
+ block->next = _root;
+ block->capacity = block_capacity;
+
+ _root = block;
+ _root_size = size;
+
+ return block->data;
+ }
+ }
+
+ void* allocate(size_t size) {
+ void* result = allocate_nothrow(size);
+
+ if (!result) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ assert(error_handler);
+ longjmp(*error_handler, 1);
+#else
+ throw std::bad_alloc();
+#endif
+ }
+
+ return result;
+ }
+
+ void* reallocate(void* ptr, size_t old_size, size_t new_size) {
+ // round size up to block alignment boundary
+ old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
+ new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
+
+ // we can only reallocate the last object
+ assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
+
+ // adjust root size so that we have not allocated the object at all
+ bool only_object = (_root_size == old_size);
+
+ if (ptr) _root_size -= old_size;
+
+ // allocate a new version (this will obviously reuse the memory if possible)
+ void* result = allocate(new_size);
+ assert(result);
+
+ // we have a new block
+ if (result != ptr && ptr) {
+ // copy old data
+ assert(new_size >= old_size);
+ memcpy(result, ptr, old_size);
+
+ // free the previous page if it had no other objects
+ if (only_object) {
+ assert(_root->data == result);
+ assert(_root->next);
+
+ xpath_memory_block* next = _root->next->next;
+
+ if (next) {
+ // deallocate the whole page, unless it was the first one
+ xml_memory::deallocate(_root->next);
+ _root->next = next;
+ }
+ }
+ }
+
+ return result;
+ }
+
+ void revert(const xpath_allocator& state) {
+ // free all new pages
+ xpath_memory_block* cur = _root;
+
+ while (cur != state._root) {
+ xpath_memory_block* next = cur->next;
+
+ xml_memory::deallocate(cur);
+
+ cur = next;
+ }
+
+ // restore state
+ _root = state._root;
+ _root_size = state._root_size;
+ }
+
+ void release() {
+ xpath_memory_block* cur = _root;
+ assert(cur);
+
+ while (cur->next) {
+ xpath_memory_block* next = cur->next;
+
+ xml_memory::deallocate(cur);
+
+ cur = next;
+ }
+ }
+};
+
+struct xpath_allocator_capture {
+ xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) {
+ }
+
+ ~xpath_allocator_capture() {
+ _target->revert(_state);
+ }
+
+ xpath_allocator* _target;
+ xpath_allocator _state;
+};
+
+struct xpath_stack {
+ xpath_allocator* result;
+ xpath_allocator* temp;
+};
+
+struct xpath_stack_data {
+ xpath_memory_block blocks[2];
+ xpath_allocator result;
+ xpath_allocator temp;
+ xpath_stack stack;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ jmp_buf error_handler;
+#endif
+
+ xpath_stack_data(): result(blocks + 0), temp(blocks + 1) {
+ blocks[0].next = blocks[1].next = 0;
+ blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
+
+ stack.result = &result;
+ stack.temp = &temp;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ result.error_handler = temp.error_handler = &error_handler;
+#endif
+ }
+
+ ~xpath_stack_data() {
+ result.release();
+ temp.release();
+ }
+};
+PUGI__NS_END
+
+// String class
+PUGI__NS_BEGIN
+class xpath_string
+{
+ const char_t* _buffer;
+ bool _uses_heap;
+ size_t _length_heap;
+
+ static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) {
+ char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
+ assert(result);
+
+ memcpy(result, string, length * sizeof(char_t));
+ result[length] = 0;
+
+ return result;
+ }
+
+ xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) {
+ }
+
+public:
+ static xpath_string from_const(const char_t* str) {
+ return xpath_string(str, false, 0);
+ }
+
+ static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) {
+ assert(begin <= end && *end == 0);
+
+ return xpath_string(begin, true, static_cast<size_t>(end - begin));
+ }
+
+ static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) {
+ assert(begin <= end);
+
+ size_t length = static_cast<size_t>(end - begin);
+
+ return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length);
+ }
+
+ xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) {
+ }
+
+ void append(const xpath_string& o, xpath_allocator* alloc) {
+ // skip empty sources
+ if (!*o._buffer) return;
+
+ // fast append for constant empty target and constant source
+ if (!*_buffer && !_uses_heap && !o._uses_heap) {
+ _buffer = o._buffer;
+ } else {
+ // need to make heap copy
+ size_t target_length = length();
+ size_t source_length = o.length();
+ size_t result_length = target_length + source_length;
+
+ // allocate new buffer
+ char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
+ assert(result);
+
+ // append first string to the new buffer in case there was no reallocation
+ if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
+
+ // append second string to the new buffer
+ memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
+ result[result_length] = 0;
+
+ // finalize
+ _buffer = result;
+ _uses_heap = true;
+ _length_heap = result_length;
+ }
+ }
+
+ const char_t* c_str() const {
+ return _buffer;
+ }
+
+ size_t length() const {
+ return _uses_heap ? _length_heap : strlength(_buffer);
+ }
+
+ char_t* data(xpath_allocator* alloc) {
+ // make private heap copy
+ if (!_uses_heap) {
+ size_t length_ = strlength(_buffer);
+
+ _buffer = duplicate_string(_buffer, length_, alloc);
+ _uses_heap = true;
+ _length_heap = length_;
+ }
+
+ return const_cast<char_t*>(_buffer);
+ }
+
+ bool empty() const {
+ return *_buffer == 0;
+ }
+
+ bool operator==(const xpath_string& o) const {
+ return strequal(_buffer, o._buffer);
+ }
+
+ bool operator!=(const xpath_string& o) const {
+ return !strequal(_buffer, o._buffer);
+ }
+
+ bool uses_heap() const {
+ return _uses_heap;
+ }
+};
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
+{
+ while (*pattern && *string == *pattern) {
+ string++;
+ pattern++;
+ }
+
+ return *pattern == 0;
+}
+
+PUGI__FN const char_t* find_char(const char_t* s, char_t c)
+{
+#ifdef PUGIXML_WCHAR_MODE
+ return wcschr(s, c);
+#else
+ return strchr(s, c);
+#endif
+}
+
+PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
+{
+#ifdef PUGIXML_WCHAR_MODE
+ // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
+ return (*p == 0) ? s : wcsstr(s, p);
+#else
+ return strstr(s, p);
+#endif
+}
+
+// Converts symbol to lower case, if it is an ASCII one
+PUGI__FN char_t tolower_ascii(char_t ch)
+{
+ return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
+}
+
+PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
+{
+ if (na.attribute())
+ return xpath_string::from_const(na.attribute().value());
+ else {
+ xml_node n = na.node();
+
+ switch (n.type()) {
+ case node_pcdata:
+ case node_cdata:
+ case node_comment:
+ case node_pi:
+ return xpath_string::from_const(n.value());
+
+ case node_document:
+ case node_element: {
+ xpath_string result;
+
+ xml_node cur = n.first_child();
+
+ while (cur && cur != n) {
+ if (cur.type() == node_pcdata || cur.type() == node_cdata)
+ result.append(xpath_string::from_const(cur.value()), alloc);
+
+ if (cur.first_child())
+ cur = cur.first_child();
+ else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else {
+ while (!cur.next_sibling() && cur != n)
+ cur = cur.parent();
+
+ if (cur != n) cur = cur.next_sibling();
+ }
+ }
+
+ return result;
+ }
+
+ default:
+ return xpath_string();
+ }
+ }
+}
+
+PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
+{
+ assert(ln->parent == rn->parent);
+
+ // there is no common ancestor (the shared parent is null), nodes are from different documents
+ if (!ln->parent) return ln < rn;
+
+ // determine sibling order
+ xml_node_struct* ls = ln;
+ xml_node_struct* rs = rn;
+
+ while (ls && rs) {
+ if (ls == rn) return true;
+ if (rs == ln) return false;
+
+ ls = ls->next_sibling;
+ rs = rs->next_sibling;
+ }
+
+ // if rn sibling chain ended ln must be before rn
+ return !rs;
+}
+
+PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
+{
+ // find common ancestor at the same depth, if any
+ xml_node_struct* lp = ln;
+ xml_node_struct* rp = rn;
+
+ while (lp && rp && lp->parent != rp->parent) {
+ lp = lp->parent;
+ rp = rp->parent;
+ }
+
+ // parents are the same!
+ if (lp && rp) return node_is_before_sibling(lp, rp);
+
+ // nodes are at different depths, need to normalize heights
+ bool left_higher = !lp;
+
+ while (lp) {
+ lp = lp->parent;
+ ln = ln->parent;
+ }
+
+ while (rp) {
+ rp = rp->parent;
+ rn = rn->parent;
+ }
+
+ // one node is the ancestor of the other
+ if (ln == rn) return left_higher;
+
+ // find common ancestor... again
+ while (ln->parent != rn->parent) {
+ ln = ln->parent;
+ rn = rn->parent;
+ }
+
+ return node_is_before_sibling(ln, rn);
+}
+
+PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
+{
+ while (node && node != parent) node = node->parent;
+
+ return parent && node == parent;
+}
+
+PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
+{
+ xml_node_struct* node = xnode.node().internal_object();
+
+ if (node) {
+ if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) {
+ if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
+ if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
+ }
+
+ return 0;
+ }
+
+ xml_attribute_struct* attr = xnode.attribute().internal_object();
+
+ if (attr) {
+ if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) {
+ if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
+ if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
+ }
+
+ return 0;
+ }
+
+ return 0;
+}
+
+struct document_order_comparator {
+ bool operator()(const xpath_node& lhs, const xpath_node& rhs) const {
+ // optimized document order based check
+ const void* lo = document_buffer_order(lhs);
+ const void* ro = document_buffer_order(rhs);
+
+ if (lo && ro) return lo < ro;
+
+ // slow comparison
+ xml_node ln = lhs.node(), rn = rhs.node();
+
+ // compare attributes
+ if (lhs.attribute() && rhs.attribute()) {
+ // shared parent
+ if (lhs.parent() == rhs.parent()) {
+ // determine sibling order
+ for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
+ if (a == rhs.attribute())
+ return true;
+
+ return false;
+ }
+
+ // compare attribute parents
+ ln = lhs.parent();
+ rn = rhs.parent();
+ } else if (lhs.attribute()) {
+ // attributes go after the parent element
+ if (lhs.parent() == rhs.node()) return false;
+
+ ln = lhs.parent();
+ } else if (rhs.attribute()) {
+ // attributes go after the parent element
+ if (rhs.parent() == lhs.node()) return true;
+
+ rn = rhs.parent();
+ }
+
+ if (ln == rn) return false;
+
+ if (!ln || !rn) return ln < rn;
+
+ return node_is_before(ln.internal_object(), rn.internal_object());
+ }
+};
+
+struct duplicate_comparator {
+ bool operator()(const xpath_node& lhs, const xpath_node& rhs) const {
+ if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
+ else return rhs.attribute() ? false : lhs.node() < rhs.node();
+ }
+};
+
+PUGI__FN double gen_nan()
+{
+#if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
+ union {
+ float f;
+ uint32_t i;
+ } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
+ u[0].i = 0x7fc00000;
+ return u[0].f;
+#else
+ // fallback
+ const volatile double zero = 0.0;
+ return zero / zero;
+#endif
+}
+
+PUGI__FN bool is_nan(double value)
+{
+#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
+ return !!_isnan(value);
+#elif defined(fpclassify) && defined(FP_NAN)
+ return fpclassify(value) == FP_NAN;
+#else
+ // fallback
+ const volatile double v = value;
+ return v != v;
+#endif
+}
+
+PUGI__FN const char_t* convert_number_to_string_special(double value)
+{
+#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
+ if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
+ if (_isnan(value)) return PUGIXML_TEXT("NaN");
+ return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
+ switch (fpclassify(value)) {
+ case FP_NAN:
+ return PUGIXML_TEXT("NaN");
+
+ case FP_INFINITE:
+ return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+
+ case FP_ZERO:
+ return PUGIXML_TEXT("0");
+
+ default:
+ return 0;
+ }
+#else
+ // fallback
+ const volatile double v = value;
+
+ if (v == 0) return PUGIXML_TEXT("0");
+ if (v != v) return PUGIXML_TEXT("NaN");
+ if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+ return 0;
+#endif
+}
+
+PUGI__FN bool convert_number_to_boolean(double value)
+{
+ return (value != 0 && !is_nan(value));
+}
+
+PUGI__FN void truncate_zeros(char* begin, char* end)
+{
+ while (begin != end && end[-1] == '0') end--;
+
+ *end = 0;
+}
+
+// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
+#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
+PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+{
+ // get base values
+ int sign, exponent;
+ _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
+
+ // truncate redundant zeros
+ truncate_zeros(buffer, buffer + strlen(buffer));
+
+ // fill results
+ *out_mantissa = buffer;
+ *out_exponent = exponent;
+}
+#else
+PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+{
+ // get a scientific notation value with IEEE DBL_DIG decimals
+ sprintf(buffer, "%.*e", DBL_DIG, value);
+ assert(strlen(buffer) < buffer_size);
+ (void)!buffer_size;
+
+ // get the exponent (possibly negative)
+ char* exponent_string = strchr(buffer, 'e');
+ assert(exponent_string);
+
+ int exponent = atoi(exponent_string + 1);
+
+ // extract mantissa string: skip sign
+ char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
+ assert(mantissa[0] != '0' && mantissa[1] == '.');
+
+ // divide mantissa by 10 to eliminate integer part
+ mantissa[1] = mantissa[0];
+ mantissa++;
+ exponent++;
+
+ // remove extra mantissa digits and zero-terminate mantissa
+ truncate_zeros(mantissa, exponent_string);
+
+ // fill results
+ *out_mantissa = mantissa;
+ *out_exponent = exponent;
+}
+#endif
+
+PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
+{
+ // try special number conversion
+ const char_t* special = convert_number_to_string_special(value);
+ if (special) return xpath_string::from_const(special);
+
+ // get mantissa + exponent form
+ char mantissa_buffer[32];
+
+ char* mantissa;
+ int exponent;
+ convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
+
+ // allocate a buffer of suitable length for the number
+ size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
+ char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
+ assert(result);
+
+ // make the number!
+ char_t* s = result;
+
+ // sign
+ if (value < 0) *s++ = '-';
+
+ // integer part
+ if (exponent <= 0) {
+ *s++ = '0';
+ } else {
+ while (exponent > 0) {
+ assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
+ *s++ = *mantissa ? *mantissa++ : '0';
+ exponent--;
+ }
+ }
+
+ // fractional part
+ if (*mantissa) {
+ // decimal point
+ *s++ = '.';
+
+ // extra zeroes from negative exponent
+ while (exponent < 0) {
+ *s++ = '0';
+ exponent++;
+ }
+
+ // extra mantissa digits
+ while (*mantissa) {
+ assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
+ *s++ = *mantissa++;
+ }
+ }
+
+ // zero-terminate
+ assert(s < result + result_size);
+ *s = 0;
+
+ return xpath_string::from_heap_preallocated(result, s);
+}
+
+PUGI__FN bool check_string_to_number_format(const char_t* string)
+{
+ // parse leading whitespace
+ while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
+
+ // parse sign
+ if (*string == '-') ++string;
+
+ if (!*string) return false;
+
+ // if there is no integer part, there should be a decimal part with at least one digit
+ if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
+
+ // parse integer part
+ while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
+
+ // parse decimal part
+ if (*string == '.') {
+ ++string;
+
+ while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
+ }
+
+ // parse trailing whitespace
+ while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
+
+ return *string == 0;
+}
+
+PUGI__FN double convert_string_to_number(const char_t* string)
+{
+ // check string format
+ if (!check_string_to_number_format(string)) return gen_nan();
+
+ // parse string
+#ifdef PUGIXML_WCHAR_MODE
+ return wcstod(string, 0);
+#else
+ return strtod(string, 0);
+#endif
+}
+
+PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
+{
+ size_t length = static_cast<size_t>(end - begin);
+ char_t* scratch = buffer;
+
+ if (length >= sizeof(buffer) / sizeof(buffer[0])) {
+ // need to make dummy on-heap copy
+ scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!scratch) return false;
+ }
+
+ // copy string to zero-terminated buffer and perform conversion
+ memcpy(scratch, begin, length * sizeof(char_t));
+ scratch[length] = 0;
+
+ *out_result = convert_string_to_number(scratch);
+
+ // free dummy buffer
+ if (scratch != buffer) xml_memory::deallocate(scratch);
+
+ return true;
+}
+
+PUGI__FN double round_nearest(double value)
+{
+ return floor(value + 0.5);
+}
+
+PUGI__FN double round_nearest_nzero(double value)
+{
+ // same as round_nearest, but returns -0 for [-0.5, -0]
+ // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
+ return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
+}
+
+PUGI__FN const char_t* qualified_name(const xpath_node& node)
+{
+ return node.attribute() ? node.attribute().name() : node.node().name();
+}
+
+PUGI__FN const char_t* local_name(const xpath_node& node)
+{
+ const char_t* name = qualified_name(node);
+ const char_t* p = find_char(name, ':');
+
+ return p ? p + 1 : name;
+}
+
+struct namespace_uri_predicate {
+ const char_t* prefix;
+ size_t prefix_length;
+
+ namespace_uri_predicate(const char_t* name) {
+ const char_t* pos = find_char(name, ':');
+
+ prefix = pos ? name : 0;
+ prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
+ }
+
+ bool operator()(xml_attribute a) const {
+ const char_t* name = a.name();
+
+ if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
+
+ return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
+ }
+};
+
+PUGI__FN const char_t* namespace_uri(xml_node node)
+{
+ namespace_uri_predicate pred = node.name();
+
+ xml_node p = node;
+
+ while (p) {
+ xml_attribute a = p.find_attribute(pred);
+
+ if (a) return a.value();
+
+ p = p.parent();
+ }
+
+ return PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
+{
+ namespace_uri_predicate pred = attr.name();
+
+ // Default namespace does not apply to attributes
+ if (!pred.prefix) return PUGIXML_TEXT("");
+
+ xml_node p = parent;
+
+ while (p) {
+ xml_attribute a = p.find_attribute(pred);
+
+ if (a) return a.value();
+
+ p = p.parent();
+ }
+
+ return PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* namespace_uri(const xpath_node& node)
+{
+ return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
+}
+
+PUGI__FN char_t* normalize_space(char_t* buffer)
+{
+ char_t* write = buffer;
+
+ for (char_t* it = buffer; *it; ) {
+ char_t ch = *it++;
+
+ if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+ // replace whitespace sequence with single space
+ while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
+
+ // avoid leading spaces
+ if (write != buffer) *write++ = ' ';
+ } else *write++ = ch;
+ }
+
+ // remove trailing space
+ if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
+
+ // zero-terminate
+ *write = 0;
+
+ return write;
+}
+
+PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
+{
+ char_t* write = buffer;
+
+ while (*buffer) {
+ PUGI__DMC_VOLATILE char_t ch = *buffer++;
+
+ const char_t* pos = find_char(from, ch);
+
+ if (!pos)
+ *write++ = ch; // do not process
+ else if (static_cast<size_t>(pos - from) < to_length)
+ *write++ = to[pos - from]; // replace
+ }
+
+ // zero-terminate
+ *write = 0;
+
+ return write;
+}
+
+PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
+{
+ unsigned char table[128] = {0};
+
+ while (*from) {
+ unsigned int fc = static_cast<unsigned int>(*from);
+ unsigned int tc = static_cast<unsigned int>(*to);
+
+ if (fc >= 128 || tc >= 128)
+ return 0;
+
+ // code=128 means "skip character"
+ if (!table[fc])
+ table[fc] = static_cast<unsigned char>(tc ? tc : 128);
+
+ from++;
+ if (tc) to++;
+ }
+
+ for (int i = 0; i < 128; ++i)
+ if (!table[i])
+ table[i] = static_cast<unsigned char>(i);
+
+ void* result = alloc->allocate_nothrow(sizeof(table));
+
+ if (result) {
+ memcpy(result, table, sizeof(table));
+ }
+
+ return static_cast<unsigned char*>(result);
+}
+
+PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
+{
+ char_t* write = buffer;
+
+ while (*buffer) {
+ char_t ch = *buffer++;
+ unsigned int index = static_cast<unsigned int>(ch);
+
+ if (index < 128) {
+ unsigned char code = table[index];
+
+ // code=128 means "skip character" (table size is 128 so 128 can be a special value)
+ // this code skips these characters without extra branches
+ *write = static_cast<char_t>(code);
+ write += 1 - (code >> 7);
+ } else {
+ *write++ = ch;
+ }
+ }
+
+ // zero-terminate
+ *write = 0;
+
+ return write;
+}
+
+inline bool is_xpath_attribute(const char_t* name)
+{
+ return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
+}
+
+struct xpath_variable_boolean: xpath_variable {
+ xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) {
+ }
+
+ bool value;
+ char_t name[1];
+};
+
+struct xpath_variable_number: xpath_variable {
+ xpath_variable_number(): xpath_variable(xpath_type_number), value(0) {
+ }
+
+ double value;
+ char_t name[1];
+};
+
+struct xpath_variable_string: xpath_variable {
+ xpath_variable_string(): xpath_variable(xpath_type_string), value(0) {
+ }
+
+ ~xpath_variable_string() {
+ if (value) xml_memory::deallocate(value);
+ }
+
+ char_t* value;
+ char_t name[1];
+};
+
+struct xpath_variable_node_set: xpath_variable {
+ xpath_variable_node_set(): xpath_variable(xpath_type_node_set) {
+ }
+
+ xpath_node_set value;
+ char_t name[1];
+};
+
+static const xpath_node_set dummy_node_set;
+
+PUGI__FN unsigned int hash_string(const char_t* str)
+{
+ // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
+ unsigned int result = 0;
+
+ while (*str) {
+ result += static_cast<unsigned int>(*str++);
+ result += result << 10;
+ result ^= result >> 6;
+ }
+
+ result += result << 3;
+ result ^= result >> 11;
+ result += result << 15;
+
+ return result;
+}
+
+template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
+{
+ size_t length = strlength(name);
+ if (length == 0) return 0; // empty variable names are invalid
+
+ // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
+ void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
+ if (!memory) return 0;
+
+ T* result = new (memory) T();
+
+ memcpy(result->name, name, (length + 1) * sizeof(char_t));
+
+ return result;
+}
+
+PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
+{
+ switch (type) {
+ case xpath_type_node_set:
+ return new_xpath_variable<xpath_variable_node_set>(name);
+
+ case xpath_type_number:
+ return new_xpath_variable<xpath_variable_number>(name);
+
+ case xpath_type_string:
+ return new_xpath_variable<xpath_variable_string>(name);
+
+ case xpath_type_boolean:
+ return new_xpath_variable<xpath_variable_boolean>(name);
+
+ default:
+ return 0;
+ }
+}
+
+template <typename T> PUGI__FN void delete_xpath_variable(T* var)
+{
+ var->~T();
+ xml_memory::deallocate(var);
+}
+
+PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
+{
+ switch (type) {
+ case xpath_type_node_set:
+ delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
+ break;
+
+ case xpath_type_number:
+ delete_xpath_variable(static_cast<xpath_variable_number*>(var));
+ break;
+
+ case xpath_type_string:
+ delete_xpath_variable(static_cast<xpath_variable_string*>(var));
+ break;
+
+ case xpath_type_boolean:
+ delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
+ break;
+
+ default:
+ assert(!"Invalid variable type");
+ }
+}
+
+PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
+{
+ switch (rhs->type()) {
+ case xpath_type_node_set:
+ return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
+
+ case xpath_type_number:
+ return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
+
+ case xpath_type_string:
+ return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
+
+ case xpath_type_boolean:
+ return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
+
+ default:
+ assert(!"Invalid variable type");
+ return false;
+ }
+}
+
+PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
+{
+ size_t length = static_cast<size_t>(end - begin);
+ char_t* scratch = buffer;
+
+ if (length >= sizeof(buffer) / sizeof(buffer[0])) {
+ // need to make dummy on-heap copy
+ scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!scratch) return false;
+ }
+
+ // copy string to zero-terminated buffer and perform lookup
+ memcpy(scratch, begin, length * sizeof(char_t));
+ scratch[length] = 0;
+
+ *out_result = set->get(scratch);
+
+ // free dummy buffer
+ if (scratch != buffer) xml_memory::deallocate(scratch);
+
+ return true;
+}
+PUGI__NS_END
+
+// Internal node set class
+PUGI__NS_BEGIN
+PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
+{
+ if (end - begin < 2)
+ return xpath_node_set::type_sorted;
+
+ document_order_comparator cmp;
+
+ bool first = cmp(begin[0], begin[1]);
+
+ for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
+ if (cmp(it[0], it[1]) != first)
+ return xpath_node_set::type_unsorted;
+
+ return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
+}
+
+PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
+{
+ xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
+
+ if (type == xpath_node_set::type_unsorted) {
+ xpath_node_set::type_t sorted = xpath_get_order(begin, end);
+
+ if (sorted == xpath_node_set::type_unsorted) {
+ sort(begin, end, document_order_comparator());
+
+ type = xpath_node_set::type_sorted;
+ } else
+ type = sorted;
+ }
+
+ if (type != order) reverse(begin, end);
+
+ return order;
+}
+
+PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
+{
+ if (begin == end) return xpath_node();
+
+ switch (type) {
+ case xpath_node_set::type_sorted:
+ return *begin;
+
+ case xpath_node_set::type_sorted_reverse:
+ return *(end - 1);
+
+ case xpath_node_set::type_unsorted:
+ return *min_element(begin, end, document_order_comparator());
+
+ default:
+ assert(!"Invalid node set type");
+ return xpath_node();
+ }
+}
+
+class xpath_node_set_raw
+{
+ xpath_node_set::type_t _type;
+
+ xpath_node* _begin;
+ xpath_node* _end;
+ xpath_node* _eos;
+
+public:
+ xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) {
+ }
+
+ xpath_node* begin() const {
+ return _begin;
+ }
+
+ xpath_node* end() const {
+ return _end;
+ }
+
+ bool empty() const {
+ return _begin == _end;
+ }
+
+ size_t size() const {
+ return static_cast<size_t>(_end - _begin);
+ }
+
+ xpath_node first() const {
+ return xpath_first(_begin, _end, _type);
+ }
+
+ void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
+
+ void push_back(const xpath_node& node, xpath_allocator* alloc) {
+ if (_end != _eos)
+ *_end++ = node;
+ else
+ push_back_grow(node, alloc);
+ }
+
+ void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) {
+ if (begin_ == end_) return;
+
+ size_t size_ = static_cast<size_t>(_end - _begin);
+ size_t capacity = static_cast<size_t>(_eos - _begin);
+ size_t count = static_cast<size_t>(end_ - begin_);
+
+ if (size_ + count > capacity) {
+ // reallocate the old array or allocate a new one
+ xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
+ assert(data);
+
+ // finalize
+ _begin = data;
+ _end = data + size_;
+ _eos = data + size_ + count;
+ }
+
+ memcpy(_end, begin_, count * sizeof(xpath_node));
+ _end += count;
+ }
+
+ void sort_do() {
+ _type = xpath_sort(_begin, _end, _type, false);
+ }
+
+ void truncate(xpath_node* pos) {
+ assert(_begin <= pos && pos <= _end);
+
+ _end = pos;
+ }
+
+ void remove_duplicates() {
+ if (_type == xpath_node_set::type_unsorted)
+ sort(_begin, _end, duplicate_comparator());
+
+ _end = unique(_begin, _end);
+ }
+
+ xpath_node_set::type_t type() const {
+ return _type;
+ }
+
+ void set_type(xpath_node_set::type_t value) {
+ _type = value;
+ }
+};
+
+PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
+{
+ size_t capacity = static_cast<size_t>(_eos - _begin);
+
+ // get new capacity (1.5x rule)
+ size_t new_capacity = capacity + capacity / 2 + 1;
+
+ // reallocate the old array or allocate a new one
+ xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
+ assert(data);
+
+ // finalize
+ _begin = data;
+ _end = data + capacity;
+ _eos = data + new_capacity;
+
+ // push
+ *_end++ = node;
+}
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+struct xpath_context {
+ xpath_node n;
+ size_t position, size;
+
+ xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) {
+ }
+};
+
+enum lexeme_t {
+ lex_none = 0,
+ lex_equal,
+ lex_not_equal,
+ lex_less,
+ lex_greater,
+ lex_less_or_equal,
+ lex_greater_or_equal,
+ lex_plus,
+ lex_minus,
+ lex_multiply,
+ lex_union,
+ lex_var_ref,
+ lex_open_brace,
+ lex_close_brace,
+ lex_quoted_string,
+ lex_number,
+ lex_slash,
+ lex_double_slash,
+ lex_open_square_brace,
+ lex_close_square_brace,
+ lex_string,
+ lex_comma,
+ lex_axis_attribute,
+ lex_dot,
+ lex_double_dot,
+ lex_double_colon,
+ lex_eof
+};
+
+struct xpath_lexer_string {
+ const char_t* begin;
+ const char_t* end;
+
+ xpath_lexer_string(): begin(0), end(0) {
+ }
+
+ bool operator==(const char_t* other) const {
+ size_t length = static_cast<size_t>(end - begin);
+
+ return strequalrange(other, begin, length);
+ }
+};
+
+class xpath_lexer
+{
+ const char_t* _cur;
+ const char_t* _cur_lexeme_pos;
+ xpath_lexer_string _cur_lexeme_contents;
+
+ lexeme_t _cur_lexeme;
+
+public:
+ explicit xpath_lexer(const char_t* query): _cur(query) {
+ next();
+ }
+
+ const char_t* state() const {
+ return _cur;
+ }
+
+ void next() {
+ const char_t* cur = _cur;
+
+ while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
+
+ // save lexeme position for error reporting
+ _cur_lexeme_pos = cur;
+
+ switch (*cur) {
+ case 0:
+ _cur_lexeme = lex_eof;
+ break;
+
+ case '>':
+ if (*(cur+1) == '=') {
+ cur += 2;
+ _cur_lexeme = lex_greater_or_equal;
+ } else {
+ cur += 1;
+ _cur_lexeme = lex_greater;
+ }
+ break;
+
+ case '<':
+ if (*(cur+1) == '=') {
+ cur += 2;
+ _cur_lexeme = lex_less_or_equal;
+ } else {
+ cur += 1;
+ _cur_lexeme = lex_less;
+ }
+ break;
+
+ case '!':
+ if (*(cur+1) == '=') {
+ cur += 2;
+ _cur_lexeme = lex_not_equal;
+ } else {
+ _cur_lexeme = lex_none;
+ }
+ break;
+
+ case '=':
+ cur += 1;
+ _cur_lexeme = lex_equal;
+
+ break;
+
+ case '+':
+ cur += 1;
+ _cur_lexeme = lex_plus;
+
+ break;
+
+ case '-':
+ cur += 1;
+ _cur_lexeme = lex_minus;
+
+ break;
+
+ case '*':
+ cur += 1;
+ _cur_lexeme = lex_multiply;
+
+ break;
+
+ case '|':
+ cur += 1;
+ _cur_lexeme = lex_union;
+
+ break;
+
+ case '$':
+ cur += 1;
+
+ if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) {
+ _cur_lexeme_contents.begin = cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+
+ if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) { // qname
+ cur++; // :
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+ }
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_var_ref;
+ } else {
+ _cur_lexeme = lex_none;
+ }
+
+ break;
+
+ case '(':
+ cur += 1;
+ _cur_lexeme = lex_open_brace;
+
+ break;
+
+ case ')':
+ cur += 1;
+ _cur_lexeme = lex_close_brace;
+
+ break;
+
+ case '[':
+ cur += 1;
+ _cur_lexeme = lex_open_square_brace;
+
+ break;
+
+ case ']':
+ cur += 1;
+ _cur_lexeme = lex_close_square_brace;
+
+ break;
+
+ case ',':
+ cur += 1;
+ _cur_lexeme = lex_comma;
+
+ break;
+
+ case '/':
+ if (*(cur+1) == '/') {
+ cur += 2;
+ _cur_lexeme = lex_double_slash;
+ } else {
+ cur += 1;
+ _cur_lexeme = lex_slash;
+ }
+ break;
+
+ case '.':
+ if (*(cur+1) == '.') {
+ cur += 2;
+ _cur_lexeme = lex_double_dot;
+ } else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) {
+ _cur_lexeme_contents.begin = cur; // .
+
+ ++cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_number;
+ } else {
+ cur += 1;
+ _cur_lexeme = lex_dot;
+ }
+ break;
+
+ case '@':
+ cur += 1;
+ _cur_lexeme = lex_axis_attribute;
+
+ break;
+
+ case '"':
+ case '\'': {
+ char_t terminator = *cur;
+
+ ++cur;
+
+ _cur_lexeme_contents.begin = cur;
+ while (*cur && *cur != terminator) cur++;
+ _cur_lexeme_contents.end = cur;
+
+ if (!*cur)
+ _cur_lexeme = lex_none;
+ else {
+ cur += 1;
+ _cur_lexeme = lex_quoted_string;
+ }
+
+ break;
+ }
+
+ case ':':
+ if (*(cur+1) == ':') {
+ cur += 2;
+ _cur_lexeme = lex_double_colon;
+ } else {
+ _cur_lexeme = lex_none;
+ }
+ break;
+
+ default:
+ if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) {
+ _cur_lexeme_contents.begin = cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+
+ if (*cur == '.') {
+ cur++;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+ }
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_number;
+ } else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) {
+ _cur_lexeme_contents.begin = cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+
+ if (cur[0] == ':') {
+ if (cur[1] == '*') { // namespace test ncname:*
+ cur += 2; // :*
+ } else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) { // namespace test qname
+ cur++; // :
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+ }
+ }
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_string;
+ } else {
+ _cur_lexeme = lex_none;
+ }
+ }
+
+ _cur = cur;
+ }
+
+ lexeme_t current() const {
+ return _cur_lexeme;
+ }
+
+ const char_t* current_pos() const {
+ return _cur_lexeme_pos;
+ }
+
+ const xpath_lexer_string& contents() const {
+ assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
+
+ return _cur_lexeme_contents;
+ }
+};
+
+enum ast_type_t {
+ ast_unknown,
+ ast_op_or, // left or right
+ ast_op_and, // left and right
+ ast_op_equal, // left = right
+ ast_op_not_equal, // left != right
+ ast_op_less, // left < right
+ ast_op_greater, // left > right
+ ast_op_less_or_equal, // left <= right
+ ast_op_greater_or_equal, // left >= right
+ ast_op_add, // left + right
+ ast_op_subtract, // left - right
+ ast_op_multiply, // left * right
+ ast_op_divide, // left / right
+ ast_op_mod, // left % right
+ ast_op_negate, // left - right
+ ast_op_union, // left | right
+ ast_predicate, // apply predicate to set; next points to next predicate
+ ast_filter, // select * from left where right
+ ast_string_constant, // string constant
+ ast_number_constant, // number constant
+ ast_variable, // variable
+ ast_func_last, // last()
+ ast_func_position, // position()
+ ast_func_count, // count(left)
+ ast_func_id, // id(left)
+ ast_func_local_name_0, // local-name()
+ ast_func_local_name_1, // local-name(left)
+ ast_func_namespace_uri_0, // namespace-uri()
+ ast_func_namespace_uri_1, // namespace-uri(left)
+ ast_func_name_0, // name()
+ ast_func_name_1, // name(left)
+ ast_func_string_0, // string()
+ ast_func_string_1, // string(left)
+ ast_func_concat, // concat(left, right, siblings)
+ ast_func_starts_with, // starts_with(left, right)
+ ast_func_contains, // contains(left, right)
+ ast_func_substring_before, // substring-before(left, right)
+ ast_func_substring_after, // substring-after(left, right)
+ ast_func_substring_2, // substring(left, right)
+ ast_func_substring_3, // substring(left, right, third)
+ ast_func_string_length_0, // string-length()
+ ast_func_string_length_1, // string-length(left)
+ ast_func_normalize_space_0, // normalize-space()
+ ast_func_normalize_space_1, // normalize-space(left)
+ ast_func_translate, // translate(left, right, third)
+ ast_func_boolean, // boolean(left)
+ ast_func_not, // not(left)
+ ast_func_true, // true()
+ ast_func_false, // false()
+ ast_func_lang, // lang(left)
+ ast_func_number_0, // number()
+ ast_func_number_1, // number(left)
+ ast_func_sum, // sum(left)
+ ast_func_floor, // floor(left)
+ ast_func_ceiling, // ceiling(left)
+ ast_func_round, // round(left)
+ ast_step, // process set left with step
+ ast_step_root, // select root node
+
+ ast_opt_translate_table, // translate(left, right, third) where right/third are constants
+ ast_opt_compare_attribute // @name = 'string'
+};
+
+enum axis_t {
+ axis_ancestor,
+ axis_ancestor_or_self,
+ axis_attribute,
+ axis_child,
+ axis_descendant,
+ axis_descendant_or_self,
+ axis_following,
+ axis_following_sibling,
+ axis_namespace,
+ axis_parent,
+ axis_preceding,
+ axis_preceding_sibling,
+ axis_self
+};
+
+enum nodetest_t {
+ nodetest_none,
+ nodetest_name,
+ nodetest_type_node,
+ nodetest_type_comment,
+ nodetest_type_pi,
+ nodetest_type_text,
+ nodetest_pi,
+ nodetest_all,
+ nodetest_all_in_namespace
+};
+
+enum predicate_t {
+ predicate_default,
+ predicate_posinv,
+ predicate_constant,
+ predicate_constant_one
+};
+
+enum nodeset_eval_t {
+ nodeset_eval_all,
+ nodeset_eval_any,
+ nodeset_eval_first
+};
+
+template <axis_t N> struct axis_to_type {
+ static const axis_t axis;
+};
+
+template <axis_t N> const axis_t axis_to_type<N>::axis = N;
+
+class xpath_ast_node
+{
+private:
+ // node type
+ char _type;
+ char _rettype;
+
+ // for ast_step
+ char _axis;
+
+ // for ast_step/ast_predicate/ast_filter
+ char _test;
+
+ // tree node structure
+ xpath_ast_node* _left;
+ xpath_ast_node* _right;
+ xpath_ast_node* _next;
+
+ union {
+ // value for ast_string_constant
+ const char_t* string;
+ // value for ast_number_constant
+ double number;
+ // variable for ast_variable
+ xpath_variable* variable;
+ // node test for ast_step (node name/namespace/node type/pi target)
+ const char_t* nodetest;
+ // table for ast_opt_translate_table
+ const unsigned char* table;
+ } _data;
+
+ xpath_ast_node(const xpath_ast_node&);
+ xpath_ast_node& operator=(const xpath_ast_node&);
+
+ template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) {
+ xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+ if (lt != xpath_type_node_set && rt != xpath_type_node_set) {
+ if (lt == xpath_type_boolean || rt == xpath_type_boolean)
+ return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
+ else if (lt == xpath_type_number || rt == xpath_type_number)
+ return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
+ else if (lt == xpath_type_string || rt == xpath_type_string) {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string ls = lhs->eval_string(c, stack);
+ xpath_string rs = rhs->eval_string(c, stack);
+
+ return comp(ls, rs);
+ }
+ } else if (lt == xpath_type_node_set && rt == xpath_type_node_set) {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
+ return true;
+ }
+
+ return false;
+ } else {
+ if (lt == xpath_type_node_set) {
+ swap(lhs, rhs);
+ swap(lt, rt);
+ }
+
+ if (lt == xpath_type_boolean)
+ return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
+ else if (lt == xpath_type_number) {
+ xpath_allocator_capture cr(stack.result);
+
+ double l = lhs->eval_number(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+ return true;
+ }
+
+ return false;
+ } else if (lt == xpath_type_string) {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string l = lhs->eval_string(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(l, string_value(*ri, stack.result)))
+ return true;
+ }
+
+ return false;
+ }
+ }
+
+ assert(!"Wrong types");
+ return false;
+ }
+
+ static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) {
+ return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
+ }
+
+ template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) {
+ xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+ if (lt != xpath_type_node_set && rt != xpath_type_node_set)
+ return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
+ else if (lt == xpath_type_node_set && rt == xpath_type_node_set) {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) {
+ xpath_allocator_capture cri(stack.result);
+
+ double l = convert_string_to_number(string_value(*li, stack.result).c_str());
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+ xpath_allocator_capture crii(stack.result);
+
+ if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+ return true;
+ }
+ }
+
+ return false;
+ } else if (lt != xpath_type_node_set && rt == xpath_type_node_set) {
+ xpath_allocator_capture cr(stack.result);
+
+ double l = lhs->eval_number(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+ return true;
+ }
+
+ return false;
+ } else if (lt == xpath_type_node_set && rt != xpath_type_node_set) {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+ double r = rhs->eval_number(c, stack);
+
+ for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
+ return true;
+ }
+
+ return false;
+ } else {
+ assert(!"Wrong types");
+ return false;
+ }
+ }
+
+ static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) {
+ assert(ns.size() >= first);
+ assert(expr->rettype() != xpath_type_number);
+
+ size_t i = 1;
+ size_t size = ns.size() - first;
+
+ xpath_node* last = ns.begin() + first;
+
+ // remove_if... or well, sort of
+ for (xpath_node* it = last; it != ns.end(); ++it, ++i) {
+ xpath_context c(*it, i, size);
+
+ if (expr->eval_boolean(c, stack)) {
+ *last++ = *it;
+
+ if (once) break;
+ }
+ }
+
+ ns.truncate(last);
+ }
+
+ static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) {
+ assert(ns.size() >= first);
+ assert(expr->rettype() == xpath_type_number);
+
+ size_t i = 1;
+ size_t size = ns.size() - first;
+
+ xpath_node* last = ns.begin() + first;
+
+ // remove_if... or well, sort of
+ for (xpath_node* it = last; it != ns.end(); ++it, ++i) {
+ xpath_context c(*it, i, size);
+
+ if (expr->eval_number(c, stack) == i) {
+ *last++ = *it;
+
+ if (once) break;
+ }
+ }
+
+ ns.truncate(last);
+ }
+
+ static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) {
+ assert(ns.size() >= first);
+ assert(expr->rettype() == xpath_type_number);
+
+ size_t size = ns.size() - first;
+
+ xpath_node* last = ns.begin() + first;
+
+ xpath_context c(xpath_node(), 1, size);
+
+ double er = expr->eval_number(c, stack);
+
+ if (er >= 1.0 && er <= size) {
+ size_t eri = static_cast<size_t>(er);
+
+ if (er == eri) {
+ xpath_node r = last[eri - 1];
+
+ *last++ = r;
+ }
+ }
+
+ ns.truncate(last);
+ }
+
+ void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) {
+ if (ns.size() == first) return;
+
+ assert(_type == ast_filter || _type == ast_predicate);
+
+ if (_test == predicate_constant || _test == predicate_constant_one)
+ apply_predicate_number_const(ns, first, _right, stack);
+ else if (_right->rettype() == xpath_type_number)
+ apply_predicate_number(ns, first, _right, stack, once);
+ else
+ apply_predicate_boolean(ns, first, _right, stack, once);
+ }
+
+ void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) {
+ if (ns.size() == first) return;
+
+ bool last_once = eval_once(ns.type(), eval);
+
+ for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
+ pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
+ }
+
+ bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) {
+ assert(a);
+
+ const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
+
+ switch (_test) {
+ case nodetest_name:
+ if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) {
+ ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_type_node:
+ case nodetest_all:
+ if (is_xpath_attribute(name)) {
+ ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_all_in_namespace:
+ if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) {
+ ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
+ return true;
+ }
+ break;
+
+ default:
+ ;
+ }
+
+ return false;
+ }
+
+ bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) {
+ assert(n);
+
+ xml_node_type type = PUGI__NODETYPE(n);
+
+ switch (_test) {
+ case nodetest_name:
+ if (type == node_element && n->name && strequal(n->name, _data.nodetest)) {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_type_node:
+ ns.push_back(xml_node(n), alloc);
+ return true;
+
+ case nodetest_type_comment:
+ if (type == node_comment) {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_type_text:
+ if (type == node_pcdata || type == node_cdata) {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_type_pi:
+ if (type == node_pi) {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_pi:
+ if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_all:
+ if (type == node_element) {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_all_in_namespace:
+ if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ default:
+ assert(!"Unknown axis");
+ }
+
+ return false;
+ }
+
+ template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) {
+ const axis_t axis = T::axis;
+
+ switch (axis) {
+ case axis_attribute: {
+ for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
+ if (step_push(ns, a, n, alloc) & once)
+ return;
+
+ break;
+ }
+
+ case axis_child: {
+ for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
+ if (step_push(ns, c, alloc) & once)
+ return;
+
+ break;
+ }
+
+ case axis_descendant:
+ case axis_descendant_or_self: {
+ if (axis == axis_descendant_or_self)
+ if (step_push(ns, n, alloc) & once)
+ return;
+
+ xml_node_struct* cur = n->first_child;
+
+ while (cur) {
+ if (step_push(ns, cur, alloc) & once)
+ return;
+
+ if (cur->first_child)
+ cur = cur->first_child;
+ else {
+ while (!cur->next_sibling) {
+ cur = cur->parent;
+
+ if (cur == n) return;
+ }
+
+ cur = cur->next_sibling;
+ }
+ }
+
+ break;
+ }
+
+ case axis_following_sibling: {
+ for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
+ if (step_push(ns, c, alloc) & once)
+ return;
+
+ break;
+ }
+
+ case axis_preceding_sibling: {
+ for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
+ if (step_push(ns, c, alloc) & once)
+ return;
+
+ break;
+ }
+
+ case axis_following: {
+ xml_node_struct* cur = n;
+
+ // exit from this node so that we don't include descendants
+ while (!cur->next_sibling) {
+ cur = cur->parent;
+
+ if (!cur) return;
+ }
+
+ cur = cur->next_sibling;
+
+ while (cur) {
+ if (step_push(ns, cur, alloc) & once)
+ return;
+
+ if (cur->first_child)
+ cur = cur->first_child;
+ else {
+ while (!cur->next_sibling) {
+ cur = cur->parent;
+
+ if (!cur) return;
+ }
+
+ cur = cur->next_sibling;
+ }
+ }
+
+ break;
+ }
+
+ case axis_preceding: {
+ xml_node_struct* cur = n;
+
+ // exit from this node so that we don't include descendants
+ while (!cur->prev_sibling_c->next_sibling) {
+ cur = cur->parent;
+
+ if (!cur) return;
+ }
+
+ cur = cur->prev_sibling_c;
+
+ while (cur) {
+ if (cur->first_child)
+ cur = cur->first_child->prev_sibling_c;
+ else {
+ // leaf node, can't be ancestor
+ if (step_push(ns, cur, alloc) & once)
+ return;
+
+ while (!cur->prev_sibling_c->next_sibling) {
+ cur = cur->parent;
+
+ if (!cur) return;
+
+ if (!node_is_ancestor(cur, n))
+ if (step_push(ns, cur, alloc) & once)
+ return;
+ }
+
+ cur = cur->prev_sibling_c;
+ }
+ }
+
+ break;
+ }
+
+ case axis_ancestor:
+ case axis_ancestor_or_self: {
+ if (axis == axis_ancestor_or_self)
+ if (step_push(ns, n, alloc) & once)
+ return;
+
+ xml_node_struct* cur = n->parent;
+
+ while (cur) {
+ if (step_push(ns, cur, alloc) & once)
+ return;
+
+ cur = cur->parent;
+ }
+
+ break;
+ }
+
+ case axis_self: {
+ step_push(ns, n, alloc);
+
+ break;
+ }
+
+ case axis_parent: {
+ if (n->parent)
+ step_push(ns, n->parent, alloc);
+
+ break;
+ }
+
+ default:
+ assert(!"Unimplemented axis");
+ }
+ }
+
+ template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) {
+ const axis_t axis = T::axis;
+
+ switch (axis) {
+ case axis_ancestor:
+ case axis_ancestor_or_self: {
+ if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
+ if (step_push(ns, a, p, alloc) & once)
+ return;
+
+ xml_node_struct* cur = p;
+
+ while (cur) {
+ if (step_push(ns, cur, alloc) & once)
+ return;
+
+ cur = cur->parent;
+ }
+
+ break;
+ }
+
+ case axis_descendant_or_self:
+ case axis_self: {
+ if (_test == nodetest_type_node) // reject attributes based on principal node type test
+ step_push(ns, a, p, alloc);
+
+ break;
+ }
+
+ case axis_following: {
+ xml_node_struct* cur = p;
+
+ while (cur) {
+ if (cur->first_child)
+ cur = cur->first_child;
+ else {
+ while (!cur->next_sibling) {
+ cur = cur->parent;
+
+ if (!cur) return;
+ }
+
+ cur = cur->next_sibling;
+ }
+
+ if (step_push(ns, cur, alloc) & once)
+ return;
+ }
+
+ break;
+ }
+
+ case axis_parent: {
+ step_push(ns, p, alloc);
+
+ break;
+ }
+
+ case axis_preceding: {
+ // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
+ step_fill(ns, p, alloc, once, v);
+ break;
+ }
+
+ default:
+ assert(!"Unimplemented axis");
+ }
+ }
+
+ template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) {
+ const axis_t axis = T::axis;
+ const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
+
+ if (xn.node())
+ step_fill(ns, xn.node().internal_object(), alloc, once, v);
+ else if (axis_has_attributes && xn.attribute() && xn.parent())
+ step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
+ }
+
+ template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) {
+ const axis_t axis = T::axis;
+ const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
+ const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
+
+ bool once =
+ (axis == axis_attribute && _test == nodetest_name) ||
+ (!_right && eval_once(axis_type, eval)) ||
+ (_right && !_right->_next && _right->_test == predicate_constant_one);
+
+ xpath_node_set_raw ns;
+ ns.set_type(axis_type);
+
+ if (_left) {
+ xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
+
+ // self axis preserves the original order
+ if (axis == axis_self) ns.set_type(s.type());
+
+ for (const xpath_node* it = s.begin(); it != s.end(); ++it) {
+ size_t size = ns.size();
+
+ // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
+ if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
+
+ step_fill(ns, *it, stack.result, once, v);
+ if (_right) apply_predicates(ns, size, stack, eval);
+ }
+ } else {
+ step_fill(ns, c.n, stack.result, once, v);
+ if (_right) apply_predicates(ns, 0, stack, eval);
+ }
+
+ // child, attribute and self axes always generate unique set of nodes
+ // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
+ if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
+ ns.remove_duplicates();
+
+ return ns;
+ }
+
+public:
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) {
+ assert(type == ast_string_constant);
+ _data.string = value;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) {
+ assert(type == ast_number_constant);
+ _data.number = value;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) {
+ assert(type == ast_variable);
+ _data.variable = value;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) {
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
+ _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) {
+ assert(type == ast_step);
+ _data.nodetest = contents;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
+ _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0) {
+ assert(type == ast_filter || type == ast_predicate);
+ }
+
+ void set_next(xpath_ast_node* value) {
+ _next = value;
+ }
+
+ void set_right(xpath_ast_node* value) {
+ _right = value;
+ }
+
+ bool eval_boolean(const xpath_context& c, const xpath_stack& stack) {
+ switch (_type) {
+ case ast_op_or:
+ return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
+
+ case ast_op_and:
+ return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
+
+ case ast_op_equal:
+ return compare_eq(_left, _right, c, stack, equal_to());
+
+ case ast_op_not_equal:
+ return compare_eq(_left, _right, c, stack, not_equal_to());
+
+ case ast_op_less:
+ return compare_rel(_left, _right, c, stack, less());
+
+ case ast_op_greater:
+ return compare_rel(_right, _left, c, stack, less());
+
+ case ast_op_less_or_equal:
+ return compare_rel(_left, _right, c, stack, less_equal());
+
+ case ast_op_greater_or_equal:
+ return compare_rel(_right, _left, c, stack, less_equal());
+
+ case ast_func_starts_with: {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string lr = _left->eval_string(c, stack);
+ xpath_string rr = _right->eval_string(c, stack);
+
+ return starts_with(lr.c_str(), rr.c_str());
+ }
+
+ case ast_func_contains: {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string lr = _left->eval_string(c, stack);
+ xpath_string rr = _right->eval_string(c, stack);
+
+ return find_substring(lr.c_str(), rr.c_str()) != 0;
+ }
+
+ case ast_func_boolean:
+ return _left->eval_boolean(c, stack);
+
+ case ast_func_not:
+ return !_left->eval_boolean(c, stack);
+
+ case ast_func_true:
+ return true;
+
+ case ast_func_false:
+ return false;
+
+ case ast_func_lang: {
+ if (c.n.attribute()) return false;
+
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string lang = _left->eval_string(c, stack);
+
+ for (xml_node n = c.n.node(); n; n = n.parent()) {
+ xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
+
+ if (a) {
+ const char_t* value = a.value();
+
+ // strnicmp / strncasecmp is not portable
+ for (const char_t* lit = lang.c_str(); *lit; ++lit) {
+ if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
+ ++value;
+ }
+
+ return *value == 0 || *value == '-';
+ }
+ }
+
+ return false;
+ }
+
+ case ast_opt_compare_attribute: {
+ const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
+
+ xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
+
+ return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
+ }
+
+ case ast_variable: {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_boolean)
+ return _data.variable->get_boolean();
+
+ // fallthrough to type conversion
+ }
+
+ default: {
+ switch (_rettype) {
+ case xpath_type_number:
+ return convert_number_to_boolean(eval_number(c, stack));
+
+ case xpath_type_string: {
+ xpath_allocator_capture cr(stack.result);
+
+ return !eval_string(c, stack).empty();
+ }
+
+ case xpath_type_node_set: {
+ xpath_allocator_capture cr(stack.result);
+
+ return !eval_node_set(c, stack, nodeset_eval_any).empty();
+ }
+
+ default:
+ assert(!"Wrong expression for return type boolean");
+ return false;
+ }
+ }
+ }
+ }
+
+ double eval_number(const xpath_context& c, const xpath_stack& stack) {
+ switch (_type) {
+ case ast_op_add:
+ return _left->eval_number(c, stack) + _right->eval_number(c, stack);
+
+ case ast_op_subtract:
+ return _left->eval_number(c, stack) - _right->eval_number(c, stack);
+
+ case ast_op_multiply:
+ return _left->eval_number(c, stack) * _right->eval_number(c, stack);
+
+ case ast_op_divide:
+ return _left->eval_number(c, stack) / _right->eval_number(c, stack);
+
+ case ast_op_mod:
+ return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
+
+ case ast_op_negate:
+ return -_left->eval_number(c, stack);
+
+ case ast_number_constant:
+ return _data.number;
+
+ case ast_func_last:
+ return static_cast<double>(c.size);
+
+ case ast_func_position:
+ return static_cast<double>(c.position);
+
+ case ast_func_count: {
+ xpath_allocator_capture cr(stack.result);
+
+ return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
+ }
+
+ case ast_func_string_length_0: {
+ xpath_allocator_capture cr(stack.result);
+
+ return static_cast<double>(string_value(c.n, stack.result).length());
+ }
+
+ case ast_func_string_length_1: {
+ xpath_allocator_capture cr(stack.result);
+
+ return static_cast<double>(_left->eval_string(c, stack).length());
+ }
+
+ case ast_func_number_0: {
+ xpath_allocator_capture cr(stack.result);
+
+ return convert_string_to_number(string_value(c.n, stack.result).c_str());
+ }
+
+ case ast_func_number_1:
+ return _left->eval_number(c, stack);
+
+ case ast_func_sum: {
+ xpath_allocator_capture cr(stack.result);
+
+ double r = 0;
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) {
+ xpath_allocator_capture cri(stack.result);
+
+ r += convert_string_to_number(string_value(*it, stack.result).c_str());
+ }
+
+ return r;
+ }
+
+ case ast_func_floor: {
+ double r = _left->eval_number(c, stack);
+
+ return r == r ? floor(r) : r;
+ }
+
+ case ast_func_ceiling: {
+ double r = _left->eval_number(c, stack);
+
+ return r == r ? ceil(r) : r;
+ }
+
+ case ast_func_round:
+ return round_nearest_nzero(_left->eval_number(c, stack));
+
+ case ast_variable: {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_number)
+ return _data.variable->get_number();
+
+ // fallthrough to type conversion
+ }
+
+ default: {
+ switch (_rettype) {
+ case xpath_type_boolean:
+ return eval_boolean(c, stack) ? 1 : 0;
+
+ case xpath_type_string: {
+ xpath_allocator_capture cr(stack.result);
+
+ return convert_string_to_number(eval_string(c, stack).c_str());
+ }
+
+ case xpath_type_node_set: {
+ xpath_allocator_capture cr(stack.result);
+
+ return convert_string_to_number(eval_string(c, stack).c_str());
+ }
+
+ default:
+ assert(!"Wrong expression for return type number");
+ return 0;
+ }
+
+ }
+ }
+ }
+
+ xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) {
+ assert(_type == ast_func_concat);
+
+ xpath_allocator_capture ct(stack.temp);
+
+ // count the string number
+ size_t count = 1;
+ for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
+
+ // gather all strings
+ xpath_string static_buffer[4];
+ xpath_string* buffer = static_buffer;
+
+ // allocate on-heap for large concats
+ if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) {
+ buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
+ assert(buffer);
+ }
+
+ // evaluate all strings to temporary stack
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ buffer[0] = _left->eval_string(c, swapped_stack);
+
+ size_t pos = 1;
+ for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
+ assert(pos == count);
+
+ // get total length
+ size_t length = 0;
+ for (size_t i = 0; i < count; ++i) length += buffer[i].length();
+
+ // create final string
+ char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
+ assert(result);
+
+ char_t* ri = result;
+
+ for (size_t j = 0; j < count; ++j)
+ for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
+ *ri++ = *bi;
+
+ *ri = 0;
+
+ return xpath_string::from_heap_preallocated(result, ri);
+ }
+
+ xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) {
+ switch (_type) {
+ case ast_string_constant:
+ return xpath_string::from_const(_data.string);
+
+ case ast_func_local_name_0: {
+ xpath_node na = c.n;
+
+ return xpath_string::from_const(local_name(na));
+ }
+
+ case ast_func_local_name_1: {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
+ xpath_node na = ns.first();
+
+ return xpath_string::from_const(local_name(na));
+ }
+
+ case ast_func_name_0: {
+ xpath_node na = c.n;
+
+ return xpath_string::from_const(qualified_name(na));
+ }
+
+ case ast_func_name_1: {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
+ xpath_node na = ns.first();
+
+ return xpath_string::from_const(qualified_name(na));
+ }
+
+ case ast_func_namespace_uri_0: {
+ xpath_node na = c.n;
+
+ return xpath_string::from_const(namespace_uri(na));
+ }
+
+ case ast_func_namespace_uri_1: {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
+ xpath_node na = ns.first();
+
+ return xpath_string::from_const(namespace_uri(na));
+ }
+
+ case ast_func_string_0:
+ return string_value(c.n, stack.result);
+
+ case ast_func_string_1:
+ return _left->eval_string(c, stack);
+
+ case ast_func_concat:
+ return eval_string_concat(c, stack);
+
+ case ast_func_substring_before: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ xpath_string p = _right->eval_string(c, swapped_stack);
+
+ const char_t* pos = find_substring(s.c_str(), p.c_str());
+
+ return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
+ }
+
+ case ast_func_substring_after: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ xpath_string p = _right->eval_string(c, swapped_stack);
+
+ const char_t* pos = find_substring(s.c_str(), p.c_str());
+ if (!pos) return xpath_string();
+
+ const char_t* rbegin = pos + p.length();
+ const char_t* rend = s.c_str() + s.length();
+
+ return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
+ }
+
+ case ast_func_substring_2: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ size_t s_length = s.length();
+
+ double first = round_nearest(_right->eval_number(c, stack));
+
+ if (is_nan(first)) return xpath_string(); // NaN
+ else if (first >= s_length + 1) return xpath_string();
+
+ size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
+ assert(1 <= pos && pos <= s_length + 1);
+
+ const char_t* rbegin = s.c_str() + (pos - 1);
+ const char_t* rend = s.c_str() + s.length();
+
+ return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
+ }
+
+ case ast_func_substring_3: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ size_t s_length = s.length();
+
+ double first = round_nearest(_right->eval_number(c, stack));
+ double last = first + round_nearest(_right->_next->eval_number(c, stack));
+
+ if (is_nan(first) || is_nan(last)) return xpath_string();
+ else if (first >= s_length + 1) return xpath_string();
+ else if (first >= last) return xpath_string();
+ else if (last < 1) return xpath_string();
+
+ size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
+ size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
+
+ assert(1 <= pos && pos <= end && end <= s_length + 1);
+ const char_t* rbegin = s.c_str() + (pos - 1);
+ const char_t* rend = s.c_str() + (end - 1);
+
+ return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
+ }
+
+ case ast_func_normalize_space_0: {
+ xpath_string s = string_value(c.n, stack.result);
+
+ char_t* begin = s.data(stack.result);
+ char_t* end = normalize_space(begin);
+
+ return xpath_string::from_heap_preallocated(begin, end);
+ }
+
+ case ast_func_normalize_space_1: {
+ xpath_string s = _left->eval_string(c, stack);
+
+ char_t* begin = s.data(stack.result);
+ char_t* end = normalize_space(begin);
+
+ return xpath_string::from_heap_preallocated(begin, end);
+ }
+
+ case ast_func_translate: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, stack);
+ xpath_string from = _right->eval_string(c, swapped_stack);
+ xpath_string to = _right->_next->eval_string(c, swapped_stack);
+
+ char_t* begin = s.data(stack.result);
+ char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
+
+ return xpath_string::from_heap_preallocated(begin, end);
+ }
+
+ case ast_opt_translate_table: {
+ xpath_string s = _left->eval_string(c, stack);
+
+ char_t* begin = s.data(stack.result);
+ char_t* end = translate_table(begin, _data.table);
+
+ return xpath_string::from_heap_preallocated(begin, end);
+ }
+
+ case ast_variable: {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_string)
+ return xpath_string::from_const(_data.variable->get_string());
+
+ // fallthrough to type conversion
+ }
+
+ default: {
+ switch (_rettype) {
+ case xpath_type_boolean:
+ return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
+
+ case xpath_type_number:
+ return convert_number_to_string(eval_number(c, stack), stack.result);
+
+ case xpath_type_node_set: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
+ return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
+ }
+
+ default:
+ assert(!"Wrong expression for return type string");
+ return xpath_string();
+ }
+ }
+ }
+ }
+
+ xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) {
+ switch (_type) {
+ case ast_op_union: {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
+ xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
+
+ // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
+ rs.set_type(xpath_node_set::type_unsorted);
+
+ rs.append(ls.begin(), ls.end(), stack.result);
+ rs.remove_duplicates();
+
+ return rs;
+ }
+
+ case ast_filter: {
+ xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
+
+ // either expression is a number or it contains position() call; sort by document order
+ if (_test != predicate_posinv) set.sort_do();
+
+ bool once = eval_once(set.type(), eval);
+
+ apply_predicate(set, 0, stack, once);
+
+ return set;
+ }
+
+ case ast_func_id:
+ return xpath_node_set_raw();
+
+ case ast_step: {
+ switch (_axis) {
+ case axis_ancestor:
+ return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
+
+ case axis_ancestor_or_self:
+ return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
+
+ case axis_attribute:
+ return step_do(c, stack, eval, axis_to_type<axis_attribute>());
+
+ case axis_child:
+ return step_do(c, stack, eval, axis_to_type<axis_child>());
+
+ case axis_descendant:
+ return step_do(c, stack, eval, axis_to_type<axis_descendant>());
+
+ case axis_descendant_or_self:
+ return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
+
+ case axis_following:
+ return step_do(c, stack, eval, axis_to_type<axis_following>());
+
+ case axis_following_sibling:
+ return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
+
+ case axis_namespace:
+ // namespaced axis is not supported
+ return xpath_node_set_raw();
+
+ case axis_parent:
+ return step_do(c, stack, eval, axis_to_type<axis_parent>());
+
+ case axis_preceding:
+ return step_do(c, stack, eval, axis_to_type<axis_preceding>());
+
+ case axis_preceding_sibling:
+ return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
+
+ case axis_self:
+ return step_do(c, stack, eval, axis_to_type<axis_self>());
+
+ default:
+ assert(!"Unknown axis");
+ return xpath_node_set_raw();
+ }
+ }
+
+ case ast_step_root: {
+ assert(!_right); // root step can't have any predicates
+
+ xpath_node_set_raw ns;
+
+ ns.set_type(xpath_node_set::type_sorted);
+
+ if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
+ else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
+
+ return ns;
+ }
+
+ case ast_variable: {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_node_set) {
+ const xpath_node_set& s = _data.variable->get_node_set();
+
+ xpath_node_set_raw ns;
+
+ ns.set_type(s.type());
+ ns.append(s.begin(), s.end(), stack.result);
+
+ return ns;
+ }
+
+ // fallthrough to type conversion
+ }
+
+ default:
+ assert(!"Wrong expression for return type node set");
+ return xpath_node_set_raw();
+ }
+ }
+
+ void optimize(xpath_allocator* alloc) {
+ if (_left) _left->optimize(alloc);
+ if (_right) _right->optimize(alloc);
+ if (_next) _next->optimize(alloc);
+
+ optimize_self(alloc);
+ }
+
+ void optimize_self(xpath_allocator* alloc) {
+ // Rewrite [position()=expr] with [expr]
+ // Note that this step has to go before classification to recognize [position()=1]
+ if ((_type == ast_filter || _type == ast_predicate) &&
+ _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) {
+ _right = _right->_right;
+ }
+
+ // Classify filter/predicate ops to perform various optimizations during evaluation
+ if (_type == ast_filter || _type == ast_predicate) {
+ assert(_test == predicate_default);
+
+ if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
+ _test = predicate_constant_one;
+ else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
+ _test = predicate_constant;
+ else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
+ _test = predicate_posinv;
+ }
+
+ // Rewrite descendant-or-self::node()/child::foo with descendant::foo
+ // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
+ // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
+ // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
+ if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
+ _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
+ is_posinv_step()) {
+ if (_axis == axis_child || _axis == axis_descendant)
+ _axis = axis_descendant;
+ else
+ _axis = axis_descendant_or_self;
+
+ _left = _left->_left;
+ }
+
+ // Use optimized lookup table implementation for translate() with constant arguments
+ if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) {
+ unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
+
+ if (table) {
+ _type = ast_opt_translate_table;
+ _data.table = table;
+ }
+ }
+
+ // Use optimized path for @attr = 'value' or @attr = $value
+ if (_type == ast_op_equal &&
+ _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
+ (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) {
+ _type = ast_opt_compare_attribute;
+ }
+ }
+
+ bool is_posinv_expr() const {
+ switch (_type) {
+ case ast_func_position:
+ case ast_func_last:
+ return false;
+
+ case ast_string_constant:
+ case ast_number_constant:
+ case ast_variable:
+ return true;
+
+ case ast_step:
+ case ast_step_root:
+ return true;
+
+ case ast_predicate:
+ case ast_filter:
+ return true;
+
+ default:
+ if (_left && !_left->is_posinv_expr()) return false;
+
+ for (xpath_ast_node* n = _right; n; n = n->_next)
+ if (!n->is_posinv_expr()) return false;
+
+ return true;
+ }
+ }
+
+ bool is_posinv_step() const {
+ assert(_type == ast_step);
+
+ for (xpath_ast_node* n = _right; n; n = n->_next) {
+ assert(n->_type == ast_predicate);
+
+ if (n->_test != predicate_posinv)
+ return false;
+ }
+
+ return true;
+ }
+
+ xpath_value_type rettype() const {
+ return static_cast<xpath_value_type>(_rettype);
+ }
+};
+
+struct xpath_parser {
+ xpath_allocator* _alloc;
+ xpath_lexer _lexer;
+
+ const char_t* _query;
+ xpath_variable_set* _variables;
+
+ xpath_parse_result* _result;
+
+ char_t _scratch[32];
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ jmp_buf _error_handler;
+#endif
+
+ void throw_error(const char* message) {
+ _result->error = message;
+ _result->offset = _lexer.current_pos() - _query;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ longjmp(_error_handler, 1);
+#else
+ throw xpath_exception(*_result);
+#endif
+ }
+
+ void throw_error_oom() {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ throw_error("Out of memory");
+#else
+ throw std::bad_alloc();
+#endif
+ }
+
+ void* alloc_node() {
+ void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
+
+ if (!result) throw_error_oom();
+
+ return result;
+ }
+
+ const char_t* alloc_string(const xpath_lexer_string& value) {
+ if (value.begin) {
+ size_t length = static_cast<size_t>(value.end - value.begin);
+
+ char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
+ if (!c) throw_error_oom();
+ assert(c); // workaround for clang static analysis
+
+ memcpy(c, value.begin, length * sizeof(char_t));
+ c[length] = 0;
+
+ return c;
+ } else return 0;
+ }
+
+ xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) {
+ assert(argc <= 1);
+
+ if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
+ }
+
+ xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) {
+ switch (name.begin[0]) {
+ case 'b':
+ if (name == PUGIXML_TEXT("boolean") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
+
+ break;
+
+ case 'c':
+ if (name == PUGIXML_TEXT("count") && argc == 1) {
+ if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+ return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
+ } else if (name == PUGIXML_TEXT("contains") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("concat") && argc >= 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
+
+ break;
+
+ case 'f':
+ if (name == PUGIXML_TEXT("false") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
+ else if (name == PUGIXML_TEXT("floor") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
+
+ break;
+
+ case 'i':
+ if (name == PUGIXML_TEXT("id") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
+
+ break;
+
+ case 'l':
+ if (name == PUGIXML_TEXT("last") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
+ else if (name == PUGIXML_TEXT("lang") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
+ else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
+ return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("name") && argc <= 1)
+ return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
+ else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
+ return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
+ else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("not") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
+ else if (name == PUGIXML_TEXT("number") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("position") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
+
+ break;
+
+ case 'r':
+ if (name == PUGIXML_TEXT("round") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
+
+ break;
+
+ case 's':
+ if (name == PUGIXML_TEXT("string") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
+ else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
+ else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
+ return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("sum") && argc == 1) {
+ if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+ return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
+ }
+
+ break;
+
+ case 't':
+ if (name == PUGIXML_TEXT("translate") && argc == 3)
+ return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("true") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
+
+ break;
+
+ default:
+ break;
+ }
+
+ throw_error("Unrecognized function or wrong parameter count");
+
+ return 0;
+ }
+
+ axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) {
+ specified = true;
+
+ switch (name.begin[0]) {
+ case 'a':
+ if (name == PUGIXML_TEXT("ancestor"))
+ return axis_ancestor;
+ else if (name == PUGIXML_TEXT("ancestor-or-self"))
+ return axis_ancestor_or_self;
+ else if (name == PUGIXML_TEXT("attribute"))
+ return axis_attribute;
+
+ break;
+
+ case 'c':
+ if (name == PUGIXML_TEXT("child"))
+ return axis_child;
+
+ break;
+
+ case 'd':
+ if (name == PUGIXML_TEXT("descendant"))
+ return axis_descendant;
+ else if (name == PUGIXML_TEXT("descendant-or-self"))
+ return axis_descendant_or_self;
+
+ break;
+
+ case 'f':
+ if (name == PUGIXML_TEXT("following"))
+ return axis_following;
+ else if (name == PUGIXML_TEXT("following-sibling"))
+ return axis_following_sibling;
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("namespace"))
+ return axis_namespace;
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("parent"))
+ return axis_parent;
+ else if (name == PUGIXML_TEXT("preceding"))
+ return axis_preceding;
+ else if (name == PUGIXML_TEXT("preceding-sibling"))
+ return axis_preceding_sibling;
+
+ break;
+
+ case 's':
+ if (name == PUGIXML_TEXT("self"))
+ return axis_self;
+
+ break;
+
+ default:
+ break;
+ }
+
+ specified = false;
+ return axis_child;
+ }
+
+ nodetest_t parse_node_test_type(const xpath_lexer_string& name) {
+ switch (name.begin[0]) {
+ case 'c':
+ if (name == PUGIXML_TEXT("comment"))
+ return nodetest_type_comment;
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("node"))
+ return nodetest_type_node;
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("processing-instruction"))
+ return nodetest_type_pi;
+
+ break;
+
+ case 't':
+ if (name == PUGIXML_TEXT("text"))
+ return nodetest_type_text;
+
+ break;
+
+ default:
+ break;
+ }
+
+ return nodetest_none;
+ }
+
+ // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
+ xpath_ast_node* parse_primary_expression() {
+ switch (_lexer.current()) {
+ case lex_var_ref: {
+ xpath_lexer_string name = _lexer.contents();
+
+ if (!_variables)
+ throw_error("Unknown variable: variable set is not provided");
+
+ xpath_variable* var = 0;
+ if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
+ throw_error_oom();
+
+ if (!var)
+ throw_error("Unknown variable: variable set does not contain the given name");
+
+ _lexer.next();
+
+ return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
+ }
+
+ case lex_open_brace: {
+ _lexer.next();
+
+ xpath_ast_node* n = parse_expression();
+
+ if (_lexer.current() != lex_close_brace)
+ throw_error("Unmatched braces");
+
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_quoted_string: {
+ const char_t* value = alloc_string(_lexer.contents());
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_number: {
+ double value = 0;
+
+ if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
+ throw_error_oom();
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_string: {
+ xpath_ast_node* args[2] = {0};
+ size_t argc = 0;
+
+ xpath_lexer_string function = _lexer.contents();
+ _lexer.next();
+
+ xpath_ast_node* last_arg = 0;
+
+ if (_lexer.current() != lex_open_brace)
+ throw_error("Unrecognized function call");
+ _lexer.next();
+
+ if (_lexer.current() != lex_close_brace)
+ args[argc++] = parse_expression();
+
+ while (_lexer.current() != lex_close_brace) {
+ if (_lexer.current() != lex_comma)
+ throw_error("No comma between function arguments");
+ _lexer.next();
+
+ xpath_ast_node* n = parse_expression();
+
+ if (argc < 2) args[argc] = n;
+ else last_arg->set_next(n);
+
+ argc++;
+ last_arg = n;
+ }
+
+ _lexer.next();
+
+ return parse_function(function, argc, args);
+ }
+
+ default:
+ throw_error("Unrecognizable primary expression");
+
+ return 0;
+ }
+ }
+
+ // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
+ // Predicate ::= '[' PredicateExpr ']'
+ // PredicateExpr ::= Expr
+ xpath_ast_node* parse_filter_expression() {
+ xpath_ast_node* n = parse_primary_expression();
+
+ while (_lexer.current() == lex_open_square_brace) {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_expression();
+
+ if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
+
+ n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default);
+
+ if (_lexer.current() != lex_close_square_brace)
+ throw_error("Unmatched square brace");
+
+ _lexer.next();
+ }
+
+ return n;
+ }
+
+ // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
+ // AxisSpecifier ::= AxisName '::' | '@'?
+ // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
+ // NameTest ::= '*' | NCName ':' '*' | QName
+ // AbbreviatedStep ::= '.' | '..'
+ xpath_ast_node* parse_step(xpath_ast_node* set) {
+ if (set && set->rettype() != xpath_type_node_set)
+ throw_error("Step has to be applied to node set");
+
+ bool axis_specified = false;
+ axis_t axis = axis_child; // implied child axis
+
+ if (_lexer.current() == lex_axis_attribute) {
+ axis = axis_attribute;
+ axis_specified = true;
+
+ _lexer.next();
+ } else if (_lexer.current() == lex_dot) {
+ _lexer.next();
+
+ return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
+ } else if (_lexer.current() == lex_double_dot) {
+ _lexer.next();
+
+ return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
+ }
+
+ nodetest_t nt_type = nodetest_none;
+ xpath_lexer_string nt_name;
+
+ if (_lexer.current() == lex_string) {
+ // node name test
+ nt_name = _lexer.contents();
+ _lexer.next();
+
+ // was it an axis name?
+ if (_lexer.current() == lex_double_colon) {
+ // parse axis name
+ if (axis_specified) throw_error("Two axis specifiers in one step");
+
+ axis = parse_axis_name(nt_name, axis_specified);
+
+ if (!axis_specified) throw_error("Unknown axis");
+
+ // read actual node test
+ _lexer.next();
+
+ if (_lexer.current() == lex_multiply) {
+ nt_type = nodetest_all;
+ nt_name = xpath_lexer_string();
+ _lexer.next();
+ } else if (_lexer.current() == lex_string) {
+ nt_name = _lexer.contents();
+ _lexer.next();
+ } else throw_error("Unrecognized node test");
+ }
+
+ if (nt_type == nodetest_none) {
+ // node type test or processing-instruction
+ if (_lexer.current() == lex_open_brace) {
+ _lexer.next();
+
+ if (_lexer.current() == lex_close_brace) {
+ _lexer.next();
+
+ nt_type = parse_node_test_type(nt_name);
+
+ if (nt_type == nodetest_none) throw_error("Unrecognized node type");
+
+ nt_name = xpath_lexer_string();
+ } else if (nt_name == PUGIXML_TEXT("processing-instruction")) {
+ if (_lexer.current() != lex_quoted_string)
+ throw_error("Only literals are allowed as arguments to processing-instruction()");
+
+ nt_type = nodetest_pi;
+ nt_name = _lexer.contents();
+ _lexer.next();
+
+ if (_lexer.current() != lex_close_brace)
+ throw_error("Unmatched brace near processing-instruction()");
+ _lexer.next();
+ } else
+ throw_error("Unmatched brace near node type test");
+
+ }
+ // QName or NCName:*
+ else {
+ if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') { // NCName:*
+ nt_name.end--; // erase *
+
+ nt_type = nodetest_all_in_namespace;
+ } else nt_type = nodetest_name;
+ }
+ }
+ } else if (_lexer.current() == lex_multiply) {
+ nt_type = nodetest_all;
+ _lexer.next();
+ } else throw_error("Unrecognized node test");
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
+
+ xpath_ast_node* last = 0;
+
+ while (_lexer.current() == lex_open_square_brace) {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_expression();
+
+ xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default);
+
+ if (_lexer.current() != lex_close_square_brace)
+ throw_error("Unmatched square brace");
+ _lexer.next();
+
+ if (last) last->set_next(pred);
+ else n->set_right(pred);
+
+ last = pred;
+ }
+
+ return n;
+ }
+
+ // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
+ xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) {
+ xpath_ast_node* n = parse_step(set);
+
+ while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) {
+ lexeme_t l = _lexer.current();
+ _lexer.next();
+
+ if (l == lex_double_slash)
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+
+ n = parse_step(n);
+ }
+
+ return n;
+ }
+
+ // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
+ // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
+ xpath_ast_node* parse_location_path() {
+ if (_lexer.current() == lex_slash) {
+ _lexer.next();
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+
+ // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
+ lexeme_t l = _lexer.current();
+
+ if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
+ return parse_relative_location_path(n);
+ else
+ return n;
+ } else if (_lexer.current() == lex_double_slash) {
+ _lexer.next();
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+
+ return parse_relative_location_path(n);
+ }
+
+ // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
+ return parse_relative_location_path(0);
+ }
+
+ // PathExpr ::= LocationPath
+ // | FilterExpr
+ // | FilterExpr '/' RelativeLocationPath
+ // | FilterExpr '//' RelativeLocationPath
+ // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
+ // UnaryExpr ::= UnionExpr | '-' UnaryExpr
+ xpath_ast_node* parse_path_or_unary_expression() {
+ // Clarification.
+ // PathExpr begins with either LocationPath or FilterExpr.
+ // FilterExpr begins with PrimaryExpr
+ // PrimaryExpr begins with '$' in case of it being a variable reference,
+ // '(' in case of it being an expression, string literal, number constant or
+ // function call.
+
+ if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
+ _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
+ _lexer.current() == lex_string) {
+ if (_lexer.current() == lex_string) {
+ // This is either a function call, or not - if not, we shall proceed with location path
+ const char_t* state = _lexer.state();
+
+ while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
+
+ if (*state != '(') return parse_location_path();
+
+ // This looks like a function call; however this still can be a node-test. Check it.
+ if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
+ }
+
+ xpath_ast_node* n = parse_filter_expression();
+
+ if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) {
+ lexeme_t l = _lexer.current();
+ _lexer.next();
+
+ if (l == lex_double_slash) {
+ if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
+
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+ }
+
+ // select from location path
+ return parse_relative_location_path(n);
+ }
+
+ return n;
+ } else if (_lexer.current() == lex_minus) {
+ _lexer.next();
+
+ // precedence 7+ - only parses union expressions
+ xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
+
+ return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
+ } else
+ return parse_location_path();
+ }
+
+ struct binary_op_t {
+ ast_type_t asttype;
+ xpath_value_type rettype;
+ int precedence;
+
+ binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) {
+ }
+
+ binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) {
+ }
+
+ static binary_op_t parse(xpath_lexer& lexer) {
+ switch (lexer.current()) {
+ case lex_string:
+ if (lexer.contents() == PUGIXML_TEXT("or"))
+ return binary_op_t(ast_op_or, xpath_type_boolean, 1);
+ else if (lexer.contents() == PUGIXML_TEXT("and"))
+ return binary_op_t(ast_op_and, xpath_type_boolean, 2);
+ else if (lexer.contents() == PUGIXML_TEXT("div"))
+ return binary_op_t(ast_op_divide, xpath_type_number, 6);
+ else if (lexer.contents() == PUGIXML_TEXT("mod"))
+ return binary_op_t(ast_op_mod, xpath_type_number, 6);
+ else
+ return binary_op_t();
+
+ case lex_equal:
+ return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
+
+ case lex_not_equal:
+ return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
+
+ case lex_less:
+ return binary_op_t(ast_op_less, xpath_type_boolean, 4);
+
+ case lex_greater:
+ return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
+
+ case lex_less_or_equal:
+ return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
+
+ case lex_greater_or_equal:
+ return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
+
+ case lex_plus:
+ return binary_op_t(ast_op_add, xpath_type_number, 5);
+
+ case lex_minus:
+ return binary_op_t(ast_op_subtract, xpath_type_number, 5);
+
+ case lex_multiply:
+ return binary_op_t(ast_op_multiply, xpath_type_number, 6);
+
+ case lex_union:
+ return binary_op_t(ast_op_union, xpath_type_node_set, 7);
+
+ default:
+ return binary_op_t();
+ }
+ }
+ };
+
+ xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) {
+ binary_op_t op = binary_op_t::parse(_lexer);
+
+ while (op.asttype != ast_unknown && op.precedence >= limit) {
+ _lexer.next();
+
+ xpath_ast_node* rhs = parse_path_or_unary_expression();
+
+ binary_op_t nextop = binary_op_t::parse(_lexer);
+
+ while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) {
+ rhs = parse_expression_rec(rhs, nextop.precedence);
+
+ nextop = binary_op_t::parse(_lexer);
+ }
+
+ if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
+ throw_error("Union operator has to be applied to node sets");
+
+ lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
+
+ op = binary_op_t::parse(_lexer);
+ }
+
+ return lhs;
+ }
+
+ // Expr ::= OrExpr
+ // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
+ // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
+ // EqualityExpr ::= RelationalExpr
+ // | EqualityExpr '=' RelationalExpr
+ // | EqualityExpr '!=' RelationalExpr
+ // RelationalExpr ::= AdditiveExpr
+ // | RelationalExpr '<' AdditiveExpr
+ // | RelationalExpr '>' AdditiveExpr
+ // | RelationalExpr '<=' AdditiveExpr
+ // | RelationalExpr '>=' AdditiveExpr
+ // AdditiveExpr ::= MultiplicativeExpr
+ // | AdditiveExpr '+' MultiplicativeExpr
+ // | AdditiveExpr '-' MultiplicativeExpr
+ // MultiplicativeExpr ::= UnaryExpr
+ // | MultiplicativeExpr '*' UnaryExpr
+ // | MultiplicativeExpr 'div' UnaryExpr
+ // | MultiplicativeExpr 'mod' UnaryExpr
+ xpath_ast_node* parse_expression() {
+ return parse_expression_rec(parse_path_or_unary_expression(), 0);
+ }
+
+ xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) {
+ }
+
+ xpath_ast_node* parse() {
+ xpath_ast_node* result = parse_expression();
+
+ if (_lexer.current() != lex_eof) {
+ // there are still unparsed tokens left, error
+ throw_error("Incorrect query");
+ }
+
+ return result;
+ }
+
+ static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) {
+ xpath_parser parser(query, variables, alloc, result);
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ int error = setjmp(parser._error_handler);
+
+ return (error == 0) ? parser.parse() : 0;
+#else
+ return parser.parse();
+#endif
+ }
+};
+
+struct xpath_query_impl {
+ static xpath_query_impl* create() {
+ void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
+ if (!memory) return 0;
+
+ return new (memory) xpath_query_impl();
+ }
+
+ static void destroy(xpath_query_impl* impl) {
+ // free all allocated pages
+ impl->alloc.release();
+
+ // free allocator memory (with the first page)
+ xml_memory::deallocate(impl);
+ }
+
+ xpath_query_impl(): root(0), alloc(&block) {
+ block.next = 0;
+ block.capacity = sizeof(block.data);
+ }
+
+ xpath_ast_node* root;
+ xpath_allocator alloc;
+ xpath_memory_block block;
+};
+
+PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
+{
+ if (!impl) return xpath_string();
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return xpath_string();
+#endif
+
+ xpath_context c(n, 1, 1);
+
+ return impl->root->eval_string(c, sd.stack);
+}
+
+PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
+{
+ if (!impl) return 0;
+
+ if (impl->root->rettype() != xpath_type_node_set) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ return 0;
+#else
+ xpath_parse_result res;
+ res.error = "Expression does not evaluate to node set";
+
+ throw xpath_exception(res);
+#endif
+ }
+
+ return impl->root;
+}
+PUGI__NS_END
+
+namespace pugi
+{
+#ifndef PUGIXML_NO_EXCEPTIONS
+PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
+{
+ assert(_result.error);
+}
+
+PUGI__FN const char* xpath_exception::what() const throw()
+{
+ return _result.error;
+}
+
+PUGI__FN const xpath_parse_result& xpath_exception::result() const
+{
+ return _result;
+}
+#endif
+
+PUGI__FN xpath_node::xpath_node()
+{
+}
+
+PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
+{
+}
+
+PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
+{
+}
+
+PUGI__FN xml_node xpath_node::node() const
+{
+ return _attribute ? xml_node() : _node;
+}
+
+PUGI__FN xml_attribute xpath_node::attribute() const
+{
+ return _attribute;
+}
+
+PUGI__FN xml_node xpath_node::parent() const
+{
+ return _attribute ? _node : _node.parent();
+}
+
+PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
+{
+}
+
+PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
+{
+ return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
+}
+
+PUGI__FN bool xpath_node::operator!() const
+{
+ return !(_node || _attribute);
+}
+
+PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
+{
+ return _node == n._node && _attribute == n._attribute;
+}
+
+PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
+{
+ return _node != n._node || _attribute != n._attribute;
+}
+
+#ifdef __BORLANDC__
+PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
+{
+ return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
+{
+ return (bool)lhs || rhs;
+}
+#endif
+
+PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
+{
+ assert(begin_ <= end_);
+
+ size_t size_ = static_cast<size_t>(end_ - begin_);
+
+ if (size_ <= 1) {
+ // deallocate old buffer
+ if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+
+ // use internal buffer
+ if (begin_ != end_) _storage = *begin_;
+
+ _begin = &_storage;
+ _end = &_storage + size_;
+ _type = type_;
+ } else {
+ // make heap copy
+ xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
+
+ if (!storage) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ return;
+#else
+ throw std::bad_alloc();
+#endif
+ }
+
+ memcpy(storage, begin_, size_ * sizeof(xpath_node));
+
+ // deallocate old buffer
+ if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+
+ // finalize
+ _begin = storage;
+ _end = storage + size_;
+ _type = type_;
+ }
+}
+
+#if __cplusplus >= 201103
+PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs)
+{
+ _type = rhs._type;
+ _storage = rhs._storage;
+ _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
+ _end = _begin + (rhs._end - rhs._begin);
+
+ rhs._type = type_unsorted;
+ rhs._begin = &rhs._storage;
+ rhs._end = rhs._begin;
+}
+#endif
+
+PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
+{
+}
+
+PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
+{
+ _assign(begin_, end_, type_);
+}
+
+PUGI__FN xpath_node_set::~xpath_node_set()
+{
+ if (_begin != &_storage)
+ impl::xml_memory::deallocate(_begin);
+}
+
+PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
+{
+ _assign(ns._begin, ns._end, ns._type);
+}
+
+PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
+{
+ if (this == &ns) return *this;
+
+ _assign(ns._begin, ns._end, ns._type);
+
+ return *this;
+}
+
+#if __cplusplus >= 201103
+PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage)
+{
+ _move(rhs);
+}
+
+PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs)
+{
+ if (this == &rhs) return *this;
+
+ if (_begin != &_storage)
+ impl::xml_memory::deallocate(_begin);
+
+ _move(rhs);
+
+ return *this;
+}
+#endif
+
+PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
+{
+ return _type;
+}
+
+PUGI__FN size_t xpath_node_set::size() const
+{
+ return _end - _begin;
+}
+
+PUGI__FN bool xpath_node_set::empty() const
+{
+ return _begin == _end;
+}
+
+PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
+{
+ assert(index < size());
+ return _begin[index];
+}
+
+PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
+{
+ return _begin;
+}
+
+PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
+{
+ return _end;
+}
+
+PUGI__FN void xpath_node_set::sort(bool reverse)
+{
+ _type = impl::xpath_sort(_begin, _end, _type, reverse);
+}
+
+PUGI__FN xpath_node xpath_node_set::first() const
+{
+ return impl::xpath_first(_begin, _end, _type);
+}
+
+PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
+{
+}
+
+PUGI__FN xpath_parse_result::operator bool() const
+{
+ return error == 0;
+}
+
+PUGI__FN const char* xpath_parse_result::description() const
+{
+ return error ? error : "No error";
+}
+
+PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
+{
+}
+
+PUGI__FN const char_t* xpath_variable::name() const
+{
+ switch (_type) {
+ case xpath_type_node_set:
+ return static_cast<const impl::xpath_variable_node_set*>(this)->name;
+
+ case xpath_type_number:
+ return static_cast<const impl::xpath_variable_number*>(this)->name;
+
+ case xpath_type_string:
+ return static_cast<const impl::xpath_variable_string*>(this)->name;
+
+ case xpath_type_boolean:
+ return static_cast<const impl::xpath_variable_boolean*>(this)->name;
+
+ default:
+ assert(!"Invalid variable type");
+ return 0;
+ }
+}
+
+PUGI__FN xpath_value_type xpath_variable::type() const
+{
+ return _type;
+}
+
+PUGI__FN bool xpath_variable::get_boolean() const
+{
+ return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
+}
+
+PUGI__FN double xpath_variable::get_number() const
+{
+ return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
+}
+
+PUGI__FN const char_t* xpath_variable::get_string() const
+{
+ const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
+ return value ? value : PUGIXML_TEXT("");
+}
+
+PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
+{
+ return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
+}
+
+PUGI__FN bool xpath_variable::set(bool value)
+{
+ if (_type != xpath_type_boolean) return false;
+
+ static_cast<impl::xpath_variable_boolean*>(this)->value = value;
+ return true;
+}
+
+PUGI__FN bool xpath_variable::set(double value)
+{
+ if (_type != xpath_type_number) return false;
+
+ static_cast<impl::xpath_variable_number*>(this)->value = value;
+ return true;
+}
+
+PUGI__FN bool xpath_variable::set(const char_t* value)
+{
+ if (_type != xpath_type_string) return false;
+
+ impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
+
+ // duplicate string
+ size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
+
+ char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
+ if (!copy) return false;
+
+ memcpy(copy, value, size);
+
+ // replace old string
+ if (var->value) impl::xml_memory::deallocate(var->value);
+ var->value = copy;
+
+ return true;
+}
+
+PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
+{
+ if (_type != xpath_type_node_set) return false;
+
+ static_cast<impl::xpath_variable_node_set*>(this)->value = value;
+ return true;
+}
+
+PUGI__FN xpath_variable_set::xpath_variable_set()
+{
+ for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
+ _data[i] = 0;
+}
+
+PUGI__FN xpath_variable_set::~xpath_variable_set()
+{
+ for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
+ _destroy(_data[i]);
+}
+
+PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
+{
+ for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
+ _data[i] = 0;
+
+ _assign(rhs);
+}
+
+PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
+{
+ if (this == &rhs) return *this;
+
+ _assign(rhs);
+
+ return *this;
+}
+
+#if __cplusplus >= 201103
+PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs)
+{
+ for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) {
+ _data[i] = rhs._data[i];
+ rhs._data[i] = 0;
+ }
+}
+
+PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs)
+{
+ for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) {
+ _destroy(_data[i]);
+
+ _data[i] = rhs._data[i];
+ rhs._data[i] = 0;
+ }
+
+ return *this;
+}
+#endif
+
+PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
+{
+ xpath_variable_set temp;
+
+ for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
+ if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
+ return;
+
+ _swap(temp);
+}
+
+PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
+{
+ for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) {
+ xpath_variable* chain = _data[i];
+
+ _data[i] = rhs._data[i];
+ rhs._data[i] = chain;
+ }
+}
+
+PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
+{
+ const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
+ size_t hash = impl::hash_string(name) % hash_size;
+
+ // look for existing variable
+ for (xpath_variable* var = _data[hash]; var; var = var->_next)
+ if (impl::strequal(var->name(), name))
+ return var;
+
+ return 0;
+}
+
+PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
+{
+ xpath_variable* last = 0;
+
+ while (var) {
+ // allocate storage for new variable
+ xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
+ if (!nvar) return false;
+
+ // link the variable to the result immediately to handle failures gracefully
+ if (last)
+ last->_next = nvar;
+ else
+ *out_result = nvar;
+
+ last = nvar;
+
+ // copy the value; this can fail due to out-of-memory conditions
+ if (!impl::copy_xpath_variable(nvar, var)) return false;
+
+ var = var->_next;
+ }
+
+ return true;
+}
+
+PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
+{
+ while (var) {
+ xpath_variable* next = var->_next;
+
+ impl::delete_xpath_variable(var->_type, var);
+
+ var = next;
+ }
+}
+
+PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
+{
+ const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
+ size_t hash = impl::hash_string(name) % hash_size;
+
+ // look for existing variable
+ for (xpath_variable* var = _data[hash]; var; var = var->_next)
+ if (impl::strequal(var->name(), name))
+ return var->type() == type ? var : 0;
+
+ // add new variable
+ xpath_variable* result = impl::new_xpath_variable(type, name);
+
+ if (result) {
+ result->_next = _data[hash];
+
+ _data[hash] = result;
+ }
+
+ return result;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
+{
+ xpath_variable* var = add(name, xpath_type_boolean);
+ return var ? var->set(value) : false;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
+{
+ xpath_variable* var = add(name, xpath_type_number);
+ return var ? var->set(value) : false;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
+{
+ xpath_variable* var = add(name, xpath_type_string);
+ return var ? var->set(value) : false;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
+{
+ xpath_variable* var = add(name, xpath_type_node_set);
+ return var ? var->set(value) : false;
+}
+
+PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
+{
+ return _find(name);
+}
+
+PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
+{
+ return _find(name);
+}
+
+PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
+{
+ impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
+
+ if (!qimpl) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+ _result.error = "Out of memory";
+#else
+ throw std::bad_alloc();
+#endif
+ } else {
+ using impl::auto_deleter; // MSVC7 workaround
+ auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
+
+ qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
+
+ if (qimpl->root) {
+ qimpl->root->optimize(&qimpl->alloc);
+
+ _impl = impl.release();
+ _result.error = 0;
+ }
+ }
+}
+
+PUGI__FN xpath_query::xpath_query(): _impl(0)
+{
+}
+
+PUGI__FN xpath_query::~xpath_query()
+{
+ if (_impl)
+ impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
+}
+
+#if __cplusplus >= 201103
+PUGI__FN xpath_query::xpath_query(xpath_query&& rhs)
+{
+ _impl = rhs._impl;
+ _result = rhs._result;
+ rhs._impl = 0;
+ rhs._result = xpath_parse_result();
+}
+
+PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs)
+{
+ if (this == &rhs) return *this;
+
+ if (_impl)
+ impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
+
+ _impl = rhs._impl;
+ _result = rhs._result;
+ rhs._impl = 0;
+ rhs._result = xpath_parse_result();
+
+ return *this;
+}
+#endif
+
+PUGI__FN xpath_value_type xpath_query::return_type() const
+{
+ if (!_impl) return xpath_type_none;
+
+ return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
+}
+
+PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
+{
+ if (!_impl) return false;
+
+ impl::xpath_context c(n, 1, 1);
+ impl::xpath_stack_data sd;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return false;
+#endif
+
+ return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
+}
+
+PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
+{
+ if (!_impl) return impl::gen_nan();
+
+ impl::xpath_context c(n, 1, 1);
+ impl::xpath_stack_data sd;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return impl::gen_nan();
+#endif
+
+ return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
+{
+ impl::xpath_stack_data sd;
+
+ impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
+
+ return string_t(r.c_str(), r.length());
+}
+#endif
+
+PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
+{
+ impl::xpath_stack_data sd;
+
+ impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
+
+ size_t full_size = r.length() + 1;
+
+ if (capacity > 0) {
+ size_t size = (full_size < capacity) ? full_size : capacity;
+ assert(size > 0);
+
+ memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
+ buffer[size - 1] = 0;
+ }
+
+ return full_size;
+}
+
+PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
+{
+ impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
+ if (!root) return xpath_node_set();
+
+ impl::xpath_context c(n, 1, 1);
+ impl::xpath_stack_data sd;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return xpath_node_set();
+#endif
+
+ impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
+
+ return xpath_node_set(r.begin(), r.end(), r.type());
+}
+
+PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
+{
+ impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
+ if (!root) return xpath_node();
+
+ impl::xpath_context c(n, 1, 1);
+ impl::xpath_stack_data sd;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return xpath_node();
+#endif
+
+ impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
+
+ return r.first();
+}
+
+PUGI__FN const xpath_parse_result& xpath_query::result() const
+{
+ return _result;
+}
+
+PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
+{
+}
+
+PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
+{
+ return _impl ? unspecified_bool_xpath_query : 0;
+}
+
+PUGI__FN bool xpath_query::operator!() const
+{
+ return !_impl;
+}
+
+PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
+{
+ xpath_query q(query, variables);
+ return select_node(q);
+}
+
+PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
+{
+ return query.evaluate_node(*this);
+}
+
+PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
+{
+ xpath_query q(query, variables);
+ return select_nodes(q);
+}
+
+PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
+{
+ return query.evaluate_node_set(*this);
+}
+
+PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
+{
+ xpath_query q(query, variables);
+ return select_single_node(q);
+}
+
+PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
+{
+ return query.evaluate_node(*this);
+}
+}
+
+#endif
+
+#ifdef __BORLANDC__
+# pragma option pop
+#endif
+
+// Intel C++ does not properly keep warning state for function templates,
+// so popping warning state at the end of translation unit leads to warnings in the middle.
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+# pragma warning(pop)
+#endif
+
+// Undefine all local macros (makes sure we're not leaking macros in header-only mode)
+#undef PUGI__NO_INLINE
+#undef PUGI__UNLIKELY
+#undef PUGI__STATIC_ASSERT
+#undef PUGI__DMC_VOLATILE
+#undef PUGI__MSVC_CRT_VERSION
+#undef PUGI__NS_BEGIN
+#undef PUGI__NS_END
+#undef PUGI__FN
+#undef PUGI__FN_NO_INLINE
+#undef PUGI__GETPAGE_IMPL
+#undef PUGI__GETPAGE
+#undef PUGI__NODETYPE
+#undef PUGI__IS_CHARTYPE_IMPL
+#undef PUGI__IS_CHARTYPE
+#undef PUGI__IS_CHARTYPEX
+#undef PUGI__ENDSWITH
+#undef PUGI__SKIPWS
+#undef PUGI__OPTSET
+#undef PUGI__PUSHNODE
+#undef PUGI__POPNODE
+#undef PUGI__SCANFOR
+#undef PUGI__SCANWHILE
+#undef PUGI__SCANWHILE_UNROLL
+#undef PUGI__ENDSEG
+#undef PUGI__THROW_ERROR
+#undef PUGI__CHECK_ERROR
+
+#endif
+
+/**
+ * Copyright (c) 2006-2015 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
diff --git a/moses2/pugixml.hpp b/moses2/pugixml.hpp
new file mode 100644
index 000000000..13bf7917b
--- /dev/null
+++ b/moses2/pugixml.hpp
@@ -0,0 +1,1391 @@
+/**
+ * pugixml parser - version 1.7
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef PUGIXML_VERSION
+// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
+# define PUGIXML_VERSION 170
+#endif
+
+// Include user configuration file (this can define various configuration macros)
+#include "pugiconfig.hpp"
+
+#ifndef HEADER_PUGIXML_HPP
+#define HEADER_PUGIXML_HPP
+
+// Include stddef.h for size_t and ptrdiff_t
+#include <stddef.h>
+
+// Include exception header for XPath
+#if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS)
+# include <exception>
+#endif
+
+// Include STL headers
+#ifndef PUGIXML_NO_STL
+# include <iterator>
+# include <iosfwd>
+# include <string>
+#endif
+
+// Macro for deprecated features
+#ifndef PUGIXML_DEPRECATED
+# if defined(__GNUC__)
+# define PUGIXML_DEPRECATED __attribute__((deprecated))
+# elif defined(_MSC_VER) && _MSC_VER >= 1300
+# define PUGIXML_DEPRECATED __declspec(deprecated)
+# else
+# define PUGIXML_DEPRECATED
+# endif
+#endif
+
+// If no API is defined, assume default
+#ifndef PUGIXML_API
+# define PUGIXML_API
+#endif
+
+// If no API for classes is defined, assume default
+#ifndef PUGIXML_CLASS
+# define PUGIXML_CLASS PUGIXML_API
+#endif
+
+// If no API for functions is defined, assume default
+#ifndef PUGIXML_FUNCTION
+# define PUGIXML_FUNCTION PUGIXML_API
+#endif
+
+// If the platform is known to have long long support, enable long long functions
+#ifndef PUGIXML_HAS_LONG_LONG
+# if __cplusplus >= 201103
+# define PUGIXML_HAS_LONG_LONG
+# elif defined(_MSC_VER) && _MSC_VER >= 1400
+# define PUGIXML_HAS_LONG_LONG
+# endif
+#endif
+
+// Character interface macros
+#ifdef PUGIXML_WCHAR_MODE
+# define PUGIXML_TEXT(t) L ## t
+# define PUGIXML_CHAR wchar_t
+#else
+# define PUGIXML_TEXT(t) t
+# define PUGIXML_CHAR char
+#endif
+
+namespace pugi
+{
+// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE
+typedef PUGIXML_CHAR char_t;
+
+#ifndef PUGIXML_NO_STL
+// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE
+typedef std::basic_string<PUGIXML_CHAR, std::char_traits<PUGIXML_CHAR>, std::allocator<PUGIXML_CHAR> > string_t;
+#endif
+}
+
+// The PugiXML namespace
+namespace pugi
+{
+// Tree node types
+enum xml_node_type {
+ node_null, // Empty (null) node handle
+ node_document, // A document tree's absolute root
+ node_element, // Element tag, i.e. '<node/>'
+ node_pcdata, // Plain character data, i.e. 'text'
+ node_cdata, // Character data, i.e. '<![CDATA[text]]>'
+ node_comment, // Comment tag, i.e. '<!-- text -->'
+ node_pi, // Processing instruction, i.e. '<?name?>'
+ node_declaration, // Document declaration, i.e. '<?xml version="1.0"?>'
+ node_doctype // Document type declaration, i.e. '<!DOCTYPE doc>'
+};
+
+// Parsing options
+
+// Minimal parsing mode (equivalent to turning all other flags off).
+// Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed.
+const unsigned int parse_minimal = 0x0000;
+
+// This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default.
+const unsigned int parse_pi = 0x0001;
+
+// This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default.
+const unsigned int parse_comments = 0x0002;
+
+// This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default.
+const unsigned int parse_cdata = 0x0004;
+
+// This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree.
+// This flag is off by default; turning it on usually results in slower parsing and more memory consumption.
+const unsigned int parse_ws_pcdata = 0x0008;
+
+// This flag determines if character and entity references are expanded during parsing. This flag is on by default.
+const unsigned int parse_escapes = 0x0010;
+
+// This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
+const unsigned int parse_eol = 0x0020;
+
+// This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
+const unsigned int parse_wconv_attribute = 0x0040;
+
+// This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
+const unsigned int parse_wnorm_attribute = 0x0080;
+
+// This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
+const unsigned int parse_declaration = 0x0100;
+
+// This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default.
+const unsigned int parse_doctype = 0x0200;
+
+// This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only
+// of whitespace is added to the DOM tree.
+// This flag is off by default; turning it on may result in slower parsing and more memory consumption.
+const unsigned int parse_ws_pcdata_single = 0x0400;
+
+// This flag determines if leading and trailing whitespace is to be removed from plain character data. This flag is off by default.
+const unsigned int parse_trim_pcdata = 0x0800;
+
+// This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document
+// is a valid document. This flag is off by default.
+const unsigned int parse_fragment = 0x1000;
+
+// The default parsing mode.
+// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
+// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
+const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol;
+
+// The full parsing mode.
+// Nodes of all types are added to the DOM tree, character/reference entities are expanded,
+// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
+const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype;
+
+// These flags determine the encoding of input data for XML document
+enum xml_encoding {
+ encoding_auto, // Auto-detect input encoding using BOM or < / <? detection; use UTF8 if BOM is not found
+ encoding_utf8, // UTF8 encoding
+ encoding_utf16_le, // Little-endian UTF16
+ encoding_utf16_be, // Big-endian UTF16
+ encoding_utf16, // UTF16 with native endianness
+ encoding_utf32_le, // Little-endian UTF32
+ encoding_utf32_be, // Big-endian UTF32
+ encoding_utf32, // UTF32 with native endianness
+ encoding_wchar, // The same encoding wchar_t has (either UTF16 or UTF32)
+ encoding_latin1
+};
+
+// Formatting flags
+
+// Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
+const unsigned int format_indent = 0x01;
+
+// Write encoding-specific BOM to the output stream. This flag is off by default.
+const unsigned int format_write_bom = 0x02;
+
+// Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
+const unsigned int format_raw = 0x04;
+
+// Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
+const unsigned int format_no_declaration = 0x08;
+
+// Don't escape attribute values and PCDATA contents. This flag is off by default.
+const unsigned int format_no_escapes = 0x10;
+
+// Open file using text mode in xml_document::save_file. This enables special character (i.e. new-line) conversions on some systems. This flag is off by default.
+const unsigned int format_save_file_text = 0x20;
+
+// Write every attribute on a new line with appropriate indentation. This flag is off by default.
+const unsigned int format_indent_attributes = 0x40;
+
+// The default set of formatting flags.
+// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
+const unsigned int format_default = format_indent;
+
+// Forward declarations
+struct xml_attribute_struct;
+struct xml_node_struct;
+
+class xml_node_iterator;
+class xml_attribute_iterator;
+class xml_named_node_iterator;
+
+class xml_tree_walker;
+
+struct xml_parse_result;
+
+class xml_node;
+
+class xml_text;
+
+#ifndef PUGIXML_NO_XPATH
+class xpath_node;
+class xpath_node_set;
+class xpath_query;
+class xpath_variable_set;
+#endif
+
+// Range-based for loop support
+template <typename It> class xml_object_range
+{
+public:
+ typedef It const_iterator;
+ typedef It iterator;
+
+ xml_object_range(It b, It e): _begin(b), _end(e) {
+ }
+
+ It begin() const {
+ return _begin;
+ }
+ It end() const {
+ return _end;
+ }
+
+private:
+ It _begin, _end;
+};
+
+// Writer interface for node printing (see xml_node::print)
+class PUGIXML_CLASS xml_writer
+{
+public:
+ virtual ~xml_writer() {}
+
+ // Write memory chunk into stream/file/whatever
+ virtual void write(const void* data, size_t size) = 0;
+};
+
+// xml_writer implementation for FILE*
+class PUGIXML_CLASS xml_writer_file: public xml_writer
+{
+public:
+ // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
+ xml_writer_file(void* file);
+
+ virtual void write(const void* data, size_t size);
+
+private:
+ void* file;
+};
+
+#ifndef PUGIXML_NO_STL
+// xml_writer implementation for streams
+class PUGIXML_CLASS xml_writer_stream: public xml_writer
+{
+public:
+ // Construct writer from an output stream object
+ xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
+ xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
+
+ virtual void write(const void* data, size_t size);
+
+private:
+ std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
+ std::basic_ostream<wchar_t, std::char_traits<wchar_t> >* wide_stream;
+};
+#endif
+
+// A light-weight handle for manipulating attributes in DOM tree
+class PUGIXML_CLASS xml_attribute
+{
+ friend class xml_attribute_iterator;
+ friend class xml_node;
+
+private:
+ xml_attribute_struct* _attr;
+
+ typedef void (*unspecified_bool_type)(xml_attribute***);
+
+public:
+ // Default constructor. Constructs an empty attribute.
+ xml_attribute();
+
+ // Constructs attribute from internal pointer
+ explicit xml_attribute(xml_attribute_struct* attr);
+
+ // Safe bool conversion operator
+ operator unspecified_bool_type() const;
+
+ // Borland C++ workaround
+ bool operator!() const;
+
+ // Comparison operators (compares wrapped attribute pointers)
+ bool operator==(const xml_attribute& r) const;
+ bool operator!=(const xml_attribute& r) const;
+ bool operator<(const xml_attribute& r) const;
+ bool operator>(const xml_attribute& r) const;
+ bool operator<=(const xml_attribute& r) const;
+ bool operator>=(const xml_attribute& r) const;
+
+ // Check if attribute is empty
+ bool empty() const;
+
+ // Get attribute name/value, or "" if attribute is empty
+ const char_t* name() const;
+ const char_t* value() const;
+
+ // Get attribute value, or the default value if attribute is empty
+ const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
+
+ // Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty
+ int as_int(int def = 0) const;
+ unsigned int as_uint(unsigned int def = 0) const;
+ double as_double(double def = 0) const;
+ float as_float(float def = 0) const;
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ long long as_llong(long long def = 0) const;
+ unsigned long long as_ullong(unsigned long long def = 0) const;
+#endif
+
+ // Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty
+ bool as_bool(bool def = false) const;
+
+ // Set attribute name/value (returns false if attribute is empty or there is not enough memory)
+ bool set_name(const char_t* rhs);
+ bool set_value(const char_t* rhs);
+
+ // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
+ bool set_value(int rhs);
+ bool set_value(unsigned int rhs);
+ bool set_value(double rhs);
+ bool set_value(float rhs);
+ bool set_value(bool rhs);
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ bool set_value(long long rhs);
+ bool set_value(unsigned long long rhs);
+#endif
+
+ // Set attribute value (equivalent to set_value without error checking)
+ xml_attribute& operator=(const char_t* rhs);
+ xml_attribute& operator=(int rhs);
+ xml_attribute& operator=(unsigned int rhs);
+ xml_attribute& operator=(double rhs);
+ xml_attribute& operator=(float rhs);
+ xml_attribute& operator=(bool rhs);
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ xml_attribute& operator=(long long rhs);
+ xml_attribute& operator=(unsigned long long rhs);
+#endif
+
+ // Get next/previous attribute in the attribute list of the parent node
+ xml_attribute next_attribute() const;
+ xml_attribute previous_attribute() const;
+
+ // Get hash value (unique for handles to the same object)
+ size_t hash_value() const;
+
+ // Get internal pointer
+ xml_attribute_struct* internal_object() const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs);
+#endif
+
+// A light-weight handle for manipulating nodes in DOM tree
+class PUGIXML_CLASS xml_node
+{
+ friend class xml_attribute_iterator;
+ friend class xml_node_iterator;
+ friend class xml_named_node_iterator;
+
+protected:
+ xml_node_struct* _root;
+
+ typedef void (*unspecified_bool_type)(xml_node***);
+
+public:
+ // Default constructor. Constructs an empty node.
+ xml_node();
+
+ // Constructs node from internal pointer
+ explicit xml_node(xml_node_struct* p);
+
+ // Safe bool conversion operator
+ operator unspecified_bool_type() const;
+
+ // Borland C++ workaround
+ bool operator!() const;
+
+ // Comparison operators (compares wrapped node pointers)
+ bool operator==(const xml_node& r) const;
+ bool operator!=(const xml_node& r) const;
+ bool operator<(const xml_node& r) const;
+ bool operator>(const xml_node& r) const;
+ bool operator<=(const xml_node& r) const;
+ bool operator>=(const xml_node& r) const;
+
+ // Check if node is empty.
+ bool empty() const;
+
+ // Get node type
+ xml_node_type type() const;
+
+ // Get node name, or "" if node is empty or it has no name
+ const char_t* name() const;
+
+ // Get node value, or "" if node is empty or it has no value
+ // Note: For <node>text</node> node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes.
+ const char_t* value() const;
+
+ // Get attribute list
+ xml_attribute first_attribute() const;
+ xml_attribute last_attribute() const;
+
+ // Get children list
+ xml_node first_child() const;
+ xml_node last_child() const;
+
+ // Get next/previous sibling in the children list of the parent node
+ xml_node next_sibling() const;
+ xml_node previous_sibling() const;
+
+ // Get parent node
+ xml_node parent() const;
+
+ // Get root of DOM tree this node belongs to
+ xml_node root() const;
+
+ // Get text object for the current node
+ xml_text text() const;
+
+ // Get child, attribute or next/previous sibling with the specified name
+ xml_node child(const char_t* name) const;
+ xml_attribute attribute(const char_t* name) const;
+ xml_node next_sibling(const char_t* name) const;
+ xml_node previous_sibling(const char_t* name) const;
+
+ // Get attribute, starting the search from a hint (and updating hint so that searching for a sequence of attributes is fast)
+ xml_attribute attribute(const char_t* name, xml_attribute& hint) const;
+
+ // Get child value of current node; that is, value of the first child node of type PCDATA/CDATA
+ const char_t* child_value() const;
+
+ // Get child value of child with specified name. Equivalent to child(name).child_value().
+ const char_t* child_value(const char_t* name) const;
+
+ // Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
+ bool set_name(const char_t* rhs);
+ bool set_value(const char_t* rhs);
+
+ // Add attribute with specified name. Returns added attribute, or empty attribute on errors.
+ xml_attribute append_attribute(const char_t* name);
+ xml_attribute prepend_attribute(const char_t* name);
+ xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr);
+ xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr);
+
+ // Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors.
+ xml_attribute append_copy(const xml_attribute& proto);
+ xml_attribute prepend_copy(const xml_attribute& proto);
+ xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
+ xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
+
+ // Add child node with specified type. Returns added node, or empty node on errors.
+ xml_node append_child(xml_node_type type = node_element);
+ xml_node prepend_child(xml_node_type type = node_element);
+ xml_node insert_child_after(xml_node_type type, const xml_node& node);
+ xml_node insert_child_before(xml_node_type type, const xml_node& node);
+
+ // Add child element with specified name. Returns added node, or empty node on errors.
+ xml_node append_child(const char_t* name);
+ xml_node prepend_child(const char_t* name);
+ xml_node insert_child_after(const char_t* name, const xml_node& node);
+ xml_node insert_child_before(const char_t* name, const xml_node& node);
+
+ // Add a copy of the specified node as a child. Returns added node, or empty node on errors.
+ xml_node append_copy(const xml_node& proto);
+ xml_node prepend_copy(const xml_node& proto);
+ xml_node insert_copy_after(const xml_node& proto, const xml_node& node);
+ xml_node insert_copy_before(const xml_node& proto, const xml_node& node);
+
+ // Move the specified node to become a child of this node. Returns moved node, or empty node on errors.
+ xml_node append_move(const xml_node& moved);
+ xml_node prepend_move(const xml_node& moved);
+ xml_node insert_move_after(const xml_node& moved, const xml_node& node);
+ xml_node insert_move_before(const xml_node& moved, const xml_node& node);
+
+ // Remove specified attribute
+ bool remove_attribute(const xml_attribute& a);
+ bool remove_attribute(const char_t* name);
+
+ // Remove specified child
+ bool remove_child(const xml_node& n);
+ bool remove_child(const char_t* name);
+
+ // Parses buffer as an XML document fragment and appends all nodes as children of the current node.
+ // Copies/converts the buffer, so it may be deleted or changed after the function returns.
+ // Note: append_buffer allocates memory that has the lifetime of the owning document; removing the appended nodes does not immediately reclaim that memory.
+ xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+ // Find attribute using predicate. Returns first attribute for which predicate returned true.
+ template <typename Predicate> xml_attribute find_attribute(Predicate pred) const {
+ if (!_root) return xml_attribute();
+
+ for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
+ if (pred(attrib))
+ return attrib;
+
+ return xml_attribute();
+ }
+
+ // Find child node using predicate. Returns first child for which predicate returned true.
+ template <typename Predicate> xml_node find_child(Predicate pred) const {
+ if (!_root) return xml_node();
+
+ for (xml_node node = first_child(); node; node = node.next_sibling())
+ if (pred(node))
+ return node;
+
+ return xml_node();
+ }
+
+ // Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true.
+ template <typename Predicate> xml_node find_node(Predicate pred) const {
+ if (!_root) return xml_node();
+
+ xml_node cur = first_child();
+
+ while (cur._root && cur._root != _root) {
+ if (pred(cur)) return cur;
+
+ if (cur.first_child()) cur = cur.first_child();
+ else if (cur.next_sibling()) cur = cur.next_sibling();
+ else {
+ while (!cur.next_sibling() && cur._root != _root) cur = cur.parent();
+
+ if (cur._root != _root) cur = cur.next_sibling();
+ }
+ }
+
+ return xml_node();
+ }
+
+ // Find child node by attribute name/value
+ xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
+ xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
+
+#ifndef PUGIXML_NO_STL
+ // Get the absolute node path from root as a text string.
+ string_t path(char_t delimiter = '/') const;
+#endif
+
+ // Search for a node by path consisting of node names and . or .. elements.
+ xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const;
+
+ // Recursively traverse subtree with xml_tree_walker
+ bool traverse(xml_tree_walker& walker);
+
+#ifndef PUGIXML_NO_XPATH
+ // Select single node by evaluating XPath query. Returns first node from the resulting node set.
+ xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const;
+ xpath_node select_node(const xpath_query& query) const;
+
+ // Select node set by evaluating XPath query
+ xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
+ xpath_node_set select_nodes(const xpath_query& query) const;
+
+ // (deprecated: use select_node instead) Select single node by evaluating XPath query.
+ xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
+ xpath_node select_single_node(const xpath_query& query) const;
+
+#endif
+
+ // Print subtree using a writer object
+ void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
+
+#ifndef PUGIXML_NO_STL
+ // Print subtree to stream
+ void print(std::basic_ostream<char, std::char_traits<char> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
+ void print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
+#endif
+
+ // Child nodes iterators
+ typedef xml_node_iterator iterator;
+
+ iterator begin() const;
+ iterator end() const;
+
+ // Attribute iterators
+ typedef xml_attribute_iterator attribute_iterator;
+
+ attribute_iterator attributes_begin() const;
+ attribute_iterator attributes_end() const;
+
+ // Range-based for support
+ xml_object_range<xml_node_iterator> children() const;
+ xml_object_range<xml_named_node_iterator> children(const char_t* name) const;
+ xml_object_range<xml_attribute_iterator> attributes() const;
+
+ // Get node offset in parsed file/string (in char_t units) for debugging purposes
+ ptrdiff_t offset_debug() const;
+
+ // Get hash value (unique for handles to the same object)
+ size_t hash_value() const;
+
+ // Get internal pointer
+ xml_node_struct* internal_object() const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs);
+#endif
+
+// A helper for working with text inside PCDATA nodes
+class PUGIXML_CLASS xml_text
+{
+ friend class xml_node;
+
+ xml_node_struct* _root;
+
+ typedef void (*unspecified_bool_type)(xml_text***);
+
+ explicit xml_text(xml_node_struct* root);
+
+ xml_node_struct* _data_new();
+ xml_node_struct* _data() const;
+
+public:
+ // Default constructor. Constructs an empty object.
+ xml_text();
+
+ // Safe bool conversion operator
+ operator unspecified_bool_type() const;
+
+ // Borland C++ workaround
+ bool operator!() const;
+
+ // Check if text object is empty
+ bool empty() const;
+
+ // Get text, or "" if object is empty
+ const char_t* get() const;
+
+ // Get text, or the default value if object is empty
+ const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
+
+ // Get text as a number, or the default value if conversion did not succeed or object is empty
+ int as_int(int def = 0) const;
+ unsigned int as_uint(unsigned int def = 0) const;
+ double as_double(double def = 0) const;
+ float as_float(float def = 0) const;
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ long long as_llong(long long def = 0) const;
+ unsigned long long as_ullong(unsigned long long def = 0) const;
+#endif
+
+ // Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty
+ bool as_bool(bool def = false) const;
+
+ // Set text (returns false if object is empty or there is not enough memory)
+ bool set(const char_t* rhs);
+
+ // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
+ bool set(int rhs);
+ bool set(unsigned int rhs);
+ bool set(double rhs);
+ bool set(float rhs);
+ bool set(bool rhs);
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ bool set(long long rhs);
+ bool set(unsigned long long rhs);
+#endif
+
+ // Set text (equivalent to set without error checking)
+ xml_text& operator=(const char_t* rhs);
+ xml_text& operator=(int rhs);
+ xml_text& operator=(unsigned int rhs);
+ xml_text& operator=(double rhs);
+ xml_text& operator=(float rhs);
+ xml_text& operator=(bool rhs);
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ xml_text& operator=(long long rhs);
+ xml_text& operator=(unsigned long long rhs);
+#endif
+
+ // Get the data node (node_pcdata or node_cdata) for this object
+ xml_node data() const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs);
+#endif
+
+// Child node iterator (a bidirectional iterator over a collection of xml_node)
+class PUGIXML_CLASS xml_node_iterator
+{
+ friend class xml_node;
+
+private:
+ mutable xml_node _wrap;
+ xml_node _parent;
+
+ xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent);
+
+public:
+ // Iterator traits
+ typedef ptrdiff_t difference_type;
+ typedef xml_node value_type;
+ typedef xml_node* pointer;
+ typedef xml_node& reference;
+
+#ifndef PUGIXML_NO_STL
+ typedef std::bidirectional_iterator_tag iterator_category;
+#endif
+
+ // Default constructor
+ xml_node_iterator();
+
+ // Construct an iterator which points to the specified node
+ xml_node_iterator(const xml_node& node);
+
+ // Iterator operators
+ bool operator==(const xml_node_iterator& rhs) const;
+ bool operator!=(const xml_node_iterator& rhs) const;
+
+ xml_node& operator*() const;
+ xml_node* operator->() const;
+
+ const xml_node_iterator& operator++();
+ xml_node_iterator operator++(int);
+
+ const xml_node_iterator& operator--();
+ xml_node_iterator operator--(int);
+};
+
+// Attribute iterator (a bidirectional iterator over a collection of xml_attribute)
+class PUGIXML_CLASS xml_attribute_iterator
+{
+ friend class xml_node;
+
+private:
+ mutable xml_attribute _wrap;
+ xml_node _parent;
+
+ xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent);
+
+public:
+ // Iterator traits
+ typedef ptrdiff_t difference_type;
+ typedef xml_attribute value_type;
+ typedef xml_attribute* pointer;
+ typedef xml_attribute& reference;
+
+#ifndef PUGIXML_NO_STL
+ typedef std::bidirectional_iterator_tag iterator_category;
+#endif
+
+ // Default constructor
+ xml_attribute_iterator();
+
+ // Construct an iterator which points to the specified attribute
+ xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent);
+
+ // Iterator operators
+ bool operator==(const xml_attribute_iterator& rhs) const;
+ bool operator!=(const xml_attribute_iterator& rhs) const;
+
+ xml_attribute& operator*() const;
+ xml_attribute* operator->() const;
+
+ const xml_attribute_iterator& operator++();
+ xml_attribute_iterator operator++(int);
+
+ const xml_attribute_iterator& operator--();
+ xml_attribute_iterator operator--(int);
+};
+
+// Named node range helper
+class PUGIXML_CLASS xml_named_node_iterator
+{
+ friend class xml_node;
+
+public:
+ // Iterator traits
+ typedef ptrdiff_t difference_type;
+ typedef xml_node value_type;
+ typedef xml_node* pointer;
+ typedef xml_node& reference;
+
+#ifndef PUGIXML_NO_STL
+ typedef std::bidirectional_iterator_tag iterator_category;
+#endif
+
+ // Default constructor
+ xml_named_node_iterator();
+
+ // Construct an iterator which points to the specified node
+ xml_named_node_iterator(const xml_node& node, const char_t* name);
+
+ // Iterator operators
+ bool operator==(const xml_named_node_iterator& rhs) const;
+ bool operator!=(const xml_named_node_iterator& rhs) const;
+
+ xml_node& operator*() const;
+ xml_node* operator->() const;
+
+ const xml_named_node_iterator& operator++();
+ xml_named_node_iterator operator++(int);
+
+ const xml_named_node_iterator& operator--();
+ xml_named_node_iterator operator--(int);
+
+private:
+ mutable xml_node _wrap;
+ xml_node _parent;
+ const char_t* _name;
+
+ xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name);
+};
+
+// Abstract tree walker class (see xml_node::traverse)
+class PUGIXML_CLASS xml_tree_walker
+{
+ friend class xml_node;
+
+private:
+ int _depth;
+
+protected:
+ // Get current traversal depth
+ int depth() const;
+
+public:
+ xml_tree_walker();
+ virtual ~xml_tree_walker();
+
+ // Callback that is called when traversal begins
+ virtual bool begin(xml_node& node);
+
+ // Callback that is called for each node traversed
+ virtual bool for_each(xml_node& node) = 0;
+
+ // Callback that is called when traversal ends
+ virtual bool end(xml_node& node);
+};
+
+// Parsing status, returned as part of xml_parse_result object
+enum xml_parse_status {
+ status_ok = 0, // No error
+
+ status_file_not_found, // File was not found during load_file()
+ status_io_error, // Error reading from file/stream
+ status_out_of_memory, // Could not allocate memory
+ status_internal_error, // Internal error occurred
+
+ status_unrecognized_tag, // Parser could not determine tag type
+
+ status_bad_pi, // Parsing error occurred while parsing document declaration/processing instruction
+ status_bad_comment, // Parsing error occurred while parsing comment
+ status_bad_cdata, // Parsing error occurred while parsing CDATA section
+ status_bad_doctype, // Parsing error occurred while parsing document type declaration
+ status_bad_pcdata, // Parsing error occurred while parsing PCDATA section
+ status_bad_start_element, // Parsing error occurred while parsing start element tag
+ status_bad_attribute, // Parsing error occurred while parsing element attribute
+ status_bad_end_element, // Parsing error occurred while parsing end element tag
+ status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag)
+
+ status_append_invalid_root, // Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer)
+
+ status_no_document_element // Parsing resulted in a document without element nodes
+};
+
+// Parsing result
+struct PUGIXML_CLASS xml_parse_result {
+ // Parsing status (see xml_parse_status)
+ xml_parse_status status;
+
+ // Last parsed offset (in char_t units from start of input data)
+ ptrdiff_t offset;
+
+ // Source document encoding
+ xml_encoding encoding;
+
+ // Default constructor, initializes object to failed state
+ xml_parse_result();
+
+ // Cast to bool operator
+ operator bool() const;
+
+ // Get error description
+ const char* description() const;
+};
+
+// Document class (DOM tree root)
+class PUGIXML_CLASS xml_document: public xml_node
+{
+private:
+ char_t* _buffer;
+
+ char _memory[192];
+
+ // Non-copyable semantics
+ xml_document(const xml_document&);
+ xml_document& operator=(const xml_document&);
+
+ void create();
+ void destroy();
+
+public:
+ // Default constructor, makes empty document
+ xml_document();
+
+ // Destructor, invalidates all node/attribute handles to this document
+ ~xml_document();
+
+ // Removes all nodes, leaving the empty document
+ void reset();
+
+ // Removes all nodes, then copies the entire contents of the specified document
+ void reset(const xml_document& proto);
+
+#ifndef PUGIXML_NO_STL
+ // Load document from stream.
+ xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+ xml_parse_result load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options = parse_default);
+#endif
+
+ // (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
+ xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
+
+ // Load document from zero-terminated string. No encoding conversions are applied.
+ xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
+
+ // Load document from file
+ xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+ xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+ // Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns.
+ xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+ // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
+ // You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed.
+ xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+ // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
+ // You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore).
+ xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+ // Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details).
+ void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+
+#ifndef PUGIXML_NO_STL
+ // Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details).
+ void save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+ void save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
+#endif
+
+ // Save XML to file
+ bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+ bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+
+ // Get document element
+ xml_node document_element() const;
+};
+
+#ifndef PUGIXML_NO_XPATH
+// XPath query return type
+enum xpath_value_type {
+ xpath_type_none, // Unknown type (query failed to compile)
+ xpath_type_node_set, // Node set (xpath_node_set)
+ xpath_type_number, // Number
+ xpath_type_string, // String
+ xpath_type_boolean // Boolean
+};
+
+// XPath parsing result
+struct PUGIXML_CLASS xpath_parse_result {
+ // Error message (0 if no error)
+ const char* error;
+
+ // Last parsed offset (in char_t units from string start)
+ ptrdiff_t offset;
+
+ // Default constructor, initializes object to failed state
+ xpath_parse_result();
+
+ // Cast to bool operator
+ operator bool() const;
+
+ // Get error description
+ const char* description() const;
+};
+
+// A single XPath variable
+class PUGIXML_CLASS xpath_variable
+{
+ friend class xpath_variable_set;
+
+protected:
+ xpath_value_type _type;
+ xpath_variable* _next;
+
+ xpath_variable(xpath_value_type type);
+
+ // Non-copyable semantics
+ xpath_variable(const xpath_variable&);
+ xpath_variable& operator=(const xpath_variable&);
+
+public:
+ // Get variable name
+ const char_t* name() const;
+
+ // Get variable type
+ xpath_value_type type() const;
+
+ // Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error
+ bool get_boolean() const;
+ double get_number() const;
+ const char_t* get_string() const;
+ const xpath_node_set& get_node_set() const;
+
+ // Set variable value; no type conversion is performed, false is returned on type mismatch error
+ bool set(bool value);
+ bool set(double value);
+ bool set(const char_t* value);
+ bool set(const xpath_node_set& value);
+};
+
+// A set of XPath variables
+class PUGIXML_CLASS xpath_variable_set
+{
+private:
+ xpath_variable* _data[64];
+
+ void _assign(const xpath_variable_set& rhs);
+ void _swap(xpath_variable_set& rhs);
+
+ xpath_variable* _find(const char_t* name) const;
+
+ static bool _clone(xpath_variable* var, xpath_variable** out_result);
+ static void _destroy(xpath_variable* var);
+
+public:
+ // Default constructor/destructor
+ xpath_variable_set();
+ ~xpath_variable_set();
+
+ // Copy constructor/assignment operator
+ xpath_variable_set(const xpath_variable_set& rhs);
+ xpath_variable_set& operator=(const xpath_variable_set& rhs);
+
+#if __cplusplus >= 201103
+ // Move semantics support
+ xpath_variable_set(xpath_variable_set&& rhs);
+ xpath_variable_set& operator=(xpath_variable_set&& rhs);
+#endif
+
+ // Add a new variable or get the existing one, if the types match
+ xpath_variable* add(const char_t* name, xpath_value_type type);
+
+ // Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch
+ bool set(const char_t* name, bool value);
+ bool set(const char_t* name, double value);
+ bool set(const char_t* name, const char_t* value);
+ bool set(const char_t* name, const xpath_node_set& value);
+
+ // Get existing variable by name
+ xpath_variable* get(const char_t* name);
+ const xpath_variable* get(const char_t* name) const;
+};
+
+// A compiled XPath query object
+class PUGIXML_CLASS xpath_query
+{
+private:
+ void* _impl;
+ xpath_parse_result _result;
+
+ typedef void (*unspecified_bool_type)(xpath_query***);
+
+ // Non-copyable semantics
+ xpath_query(const xpath_query&);
+ xpath_query& operator=(const xpath_query&);
+
+public:
+ // Construct a compiled object from XPath expression.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors.
+ explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0);
+
+ // Constructor
+ xpath_query();
+
+ // Destructor
+ ~xpath_query();
+
+#if __cplusplus >= 201103
+ // Move semantics support
+ xpath_query(xpath_query&& rhs);
+ xpath_query& operator=(xpath_query&& rhs);
+#endif
+
+ // Get query expression return type
+ xpath_value_type return_type() const;
+
+ // Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+ bool evaluate_boolean(const xpath_node& n) const;
+
+ // Evaluate expression as double value in the specified context; performs type conversion if necessary.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+ double evaluate_number(const xpath_node& n) const;
+
+#ifndef PUGIXML_NO_STL
+ // Evaluate expression as string value in the specified context; performs type conversion if necessary.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+ string_t evaluate_string(const xpath_node& n) const;
+#endif
+
+ // Evaluate expression as string value in the specified context; performs type conversion if necessary.
+ // At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+ // If PUGIXML_NO_EXCEPTIONS is defined, returns empty set instead.
+ size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
+
+ // Evaluate expression as node set in the specified context.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
+ // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead.
+ xpath_node_set evaluate_node_set(const xpath_node& n) const;
+
+ // Evaluate expression as node set in the specified context.
+ // Return first node in document order, or empty node if node set is empty.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
+ // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node instead.
+ xpath_node evaluate_node(const xpath_node& n) const;
+
+ // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode)
+ const xpath_parse_result& result() const;
+
+ // Safe bool conversion operator
+ operator unspecified_bool_type() const;
+
+ // Borland C++ workaround
+ bool operator!() const;
+};
+
+#ifndef PUGIXML_NO_EXCEPTIONS
+// XPath exception class
+class PUGIXML_CLASS xpath_exception: public std::exception
+{
+private:
+ xpath_parse_result _result;
+
+public:
+ // Construct exception from parse result
+ explicit xpath_exception(const xpath_parse_result& result);
+
+ // Get error message
+ virtual const char* what() const throw();
+
+ // Get parse result
+ const xpath_parse_result& result() const;
+};
+#endif
+
+// XPath node class (either xml_node or xml_attribute)
+class PUGIXML_CLASS xpath_node
+{
+private:
+ xml_node _node;
+ xml_attribute _attribute;
+
+ typedef void (*unspecified_bool_type)(xpath_node***);
+
+public:
+ // Default constructor; constructs empty XPath node
+ xpath_node();
+
+ // Construct XPath node from XML node/attribute
+ xpath_node(const xml_node& node);
+ xpath_node(const xml_attribute& attribute, const xml_node& parent);
+
+ // Get node/attribute, if any
+ xml_node node() const;
+ xml_attribute attribute() const;
+
+ // Get parent of contained node/attribute
+ xml_node parent() const;
+
+ // Safe bool conversion operator
+ operator unspecified_bool_type() const;
+
+ // Borland C++ workaround
+ bool operator!() const;
+
+ // Comparison operators
+ bool operator==(const xpath_node& n) const;
+ bool operator!=(const xpath_node& n) const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs);
+#endif
+
+// A fixed-size collection of XPath nodes
+class PUGIXML_CLASS xpath_node_set
+{
+public:
+ // Collection type
+ enum type_t {
+ type_unsorted, // Not ordered
+ type_sorted, // Sorted by document order (ascending)
+ type_sorted_reverse // Sorted by document order (descending)
+ };
+
+ // Constant iterator type
+ typedef const xpath_node* const_iterator;
+
+ // We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work
+ typedef const xpath_node* iterator;
+
+ // Default constructor. Constructs empty set.
+ xpath_node_set();
+
+ // Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful
+ xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
+
+ // Destructor
+ ~xpath_node_set();
+
+ // Copy constructor/assignment operator
+ xpath_node_set(const xpath_node_set& ns);
+ xpath_node_set& operator=(const xpath_node_set& ns);
+
+#if __cplusplus >= 201103
+ // Move semantics support
+ xpath_node_set(xpath_node_set&& rhs);
+ xpath_node_set& operator=(xpath_node_set&& rhs);
+#endif
+
+ // Get collection type
+ type_t type() const;
+
+ // Get collection size
+ size_t size() const;
+
+ // Indexing operator
+ const xpath_node& operator[](size_t index) const;
+
+ // Collection iterators
+ const_iterator begin() const;
+ const_iterator end() const;
+
+ // Sort the collection in ascending/descending order by document order
+ void sort(bool reverse = false);
+
+ // Get first node in the collection by document order
+ xpath_node first() const;
+
+ // Check if collection is empty
+ bool empty() const;
+
+private:
+ type_t _type;
+
+ xpath_node _storage;
+
+ xpath_node* _begin;
+ xpath_node* _end;
+
+ void _assign(const_iterator begin, const_iterator end, type_t type);
+ void _move(xpath_node_set& rhs);
+};
+#endif
+
+#ifndef PUGIXML_NO_STL
+// Convert wide string to UTF8
+std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
+std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
+
+// Convert UTF8 to wide string
+std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
+std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
+#endif
+
+// Memory allocation function interface; returns pointer to allocated memory or NULL on failure
+typedef void* (*allocation_function)(size_t size);
+
+// Memory deallocation function interface
+typedef void (*deallocation_function)(void* ptr);
+
+// Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
+void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
+
+// Get current memory management functions
+allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
+deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
+}
+
+#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
+namespace std
+{
+// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
+std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&);
+std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&);
+std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&);
+}
+#endif
+
+#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
+namespace std
+{
+// Workarounds for (non-standard) iterator category detection
+std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&);
+std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&);
+std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&);
+}
+#endif
+
+#endif
+
+// Make sure implementation is included in header-only mode
+// Use macro expansion in #include to work around QMake (QTBUG-11923)
+#if defined(PUGIXML_HEADER_ONLY) && !defined(PUGIXML_SOURCE)
+# define PUGIXML_SOURCE "pugixml.cpp"
+# include PUGIXML_SOURCE
+#endif
+
+/**
+ * Copyright (c) 2006-2015 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
diff --git a/contrib/moses2/server/Server.cpp b/moses2/server/Server.cpp
index 4befff98e..57218c374 100644
--- a/contrib/moses2/server/Server.cpp
+++ b/moses2/server/Server.cpp
@@ -16,8 +16,8 @@ namespace Moses2
{
Server::Server(ServerOptions &server_options, System &system)
-:m_server_options(server_options)
-,m_translator(new Translator(*this, system))
+ :m_server_options(server_options)
+ ,m_translator(new Translator(*this, system))
{
m_registry.addMethod("translate", m_translator);
}
@@ -30,30 +30,34 @@ Server::~Server()
void Server::run(System &system)
{
xmlrpc_c::serverAbyss myAbyssServer
- (xmlrpc_c::serverAbyss::constrOpt()
- .registryP(&m_registry)
- .portNumber(m_server_options.port) // TCP port on which to listen
- .logFileName(m_server_options.logfile)
- .allowOrigin("*")
- .maxConn(m_server_options.maxConn)
- .maxConnBacklog(m_server_options.maxConnBacklog)
- .keepaliveTimeout(m_server_options.keepaliveTimeout)
- .keepaliveMaxConn(m_server_options.keepaliveMaxConn)
- .timeout(m_server_options.timeout)
- );
+ (xmlrpc_c::serverAbyss::constrOpt()
+ .registryP(&m_registry)
+ .portNumber(m_server_options.port) // TCP port on which to listen
+ .logFileName(m_server_options.logfile)
+ .allowOrigin("*")
+ .maxConn(m_server_options.maxConn)
+ .maxConnBacklog(m_server_options.maxConnBacklog)
+ .keepaliveTimeout(m_server_options.keepaliveTimeout)
+ .keepaliveMaxConn(m_server_options.keepaliveMaxConn)
+ .timeout(m_server_options.timeout)
+ );
std::ostringstream pidfilename;
pidfilename << "/tmp/moses-server." << m_server_options.port << ".pid";
m_pidfile = pidfilename.str();
std::ofstream pidfile(m_pidfile.c_str());
- pidfile << getpid() << std::endl;
+
+#ifdef _WIN32
+ int thePid = GetCurrentProcessId();
+#else
+ int thePid = getpid();
+#endif
+ pidfile << thePid << std::endl;
pidfile.close();
cerr << "Listening on port " << m_server_options.port << std::endl;
- if (m_server_options.is_serial)
- {
- cerr << "Running server in serial mode." << std::endl;
- while(true) myAbyssServer.runOnce();
- }
- else myAbyssServer.run();
+ if (m_server_options.is_serial) {
+ cerr << "Running server in serial mode." << std::endl;
+ while(true) myAbyssServer.runOnce();
+ } else myAbyssServer.run();
std::cerr << "xmlrpc_c::serverAbyss.run() returned but it should not."
<< std::endl;
diff --git a/contrib/moses2/server/Server.h b/moses2/server/Server.h
index d19ef75d2..d19ef75d2 100644
--- a/contrib/moses2/server/Server.h
+++ b/moses2/server/Server.h
diff --git a/contrib/moses2/server/TranslationRequest.cpp b/moses2/server/TranslationRequest.cpp
index dd37d621c..2d50835a6 100644
--- a/contrib/moses2/server/TranslationRequest.cpp
+++ b/moses2/server/TranslationRequest.cpp
@@ -14,10 +14,10 @@ TranslationRequest(xmlrpc_c::paramList const& paramList,
System &system,
const std::string &line,
long translationId)
-:TranslationTask(system, line, translationId)
-,m_cond(cond)
-,m_mutex(mut)
-,m_done(false)
+ :TranslationTask(system, line, translationId)
+ ,m_cond(cond)
+ ,m_mutex(mut)
+ ,m_done(false)
{
}
@@ -25,12 +25,12 @@ TranslationRequest(xmlrpc_c::paramList const& paramList,
boost::shared_ptr<TranslationRequest>
TranslationRequest::
create(Translator* translator,
- xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond,
- boost::mutex& mut,
- System &system,
- const std::string &line,
- long translationId)
+ xmlrpc_c::paramList const& paramList,
+ boost::condition_variable& cond,
+ boost::mutex& mut,
+ System &system,
+ const std::string &line,
+ long translationId)
{
boost::shared_ptr<TranslationRequest> ret;
TranslationRequest *request = new TranslationRequest(paramList, cond, mut, system, line, translationId);
@@ -38,7 +38,7 @@ create(Translator* translator,
ret->m_translator = translator;
return ret;
}
-
+
void
TranslationRequest::
Run()
@@ -60,7 +60,7 @@ Run()
void TranslationRequest::pack_hypothesis(const Manager& manager, Hypothesis const* h,
std::string const& key,
- std::map<std::string, xmlrpc_c::value> & dest) const
+ std::map<std::string, xmlrpc_c::value> & dest) const
{
}
diff --git a/contrib/moses2/server/TranslationRequest.h b/moses2/server/TranslationRequest.h
index 0f63bc57a..822cde153 100644
--- a/contrib/moses2/server/TranslationRequest.h
+++ b/moses2/server/TranslationRequest.h
@@ -22,7 +22,7 @@ class System;
class Manager;
class
-TranslationRequest : public virtual TranslationTask
+ TranslationRequest : public virtual TranslationTask
{
protected:
std::map<std::string, xmlrpc_c::value> m_retData;
@@ -41,7 +41,7 @@ protected:
void
pack_hypothesis(const Manager& manager, Hypothesis const* h,
- std::string const& key,
+ std::string const& key,
std::map<std::string, xmlrpc_c::value> & dest) const;
public:
@@ -49,12 +49,12 @@ public:
static
boost::shared_ptr<TranslationRequest>
create(Translator* translator,
- xmlrpc_c::paramList const& paramList,
+ xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond,
boost::mutex& mut,
System &system,
- const std::string &line,
- long translationId);
+ const std::string &line,
+ long translationId);
virtual bool
diff --git a/contrib/moses2/server/Translator.cpp b/moses2/server/Translator.cpp
index fd855c136..6f6212323 100644
--- a/contrib/moses2/server/Translator.cpp
+++ b/moses2/server/Translator.cpp
@@ -16,10 +16,10 @@ namespace Moses2
{
Translator::Translator(Server& server, System &system)
-: m_server(server),
- m_threadPool(server.options().numThreads),
- m_system(system),
- m_translationId(0)
+ : m_server(server),
+ m_threadPool(server.options().numThreads),
+ m_system(system),
+ m_translationId(0)
{
// signature and help strings are documentation -- the client
// can query this information with a system.methodSignature and
@@ -34,7 +34,7 @@ Translator::~Translator()
}
void Translator::execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value *const retvalP)
+ xmlrpc_c::value *const retvalP)
{
typedef std::map<std::string,xmlrpc_c::value> param_t;
param_t const& params = paramList.getStruct(0);
diff --git a/contrib/moses2/server/Translator.h b/moses2/server/Translator.h
index ba2c68ceb..bb84c70b1 100644
--- a/contrib/moses2/server/Translator.h
+++ b/moses2/server/Translator.h
@@ -25,7 +25,7 @@ public:
virtual ~Translator();
void execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value * const retvalP);
+ xmlrpc_c::value * const retvalP);
protected:
Server& m_server;
diff --git a/phrase-extract/consolidate-direct.vcxproj b/phrase-extract/consolidate-direct.vcxproj
deleted file mode 100644
index a84d2cce6..000000000
--- a/phrase-extract/consolidate-direct.vcxproj
+++ /dev/null
@@ -1,98 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup Label="ProjectConfigurations">
- <ProjectConfiguration Include="Debug|Win32">
- <Configuration>Debug</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Release|Win32">
- <Configuration>Release</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- </ItemGroup>
- <PropertyGroup Label="Globals">
- <ProjectGuid>{33775109-60CF-4C1C-A869-5450B3DD88B3}</ProjectGuid>
- <RootNamespace>consolidatedirect</RootNamespace>
- <Keyword>Win32Proj</Keyword>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
- <ImportGroup Label="ExtensionSettings">
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <PropertyGroup Label="UserMacros" />
- <PropertyGroup>
- <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
- </PropertyGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <ClCompile>
- <Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <MinimalRebuild>true</MinimalRebuild>
- <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
- <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
- </ClCompile>
- <Link>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <TargetMachine>MachineX86</TargetMachine>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <ClCompile>
- <Optimization>MaxSpeed</Optimization>
- <IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
- <FunctionLevelLinking>true</FunctionLevelLinking>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- </ClCompile>
- <Link>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <OptimizeReferences>true</OptimizeReferences>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <TargetMachine>MachineX86</TargetMachine>
- </Link>
- </ItemDefinitionGroup>
- <ItemGroup>
- <ClCompile Include="consolidate-direct.cpp" />
- <ClCompile Include="InputFileStream.cpp" />
- </ItemGroup>
- <ItemGroup>
- <ClInclude Include="InputFileStream.h" />
- <ClInclude Include="SafeGetline.h" />
- </ItemGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
- <ImportGroup Label="ExtensionTargets">
- </ImportGroup>
-</Project> \ No newline at end of file
diff --git a/phrase-extract/consolidate.vcxproj b/phrase-extract/consolidate.vcxproj
deleted file mode 100644
index 1e77a90f0..000000000
--- a/phrase-extract/consolidate.vcxproj
+++ /dev/null
@@ -1,100 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup Label="ProjectConfigurations">
- <ProjectConfiguration Include="Debug|Win32">
- <Configuration>Debug</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Release|Win32">
- <Configuration>Release</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- </ItemGroup>
- <PropertyGroup Label="Globals">
- <ProjectGuid>{45410EFE-65C0-4078-82E4-D636258F9225}</ProjectGuid>
- <RootNamespace>consolidate</RootNamespace>
- <Keyword>Win32Proj</Keyword>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
- <ImportGroup Label="ExtensionSettings">
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <PropertyGroup Label="UserMacros" />
- <PropertyGroup>
- <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
- </PropertyGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <ClCompile>
- <Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <MinimalRebuild>true</MinimalRebuild>
- <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
- <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
- </ClCompile>
- <Link>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <TargetMachine>MachineX86</TargetMachine>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <ClCompile>
- <Optimization>MaxSpeed</Optimization>
- <IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
- <FunctionLevelLinking>true</FunctionLevelLinking>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- </ClCompile>
- <Link>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <OptimizeReferences>true</OptimizeReferences>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <TargetMachine>MachineX86</TargetMachine>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemGroup>
- <ClCompile Include="consolidate.cpp" />
- <ClCompile Include="InputFileStream.cpp" />
- <ClCompile Include="tables-core.cpp" />
- </ItemGroup>
- <ItemGroup>
- <ClInclude Include="InputFileStream.h" />
- <ClInclude Include="SafeGetline.h" />
- <ClInclude Include="tables-core.h" />
- </ItemGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
- <ImportGroup Label="ExtensionTargets">
- </ImportGroup>
-</Project> \ No newline at end of file
diff --git a/phrase-extract/extract-lex.vcxproj b/phrase-extract/extract-lex.vcxproj
deleted file mode 100644
index a291d51aa..000000000
--- a/phrase-extract/extract-lex.vcxproj
+++ /dev/null
@@ -1,89 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup Label="ProjectConfigurations">
- <ProjectConfiguration Include="Debug|Win32">
- <Configuration>Debug</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Release|Win32">
- <Configuration>Release</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- </ItemGroup>
- <ItemGroup>
- <ClCompile Include="extract-lex.cpp" />
- <ClCompile Include="InputFileStream.cpp" />
- </ItemGroup>
- <ItemGroup>
- <ClInclude Include="extract-lex.h" />
- <ClInclude Include="InputFileStream.h" />
- </ItemGroup>
- <PropertyGroup Label="Globals">
- <ProjectGuid>{808BF985-CA18-4E55-8AAC-70E04DC25117}</ProjectGuid>
- <Keyword>Win32Proj</Keyword>
- <RootNamespace>extractlex</RootNamespace>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>true</UseDebugLibraries>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>false</UseDebugLibraries>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
- <ImportGroup Label="ExtensionSettings">
- </ImportGroup>
- <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <PropertyGroup Label="UserMacros" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <LinkIncremental>true</LinkIncremental>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <LinkIncremental>false</LinkIncremental>
- </PropertyGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <ClCompile>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <ClCompile>
- <WarningLevel>Level3</WarningLevel>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <Optimization>MaxSpeed</Optimization>
- <FunctionLevelLinking>true</FunctionLevelLinking>
- <IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <OptimizeReferences>true</OptimizeReferences>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
- <ImportGroup Label="ExtensionTargets">
- </ImportGroup>
-</Project> \ No newline at end of file
diff --git a/phrase-extract/extract-rules.vcxproj b/phrase-extract/extract-rules.vcxproj
deleted file mode 100644
index ecd36fe50..000000000
--- a/phrase-extract/extract-rules.vcxproj
+++ /dev/null
@@ -1,111 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup Label="ProjectConfigurations">
- <ProjectConfiguration Include="Debug|Win32">
- <Configuration>Debug</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Release|Win32">
- <Configuration>Release</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- </ItemGroup>
- <PropertyGroup Label="Globals">
- <ProjectGuid>{3C5CFAEC-6830-4491-9008-1C9E8C381C50}</ProjectGuid>
- <RootNamespace>extractrules</RootNamespace>
- <Keyword>Win32Proj</Keyword>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
- <ImportGroup Label="ExtensionSettings">
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <PropertyGroup Label="UserMacros" />
- <PropertyGroup>
- <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
- </PropertyGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <ClCompile>
- <Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <MinimalRebuild>true</MinimalRebuild>
- <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
- <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
- </ClCompile>
- <Link>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <TargetMachine>MachineX86</TargetMachine>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <ClCompile>
- <Optimization>MaxSpeed</Optimization>
- <IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
- <FunctionLevelLinking>true</FunctionLevelLinking>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- </ClCompile>
- <Link>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <OptimizeReferences>true</OptimizeReferences>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <TargetMachine>MachineX86</TargetMachine>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemGroup>
- <ClCompile Include="extract-rules.cpp" />
- <ClCompile Include="ExtractedRule.cpp" />
- <ClCompile Include="HoleCollection.cpp" />
- <ClCompile Include="InputFileStream.cpp" />
- <ClCompile Include="SentenceAlignment.cpp" />
- <ClCompile Include="SentenceAlignmentWithSyntax.cpp" />
- <ClCompile Include="SyntaxTree.cpp" />
- <ClCompile Include="tables-core.cpp" />
- <ClCompile Include="XmlTree.cpp" />
- </ItemGroup>
- <ItemGroup>
- <ClInclude Include="ExtractedRule.h" />
- <ClInclude Include="Hole.h" />
- <ClInclude Include="HoleCollection.h" />
- <ClInclude Include="SentenceAlignment.h" />
- <ClInclude Include="SentenceAlignmentWithSyntax.h" />
- <ClInclude Include="SyntaxTree.h" />
- <ClInclude Include="tables-core.h" />
- <ClInclude Include="XmlTree.h" />
- </ItemGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
- <ImportGroup Label="ExtensionTargets">
- </ImportGroup>
-</Project> \ No newline at end of file
diff --git a/phrase-extract/extract.vcxproj b/phrase-extract/extract.vcxproj
deleted file mode 100644
index 60a1128eb..000000000
--- a/phrase-extract/extract.vcxproj
+++ /dev/null
@@ -1,103 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup Label="ProjectConfigurations">
- <ProjectConfiguration Include="Debug|Win32">
- <Configuration>Debug</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Release|Win32">
- <Configuration>Release</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- </ItemGroup>
- <PropertyGroup Label="Globals">
- <ProjectGuid>{2475F8E8-A5C9-4785-8B09-5F4E120FC518}</ProjectGuid>
- <RootNamespace>extract</RootNamespace>
- <Keyword>Win32Proj</Keyword>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
- <ImportGroup Label="ExtensionSettings">
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <PropertyGroup Label="UserMacros" />
- <PropertyGroup>
- <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
- </PropertyGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <ClCompile>
- <Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <MinimalRebuild>true</MinimalRebuild>
- <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
- <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
- </ClCompile>
- <Link>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <RandomizedBaseAddress>false</RandomizedBaseAddress>
- <DataExecutionPrevention>
- </DataExecutionPrevention>
- <TargetMachine>MachineX86</TargetMachine>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <ClCompile>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- </ClCompile>
- <Link>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <OptimizeReferences>true</OptimizeReferences>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <RandomizedBaseAddress>false</RandomizedBaseAddress>
- <DataExecutionPrevention>
- </DataExecutionPrevention>
- <TargetMachine>MachineX86</TargetMachine>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemGroup>
- <ClCompile Include="extract.cpp" />
- <ClCompile Include="InputFileStream.cpp" />
- <ClCompile Include="SentenceAlignment.cpp" />
- <ClCompile Include="tables-core.cpp" />
- </ItemGroup>
- <ItemGroup>
- <ClInclude Include="SentenceAlignment.h" />
- <ClInclude Include="tables-core.h" />
- </ItemGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
- <ImportGroup Label="ExtensionTargets">
- </ImportGroup>
-</Project> \ No newline at end of file
diff --git a/phrase-extract/phrase-extract.sln b/phrase-extract/phrase-extract.sln
deleted file mode 100644
index 800c26192..000000000
--- a/phrase-extract/phrase-extract.sln
+++ /dev/null
@@ -1,56 +0,0 @@
-
-Microsoft Visual Studio Solution File, Format Version 11.00
-# Visual C++ Express 2010
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "score", "score.vcxproj", "{34AC84C7-62A1-4BBE-BCA9-4E95B2341039}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "extract", "extract.vcxproj", "{2475F8E8-A5C9-4785-8B09-5F4E120FC518}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "symal", "..\symal\symal.vcxproj", "{6716FB26-8298-47A3-A915-958AF0AC80F8}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "extract-rules", "extract-rules.vcxproj", "{3C5CFAEC-6830-4491-9008-1C9E8C381C50}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "consolidate", "consolidate.vcxproj", "{45410EFE-65C0-4078-82E4-D636258F9225}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "consolidate-direct", "consolidate-direct.vcxproj", "{33775109-60CF-4C1C-A869-5450B3DD88B3}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "extract-lex", "extract-lex.vcxproj", "{808BF985-CA18-4E55-8AAC-70E04DC25117}"
-EndProject
-Global
- GlobalSection(SolutionConfigurationPlatforms) = preSolution
- Debug|Win32 = Debug|Win32
- Release|Win32 = Release|Win32
- EndGlobalSection
- GlobalSection(ProjectConfigurationPlatforms) = postSolution
- {34AC84C7-62A1-4BBE-BCA9-4E95B2341039}.Debug|Win32.ActiveCfg = Debug|Win32
- {34AC84C7-62A1-4BBE-BCA9-4E95B2341039}.Debug|Win32.Build.0 = Debug|Win32
- {34AC84C7-62A1-4BBE-BCA9-4E95B2341039}.Release|Win32.ActiveCfg = Release|Win32
- {34AC84C7-62A1-4BBE-BCA9-4E95B2341039}.Release|Win32.Build.0 = Release|Win32
- {2475F8E8-A5C9-4785-8B09-5F4E120FC518}.Debug|Win32.ActiveCfg = Debug|Win32
- {2475F8E8-A5C9-4785-8B09-5F4E120FC518}.Debug|Win32.Build.0 = Debug|Win32
- {2475F8E8-A5C9-4785-8B09-5F4E120FC518}.Release|Win32.ActiveCfg = Release|Win32
- {2475F8E8-A5C9-4785-8B09-5F4E120FC518}.Release|Win32.Build.0 = Release|Win32
- {6716FB26-8298-47A3-A915-958AF0AC80F8}.Debug|Win32.ActiveCfg = Debug|Win32
- {6716FB26-8298-47A3-A915-958AF0AC80F8}.Debug|Win32.Build.0 = Debug|Win32
- {6716FB26-8298-47A3-A915-958AF0AC80F8}.Release|Win32.ActiveCfg = Release|Win32
- {6716FB26-8298-47A3-A915-958AF0AC80F8}.Release|Win32.Build.0 = Release|Win32
- {3C5CFAEC-6830-4491-9008-1C9E8C381C50}.Debug|Win32.ActiveCfg = Debug|Win32
- {3C5CFAEC-6830-4491-9008-1C9E8C381C50}.Debug|Win32.Build.0 = Debug|Win32
- {3C5CFAEC-6830-4491-9008-1C9E8C381C50}.Release|Win32.ActiveCfg = Release|Win32
- {3C5CFAEC-6830-4491-9008-1C9E8C381C50}.Release|Win32.Build.0 = Release|Win32
- {45410EFE-65C0-4078-82E4-D636258F9225}.Debug|Win32.ActiveCfg = Debug|Win32
- {45410EFE-65C0-4078-82E4-D636258F9225}.Debug|Win32.Build.0 = Debug|Win32
- {45410EFE-65C0-4078-82E4-D636258F9225}.Release|Win32.ActiveCfg = Release|Win32
- {45410EFE-65C0-4078-82E4-D636258F9225}.Release|Win32.Build.0 = Release|Win32
- {33775109-60CF-4C1C-A869-5450B3DD88B3}.Debug|Win32.ActiveCfg = Debug|Win32
- {33775109-60CF-4C1C-A869-5450B3DD88B3}.Debug|Win32.Build.0 = Debug|Win32
- {33775109-60CF-4C1C-A869-5450B3DD88B3}.Release|Win32.ActiveCfg = Release|Win32
- {33775109-60CF-4C1C-A869-5450B3DD88B3}.Release|Win32.Build.0 = Release|Win32
- {808BF985-CA18-4E55-8AAC-70E04DC25117}.Debug|Win32.ActiveCfg = Debug|Win32
- {808BF985-CA18-4E55-8AAC-70E04DC25117}.Debug|Win32.Build.0 = Debug|Win32
- {808BF985-CA18-4E55-8AAC-70E04DC25117}.Release|Win32.ActiveCfg = Release|Win32
- {808BF985-CA18-4E55-8AAC-70E04DC25117}.Release|Win32.Build.0 = Release|Win32
- EndGlobalSection
- GlobalSection(SolutionProperties) = preSolution
- HideSolutionNode = FALSE
- EndGlobalSection
-EndGlobal
diff --git a/phrase-extract/score.vcxproj b/phrase-extract/score.vcxproj
deleted file mode 100644
index c2c26f513..000000000
--- a/phrase-extract/score.vcxproj
+++ /dev/null
@@ -1,106 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup Label="ProjectConfigurations">
- <ProjectConfiguration Include="Debug|Win32">
- <Configuration>Debug</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Release|Win32">
- <Configuration>Release</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- </ItemGroup>
- <PropertyGroup Label="Globals">
- <ProjectGuid>{34AC84C7-62A1-4BBE-BCA9-4E95B2341039}</ProjectGuid>
- <RootNamespace>score</RootNamespace>
- <Keyword>Win32Proj</Keyword>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
- <ImportGroup Label="ExtensionSettings">
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <PropertyGroup Label="UserMacros" />
- <PropertyGroup>
- <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
- <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</OutDir>
- <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
- <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
- </PropertyGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <ClCompile>
- <Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <MinimalRebuild>true</MinimalRebuild>
- <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
- <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
- </ClCompile>
- <Link>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <RandomizedBaseAddress>false</RandomizedBaseAddress>
- <DataExecutionPrevention>
- </DataExecutionPrevention>
- <TargetMachine>MachineX86</TargetMachine>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <ClCompile>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
- <PrecompiledHeader>
- </PrecompiledHeader>
- <WarningLevel>Level3</WarningLevel>
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- </ClCompile>
- <Link>
- <AdditionalDependencies>zdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <SubSystem>Console</SubSystem>
- <OptimizeReferences>true</OptimizeReferences>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <RandomizedBaseAddress>false</RandomizedBaseAddress>
- <DataExecutionPrevention>
- </DataExecutionPrevention>
- <TargetMachine>MachineX86</TargetMachine>
- </Link>
- </ItemDefinitionGroup>
- <ItemGroup>
- <ClCompile Include="AlignmentPhrase.cpp" />
- <ClCompile Include="InputFileStream.cpp" />
- <ClCompile Include="ExtractionPhrasePair.cpp" />
- <ClCompile Include="score.cpp" />
- <ClCompile Include="tables-core.cpp" />
- </ItemGroup>
- <ItemGroup>
- <ClInclude Include="AlignmentPhrase.h" />
- <ClInclude Include="InputFileStream.h" />
- <ClInclude Include="ExtractionPhrasePair.h" />
- <ClInclude Include="tables-core.h" />
- </ItemGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
- <ImportGroup Label="ExtensionTargets">
- </ImportGroup>
-</Project>
diff --git a/misc/CreateProbingPT.cpp b/probingpt/CreateProbingPT.cpp
index 2cf6627ef..6be0b402f 100644
--- a/misc/CreateProbingPT.cpp
+++ b/probingpt/CreateProbingPT.cpp
@@ -1,9 +1,9 @@
#include <string>
#include <boost/program_options.hpp>
#include "util/usage.hh"
-#include "moses/TranslationModel/ProbingPT/storing.hh"
-#include "moses/InputFileStream.h"
-#include "moses/OutputFileStream.h"
+#include "storing.h"
+#include "InputFileStream.h"
+#include "OutputFileStream.h"
#include "moses/Util.h"
using namespace std;
@@ -66,7 +66,7 @@ int main(int argc, char* argv[])
inPath = ReformatSCFGFile(inPath);
}
- Moses::createProbingPT(inPath, outPath, num_scores, num_lex_scores, log_prob, max_cache_size, scfg);
+ probingpt::createProbingPT(inPath, outPath, num_scores, num_lex_scores, log_prob, max_cache_size, scfg);
//util::PrintUsage(std::cout);
return 0;
@@ -74,9 +74,9 @@ int main(int argc, char* argv[])
std::string ReformatSCFGFile(const std::string &path)
{
- Moses::InputFileStream inFile(path);
+ probingpt::InputFileStream inFile(path);
string reformattedPath = path + ".reformat.gz";
- Moses::OutputFileStream outFile(reformattedPath);
+ probingpt::OutputFileStream outFile(reformattedPath);
string line;
while (getline(inFile, line)) {
diff --git a/probingpt/InputFileStream.cpp b/probingpt/InputFileStream.cpp
new file mode 100644
index 000000000..586d4bddb
--- /dev/null
+++ b/probingpt/InputFileStream.cpp
@@ -0,0 +1,59 @@
+// $Id$
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#include "InputFileStream.h"
+#include "gzfilebuf.h"
+#include <iostream>
+
+using namespace std;
+
+namespace probingpt
+{
+
+InputFileStream::InputFileStream(const std::string &filePath) :
+ std::istream(NULL), m_streambuf(NULL)
+{
+ if (filePath.size() > 3 && filePath.substr(filePath.size() - 3, 3) == ".gz") {
+ m_streambuf = new gzfilebuf(filePath.c_str());
+ } else {
+ std::filebuf* fb = new std::filebuf();
+ fb = fb->open(filePath.c_str(), std::ios::in);
+ if (!fb) {
+ cerr << "Can't read " << filePath.c_str() << endl;
+ exit(1);
+ }
+ m_streambuf = fb;
+ }
+ this->init(m_streambuf);
+}
+
+InputFileStream::~InputFileStream()
+{
+ delete m_streambuf;
+ m_streambuf = NULL;
+}
+
+void InputFileStream::Close()
+{
+}
+
+}
+
diff --git a/probingpt/InputFileStream.h b/probingpt/InputFileStream.h
new file mode 100644
index 000000000..99933c093
--- /dev/null
+++ b/probingpt/InputFileStream.h
@@ -0,0 +1,46 @@
+// $Id$
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#pragma once
+
+#include <cstdlib>
+#include <fstream>
+#include <string>
+
+namespace probingpt
+{
+
+/** Used in place of std::istream, can read zipped files if it ends in .gz
+ */
+class InputFileStream: public std::istream
+{
+protected:
+ std::streambuf *m_streambuf;
+public:
+
+ explicit InputFileStream(const std::string &filePath);
+ ~InputFileStream();
+
+ void Close();
+};
+
+}
+
diff --git a/probingpt/Jamfile b/probingpt/Jamfile
new file mode 100644
index 000000000..c1d41e835
--- /dev/null
+++ b/probingpt/Jamfile
@@ -0,0 +1,32 @@
+alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ;
+
+lib probingpt :
+ StoreTarget.cpp
+ StoreVocab.cpp
+ hash.cpp
+ line_splitter.cpp
+ probing_hash_utils.cpp
+ querying.cpp
+ storing.cpp
+ vocabid.cpp
+ OutputFileStream.cpp
+ InputFileStream.cpp
+ util.cpp
+
+# ../util/string_piece.cc
+# ../util/exception.cc
+# ../util/file.cc
+# ../util/file_piece.cc
+# ../util/murmur_hash.cc
+# ../util/mmap.cc
+# ../util/read_compressed.cc
+# ../util/parallel_read.cc
+# ../util/ersatz_progress.cc
+
+
+ deps
+ ;
+
+exe CreateProbingPT : CreateProbingPT.cpp probingpt ../util//kenutil ;
+
+alias programs : CreateProbingPT ;
diff --git a/probingpt/OutputFileStream.cpp b/probingpt/OutputFileStream.cpp
new file mode 100644
index 000000000..56647dc9e
--- /dev/null
+++ b/probingpt/OutputFileStream.cpp
@@ -0,0 +1,87 @@
+// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#include <iostream>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include "OutputFileStream.h"
+#include "gzfilebuf.h"
+
+using namespace std;
+using namespace boost::algorithm;
+
+namespace probingpt
+{
+OutputFileStream::OutputFileStream() :
+ boost::iostreams::filtering_ostream(), m_outFile(NULL), m_open(false)
+{
+}
+
+OutputFileStream::OutputFileStream(const std::string &filePath) :
+ m_outFile(NULL), m_open(false)
+{
+ Open(filePath);
+}
+
+OutputFileStream::~OutputFileStream()
+{
+ Close();
+}
+
+bool OutputFileStream::Open(const std::string &filePath)
+{
+ assert(!m_open);
+ if (filePath == std::string("-")) {
+ // Write to standard output. Leave m_outFile null.
+ this->push(std::cout);
+ } else {
+ m_outFile = new ofstream(filePath.c_str(),
+ ios_base::out | ios_base::binary);
+ if (m_outFile->fail()) {
+ return false;
+ }
+
+ if (ends_with(filePath, ".gz")) {
+ this->push(boost::iostreams::gzip_compressor());
+ }
+ this->push(*m_outFile);
+ }
+
+ m_open = true;
+ return true;
+}
+
+void OutputFileStream::Close()
+{
+ if (!m_open) return;
+ this->flush();
+ if (m_outFile) {
+ this->pop(); // file
+
+ m_outFile->close();
+ delete m_outFile;
+ m_outFile = NULL;
+ }
+ m_open = false;
+}
+
+}
+
diff --git a/probingpt/OutputFileStream.h b/probingpt/OutputFileStream.h
new file mode 100644
index 000000000..0b2fc3251
--- /dev/null
+++ b/probingpt/OutputFileStream.h
@@ -0,0 +1,81 @@
+// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#pragma once
+
+#include <cstdlib>
+#include <fstream>
+#include <string>
+#include <iostream>
+#include <boost/iostreams/filtering_stream.hpp>
+
+namespace probingpt
+{
+
+/** Version of std::ostream with transparent compression.
+ *
+ * Transparently compresses output when writing to a file whose name ends in
+ * ".gz". Or, writes to stdout instead of a file when given a filename
+ * consisting of just a dash ("-").
+ */
+class OutputFileStream: public boost::iostreams::filtering_ostream
+{
+private:
+ /** File that needs flushing & closing when we close this stream.
+ *
+ * Is NULL when no file is opened, e.g. when writing to standard output.
+ */
+ std::ofstream *m_outFile;
+
+ /// Is this stream open?
+ bool m_open;
+
+public:
+ /** Create an unopened OutputFileStream.
+ *
+ * Until it's been opened, nothing can be done with this stream.
+ */
+ OutputFileStream();
+
+ /// Create an OutputFileStream, and open it by calling Open().
+ OutputFileStream(const std::string &filePath);
+ virtual ~OutputFileStream();
+
+ // TODO: Can we please just always throw an exception when this fails?
+ /** Open stream.
+ *
+ * If filePath is "-" (just a dash), this opens the stream for writing to
+ * standard output. Otherwise, it opens the given file. If the filename
+ * has the ".gz" suffix, output will be transparently compressed.
+ *
+ * Call Close() to close the file.
+ *
+ * Returns whether opening the file was successful. It may also throw an
+ * exception on failure.
+ */
+ bool Open(const std::string &filePath);
+
+ /// Flush and close stream. After this, the stream can be opened again.
+ void Close();
+};
+
+}
+
diff --git a/moses/TranslationModel/ProbingPT/StoreTarget.cpp b/probingpt/StoreTarget.cpp
index f586a26b9..8c33b96a2 100644
--- a/moses/TranslationModel/ProbingPT/StoreTarget.cpp
+++ b/probingpt/StoreTarget.cpp
@@ -6,14 +6,14 @@
*/
#include <boost/foreach.hpp>
#include "StoreTarget.h"
-#include "line_splitter.hh"
-#include "probing_hash_utils.hh"
-#include "moses/OutputFileStream.h"
-#include "moses/Util.h"
+#include "line_splitter.h"
+#include "probing_hash_utils.h"
+#include "OutputFileStream.h"
+#include "moses2/legacy/Util2.h"
using namespace std;
-namespace Moses
+namespace probingpt
{
StoreTarget::StoreTarget(const std::string &basepath)
@@ -51,7 +51,7 @@ uint64_t StoreTarget::Save()
}
// clear coll
- RemoveAllInColl(m_coll);
+ Moses2::RemoveAllInColl(m_coll);
m_coll.clear();
// starting position of coll
@@ -89,7 +89,7 @@ void StoreTarget::Save(const target_text &rule)
void StoreTarget::SaveAlignment()
{
std::string path = m_basePath + "/Alignments.dat";
- OutputFileStream file(path);
+ probingpt::OutputFileStream file(path);
BOOST_FOREACH(Alignments::value_type &valPair, m_aligns) {
file << valPair.second << "\t";
@@ -149,10 +149,10 @@ void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg)
util::SingleCharacter(' '));
while (it) {
string tok = it->as_string();
- float prob = Scan<float>(tok);
+ float prob = Moses2::Scan<float>(tok);
if (log_prob) {
- prob = FloorScore(log(prob));
+ prob = Moses2::FloorScore(log(prob));
if (prob == 0.0f) prob = 0.0000000001;
}
@@ -172,12 +172,12 @@ void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg)
it = util::TokenIter<util::SingleCharacter>(line.word_align,
util::SingleCharacter(' '));
while (it) {
- string tokPair = Trim(it->as_string());
+ string tokPair = Moses2::Trim(it->as_string());
if (tokPair.empty()) {
break;
}
- vector<size_t> alignPair = Tokenize<size_t>(tokPair, "-");
+ vector<size_t> alignPair = Moses2::Tokenize<size_t>(tokPair, "-");
assert(alignPair.size() == 2);
bool nonTerm = false;
@@ -241,11 +241,11 @@ void StoreTarget::AppendLexRO(std::string &prop, std::vector<float> &retvector,
//cerr << "lexProb=" << lexProb << endl;
// append lex probs to pt probs
- vector<float> scores = Tokenize<float>(lexProb);
+ vector<float> scores = Moses2::Tokenize<float>(lexProb);
if (log_prob) {
for (size_t i = 0; i < scores.size(); ++i) {
- scores[i] = FloorScore(log(scores[i]));
+ scores[i] = Moses2::FloorScore(log(scores[i]));
if (scores[i] == 0.0f) scores[i] = 0.0000000001;
}
}
diff --git a/moses/TranslationModel/ProbingPT/StoreTarget.h b/probingpt/StoreTarget.h
index 331c197b3..7e5564ef1 100644
--- a/moses/TranslationModel/ProbingPT/StoreTarget.h
+++ b/probingpt/StoreTarget.h
@@ -13,7 +13,7 @@
#include <boost/unordered_set.hpp>
#include "StoreVocab.h"
-namespace Moses
+namespace probingpt
{
class line_text;
diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.cpp b/probingpt/StoreVocab.cpp
index e0b5b0b08..970249534 100644
--- a/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.cpp
+++ b/probingpt/StoreVocab.cpp
@@ -7,7 +7,7 @@
#include <fstream>
#include "StoreVocab.h"
-namespace Moses2
+namespace probingpt
{
} /* namespace Moses2 */
diff --git a/moses/TranslationModel/ProbingPT/StoreVocab.h b/probingpt/StoreVocab.h
index 806dcebf4..cd0b16384 100644
--- a/moses/TranslationModel/ProbingPT/StoreVocab.h
+++ b/probingpt/StoreVocab.h
@@ -7,10 +7,10 @@
#pragma once
#include <string>
#include <boost/unordered_map.hpp>
-#include "moses/OutputFileStream.h"
-#include "moses/Util.h"
+#include "OutputFileStream.h"
+#include "moses2/legacy/Util2.h"
-namespace Moses
+namespace probingpt
{
template<typename VOCABID>
diff --git a/probingpt/gzfilebuf.h b/probingpt/gzfilebuf.h
new file mode 100644
index 000000000..a33b19d99
--- /dev/null
+++ b/probingpt/gzfilebuf.h
@@ -0,0 +1,94 @@
+#ifndef moses_gzfile_buf_h
+#define moses_gzfile_buf_h
+
+#include <stdexcept>
+#include <streambuf>
+#include <zlib.h>
+#include <cstring>
+
+namespace probingpt
+{
+
+/** wrapper around gzip input stream. Unknown parentage
+ * @todo replace with boost version - output stream already uses it
+ */
+class gzfilebuf: public std::streambuf
+{
+public:
+ gzfilebuf(const char *filename) {
+ _gzf = gzopen(filename, "rb");
+ if (!_gzf) throw std::runtime_error(
+ "Could not open " + std::string(filename) + ".");
+ setg(_buff + sizeof(int), // beginning of putback area
+ _buff + sizeof(int), // read position
+ _buff + sizeof(int)); // end position
+ }
+ ~gzfilebuf() {
+ gzclose(_gzf);
+ }
+protected:
+ virtual int_type overflow(int_type /* c */) {
+ throw;
+ }
+
+ // write multiple characters
+ virtual std::streamsize xsputn(const char* /* s */, std::streamsize /* num */) {
+ throw;
+ }
+
+ virtual std::streampos seekpos(std::streampos /* sp */,
+ std::ios_base::openmode /* which = std::ios_base::in | std::ios_base::out */) {
+ throw;
+ }
+
+ //read one character
+ virtual int_type underflow() {
+ // is read position before end of _buff?
+ if (gptr() < egptr()) {
+ return traits_type::to_int_type(*gptr());
+ }
+
+ /* process size of putback area
+ * - use number of characters read
+ * - but at most four
+ */
+ unsigned int numPutback = gptr() - eback();
+ if (numPutback > sizeof(int)) {
+ numPutback = sizeof(int);
+ }
+
+ /* copy up to four characters previously read into
+ * the putback _buff (area of first four characters)
+ */
+ std::memmove(_buff + (sizeof(int) - numPutback), gptr() - numPutback,
+ numPutback);
+
+ // read new characters
+ int num = gzread(_gzf, _buff + sizeof(int), _buffsize - sizeof(int));
+ if (num <= 0) {
+ // ERROR or EOF
+ return EOF;
+ }
+
+ // reset _buff pointers
+ setg(_buff + (sizeof(int) - numPutback), // beginning of putback area
+ _buff + sizeof(int), // read position
+ _buff + sizeof(int) + num); // end of buffer
+
+ // return next character
+ return traits_type::to_int_type(*gptr());
+ }
+
+ std::streamsize xsgetn(char* s, std::streamsize num) {
+ return gzread(_gzf, s, num);
+ }
+
+private:
+ gzFile _gzf;
+ static const unsigned int _buffsize = 1024;
+ char _buff[_buffsize];
+};
+
+}
+
+#endif
diff --git a/moses/TranslationModel/ProbingPT/hash.cpp b/probingpt/hash.cpp
index 47242e25d..6ecbae909 100644
--- a/moses/TranslationModel/ProbingPT/hash.cpp
+++ b/probingpt/hash.cpp
@@ -1,9 +1,9 @@
#include <iostream>
-#include "hash.hh"
+#include "hash.h"
using namespace std;
-namespace Moses
+namespace probingpt
{
uint64_t getHash(StringPiece text)
diff --git a/contrib/moses2/TranslationModel/ProbingPT/hash.hh b/probingpt/hash.h
index 78cc27999..9d14e73d6 100644
--- a/contrib/moses2/TranslationModel/ProbingPT/hash.hh
+++ b/probingpt/hash.h
@@ -6,7 +6,7 @@
#include "util/tokenize_piece.hh"
#include <vector>
-namespace Moses2
+namespace probingpt
{
//Gets the MurmurmurHash for give string
diff --git a/contrib/moses2/TranslationModel/ProbingPT/line_splitter.cpp b/probingpt/line_splitter.cpp
index e4b5e2694..c47f7bdd8 100644
--- a/contrib/moses2/TranslationModel/ProbingPT/line_splitter.cpp
+++ b/probingpt/line_splitter.cpp
@@ -1,6 +1,6 @@
-#include "line_splitter.hh"
+#include "line_splitter.h"
-namespace Moses2
+namespace probingpt
{
line_text splitLine(const StringPiece &textin, bool scfg)
diff --git a/moses/TranslationModel/ProbingPT/line_splitter.hh b/probingpt/line_splitter.h
index 01b86fc9b..e8302151b 100644
--- a/moses/TranslationModel/ProbingPT/line_splitter.hh
+++ b/probingpt/line_splitter.h
@@ -1,15 +1,14 @@
#pragma once
+#include <vector>
+#include <cstdlib> //atof
#include "util/string_piece.hh"
#include "util/tokenize_piece.hh"
#include "util/file_piece.hh"
-#include <vector>
-#include <cstdlib> //atof
#include "util/string_piece.hh" //Tokenization and work with StringPiece
#include "util/tokenize_piece.hh"
-#include <vector>
-namespace Moses
+namespace probingpt
{
//Struct for holding processed line
diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp b/probingpt/probing_hash_utils.cpp
index 96c317b65..e64da72c4 100644
--- a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp
+++ b/probingpt/probing_hash_utils.cpp
@@ -1,8 +1,8 @@
#include <iostream>
-#include "probing_hash_utils.hh"
+#include "probing_hash_utils.h"
#include "util/file.hh"
-namespace Moses2
+namespace probingpt
{
//Read table from disk, return memory map location
diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh b/probingpt/probing_hash_utils.h
index 368147807..a21236a08 100644
--- a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh
+++ b/probingpt/probing_hash_utils.h
@@ -2,29 +2,30 @@
#include "util/probing_hash_table.hh"
+#if defined(_WIN32) || defined(_WIN64)
+#include <mman.h>
+#else
#include <sys/mman.h>
+#endif
#include <boost/functional/hash.hpp>
#include <fcntl.h>
#include <fstream>
-namespace Moses2
+namespace probingpt
{
#define API_VERSION 15
//Hash table entry
-struct Entry
-{
+struct Entry {
typedef uint64_t Key;
Key key;
- Key GetKey() const
- {
+ Key GetKey() const {
return key;
}
- void SetKey(Key to)
- {
+ void SetKey(Key to) {
key = to;
}
@@ -42,8 +43,7 @@ char * readTable(const char * filename, util::LoadMethod load_method, util::scop
uint64_t getKey(const uint64_t source_phrase[], size_t size);
-struct TargetPhraseInfo
-{
+struct TargetPhraseInfo {
uint32_t alignTerm;
uint32_t alignNonTerm;
uint16_t numWords;
diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/probingpt/querying.cpp
index 9ea2d8cb6..e71c79a98 100644
--- a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp
+++ b/probingpt/querying.cpp
@@ -1,10 +1,10 @@
-#include "querying.hh"
+#include "querying.h"
#include "util/exception.hh"
-#include "../../legacy/Util2.h"
+#include "moses2/legacy/Util2.h"
using namespace std;
-namespace Moses2
+namespace probingpt
{
QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method)
@@ -46,10 +46,9 @@ QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method)
found = Get(keyValue, "API_VERSION", version);
if (!found) {
std::cerr << "Old or corrupted version of ProbingPT. Please rebinarize your phrase tables." << std::endl;
- }
- else if (version != API_VERSION) {
+ } else if (version != API_VERSION) {
std::cerr << "The ProbingPT API has changed. " << version << "!="
- << API_VERSION << " Please rebinarize your phrase tables." << std::endl;
+ << API_VERSION << " Please rebinarize your phrase tables." << std::endl;
exit(EXIT_FAILURE);
}
@@ -104,7 +103,7 @@ uint64_t QueryEngine::getKey(uint64_t source_phrase[], size_t size) const
{
//TOO SLOW
//uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
- return Moses2::getKey(source_phrase, size);
+ return probingpt::getKey(source_phrase, size);
}
std::pair<bool, uint64_t> QueryEngine::query(uint64_t key)
@@ -128,14 +127,14 @@ void QueryEngine::read_alignments(const std::string &alignPath)
vector<string> toks = Moses2::Tokenize(line, "\t ");
UTIL_THROW_IF2(toks.size() == 0, "Corrupt alignment file");
- uint32_t alignInd = Scan<uint32_t>(toks[0]);
+ uint32_t alignInd = Moses2::Scan<uint32_t>(toks[0]);
if (alignInd >= alignColl.size()) {
alignColl.resize(alignInd + 1);
}
Alignments &aligns = alignColl[alignInd];
for (size_t i = 1; i < toks.size(); ++i) {
- size_t pos = Scan<size_t>(toks[i]);
+ size_t pos = Moses2::Scan<size_t>(toks[i]);
aligns.push_back(pos);
}
}
@@ -143,37 +142,37 @@ void QueryEngine::read_alignments(const std::string &alignPath)
void QueryEngine::file_exits(const std::string &basePath)
{
- if (!FileExists(basePath + "/Alignments.dat")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/Alignments.dat");
- }
- if (!FileExists(basePath + "/TargetColl.dat")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetColl.dat");
- }
- if (!FileExists(basePath + "/TargetVocab.dat")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetVocab.dat");
- }
- if (!FileExists(basePath + "/cache")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/cache");
- }
- if (!FileExists(basePath + "/config")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/config");
- }
- if (!FileExists(basePath + "/probing_hash.dat")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/probing_hash.dat");
- }
- if (!FileExists(basePath + "/source_vocabids")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/source_vocabids");
- }
-
- /*
-
- if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) ||
- !FileExists(path_to_source_vocabid) || !FileExists(basepath + alignPath) ||
- !FileExists(basepath + "/TargetColl.dat") || !FileExists(basepath + "/TargetVocab.dat") ||
- !FileExists(basepath + "/cache")) {
- UTIL_THROW2("A required table doesn't exist in: " << basepath);
- }
- */
+ if (!Moses2::FileExists(basePath + "/Alignments.dat")) {
+ UTIL_THROW2("Require file does not exist in: " << basePath << "/Alignments.dat");
+ }
+ if (!Moses2::FileExists(basePath + "/TargetColl.dat")) {
+ UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetColl.dat");
+ }
+ if (!Moses2::FileExists(basePath + "/TargetVocab.dat")) {
+ UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetVocab.dat");
+ }
+ if (!Moses2::FileExists(basePath + "/cache")) {
+ UTIL_THROW2("Require file does not exist in: " << basePath << "/cache");
+ }
+ if (!Moses2::FileExists(basePath + "/config")) {
+ UTIL_THROW2("Require file does not exist in: " << basePath << "/config");
+ }
+ if (!Moses2::FileExists(basePath + "/probing_hash.dat")) {
+ UTIL_THROW2("Require file does not exist in: " << basePath << "/probing_hash.dat");
+ }
+ if (!Moses2::FileExists(basePath + "/source_vocabids")) {
+ UTIL_THROW2("Require file does not exist in: " << basePath << "/source_vocabids");
+ }
+
+ /*
+
+ if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) ||
+ !FileExists(path_to_source_vocabid) || !FileExists(basepath + alignPath) ||
+ !FileExists(basepath + "/TargetColl.dat") || !FileExists(basepath + "/TargetVocab.dat") ||
+ !FileExists(basepath + "/cache")) {
+ UTIL_THROW2("A required table doesn't exist in: " << basepath);
+ }
+ */
}
}
diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.hh b/probingpt/querying.h
index dcdd2a75a..bdae9e89c 100644
--- a/contrib/moses2/TranslationModel/ProbingPT/querying.hh
+++ b/probingpt/querying.h
@@ -3,15 +3,16 @@
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/unordered_map.hpp>
#include <sys/stat.h> //For finding size of file
-#include "vocabid.hh"
#include <algorithm> //toLower
#include <deque>
-#include "probing_hash_utils.hh"
-#include "hash.hh" //Includes line splitter
-#include "line_splitter.hh"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
+#include "vocabid.h"
+#include "probing_hash_utils.h"
+#include "hash.h" //Includes line splitter
+#include "line_splitter.h"
+#include "util.h"
+#include "moses2/legacy/Util2.h"
+
+namespace probingpt
{
class QueryEngine
@@ -50,17 +51,18 @@ public:
std::pair<bool, uint64_t> query(uint64_t key);
- const std::map<uint64_t, std::string> &getSourceVocab() const
- { return source_vocabids; }
+ const std::map<uint64_t, std::string> &getSourceVocab() const {
+ return source_vocabids;
+ }
- const std::vector<Alignments> &getAlignments() const
- { return alignColl; }
+ const std::vector<Alignments> &getAlignments() const {
+ return alignColl;
+ }
uint64_t getKey(uint64_t source_phrase[], size_t size) const;
template<typename T>
- inline bool Get(const boost::unordered_map<std::string, std::string> &keyValue, const std::string &sought, T &found) const
- {
+ inline bool Get(const boost::unordered_map<std::string, std::string> &keyValue, const std::string &sought, T &found) const {
boost::unordered_map<std::string, std::string>::const_iterator iter = keyValue.find(sought);
if (iter == keyValue.end()) {
return false;
diff --git a/moses/TranslationModel/ProbingPT/storing.cpp b/probingpt/storing.cpp
index baf6ae91e..72e61c904 100644
--- a/moses/TranslationModel/ProbingPT/storing.cpp
+++ b/probingpt/storing.cpp
@@ -1,15 +1,15 @@
#include <sys/stat.h>
#include <boost/foreach.hpp>
-#include "line_splitter.hh"
-#include "storing.hh"
+#include "line_splitter.h"
+#include "storing.h"
#include "StoreTarget.h"
#include "StoreVocab.h"
-#include "moses/Util.h"
-#include "moses/InputFileStream.h"
+#include "moses2/legacy/Util2.h"
+#include "InputFileStream.h"
using namespace std;
-namespace Moses
+namespace probingpt
{
///////////////////////////////////////////////////////////////////////
@@ -67,6 +67,9 @@ void createProbingPT(const std::string &phrasetable_path,
const std::string &basepath, int num_scores, int num_lex_scores,
bool log_prob, int max_cache_size, bool scfg)
{
+#if defined(_WIN32) || defined(_WIN64)
+ std::cerr << "Create not implemented for Windows" << std::endl;
+#else
std::cerr << "Starting..." << std::endl;
//Get basepath and create directory if missing
@@ -158,9 +161,9 @@ void createProbingPT(const std::string &phrasetable_path,
// update cache - CURRENT source phrase, not prev
if (max_cache_size) {
std::string countStr = line.counts.as_string();
- countStr = Trim(countStr);
+ countStr = Moses2::Trim(countStr);
if (!countStr.empty()) {
- std::vector<float> toks = Tokenize<float>(countStr);
+ std::vector<float> toks = Moses2::Tokenize<float>(countStr);
//cerr << "CACHE:" << line.source_phrase << " " << countStr << " " << toks[1] << endl;
if (toks.size() >= 2) {
@@ -171,7 +174,7 @@ void createProbingPT(const std::string &phrasetable_path,
uint64_t currKey = getKey(currVocabidSource);
CacheItem *item = new CacheItem(
- Trim(line.source_phrase.as_string()),
+ Moses2::Trim(line.source_phrase.as_string()),
currKey,
toks[1]);
cache.push(item);
@@ -231,6 +234,7 @@ void createProbingPT(const std::string &phrasetable_path,
configfile << "num_lex_scores\t" << num_lex_scores << '\n';
configfile << "log_prob\t" << log_prob << '\n';
configfile.close();
+#endif
}
size_t countUniqueSource(const std::string &path)
@@ -240,7 +244,7 @@ size_t countUniqueSource(const std::string &path)
std::string line, prevSource;
while (std::getline(strme, line)) {
- std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
+ std::vector<std::string> toks = Moses2::TokenizeMultiCharSeparator(line, "|||");
assert(toks.size() != 0);
if (prevSource != toks[0]) {
@@ -280,7 +284,7 @@ void serialize_cache(
uint64_t getKey(const std::vector<uint64_t> &vocabid_source)
{
- return getKey(vocabid_source.data(), vocabid_source.size());
+ return probingpt::getKey(vocabid_source.data(), vocabid_source.size());
}
std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos)
diff --git a/moses/TranslationModel/ProbingPT/storing.hh b/probingpt/storing.h
index 994067515..1243a6df8 100644
--- a/moses/TranslationModel/ProbingPT/storing.hh
+++ b/probingpt/storing.h
@@ -10,14 +10,14 @@
#include <queue>
#include <sys/stat.h> //mkdir
-#include "hash.hh" //Includes line_splitter
-#include "probing_hash_utils.hh"
+#include "hash.h" //Includes line_splitter
+#include "probing_hash_utils.h"
+#include "vocabid.h"
#include "util/file_piece.hh"
#include "util/file.hh"
-#include "vocabid.hh"
-namespace Moses
+namespace probingpt
{
typedef std::vector<uint64_t> SourcePhrase;
diff --git a/probingpt/util.cpp b/probingpt/util.cpp
new file mode 100644
index 000000000..cf4edb81a
--- /dev/null
+++ b/probingpt/util.cpp
@@ -0,0 +1,24 @@
+#include "util.h"
+#include "util/exception.hh"
+
+namespace probingpt
+{
+
+template<>
+bool Scan<bool>(const std::string &input)
+{
+ std::string lc = ToLower(input);
+ if (lc == "yes" || lc == "y" || lc == "true" || lc == "1") return true;
+ if (lc == "no" || lc == "n" || lc == "false" || lc == "0") return false;
+ UTIL_THROW2("Could not interpret " << input << " as a boolean. After lowercasing, valid values are yes, y, true, 1, no, n, false, and 0.");
+}
+
+const std::string ToLower(const std::string& str)
+{
+ std::string lc(str);
+ std::transform(lc.begin(), lc.end(), lc.begin(), (int (*)(int))std::tolower);
+ return
+ lc ;
+}
+
+}
diff --git a/probingpt/util.h b/probingpt/util.h
new file mode 100644
index 000000000..b1e2ad0b9
--- /dev/null
+++ b/probingpt/util.h
@@ -0,0 +1,24 @@
+#pragma once
+#include <string>
+#include <sstream>
+
+namespace probingpt
+{
+
+//! convert string to variable of type T. Used to reading floats, int etc from files
+template<typename T>
+inline T Scan(const std::string &input)
+{
+ std::stringstream stream(input);
+ T ret;
+ stream >> ret;
+ return ret;
+}
+
+//! Specialisation to understand yes/no y/n true/false 0/1
+template<>
+bool Scan<bool>(const std::string &input);
+
+const std::string ToLower(const std::string& str);
+
+}
diff --git a/moses/TranslationModel/ProbingPT/vocabid.cpp b/probingpt/vocabid.cpp
index d6f442323..81ca261de 100644
--- a/moses/TranslationModel/ProbingPT/vocabid.cpp
+++ b/probingpt/vocabid.cpp
@@ -1,9 +1,9 @@
#include <boost/foreach.hpp>
-#include "vocabid.hh"
+#include "vocabid.h"
#include "StoreVocab.h"
-#include "moses/Util.h"
+#include "moses2/legacy/Util2.h"
-namespace Moses
+namespace probingpt
{
void add_to_map(StoreVocab<uint64_t> &sourceVocab,
@@ -45,9 +45,9 @@ void read_map(std::map<uint64_t, std::string> &karta, const char* filename)
std::string line;
while (getline(is, line)) {
- std::vector<std::string> toks = Tokenize(line, "\t");
+ std::vector<std::string> toks = Moses2::Tokenize(line, "\t");
assert(toks.size() == 2);
- uint64_t ind = Scan<uint64_t>(toks[1]);
+ uint64_t ind = Moses2::Scan<uint64_t>(toks[1]);
karta[ind] = toks[0];
}
diff --git a/moses/TranslationModel/ProbingPT/vocabid.hh b/probingpt/vocabid.h
index 7e1390874..e04a9180a 100644
--- a/moses/TranslationModel/ProbingPT/vocabid.hh
+++ b/probingpt/vocabid.h
@@ -8,12 +8,12 @@
#include <vector>
#include <map> //Container
-#include "hash.hh" //Hash of elements
+#include "hash.h" //Hash of elements
#include "util/string_piece.hh" //Tokenization and work with StringPiece
#include "util/tokenize_piece.hh"
-namespace Moses
+namespace probingpt
{
template<typename VOCABID>
class StoreVocab;
diff --git a/regression-testing/Jamfile b/regression-testing/Jamfile
index 17e399e43..e72470c12 100644
--- a/regression-testing/Jamfile
+++ b/regression-testing/Jamfile
@@ -41,7 +41,7 @@ if $(with-regtest) {
} else {
reg_test phrase : [ glob $(test-dir)/phrase.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
reg_test chart : [ glob $(test-dir)/chart.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
- reg_test moses2 : [ glob $(test-dir)/moses2.* : $(test-dir)/*withDALM ] : ../contrib/moses2//moses2 : @reg_test_decode ;
+ reg_test moses2 : [ glob $(test-dir)/moses2.* : $(test-dir)/*withDALM ] : ../moses2//moses2 : @reg_test_decode ;
}
if [ option.get "with-dalm" : : "yes" ] {
diff --git a/scripts/docker/Dockerfile.ubuntu.basic b/scripts/docker/Dockerfile.ubuntu.basic
new file mode 100644
index 000000000..adf204a52
--- /dev/null
+++ b/scripts/docker/Dockerfile.ubuntu.basic
@@ -0,0 +1,26 @@
+FROM ubuntu:latest
+
+MAINTAINER Momo <mo@mo.com>
+LABEL description="Basic Moses docker container for Ubuntu"
+
+# Update Ubuntu.
+RUN apt-get update
+RUN apt-get install -y apt-utils debconf-utils
+RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
+RUN apt-get update && apt-get -y upgrade
+
+# Install some necessary tools.
+RUN apt-get install -y nano perl
+
+# Install Moses dependencies.
+RUN apt-get install -y libboost-all-dev
+RUN apt-get install -y build-essential git-core pkg-config automake libtool wget zlib1g-dev python-dev libbz2-dev cmake
+
+# Clone the repos we need.
+RUN git clone https://github.com/moses-smt/mosesdecoder.git
+
+# Install Moses.
+WORKDIR /mosesdecoder
+RUN make -f /mosesdecoder/contrib/Makefiles/install-dependencies.gmake
+RUN /mosesdecoder/compile.sh --max-kenlm-order=20 --max-factors=1000
+WORKDIR /
diff --git a/scripts/docker/Dockerfile.ubuntu.fastlightpbmt b/scripts/docker/Dockerfile.ubuntu.fastlightpbmt
new file mode 100644
index 000000000..8a0479724
--- /dev/null
+++ b/scripts/docker/Dockerfile.ubuntu.fastlightpbmt
@@ -0,0 +1,47 @@
+FROM ubuntu:latest
+
+MAINTAINER Momo <mo@mo.com>
+LABEL description="Moses docker container for 'Faster and Lighter Phrase-based Machine Translation Baseline' (aka vanilla-moses)"
+
+# Update Ubuntu.
+RUN apt-get update
+RUN apt-get install -y apt-utils debconf-utils
+RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
+RUN apt-get update && apt-get -y upgrade
+
+# Install some necessary tools.
+RUN apt-get install -y sudo nano perl python-dev python3-dev python-pip python3-pip curl wget tar dtrx
+
+# Install Moses dependencies.
+RUN apt-get install -y libboost-all-dev
+RUN apt-get install -y build-essential git-core pkg-config automake libtool wget zlib1g-dev python-dev libbz2-dev cmake
+
+# Clone the repos we need.
+RUN git clone https://github.com/moses-smt/mosesdecoder.git
+RUN git clone https://github.com/moses-smt/mgiza.git
+RUN git clone https://github.com/jonsafari/clustercat.git
+
+# Install Moses.
+WORKDIR /mosesdecoder
+RUN make -f /mosesdecoder/contrib/Makefiles/install-dependencies.gmake
+RUN /mosesdecoder/compile.sh --max-kenlm-order=20 --max-factors=1000
+WORKDIR /
+
+# Install MGIZA++.
+WORKDIR /mgiza/mgizapp
+RUN cmake . && make && make install
+RUN cp /mgiza/mgizapp/scripts/merge_alignment.py /mgiza/mgizapp/bin/
+WORKDIR /
+
+# Install clustercat.
+WORKDIR /clustercat
+RUN make -j 4
+WORKDIR /
+
+# Clean up the container.
+RUN mkdir moses-training-tools
+RUN cp /mgiza/mgizapp/bin/* /moses-training-tools/
+RUN cp /clustercat/bin/clustercat /moses-training-tools/
+RUN cp /clustercat/bin/mkcls /moses-training-tools/mkcls-clustercat
+RUN mv /moses-training-tools/mkcls /moses-training-tools/mkcls-original
+RUN cp /moses-training-tools/mkcls-clustercat /moses-training-tools/mkcls
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index d6e6dc133..4c0a9794e 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -969,21 +969,6 @@ parse-input-devtest
pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
ignore-unless: use-mira
template: $input-parser < IN > OUT
-parse-relax-input
- in: split-input
- out: input
- default-name: tuning/input.parse-relaxed
- pass-unless: input-parse-relaxer
- pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
- template: $input-parse-relaxer < IN > OUT
-parse-relax-input-devtest
- in: split-input-devtest
- out: input-devtest
- default-name: tuning/input.devtest.parse-relaxed
- pass-unless: input-parse-relaxer
- pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
- ignore-unless: use-mira
- template: $input-parse-relaxer < IN > OUT
factorize-input
in: parsed-input
out: factorized-input
@@ -1059,6 +1044,21 @@ split-input-devtest
pass-unless: input-splitter
ignore-unless: use-mira
template: $input-splitter -model IN1.$input-extension < IN > OUT
+parse-relax-input
+ in: split-input
+ out: input
+ default-name: tuning/input.parse-relaxed
+ pass-unless: input-parse-relaxer
+ pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
+ template: $input-parse-relaxer < IN > OUT
+parse-relax-input-devtest
+ in: split-input-devtest
+ out: input-devtest
+ default-name: tuning/input.devtest.parse-relaxed
+ pass-unless: input-parse-relaxer
+ pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
+ ignore-unless: use-mira
+ template: $input-parse-relaxer < IN > OUT
reference-from-sgm
in: reference-sgm input-sgm
out: raw-reference
@@ -1252,20 +1252,6 @@ mock-parse-input
default-name: evaluation/input.mock-parsed
pass-unless: mock-input-parser-devtesteval
template: $mock-input-parser-devtesteval < IN > OUT
-parse-input
- in: mock-parsed-input
- out: parsed-input
- default-name: evaluation/input.parsed
- pass-unless: input-parser
- pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
- template: $input-parser < IN > OUT
-parse-relax-input
- in: split-input
- out: input
- default-name: evaluation/input.parse-relaxed
- pass-unless: input-parse-relaxer
- pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
- template: $input-parse-relaxer < IN > OUT
factorize-input
in: parsed-input
out: factorized-input
@@ -1303,6 +1289,20 @@ split-input
default-name: evaluation/input.split
pass-unless: input-splitter
template: $input-splitter -model IN1.$input-extension < IN > OUT
+parse-input
+ in: mock-parsed-input
+ out: parsed-input
+ default-name: evaluation/input.parsed
+ pass-unless: input-parser
+ pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
+ template: $input-parser < IN > OUT
+parse-relax-input
+ in: split-input
+ out: input
+ default-name: evaluation/input.parse-relaxed
+ pass-unless: input-parse-relaxer
+ pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
+ template: $input-parse-relaxer < IN > OUT
filter
in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table
out: filtered-dir
diff --git a/scripts/ems/support/reference-from-sgm.perl b/scripts/ems/support/reference-from-sgm.perl
index b8e1d108d..b892fcda3 100755
--- a/scripts/ems/support/reference-from-sgm.perl
+++ b/scripts/ems/support/reference-from-sgm.perl
@@ -60,7 +60,7 @@ foreach my $system (keys %DOC) {
}
open(TXT,">$outfile") || die($outfile);
foreach my $doc (@ORDER) {
- die("can't find '$doc' for ref '$system'") unless defined @{$DOC{$system}{$doc}};
+ die("can't find '$doc' for ref '$system'") unless defined $DOC{$system}{$doc};
foreach my $line (@{$DOC{$system}{$doc}}) {
print TXT $line."\n";
}
diff --git a/scripts/generic/binarize4moses2.perl b/scripts/generic/binarize4moses2.perl
index a703cc241..46e30f43d 100755
--- a/scripts/generic/binarize4moses2.perl
+++ b/scripts/generic/binarize4moses2.perl
@@ -31,6 +31,9 @@ die("ERROR: please set --phrase-table") unless defined($ptPath);
#die("ERROR: please set --lex-ro") unless defined($lexRoPath);
die("ERROR: please set --output-dir") unless defined($outPath);
#die("ERROR: please set --num-lex-scores") unless defined($numLexScores);
+die("ERROR: compile contrib/sigtest-filter") if (!-X "$mosesDir/contrib/sigtest-filter/filter-pt");
+die("ERROR: compile with bjam --with-cmph") if (!-X "$mosesDir/bin/processLexicalTableMin");
+die("ERROR: compile with bjam --with-xmlrpc-c") if (!-X "$mosesDir/bin/CreateProbingPT2");
my $cmd;
diff --git a/scripts/generic/bsbleu.py b/scripts/generic/bsbleu.py
index f3c99747f..d40a28e6e 100755
--- a/scripts/generic/bsbleu.py
+++ b/scripts/generic/bsbleu.py
@@ -64,22 +64,29 @@ class BleuScore:
self.lower = None
self.upper = None
self.median = None
- self.bootstrap = [
- self.score([randint(0, len(hyp.snt) - 1) for s in hyp.snt])
- for i in xrange(1000)]
- self.bootstrap.sort()
self.actual = self.score([i for i in xrange(len(hyp.snt))])
+ if bootstrap:
+ self.bootstrap = [self.score([randint(0, len(hyp.snt) - 1)
+ for s in hyp.snt])
+ for i in xrange(bootstrap)]
+ self.bootstrap.sort()
+ else:
+ self.bootstrap = [self.actual]
+ pass
def score(self, sample):
hits = [0 for i in xrange(self.max_n)]
self.hyplen = 0
self.reflen = 0
+ self.total = [0 for i in hits]
for i in sample:
self.hyplen += len(self.hyp.snt[i])
self.reflen += len(self.ref.snt[i])
for n in xrange(self.max_n):
hits[n] += self.hits[i][n]
- self.prec = [float(hits[n]) / (self.hyplen - n * len(sample))
+ self.total[n] += max(len(self.hyp.snt[i]) - n, 0)
+ pass
+ self.prec = [float(hits[n]) / self.total[n]
for n in xrange(self.max_n)]
ret = sum([math.log(x) for x in self.prec]) / self.max_n
self.BP = min(
diff --git a/scripts/generic/mteval-v12.pl b/scripts/generic/mteval-v12.pl
index b4dfbf83a..2666c8012 100755
--- a/scripts/generic/mteval-v12.pl
+++ b/scripts/generic/mteval-v12.pl
@@ -1,7 +1,4 @@
#!/usr/bin/env perl
-#
-# This file is part of moses. Its use is licensed under the GNU Lesser General
-# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;
diff --git a/scripts/generic/mteval-v13a.pl b/scripts/generic/mteval-v13a.pl
index 2e5d29ad5..92afcbd71 100755
--- a/scripts/generic/mteval-v13a.pl
+++ b/scripts/generic/mteval-v13a.pl
@@ -1,7 +1,4 @@
#!/usr/bin/env perl
-#
-# This file is part of moses. Its use is licensed under the GNU Lesser General
-# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;
diff --git a/scripts/generic/mteval-v14.pl b/scripts/generic/mteval-v14.pl
new file mode 100644
index 000000000..84a7549ac
--- /dev/null
+++ b/scripts/generic/mteval-v14.pl
@@ -0,0 +1,1179 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+use utf8;
+use Encode;
+use XML::Twig;
+use Sort::Naturally;
+
+binmode STDOUT, ":utf8";
+binmode STDERR, ":utf8";
+
+
+#################################
+# History:
+#
+# version 14
+# (2016-03-29 lukas.diduch@nist.gov)
+# * Fixed warning message in case seg-id is a string, by sorting in correct order using Sort::Naturally.
+#
+# version 13b
+# * Fixed die 'bug' in case seg->id = 0
+#
+# version 13a
+# * modified the scoring functions to prevent division-by-zero errors when a system segment is empty
+# * affected methods: 'bleu_score' and 'bleu_score_smoothing'
+#
+# version 13
+# * Uses a XML parser to read data (only when extension is .xml)
+# * Smoothing of the segment-level BLEU scores, done by default
+# * smoothing method similar to that of bleu-1.04.pl (IBM)
+# * see comments above the 'bleu_score' method for more details on how the smoothing is computed
+# * added a '--no-smoothing' option to simulate old scripts behavior
+# * Introduction of the 'brevity-penalty' option, taking one of two values:
+# * 'closest' (default) : act as IBM BLEU (taking the closest reference translation length)
+# * in case two reference translations are at the same distance, will take the shortest one
+# * for more details regarding how the BP is computed, see comments of the 'brevity_penalty_closest' function
+# * 'shortest' : act as previous versions of the script (taking shortest reference translation length)
+# * Introduction of the 'international-tokenization' option, boolean, disabled by default
+# by default (when the option is not provided), uses 11b's tokenization function
+# when option specified, uses v12's tokenization function
+# * Introduction of a 'Metrics MATR output' flag (option '--metricsMATR')
+# when used, creates three files for both BLEU score and NIST score:
+# * BLEU-seg.scr and NIST-seg.scr: contain segment-level scores
+# * BLEU-doc.scr and NIST-doc.scr: contain document-level scores
+# * BLEU-sys.scr and NIST-sys.scr: contain system-level scores
+# * SGML parsing
+# * script will halt if source, reference and test files don't share the same setid attribute value (used for metricsMATR output)
+# * correct segment IDs extracted from the files (was previously using an array, and using the index as a segID for output)
+# * detailed output flag (-d) can now be used when running both BLEU and NIST
+#
+# version 12
+# * Text normalization changes:
+# * convert entity references (only the entities declared in the DTD)
+# * now uses unicode categories
+# * tokenize punctuation unless followed AND preceded by digits
+# * tokenize symbols
+# * UTF-8 handling:
+# * files are now read using utf8 mode
+# * Added the '-e' command-line option to enclose non-ASCII characters between spaces
+#
+# version 11b -- text normalization modified:
+# * take out the join digit line because it joins digits
+# when it shouldn't have
+# $norm_text =~ s/(\d)\s+(?=\d)/$1/g; #join digits
+#
+# version 11a -- corrected output of individual n-gram precision values
+#
+# version 11 -- bug fixes:
+# * make filehandle operate in binary mode to prevent Perl from operating
+# (by default in Red Hat 9) in UTF-8
+# * fix failure on joining digits
+# version 10 -- updated output to include more details of n-gram scoring.
+# Defaults to generate both NIST and BLEU scores. Use -b for BLEU
+# only, use -n for NIST only
+#
+# version 09d -- bug fix (for BLEU scoring, ngrams were fixed at 4
+# being the max, regardless what was entered on the command line.)
+#
+# version 09c -- bug fix (During the calculation of ngram information,
+# each ngram was being counted only once for each segment. This has
+# been fixed so that each ngram is counted correctly in each segment.)
+#
+# version 09b -- text normalization modified:
+# * option flag added to preserve upper case
+# * non-ASCII characters left in place.
+#
+# version 09a -- text normalization modified:
+# * &quot; and &amp; converted to "" and &, respectively
+# * non-ASCII characters kept together (bug fix)
+#
+# version 09 -- modified to accommodate sgml tag and attribute
+# names revised to conform to default SGML conventions.
+#
+# version 08 -- modifies the NIST metric in accordance with the
+# findings on the 2001 Chinese-English dry run corpus. Also
+# incorporates the BLEU metric as an option and supports the
+# output of ngram detail.
+#
+# version 07 -- in response to the MT meeting on 28 Jan 2002 at ISI
+# Keep strings of non-ASCII characters together as one word
+# (rather than splitting them into one-character words).
+# Change length penalty so that translations that are longer than
+# the average reference translation are not penalized.
+#
+# version 06
+# Prevent divide-by-zero when a segment has no evaluation N-grams.
+# Correct segment index for level 3 debug output.
+#
+# version 05
+# improve diagnostic error messages
+#
+# version 04
+# tag segments
+#
+# version 03
+# add detailed output option (intermediate document and segment scores)
+#
+# version 02
+# accommodation of modified sgml tags and attributes
+#
+# version 01
+# same as bleu version 15, but modified to provide formal score output.
+#
+# original IBM version
+# Author: Kishore Papineni
+# Date: 06/10/2001
+#################################
+
+######
+# Intro
+my ($date, $time) = date_time_stamp();
+print "MT evaluation scorer began on $date at $time\n";
+print "\ncommand line: ", $0, " ", join(" ", @ARGV), "\n";
+my $usage = "\n\nUsage: $0 -r <ref_file> -s <src_file> -t <tst_file>\n\n".
+ "Description: This Perl script evaluates MT system performance.\n".
+ "\n".
+ "Required arguments:\n".
+ " -r <ref_file> is a file containing the reference translations for\n".
+ " the documents to be evaluated.\n".
+ " -s <src_file> is a file containing the source documents for which\n".
+ " translations are to be evaluated\n".
+ " -t <tst_file> is a file containing the translations to be evaluated\n".
+ "\n".
+ "Optional arguments:\n".
+ " -h prints this help message to STDOUT\n".
+ " -c preserves upper-case alphabetic characters\n".
+ " -b generate BLEU scores only\n".
+ " -n generate NIST scores only\n".
+ " -d detailed output flag:\n".
+ " 0 (default) for system-level score only\n".
+ " 1 to include document-level scores\n".
+ " 2 to include segment-level scores\n".
+ " 3 to include ngram-level scores\n".
+ " -e enclose non-ASCII characters between spaces\n".
+ " --brevity-penalty ( closest | shortest )\n" .
+ " closest (default) : acts as IBM BLEU (takes the closest reference translation length)\n" .
+ " shortest : acts as previous versions of the script (takes the shortest reference translation length)\n" .
+ " --international-tokenization\n" .
+ " when specified, uses Unicode-based (only) tokenization rules\n" .
+ " when not specified (default), uses default tokenization (some language-dependant rules)\n" .
+ " --metricsMATR : create three files for both BLEU scores and NIST scores:\n" .
+ " BLEU-seg.scr and NIST-seg.scr : segment-level scores\n" .
+ " BLEU-doc.scr and NIST-doc.scr : document-level scores\n" .
+ " BLEU-sys.scr and NIST-sys.scr : system-level scores\n" .
+ " --no-smoothing : disable smoothing on BLEU scores\n" .
+ "\n";
+
+use vars qw ($opt_r $opt_s $opt_t $opt_d $opt_h $opt_b $opt_n $opt_c $opt_x $opt_e);
+use Getopt::Long;
+my $ref_file = '';
+my $src_file = '';
+my $tst_file = '';
+my $detail = 0;
+my $help = '';
+my $preserve_case = '';
+my $split_non_ASCII = '';
+my $brevity_penalty = 'closest';
+my $international_tokenization;
+my $metricsMATR_output = '';
+my $no_smoothing = '';
+our $opt_x = '';
+our $opt_b = '';
+our $opt_n = '';
+GetOptions(
+ 'r=s' => \$ref_file,
+ 's=s' => \$src_file,
+ 't=s' => \$tst_file,
+ 'd:i' => \$detail,
+ 'h|help' => \$help,
+ 'b',
+ 'n',
+ 'c' => \$preserve_case,
+ 'x:s',
+ 'e' => \$split_non_ASCII,
+ 'brevity-penalty:s' => \$brevity_penalty,
+ 'international-tokenization' => \$international_tokenization,
+ 'metricsMATR-output' => \$metricsMATR_output,
+ 'no-smoothing' => \$no_smoothing
+);
+die $usage if $help;
+
+die "Error in command line: ref_file not defined$usage" unless ( $ref_file );
+die "Error in command line: src_file not defined$usage" unless ( $src_file );
+die "Error in command line: tst_file not defined$usage" unless ( $tst_file );
+my $BLEU_BP;
+if ( !( $brevity_penalty cmp 'closest' ) )
+{
+ $BLEU_BP = \&brevity_penalty_closest;
+}
+elsif ( !( $brevity_penalty cmp 'shortest' ) )
+{
+ $BLEU_BP = \&brevity_penalty_shortest;
+}
+else
+{
+ die "Incorrect value supplied for 'brevity_penalty'$usage";
+}
+my $TOKENIZATION = \&tokenization;
+$TOKENIZATION = \&tokenization_international if ( $international_tokenization );
+
+my $BLEU_SCORE = \&bleu_score;
+$BLEU_SCORE = \&bleu_score_nosmoothing if ( $no_smoothing );
+
+my $max_Ngram = 9;
+
+my $METHOD = "BOTH";
+if ( $opt_b ) { $METHOD = "BLEU"; }
+if ( $opt_n ) { $METHOD = "NIST"; }
+my $method;
+
+######
+# Global variables
+my ($src_lang, $tgt_lang, @tst_sys, @ref_sys); # evaluation parameters
+my (%tst_data, %ref_data); # the data -- with structure: {system}{document}{segments}
+my ($src_id, $ref_id, $tst_id); # unique identifiers for ref and tst translation sets
+my %eval_docs; # document information for the evaluation data set
+my %ngram_info; # the information obtained from (the last word in) the ngram
+
+######
+# Get source document ID's
+($src_id) = get_source_info ($src_file);
+
+######
+# Get reference translations
+($ref_id) = get_MT_data (\%ref_data, "RefSet", $ref_file);
+
+compute_ngram_info ();
+
+######
+# Get translations to evaluate
+($tst_id) = get_MT_data (\%tst_data, "TstSet", $tst_file);
+
+######
+# Check data for completeness and correctness
+check_MT_data ();
+
+######
+#
+my %NISTmt;
+my %NISTOverall;
+my %BLEUmt;
+my %BLEUOverall;
+
+######
+# Evaluate
+print "\nEvaluation of $src_lang-to-$tgt_lang translation using:\n";
+my $cum_seg = 0;
+foreach my $doc (sort keys %eval_docs)
+{
+ $cum_seg += scalar( keys( %{$eval_docs{$doc}{SEGS}} ) );
+}
+print " src set \"$src_id\" (", scalar keys %eval_docs, " docs, $cum_seg segs)\n";
+print " ref set \"$ref_id\" (", scalar keys %ref_data, " refs)\n";
+print " tst set \"$tst_id\" (", scalar keys %tst_data, " systems)\n\n";
+
+foreach my $sys (sort @tst_sys)
+{
+ for (my $n=1; $n<=$max_Ngram; $n++)
+ {
+ $NISTmt{$n}{$sys}{cum} = 0;
+ $NISTmt{$n}{$sys}{ind} = 0;
+ $BLEUmt{$n}{$sys}{cum} = 0;
+ $BLEUmt{$n}{$sys}{ind} = 0;
+ }
+ if ( ($METHOD eq "BOTH") || ($METHOD eq "NIST") )
+ {
+ $method="NIST";
+ score_system ($sys, \%NISTmt, \%NISTOverall);
+ }
+ if ( ($METHOD eq "BOTH") || ($METHOD eq "BLEU") )
+ {
+ $method="BLEU";
+ score_system ($sys, \%BLEUmt, \%BLEUOverall);
+ }
+}
+
+######
+printout_report ();
+if ( $metricsMATR_output )
+{
+ outputMetricsMATR( 'NIST', %NISTOverall ) if ( ( $METHOD eq 'BOTH' ) || ( $METHOD eq 'NIST' ) );
+ outputMetricsMATR( 'BLEU', %BLEUOverall ) if ( ( $METHOD eq 'BOTH' ) || ( $METHOD eq 'BLEU' ) );
+}
+
+($date, $time) = date_time_stamp();
+print "\nMT evaluation scorer ended on $date at $time\n";
+
+exit 0;
+
+#################################
+
+sub get_source_info
+{
+ my ($file) = @_;
+ my ($name, $id, $src, $doc, $seg);
+ my ($data, $tag, $span);
+
+ # Extension of the file determines the parser used:
+ # .xml : XML::Twig
+ # otherwise : simple SGML parsing functions
+ if ( $file =~ /\.xml$/i )
+ {
+ my $twig = XML::Twig->new();
+ $twig->parsefile( $file );
+ my $root = $twig->root;
+ my $currentSet = $root->first_child( 'srcset' );
+ die "Source XML file '$file' does not contain the 'srcset' element" if ( not $currentSet );
+ $id = $currentSet->{ 'att' }->{ 'setid' } or die "No 'setid' attribute value in '$file'";
+ $src = $currentSet->{ 'att' }->{ 'srclang' } or die "No srcset 'srclang' attribute value in '$file'";
+ die "Not the same srclang attribute values across sets" unless ( not defined $src_lang or $src eq $src_lang );
+ $src_lang = $src;
+ foreach my $currentDoc ( $currentSet->get_xpath( './/doc' ) )
+ {
+ my $docID = $currentDoc->{ 'att' }->{ 'docid' } or die "No document 'docid' attribute value in '$file'";
+ foreach my $currentSeg ( $currentDoc->get_xpath( './/seg' ) )
+ {
+
+ my $segID = $currentSeg->{ 'att' }->{ 'id' };
+ die "No segment 'id' attribute value in '$file'" if (! defined $segID);
+ my $segData = $currentSeg->text;
+ ($eval_docs{$docID}{SEGS}{$segID}) = &{ $TOKENIZATION }( $segData );
+ }
+ }
+ }
+ else
+ {
+ #read data from file
+ open (FILE, $file) or die "\nUnable to open translation data file '$file'", $usage;
+ binmode FILE, ":utf8";
+ $data .= $_ while <FILE>;
+ close (FILE);
+
+ #get source set info
+ die "\n\nFATAL INPUT ERROR: no 'src_set' tag in src_file '$file'\n\n"
+ unless ($tag, $span, $data) = extract_sgml_tag_and_span ("SrcSet", $data);
+ die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n"
+ unless ($id) = extract_sgml_tag_attribute ($name="SetID", $tag);
+ die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n"
+ unless ($src) = extract_sgml_tag_attribute ($name="SrcLang", $tag);
+ die "\n\nFATAL INPUT ERROR: $name ('$src') in file '$file' inconsistent\n"
+ ." with $name in previous input data ('$src_lang')\n\n"
+ unless (not defined $src_lang or $src eq $src_lang);
+ $src_lang = $src;
+
+ #get doc info -- ID and # of segs
+ $data = $span;
+ while (($tag, $span, $data) = extract_sgml_tag_and_span ("Doc", $data))
+ {
+ die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n"
+ unless ($doc) = extract_sgml_tag_attribute ($name="DocID", $tag);
+ die "\n\nFATAL INPUT ERROR: duplicate '$name' in file '$file'\n\n"
+ if defined $eval_docs{$doc};
+ $span =~ s/[\s\n\r]+/ /g; # concatenate records
+ my $nseg=0, my $seg_data = $span;
+ while (($tag, $span, $seg_data) = extract_sgml_tag_and_span ("Seg", $seg_data))
+ {
+ die "\n\nFATAL INPUT ERROR: no attribute '$name' in file '$file'\n\n"
+ unless ($seg) = extract_sgml_tag_attribute( $name='id', $tag );
+ ($eval_docs{$doc}{SEGS}{$seg}) = &{ $TOKENIZATION }( $span );
+ $nseg++;
+ }
+ die "\n\nFATAL INPUT ERROR: no segments in document '$doc' in file '$file'\n\n"
+ if $nseg == 0;
+ }
+ die "\n\nFATAL INPUT ERROR: no documents in file '$file'\n\n"
+ unless keys %eval_docs > 0;
+ }
+ return $id;
+}
+
+#################################
+
+sub get_MT_data
+{
+ my ($docs, $set_tag, $file) = @_;
+ my ($name, $id, $src, $tgt, $sys, $doc, $seg);
+ my ($tag, $span, $data);
+
+ # Extension of the file determines the parser used:
+ # .xml : XML::Twig
+ # otherwise : simple SGML parsing functions
+ if ( $file =~ /\.xml$/i )
+ {
+ my $twig = XML::Twig->new();
+ $twig->parsefile( $file );
+ my $root = $twig->root;
+ foreach my $currentSet ( $root->get_xpath( 'refset' ), $root->get_xpath( 'tstset' ) )
+ {
+ $id = $currentSet->{ 'att' }->{ 'setid' } or die "No 'setid' attribute value in '$file'";
+ $src = $currentSet->{ 'att' }->{ 'srclang' } or die "No 'srclang' attribute value in '$file'";
+ $tgt = $currentSet->{ 'att' }->{ 'trglang' } or die "No 'trglang' attribute value in '$file'";
+ die "Not the same 'srclang' attribute value across sets" unless ( $src eq $src_lang );
+ die "Not the same 'trglang' attribute value across sets" unless ( ( not defined $tgt_lang ) or ( $tgt = $tgt_lang ) );
+ $tgt_lang = $tgt;
+ my $sys;
+ if ( $currentSet->name eq 'tstset' )
+ {
+ $sys = $currentSet->{ 'att' }->{ 'sysid' } or die "No 'sysid' attribute value in '$file'";
+ }
+ else
+ {
+ $sys = $currentSet->{ 'att' }->{ 'refid' } or die "No 'refid' attribute value in '$file'";
+ }
+ foreach my $currentDoc ( $currentSet->get_xpath( './/doc' ) )
+ {
+ my $docID = $currentDoc->{ 'att' }->{ 'docid' } or die "No document 'docid' attribute value in '$file'";
+ $docs->{ $sys }{ $docID }{ FILE } = $file;
+ foreach my $currentSeg ( $currentDoc->get_xpath( './/seg' ) )
+ {
+ my $segID = $currentSeg->{ 'att' }->{ 'id' };
+ die "No segment 'id' attribute value in '$file'" if (! defined $segID);
+ my $segData = $currentSeg->text;
+ ($docs->{$sys}{$docID}{SEGS}{$segID}) = &{ $TOKENIZATION }( $segData );
+ }
+ }
+ }
+ }
+ else
+ {
+ #read data from file
+ open (FILE, $file) or die "\nUnable to open translation data file '$file'", $usage;
+ binmode FILE, ":utf8";
+ $data .= $_ while <FILE>;
+ close (FILE);
+
+ #get tag info
+ while (($tag, $span, $data) = extract_sgml_tag_and_span ($set_tag, $data))
+ {
+ die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n"
+ unless ($id) = extract_sgml_tag_attribute ($name="SetID", $tag);
+ die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n"
+ unless ($src) = extract_sgml_tag_attribute ($name="SrcLang", $tag);
+ die "\n\nFATAL INPUT ERROR: $name ('$src') in file '$file' inconsistent\n"
+ ." with $name of source ('$src_lang')\n\n"
+ unless $src eq $src_lang;
+ die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n"
+ unless ($tgt) = extract_sgml_tag_attribute ($name="TrgLang", $tag);
+ die "\n\nFATAL INPUT ERROR: $name ('$tgt') in file '$file' inconsistent\n"
+ ." with $name of the evaluation ('$tgt_lang')\n\n"
+ unless (not defined $tgt_lang or $tgt eq $tgt_lang);
+ $tgt_lang = $tgt;
+
+ my $mtdata = $span;
+ while (($tag, $span, $mtdata) = extract_sgml_tag_and_span ("Doc", $mtdata))
+ {
+ die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n"
+ unless (my $sys) = extract_sgml_tag_attribute ($name="SysID", $tag);
+ die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n"
+ unless $doc = extract_sgml_tag_attribute ($name="DocID", $tag);
+ die "\n\nFATAL INPUT ERROR: document '$doc' for system '$sys' in file '$file'\n"
+ ." previously loaded from file '$docs->{$sys}{$doc}{FILE}'\n\n"
+ unless (not defined $docs->{$sys}{$doc});
+
+ $span =~ s/[\s\n\r]+/ /g; # concatenate records
+ my $nseg=0, my $seg_data = $span;
+ while (($tag, $span, $seg_data) = extract_sgml_tag_and_span ("Seg", $seg_data))
+ {
+ die "\n\nFATAIL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n"
+ unless $seg = extract_sgml_tag_attribute( $name="id", $tag );
+ ($docs->{$sys}{$doc}{SEGS}{$seg}) = &{ $TOKENIZATION }( $span );
+ $nseg++;
+ }
+ die "\n\nFATAL INPUT ERROR: no segments in document '$doc' in file '$file'\n\n" if $nseg == 0;
+ $docs->{$sys}{$doc}{FILE} = $file;
+ }
+ }
+ }
+ return $id;
+}
+
+#################################
+
+sub check_MT_data
+{
+ @tst_sys = sort keys %tst_data;
+ @ref_sys = sort keys %ref_data;
+
+ die "Not the same 'setid' attribute values across files" unless ( ( $src_id eq $tst_id ) && ( $src_id eq $ref_id ) );
+
+#every evaluation document must be represented for every system and every reference
+ foreach my $doc (sort keys %eval_docs)
+ {
+ my $nseg_source = scalar( keys( %{$eval_docs{$doc}{SEGS}} ) );
+ foreach my $sys (@tst_sys)
+ {
+ die "\n\nFATAL ERROR: no document '$doc' for system '$sys'\n\n" unless defined $tst_data{$sys}{$doc};
+ my $nseg = scalar( keys( %{$tst_data{$sys}{$doc}{SEGS}} ) );
+ die "\n\nFATAL ERROR: translated documents must contain the same # of segments as the source, but\n"
+ ." document '$doc' for system '$sys' contains $nseg segments, while\n"
+ ." the source document contains $nseg_source segments.\n\n"
+ unless $nseg == $nseg_source;
+ }
+ foreach my $sys (@ref_sys)
+ {
+ die "\n\nFATAL ERROR: no document '$doc' for reference '$sys'\n\n" unless defined $ref_data{$sys}{$doc};
+ my $nseg = scalar( keys( %{$ref_data{$sys}{$doc}{SEGS}} ) );
+ die "\n\nFATAL ERROR: translated documents must contain the same # of segments as the source, but\n"
+ ." document '$doc' for system '$sys' contains $nseg segments, while\n"
+ ." the source document contains $nseg_source segments.\n\n"
+ unless $nseg == $nseg_source;
+ }
+ }
+}
+
+#################################
+
+sub compute_ngram_info
+{
+ my ($ref, $doc, $seg);
+ my (@wrds, $tot_wrds, %ngrams, $ngram, $mgram);
+ my (%ngram_count, @tot_ngrams);
+
+ foreach $ref (keys %ref_data)
+ {
+ foreach $doc (keys %{$ref_data{$ref}})
+ {
+ foreach $seg ( keys %{$ref_data{$ref}{$doc}{SEGS}})
+ {
+ @wrds = split /\s+/, $ref_data{ $ref }{ $doc }{ SEGS }{ $seg };
+ $tot_wrds += @wrds;
+ %ngrams = %{Words2Ngrams (@wrds)};
+ foreach $ngram (keys %ngrams)
+ {
+ $ngram_count{$ngram} += $ngrams{$ngram};
+ }
+ }
+ }
+ }
+
+ foreach $ngram (keys %ngram_count)
+ {
+ @wrds = split / /, $ngram;
+ pop @wrds, $mgram = join " ", @wrds;
+ $ngram_info{$ngram} = - log ($mgram ? $ngram_count{$ngram}/$ngram_count{$mgram} : $ngram_count{$ngram}/$tot_wrds) / log 2;
+ if (defined $opt_x and $opt_x eq "ngram info")
+ {
+ @wrds = split / /, $ngram;
+ printf "ngram info:%9.4f%6d%6d%8d%3d %s\n", $ngram_info{$ngram}, $ngram_count{$ngram},
+ $mgram ? $ngram_count{$mgram} : $tot_wrds, $tot_wrds, scalar @wrds, $ngram;
+ }
+ }
+}
+
+#################################
+
+sub score_system
+{
+ my ($sys, $ref, $doc, $SCOREmt, $overallScore);
+ ($sys, $SCOREmt, $overallScore) = @_;
+ my ($ref_length, $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info);
+ my ($cum_ref_length, @cum_match, @cum_tst_cnt, @cum_ref_cnt, @cum_tst_info, @cum_ref_info);
+
+ $cum_ref_length = 0;
+ for (my $j=1; $j<=$max_Ngram; $j++)
+ {
+ $cum_match[$j] = $cum_tst_cnt[$j] = $cum_ref_cnt[$j] = $cum_tst_info[$j] = $cum_ref_info[$j] = 0;
+ }
+ foreach $doc (sort keys %eval_docs)
+ {
+ ($ref_length, $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info) = score_document ($sys, $doc, $overallScore);
+ if ( $method eq "NIST" )
+ {
+ my %DOCmt = ();
+ my $docScore = nist_score( scalar( @ref_sys ), $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info, $sys, \%DOCmt );
+ $overallScore->{ $sys }{ 'documents' }{ $doc }{ 'score' } = $docScore;
+ if ( $detail >= 1 )
+ {
+ printf "$method score using 5-grams = %.4f for system \"$sys\" on document \"$doc\" (%d segments, %d words)\n",
+ $docScore, scalar keys %{$tst_data{$sys}{$doc}{SEGS}}, $tst_cnt->[1];
+ }
+ }
+
+ if ( $method eq "BLEU" )
+ {
+ my %DOCmt = ();
+ my $docScore = &{$BLEU_SCORE}( $ref_length, $match_cnt, $tst_cnt, $sys, \%DOCmt );
+ $overallScore->{ $sys }{ 'documents' }{ $doc }{ 'score' } = $docScore;
+ if ( $detail >= 1 )
+ {
+ printf "$method score using 4-grams = %.4f for system \"$sys\" on document \"$doc\" (%d segments, %d words)\n",
+ $docScore, scalar keys %{$tst_data{$sys}{$doc}{SEGS}}, $tst_cnt->[1];
+ }
+ }
+
+ $cum_ref_length += $ref_length;
+ for (my $j=1; $j<=$max_Ngram; $j++)
+ {
+ $cum_match[$j] += $match_cnt->[$j];
+ $cum_tst_cnt[$j] += $tst_cnt->[$j];
+ $cum_ref_cnt[$j] += $ref_cnt->[$j];
+ $cum_tst_info[$j] += $tst_info->[$j];
+ $cum_ref_info[$j] += $ref_info->[$j];
+ printf "document info: $sys $doc %d-gram %d %d %d %9.4f %9.4f\n", $j, $match_cnt->[$j],
+ $tst_cnt->[$j], $ref_cnt->[$j], $tst_info->[$j], $ref_info->[$j]
+ if (defined $opt_x and $opt_x eq "document info");
+ }
+ }
+
+ if ($method eq "BLEU")
+ {
+ $overallScore->{ $sys }{ 'score' } = &{$BLEU_SCORE}($cum_ref_length, \@cum_match, \@cum_tst_cnt, $sys, $SCOREmt);
+ }
+ if ($method eq "NIST")
+ {
+ $overallScore->{ $sys }{ 'score' } = nist_score (scalar @ref_sys, \@cum_match, \@cum_tst_cnt, \@cum_ref_cnt, \@cum_tst_info, \@cum_ref_info, $sys, $SCOREmt);
+ }
+}
+
+#################################
+
+sub score_document
+{
+ my ($sys, $ref, $doc, $overallScore);
+ ($sys, $doc, $overallScore) = @_;
+ my ($ref_length, $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info);
+ my ($cum_ref_length, @cum_match, @cum_tst_cnt, @cum_ref_cnt, @cum_tst_info, @cum_ref_info);
+
+ $cum_ref_length = 0;
+ for (my $j=1; $j<=$max_Ngram; $j++)
+ {
+ $cum_match[$j] = $cum_tst_cnt[$j] = $cum_ref_cnt[$j] = $cum_tst_info[$j] = $cum_ref_info[$j] = 0;
+ }
+
+ # score each segment
+ foreach my $seg ( nsort keys( %{$tst_data{$sys}{$doc}{SEGS}} ) )
+ {
+
+ my @ref_segments = ();
+ foreach $ref (@ref_sys)
+ {
+ push @ref_segments, $ref_data{$ref}{$doc}{SEGS}{$seg};
+ if ( $detail >= 3 )
+ {
+ printf "ref '$ref', seg $seg: %s\n", $ref_data{$ref}{$doc}{SEGS}{$seg}
+ }
+
+ }
+
+ printf "sys '$sys', seg $seg: %s\n", $tst_data{$sys}{$doc}{SEGS}{$seg} if ( $detail >= 3 );
+ ($ref_length, $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info) = score_segment ($tst_data{$sys}{$doc}{SEGS}{$seg}, @ref_segments);
+
+ if ( $method eq "BLEU" )
+ {
+ my %DOCmt = ();
+ my $segScore = &{$BLEU_SCORE}($ref_length, $match_cnt, $tst_cnt, $sys, %DOCmt);
+ $overallScore->{ $sys }{ 'documents' }{ $doc }{ 'segments' }{ $seg }{ 'score' } = $segScore;
+ if ( $detail >= 2 )
+ {
+ printf " $method score using 4-grams = %.4f for system \"$sys\" on segment $seg of document \"$doc\" (%d words)\n", $segScore, $tst_cnt->[1]
+ }
+ }
+ if ( $method eq "NIST" )
+ {
+ my %DOCmt = ();
+ my $segScore = nist_score (scalar @ref_sys, $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info, $sys, %DOCmt);
+ $overallScore->{ $sys }{ 'documents' }{ $doc }{ 'segments' }{ $seg }{ 'score' } = $segScore;
+ if ( $detail >= 2 )
+ {
+ printf " $method score using 5-grams = %.4f for system \"$sys\" on segment $seg of document \"$doc\" (%d words)\n", $segScore, $tst_cnt->[1];
+ }
+ }
+ $cum_ref_length += $ref_length;
+ for (my $j=1; $j<=$max_Ngram; $j++)
+ {
+ $cum_match[$j] += $match_cnt->[$j];
+ $cum_tst_cnt[$j] += $tst_cnt->[$j];
+ $cum_ref_cnt[$j] += $ref_cnt->[$j];
+ $cum_tst_info[$j] += $tst_info->[$j];
+ $cum_ref_info[$j] += $ref_info->[$j];
+ }
+ }
+ return ($cum_ref_length, [@cum_match], [@cum_tst_cnt], [@cum_ref_cnt], [@cum_tst_info], [@cum_ref_info]);
+}
+
+###############################################################################################################################
+# function returning the shortest reference length
+# takes as input:
+# - currentLength : the current (shortest) reference length
+# - referenceSentenceLength : the current reference sentence length
+# - candidateSentenceLength : the current candidate sentence length (unused)
+###############################################################################################################################
+sub brevity_penalty_shortest
+{
+ my ( $currentLength, $referenceSentenceLength, $candidateSentenceLength ) = @_;
+ return ( $referenceSentenceLength < $currentLength ? $referenceSentenceLength : $currentLength );
+}
+
+###############################################################################################################################
+# function returning the closest reference length (to the candidate sentence length)
+# takes as input:
+# - currentLength: the current (closest) reference length.
+# - candidateSentenceLength : the current reference sentence length
+# - candidateSentenceLength : the current candidate sentence length
+# when two reference sentences are at the same distance, it will return the shortest reference sentence length
+# example of 4 iterations, given:
+# - one candidate sentence containing 7 tokens
+# - one reference translation containing 11 tokens
+# - one reference translation containing 8 tokens
+# - one reference translation containing 6 tokens
+# - one reference translation containing 7 tokens
+# the multiple invokations will return:
+# - currentLength is set to 11 (outside of this function)
+# - brevity_penalty_closest( 11, 8, 7 ) returns 8, since abs( 8 - 7 ) < abs( 11 - 7 )
+# - brevity_penalty_closest( 8, 6, 7 ) returns 6, since abs( 8 - 7 ) == abs( 6 - 7 ) AND 6 < 8
+# - brevity_penalty_closest( 7, 6, 7 ) returns 7, since abs( 7 - 7 ) < abs( 6 - 7 )
+###############################################################################################################################
+sub brevity_penalty_closest
+{
+ my ( $currentLength, $referenceSentenceLength, $candidateSentenceLength ) = @_;
+ my $result = $currentLength;
+ if ( abs( $candidateSentenceLength - $referenceSentenceLength ) <= abs( $candidateSentenceLength - $currentLength ) )
+ {
+ if ( abs( $candidateSentenceLength - $referenceSentenceLength ) == abs( $candidateSentenceLength - $currentLength ) )
+ {
+ if ( $currentLength > $referenceSentenceLength )
+ {
+ $result = $referenceSentenceLength;
+ }
+ }
+ else
+ {
+ $result = $referenceSentenceLength;
+ }
+ }
+ return $result;
+}
+
+#################################
+
+sub score_segment
+{
+ my ($tst_seg, @ref_segs) = @_;
+ my (@tst_wrds, %tst_ngrams, @match_count, @tst_count, @tst_info);
+ my (@ref_wrds, $ref_seg, %ref_ngrams, %ref_ngrams_max, @ref_count, @ref_info);
+ my ($ngram);
+ my (@nwrds_ref);
+ my $ref_length;
+
+ for (my $j=1; $j<= $max_Ngram; $j++)
+ {
+ $match_count[$j] = $tst_count[$j] = $ref_count[$j] = $tst_info[$j] = $ref_info[$j] = 0;
+ }
+
+# get the ngram counts for the test segment
+ @tst_wrds = split /\s+/, $tst_seg;
+ %tst_ngrams = %{Words2Ngrams (@tst_wrds)};
+ for (my $j=1; $j<=$max_Ngram; $j++)
+ {
+ # compute ngram counts
+ $tst_count[$j] = $j<=@tst_wrds ? (@tst_wrds - $j + 1) : 0;
+ }
+
+# get the ngram counts for the reference segments
+ foreach $ref_seg (@ref_segs)
+ {
+ @ref_wrds = split /\s+/, $ref_seg;
+ %ref_ngrams = %{Words2Ngrams (@ref_wrds)};
+ foreach $ngram (keys %ref_ngrams)
+ {
+ # find the maximum # of occurrences
+ my @wrds = split / /, $ngram;
+ $ref_info[@wrds] += $ngram_info{$ngram};
+ $ref_ngrams_max{$ngram} = defined $ref_ngrams_max{$ngram} ? max ($ref_ngrams_max{$ngram}, $ref_ngrams{$ngram}) : $ref_ngrams{$ngram};
+ }
+ for (my $j=1; $j<=$max_Ngram; $j++)
+ {
+ # update ngram counts
+ $ref_count[$j] += $j<=@ref_wrds ? (@ref_wrds - $j + 1) : 0;
+ }
+ if ( not defined( $ref_length ) )
+ {
+ $ref_length = scalar( @ref_wrds );
+ }
+ else
+ {
+ $ref_length = &{$BLEU_BP}( $ref_length, scalar( @ref_wrds ), scalar( @tst_wrds ) );
+ }
+ }
+
+# accumulate scoring stats for tst_seg ngrams that match ref_seg ngrams
+ foreach $ngram (keys %tst_ngrams)
+ {
+ next unless defined $ref_ngrams_max{$ngram};
+ my @wrds = split / /, $ngram;
+ $tst_info[@wrds] += $ngram_info{$ngram} * min($tst_ngrams{$ngram},$ref_ngrams_max{$ngram});
+ $match_count[@wrds] += my $count = min($tst_ngrams{$ngram},$ref_ngrams_max{$ngram});
+ printf "%.2f info for each of $count %d-grams = '%s'\n", $ngram_info{$ngram}, scalar @wrds, $ngram
+ if $detail >= 3;
+ }
+
+ return ($ref_length, [@match_count], [@tst_count], [@ref_count], [@tst_info], [@ref_info]);
+}
+
+#################################
+
+sub bleu_score_nosmoothing
+{
+ my ($ref_length, $matching_ngrams, $tst_ngrams, $sys, $SCOREmt) = @_;
+ my $score = 0;
+ my $iscore = 0;
+
+ for ( my $j = 1; $j <= $max_Ngram; ++$j )
+ {
+ if ($matching_ngrams->[ $j ] == 0)
+ {
+ $SCOREmt->{ $j }{ $sys }{ cum }=0;
+ }
+ else
+ {
+ my $len_score = min (0, 1-$ref_length/$tst_ngrams->[1]);
+ # Cumulative N-Gram score
+ $score += log( $matching_ngrams->[ $j ] / $tst_ngrams->[ $j ] );
+ $SCOREmt->{ $j }{ $sys }{ cum } = exp( $score / $j + $len_score );
+ # Individual N-Gram score
+ $iscore = log( $matching_ngrams->[ $j ] / $tst_ngrams->[ $j ] );
+ $SCOREmt->{ $j }{ $sys }{ ind } = exp( $iscore );
+ }
+ }
+ return $SCOREmt->{ 4 }{ $sys }{ cum };
+}
+
+###############################################################################################################################
+# Default method used to compute the BLEU score, using smoothing.
+# Note that the method used can be overridden using the '--no-smoothing' command-line argument
+# The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each precision score whose matching n-gram count is null
+# k is 1 for the first 'n' value for which the n-gram match count is null
+# For example, if the text contains:
+# - one 2-gram match
+# - and (consequently) two 1-gram matches
+# the n-gram count for each individual precision score would be:
+# - n=1 => prec_count = 2 (two unigrams)
+# - n=2 => prec_count = 1 (one bigram)
+# - n=3 => prec_count = 1/2 (no trigram, taking 'smoothed' value of 1 / ( 2^k ), with k=1)
+# - n=4 => prec_count = 1/4 (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
+###############################################################################################################################
+sub bleu_score
+{
+ my ($ref_length, $matching_ngrams, $tst_ngrams, $sys, $SCOREmt) = @_;
+ my $score = 0;
+ my $iscore = 0;
+ my $exp_len_score = 0;
+ $exp_len_score = exp( min (0, 1 - $ref_length / $tst_ngrams->[ 1 ] ) ) if ( $tst_ngrams->[ 1 ] > 0 );
+ my $smooth = 1;
+ for ( my $j = 1; $j <= $max_Ngram; ++$j )
+ {
+ if ( $tst_ngrams->[ $j ] == 0 )
+ {
+ $iscore = 0;
+ }
+ elsif ( $matching_ngrams->[ $j ] == 0 )
+ {
+ $smooth *= 2;
+ $iscore = log( 1 / ( $smooth * $tst_ngrams->[ $j ] ) );
+ }
+ else
+ {
+ $iscore = log( $matching_ngrams->[ $j ] / $tst_ngrams->[ $j ] );
+ }
+ $SCOREmt->{ $j }{ $sys }{ ind } = exp( $iscore );
+ $score += $iscore;
+ $SCOREmt->{ $j }{ $sys }{ cum } = exp( $score / $j ) * $exp_len_score;
+ }
+ return $SCOREmt->{ 4 }{ $sys }{ cum };
+}
+
+#################################
+
+sub nist_score
+{
+ my ($nsys, $matching_ngrams, $tst_ngrams, $ref_ngrams, $tst_info, $ref_info, $sys, $SCOREmt) = @_;
+ my $score = 0;
+ my $iscore = 0;
+
+ for (my $n=1; $n<=$max_Ngram; $n++)
+ {
+ $score += $tst_info->[$n]/max($tst_ngrams->[$n],1);
+ $SCOREmt->{$n}{$sys}{cum} = $score * nist_length_penalty($tst_ngrams->[1]/($ref_ngrams->[1]/$nsys));
+ $iscore = $tst_info->[$n]/max($tst_ngrams->[$n],1);
+ $SCOREmt->{$n}{$sys}{ind} = $iscore * nist_length_penalty($tst_ngrams->[1]/($ref_ngrams->[1]/$nsys));
+ }
+ return $SCOREmt->{5}{$sys}{cum};
+}
+
+#################################
+
+sub Words2Ngrams
+{
+ #convert a string of words to an Ngram count hash
+ my %count = ();
+
+ for (; @_; shift)
+ {
+ my ($j, $ngram, $word);
+ for ($j=0; $j<$max_Ngram and defined($word=$_[$j]); $j++)
+ {
+ $ngram .= defined $ngram ? " $word" : $word;
+ $count{$ngram}++;
+ }
+ }
+ return {%count};
+}
+
+#################################
+
+sub tokenization
+{
+ my ($norm_text) = @_;
+
+# language-independent part:
+ $norm_text =~ s/<skipped>//g; # strip "skipped" tags
+ $norm_text =~ s/-\n//g; # strip end-of-line hyphenation and join lines
+ $norm_text =~ s/\n/ /g; # join lines
+ $norm_text =~ s/&quot;/"/g; # convert SGML tag for quote to "
+ $norm_text =~ s/&amp;/&/g; # convert SGML tag for ampersand to &
+ $norm_text =~ s/&lt;/</g; # convert SGML tag for less-than to >
+ $norm_text =~ s/&gt;/>/g; # convert SGML tag for greater-than to <
+
+# language-dependent part (assuming Western languages):
+ $norm_text = " $norm_text ";
+ $norm_text =~ tr/[A-Z]/[a-z]/ unless $preserve_case;
+ $norm_text =~ s/([\{-\~\[-\` -\&\(-\+\:-\@\/])/ $1 /g; # tokenize punctuation
+ $norm_text =~ s/([^0-9])([\.,])/$1 $2 /g; # tokenize period and comma unless preceded by a digit
+ $norm_text =~ s/([\.,])([^0-9])/ $1 $2/g; # tokenize period and comma unless followed by a digit
+ $norm_text =~ s/([0-9])(-)/$1 $2 /g; # tokenize dash when preceded by a digit
+ $norm_text =~ s/\s+/ /g; # one space only between words
+ $norm_text =~ s/^\s+//; # no leading space
+ $norm_text =~ s/\s+$//; # no trailing space
+
+ return $norm_text;
+}
+
+
+sub tokenization_international
+{
+ my ($norm_text) = @_;
+
+ $norm_text =~ s/<skipped>//g; # strip "skipped" tags
+ #$norm_text =~ s/\p{Hyphen}\p{Zl}//g; # strip end-of-line hyphenation and join lines
+ $norm_text =~ s/\p{Zl}/ /g; # join lines
+
+ # replace entities
+ $norm_text =~ s/&quot;/\"/g; # quote to "
+ $norm_text =~ s/&amp;/&/g; # ampersand to &
+ $norm_text =~ s/&lt;/</g; # less-than to <
+ $norm_text =~ s/&gt;/>/g; # greater-than to >
+ $norm_text =~ s/&apos;/\'/g; # apostrophe to '
+
+ $norm_text = lc( $norm_text ) unless $preserve_case; # lowercasing if needed
+ $norm_text =~ s/([^[:ascii:]])/ $1 /g if ( $split_non_ASCII );
+
+ # punctuation: tokenize any punctuation unless followed AND preceded by a digit
+ $norm_text =~ s/(\P{N})(\p{P})/$1 $2 /g;
+ $norm_text =~ s/(\p{P})(\P{N})/ $1 $2/g;
+
+ $norm_text =~ s/(\p{S})/ $1 /g; # tokenize symbols
+
+ $norm_text =~ s/\p{Z}+/ /g; # one space only between words
+ $norm_text =~ s/^\p{Z}+//; # no leading space
+ $norm_text =~ s/\p{Z}+$//; # no trailing space
+
+ return $norm_text;
+}
+
+#################################
+
+sub nist_length_penalty
+{
+ my ($ratio) = @_;
+ return 1 if $ratio >= 1;
+ return 0 if $ratio <= 0;
+ my $ratio_x = 1.5;
+ my $score_x = 0.5;
+ my $beta = -log($score_x)/log($ratio_x)/log($ratio_x);
+ return exp (-$beta*log($ratio)*log($ratio));
+}
+
+#################################
+
+sub date_time_stamp
+{
+ my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime();
+ my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
+ my ($date, $time);
+ $time = sprintf "%2.2d:%2.2d:%2.2d", $hour, $min, $sec;
+ $date = sprintf "%4.4s %3.3s %s", 1900+$year, $months[$mon], $mday;
+ return ($date, $time);
+}
+
+#################################
+
+sub extract_sgml_tag_and_span
+{
+ my ($name, $data) = @_;
+ ($data =~ m|<$name\s*([^>]*)>(.*?)</$name\s*>(.*)|si) ? ($1, $2, $3) : ();
+}
+
+#################################
+
+sub extract_sgml_tag_attribute
+{
+ my ($name, $data) = @_;
+ ($data =~ m|$name\s*=\s*\"([^\"]*)\"|si) ? ($1) : ();
+}
+
+#################################
+
+sub max
+{
+ my ($max, $next);
+
+ return unless defined ($max=pop);
+ while (defined ($next=pop))
+ {
+ $max = $next if $next > $max;
+ }
+ return $max;
+}
+
+#################################
+
+sub min
+{
+ my ($min, $next);
+
+ return unless defined ($min=pop);
+ while (defined ($next=pop))
+ {
+ $min = $next if $next < $min;
+ }
+ return $min;
+}
+
+#################################
+
+sub printout_report
+{
+ if ( $METHOD eq "BOTH" )
+ {
+ foreach my $sys (sort @tst_sys)
+ {
+ printf "NIST score = %2.4f BLEU score = %.4f for system \"$sys\"\n",$NISTmt{5}{$sys}{cum},$BLEUmt{4}{$sys}{cum};
+ }
+ }
+ elsif ($METHOD eq "NIST" )
+ {
+ foreach my $sys (sort @tst_sys)
+ {
+ printf "NIST score = %2.4f for system \"$sys\"\n",$NISTmt{5}{$sys}{cum};
+ }
+ }
+ elsif ($METHOD eq "BLEU" )
+ {
+ foreach my $sys (sort @tst_sys)
+ {
+ printf "\nBLEU score = %.4f for system \"$sys\"\n",$BLEUmt{4}{$sys}{cum};
+ }
+ }
+ printf "\n# ------------------------------------------------------------------------\n\n";
+ printf "Individual N-gram scoring\n";
+ printf " 1-gram 2-gram 3-gram 4-gram 5-gram 6-gram 7-gram 8-gram 9-gram\n";
+ printf " ------ ------ ------ ------ ------ ------ ------ ------ ------\n";
+
+ if ( ( $METHOD eq "BOTH" ) || ($METHOD eq "NIST") )
+ {
+ foreach my $sys (sort @tst_sys)
+ {
+ printf " NIST:";
+ for (my $i=1; $i<=$max_Ngram; $i++)
+ {
+ printf " %2.4f ",$NISTmt{$i}{$sys}{ind}
+ }
+ printf " \"$sys\"\n";
+ }
+ printf "\n";
+ }
+
+ if ( ( $METHOD eq "BOTH" ) || ($METHOD eq "BLEU") )
+ {
+ foreach my $sys (sort @tst_sys)
+ {
+ printf " BLEU:";
+ for (my $i=1; $i<=$max_Ngram; $i++)
+ {
+ printf " %2.4f ",$BLEUmt{$i}{$sys}{ind}
+ }
+ printf " \"$sys\"\n";
+ }
+ }
+
+ printf "\n# ------------------------------------------------------------------------\n";
+ printf "\nCumulative N-gram scoring\n";
+ printf " 1-gram 2-gram 3-gram 4-gram 5-gram 6-gram 7-gram 8-gram 9-gram\n";
+ printf " ------ ------ ------ ------ ------ ------ ------ ------ ------\n";
+
+ if (( $METHOD eq "BOTH" ) || ($METHOD eq "NIST"))
+ {
+ foreach my $sys (sort @tst_sys)
+ {
+ printf " NIST:";
+ for (my $i=1; $i<=$max_Ngram; $i++)
+ {
+ printf " %2.4f ",$NISTmt{$i}{$sys}{cum}
+ }
+ printf " \"$sys\"\n";
+ }
+ }
+ printf "\n";
+ if ( ( $METHOD eq "BOTH" ) || ($METHOD eq "BLEU") )
+ {
+ foreach my $sys (sort @tst_sys)
+ {
+ printf " BLEU:";
+ for (my $i=1; $i<=$max_Ngram; $i++)
+ {
+ printf " %2.4f ",$BLEUmt{$i}{$sys}{cum}
+ }
+ printf " \"$sys\"\n";
+ }
+ }
+}
+
+###############################################################################################################################
+# Create three files, by using:
+# - $prefix : the prefix used for the output file names
+# - %overall : a hash containing seg/doc/sys-level scores:
+# - $overall{ $SYSTEM_ID }{ 'score' } => system-level score
+# - $overall{ $SYSTEM_ID }{ 'documents' }{ $DOCUMENT_ID }{ 'score' } => document-level score
+# - $overall{ $SYSTEM_ID }{ 'documents' }{ $DOCUMENT_ID }{ 'segments' }{ $SEGMENT_ID } => segment-level score
+###############################################################################################################################
+sub outputMetricsMATR
+{
+ my ( $prefix, %overall ) = @_;
+ my $fileNameSys = $prefix . '-sys.scr';
+ my $fileNameDoc = $prefix . '-doc.scr';
+ my $fileNameSeg = $prefix . '-seg.scr';
+ open FILEOUT_SYS, '>', $fileNameSys or die "Could not open file: ${fileNameSys}";
+ open FILEOUT_DOC, '>', $fileNameDoc or die "Could not open file: ${fileNameDoc}";
+ open FILEOUT_SEG, '>', $fileNameSeg or die "Could not open file: ${fileNameSeg}";
+ foreach my $sys ( sort( keys( %overall ) ) )
+ {
+ my $scoreSys = $overall{ $sys }{ 'score' };
+ print FILEOUT_SYS "${tst_id}\t${sys}\t${scoreSys}\n";
+ foreach my $doc ( sort( keys( %{$overall{ $sys }{ 'documents' }} ) ) )
+ {
+ my $scoreDoc = $overall{ $sys }{ 'documents' }{ $doc }{ 'score' };
+ print FILEOUT_DOC "${tst_id}\t${sys}\t${doc}\t${scoreDoc}\n";
+ foreach my $seg ( nsort keys( %{$overall{ $sys }{ 'documents' }{ $doc }{ 'segments' }} ) )
+ {
+ my $scoreSeg = $overall{ $sys }{ 'documents' }{ $doc }{ 'segments' }{ $seg }{ 'score' };
+ print FILEOUT_SEG "${tst_id}\t${sys}\t${doc}\t${seg}\t${scoreSeg}\n";
+ }
+ }
+ }
+ close FILEOUT_SEG;
+ close FILEOUT_DOC;
+ close FILEOUT_SYS;
+}
+
diff --git a/scripts/recaser/train-truecaser.perl b/scripts/recaser/train-truecaser.perl
index 4f600a640..94ddbf2fa 100755
--- a/scripts/recaser/train-truecaser.perl
+++ b/scripts/recaser/train-truecaser.perl
@@ -44,6 +44,12 @@ while(<CORPUS>) {
$firstWordOfSentence = 1;
}
+ if ($currentWord !~ /[\p{Ll}\p{Lu}\p{Lt}]/) {
+ # skip words with nothing to case
+ $firstWordOfSentence = 0;
+ next;
+ }
+
my $currentWordWeight = 0;
if (! $firstWordOfSentence) {
$currentWordWeight = 1;
diff --git a/scripts/training/rdlm/train_rdlm.py b/scripts/training/rdlm/train_rdlm.py
index 289ab405c..7915e454c 100755
--- a/scripts/training/rdlm/train_rdlm.py
+++ b/scripts/training/rdlm/train_rdlm.py
@@ -102,6 +102,11 @@ parser.add_argument(
parser.add_argument(
"--mmap", dest="mmap", action="store_true",
help="Use memory-mapped file (for lower memory consumption).")
+parser.add_argument(
+ "--train-host", dest="train_host",
+ help="Execute nplm training on this host, via ssh")
+parser.add_argument("--extra-settings", dest="extra_settings",
+ help="Extra settings to be passed to NPLM")
parser.set_defaults(
diff --git a/util/tempfile.hh b/util/tempfile.hh
index 9c28346fc..f4fb1860c 100644
--- a/util/tempfile.hh
+++ b/util/tempfile.hh
@@ -27,7 +27,7 @@ std::string temp_location()
{
#if defined(_WIN32) || defined(_WIN64)
char dir_buffer[1000];
- if (GetTempPath(1000, dir_buffer) == 0)
+ if (GetTempPathA(1000, dir_buffer) == 0)
throw std::runtime_error("Could not read temporary directory.");
return std::string(dir_buffer);
#else
@@ -51,7 +51,7 @@ std::string windows_tmpnam()
{
const std::string tmp = temp_location();
char output_buffer[MAX_PATH];
- if (GetTempFileName(tmp.c_str(), "tmp", 0, output_buffer) == 0)
+ if (GetTempFileNameA(tmp.c_str(), "tmp", 0, output_buffer) == 0)
throw std::runtime_error("Could not create temporary file name.");
return output_buffer;
}