Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2017-02-01 01:21:59 +0300
committerHieu Hoang <hieuhoang@gmail.com>2017-02-01 01:21:59 +0300
commita8a5b43f2dc32bd1b45006fd43989dc71e74ba0e (patch)
treee84a78fa005e29ec78076d6e525371240871122c /contrib
parent7206d592751ee9afeb1fa4753b7e19272e2585bc (diff)
move moses2 to root
Diffstat (limited to 'contrib')
-rw-r--r--contrib/moses2-cmd/.cproject174
-rw-r--r--contrib/moses2-cmd/.project43
-rw-r--r--contrib/moses2/.cproject180
-rw-r--r--contrib/moses2/.project29
-rw-r--r--contrib/moses2/AlignmentInfo.cpp176
-rw-r--r--contrib/moses2/AlignmentInfo.h148
-rw-r--r--contrib/moses2/AlignmentInfoCollection.cpp62
-rw-r--r--contrib/moses2/AlignmentInfoCollection.h81
-rw-r--r--contrib/moses2/ArcLists.cpp129
-rw-r--r--contrib/moses2/ArcLists.h43
-rw-r--r--contrib/moses2/Array.h94
-rw-r--r--contrib/moses2/EstimatedScores.cpp117
-rw-r--r--contrib/moses2/EstimatedScores.h61
-rw-r--r--contrib/moses2/FF/Distortion.cpp190
-rw-r--r--contrib/moses2/FF/Distortion.h60
-rw-r--r--contrib/moses2/FF/FFState.cpp0
-rw-r--r--contrib/moses2/FF/FFState.h55
-rw-r--r--contrib/moses2/FF/FeatureFunction.cpp85
-rw-r--r--contrib/moses2/FF/FeatureFunction.h127
-rw-r--r--contrib/moses2/FF/FeatureFunctions.cpp287
-rw-r--r--contrib/moses2/FF/FeatureFunctions.h106
-rw-r--r--contrib/moses2/FF/FeatureRegistry.cpp130
-rw-r--r--contrib/moses2/FF/FeatureRegistry.h54
-rw-r--r--contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp79
-rw-r--r--contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.h40
-rw-r--r--contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp71
-rw-r--r--contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.h37
-rw-r--r--contrib/moses2/FF/LexicalReordering/HReorderingForwardState.cpp87
-rw-r--r--contrib/moses2/FF/LexicalReordering/HReorderingForwardState.h41
-rw-r--r--contrib/moses2/FF/LexicalReordering/LRModel.cpp209
-rw-r--r--contrib/moses2/FF/LexicalReordering/LRModel.h109
-rw-r--r--contrib/moses2/FF/LexicalReordering/LRState.cpp93
-rw-r--r--contrib/moses2/FF/LexicalReordering/LRState.h48
-rw-r--r--contrib/moses2/FF/LexicalReordering/LexicalReordering.cpp222
-rw-r--r--contrib/moses2/FF/LexicalReordering/LexicalReordering.h116
-rw-r--r--contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp86
-rw-r--r--contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h45
-rw-r--r--contrib/moses2/FF/LexicalReordering/ReorderingStack.cpp104
-rw-r--r--contrib/moses2/FF/LexicalReordering/ReorderingStack.h41
-rw-r--r--contrib/moses2/FF/OSM/KenOSM.cpp33
-rw-r--r--contrib/moses2/FF/OSM/KenOSM.h53
-rw-r--r--contrib/moses2/FF/OSM/OpSequenceModel.cpp248
-rw-r--r--contrib/moses2/FF/OSM/OpSequenceModel.h57
-rw-r--r--contrib/moses2/FF/OSM/osmHyp.cpp601
-rw-r--r--contrib/moses2/FF/OSM/osmHyp.h111
-rw-r--r--contrib/moses2/FF/PhrasePenalty.cpp40
-rw-r--r--contrib/moses2/FF/PhrasePenalty.h34
-rw-r--r--contrib/moses2/FF/PointerState.cpp0
-rw-r--r--contrib/moses2/FF/PointerState.h42
-rw-r--r--contrib/moses2/FF/SkeletonStatefulFF.cpp100
-rw-r--r--contrib/moses2/FF/SkeletonStatefulFF.h48
-rw-r--r--contrib/moses2/FF/SkeletonStatelessFF.cpp40
-rw-r--r--contrib/moses2/FF/SkeletonStatelessFF.h34
-rw-r--r--contrib/moses2/FF/StatefulFeatureFunction.cpp67
-rw-r--r--contrib/moses2/FF/StatefulFeatureFunction.h68
-rw-r--r--contrib/moses2/FF/StatelessFeatureFunction.cpp27
-rw-r--r--contrib/moses2/FF/StatelessFeatureFunction.h25
-rw-r--r--contrib/moses2/FF/WordPenalty.cpp53
-rw-r--r--contrib/moses2/FF/WordPenalty.h37
-rw-r--r--contrib/moses2/HypothesisBase.cpp81
-rw-r--r--contrib/moses2/HypothesisBase.h74
-rw-r--r--contrib/moses2/HypothesisColl.cpp287
-rw-r--r--contrib/moses2/HypothesisColl.h74
-rw-r--r--contrib/moses2/InputPathBase.cpp21
-rw-r--r--contrib/moses2/InputPathBase.h32
-rw-r--r--contrib/moses2/InputPathsBase.cpp20
-rw-r--r--contrib/moses2/InputPathsBase.h59
-rw-r--r--contrib/moses2/InputType.cpp92
-rw-r--r--contrib/moses2/InputType.h78
-rw-r--r--contrib/moses2/Jamfile181
-rw-r--r--contrib/moses2/LM/GPULM.cpp249
-rw-r--r--contrib/moses2/LM/GPULM.h91
-rw-r--r--contrib/moses2/LM/KENLM.cpp601
-rw-r--r--contrib/moses2/LM/KENLM.h88
-rw-r--r--contrib/moses2/LM/KENLMBatch.cpp390
-rw-r--r--contrib/moses2/LM/KENLMBatch.h101
-rw-r--r--contrib/moses2/LM/LanguageModel.cpp334
-rw-r--r--contrib/moses2/LM/LanguageModel.h97
-rw-r--r--contrib/moses2/LM/LanguageModelDALM.cpp246
-rw-r--r--contrib/moses2/LM/LanguageModelDALM.h75
-rw-r--r--contrib/moses2/Main.cpp128
-rw-r--r--contrib/moses2/Main.h22
-rw-r--r--contrib/moses2/ManagerBase.cpp55
-rw-r--r--contrib/moses2/ManagerBase.h76
-rw-r--r--contrib/moses2/MemPool.cpp83
-rw-r--r--contrib/moses2/MemPool.h175
-rw-r--r--contrib/moses2/MemPoolAllocator.h90
-rw-r--r--contrib/moses2/MorphoTrie/MorphTrie.h100
-rw-r--r--contrib/moses2/MorphoTrie/Node.h93
-rw-r--r--contrib/moses2/MorphoTrie/utils.h30
-rw-r--r--contrib/moses2/Phrase.cpp23
-rw-r--r--contrib/moses2/Phrase.h151
-rw-r--r--contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp160
-rw-r--r--contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.h104
-rw-r--r--contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp248
-rw-r--r--contrib/moses2/PhraseBased/CubePruningMiniStack/Search.h62
-rw-r--r--contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp125
-rw-r--r--contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.h77
-rw-r--r--contrib/moses2/PhraseBased/Hypothesis.cpp228
-rw-r--r--contrib/moses2/PhraseBased/Hypothesis.h125
-rw-r--r--contrib/moses2/PhraseBased/InputPath.cpp59
-rw-r--r--contrib/moses2/PhraseBased/InputPath.h41
-rw-r--r--contrib/moses2/PhraseBased/InputPaths.cpp65
-rw-r--r--contrib/moses2/PhraseBased/InputPaths.h44
-rw-r--r--contrib/moses2/PhraseBased/Manager.cpp280
-rw-r--r--contrib/moses2/PhraseBased/Manager.h77
-rw-r--r--contrib/moses2/PhraseBased/Normal/Search.cpp161
-rw-r--r--contrib/moses2/PhraseBased/Normal/Search.h51
-rw-r--r--contrib/moses2/PhraseBased/Normal/Stack.cpp35
-rw-r--r--contrib/moses2/PhraseBased/Normal/Stack.h32
-rw-r--r--contrib/moses2/PhraseBased/Normal/Stacks.cpp67
-rw-r--r--contrib/moses2/PhraseBased/Normal/Stacks.h62
-rw-r--r--contrib/moses2/PhraseBased/PhraseImpl.cpp27
-rw-r--r--contrib/moses2/PhraseBased/PhraseImpl.h21
-rw-r--r--contrib/moses2/PhraseBased/ReorderingConstraint.cpp252
-rw-r--r--contrib/moses2/PhraseBased/ReorderingConstraint.h88
-rw-r--r--contrib/moses2/PhraseBased/Search.cpp116
-rw-r--r--contrib/moses2/PhraseBased/Search.h60
-rw-r--r--contrib/moses2/PhraseBased/Sentence.cpp174
-rw-r--r--contrib/moses2/PhraseBased/Sentence.h52
-rw-r--r--contrib/moses2/PhraseBased/TargetPhraseImpl.cpp52
-rw-r--r--contrib/moses2/PhraseBased/TargetPhraseImpl.h54
-rw-r--r--contrib/moses2/PhraseBased/TargetPhrases.cpp78
-rw-r--r--contrib/moses2/PhraseBased/TargetPhrases.h66
-rw-r--r--contrib/moses2/PhraseBased/TrellisPath.cpp175
-rw-r--r--contrib/moses2/PhraseBased/TrellisPath.h87
-rw-r--r--contrib/moses2/PhraseImplTemplate.h83
-rw-r--r--contrib/moses2/Recycler.cpp13
-rw-r--r--contrib/moses2/Recycler.h76
-rw-r--r--contrib/moses2/SCFG/ActiveChart.cpp109
-rw-r--r--contrib/moses2/SCFG/ActiveChart.h125
-rw-r--r--contrib/moses2/SCFG/Hypothesis.cpp180
-rw-r--r--contrib/moses2/SCFG/Hypothesis.h71
-rw-r--r--contrib/moses2/SCFG/InputPath.cpp120
-rw-r--r--contrib/moses2/SCFG/InputPath.h63
-rw-r--r--contrib/moses2/SCFG/InputPaths.cpp88
-rw-r--r--contrib/moses2/SCFG/InputPaths.h43
-rw-r--r--contrib/moses2/SCFG/Manager.cpp391
-rw-r--r--contrib/moses2/SCFG/Manager.h83
-rw-r--r--contrib/moses2/SCFG/Misc.cpp231
-rw-r--r--contrib/moses2/SCFG/Misc.h146
-rw-r--r--contrib/moses2/SCFG/PhraseImpl.cpp37
-rw-r--r--contrib/moses2/SCFG/PhraseImpl.h26
-rw-r--r--contrib/moses2/SCFG/Sentence.cpp155
-rw-r--r--contrib/moses2/SCFG/Sentence.h54
-rw-r--r--contrib/moses2/SCFG/Stack.cpp108
-rw-r--r--contrib/moses2/SCFG/Stack.h50
-rw-r--r--contrib/moses2/SCFG/Stacks.cpp56
-rw-r--r--contrib/moses2/SCFG/Stacks.h39
-rw-r--r--contrib/moses2/SCFG/TargetPhraseImpl.cpp125
-rw-r--r--contrib/moses2/SCFG/TargetPhraseImpl.h88
-rw-r--r--contrib/moses2/SCFG/TargetPhrases.cpp66
-rw-r--r--contrib/moses2/SCFG/TargetPhrases.h67
-rw-r--r--contrib/moses2/SCFG/Word.cpp149
-rw-r--r--contrib/moses2/SCFG/Word.h63
-rw-r--r--contrib/moses2/SCFG/nbest/KBestExtractor.cpp74
-rw-r--r--contrib/moses2/SCFG/nbest/KBestExtractor.h40
-rw-r--r--contrib/moses2/SCFG/nbest/NBest.cpp193
-rw-r--r--contrib/moses2/SCFG/nbest/NBest.h99
-rw-r--r--contrib/moses2/SCFG/nbest/NBestColl.cpp53
-rw-r--r--contrib/moses2/SCFG/nbest/NBestColl.h36
-rw-r--r--contrib/moses2/SCFG/nbest/NBests.cpp111
-rw-r--r--contrib/moses2/SCFG/nbest/NBests.h53
-rw-r--r--contrib/moses2/Scores.cpp285
-rw-r--r--contrib/moses2/Scores.h80
-rw-r--r--contrib/moses2/SubPhrase.cpp17
-rw-r--r--contrib/moses2/SubPhrase.h54
-rw-r--r--contrib/moses2/System.cpp223
-rw-r--r--contrib/moses2/System.h84
-rw-r--r--contrib/moses2/TargetPhrase.cpp15
-rw-r--r--contrib/moses2/TargetPhrase.h170
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp418
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h200
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/CanonicalHuffman.h345
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp95
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h108
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp173
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h143
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/ListCoders.h394
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/MmapAllocator.h217
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/MonotonicVector.h247
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/MurmurHash3.cpp424
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/MurmurHash3.h37
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/PackedArray.h207
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp466
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h142
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp222
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.h68
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/StringVector.h662
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp39
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h176
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp30
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.h31
-rw-r--r--contrib/moses2/TranslationModel/Memory/Node.h138
-rw-r--r--contrib/moses2/TranslationModel/Memory/PhraseTableMemory.cpp268
-rw-r--r--contrib/moses2/TranslationModel/Memory/PhraseTableMemory.h85
-rw-r--r--contrib/moses2/TranslationModel/PhraseTable.cpp183
-rw-r--r--contrib/moses2/TranslationModel/PhraseTable.h128
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp756
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h159
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/StoreTarget.cpp266
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/StoreTarget.h51
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/StoreVocab.cpp13
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/StoreVocab.h64
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/hash.cpp44
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/hash.hh17
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/line_splitter.cpp103
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/line_splitter.hh59
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp40
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh55
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/querying.cpp180
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/querying.hh77
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/storing.cpp303
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/storing.hh95
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/vocabid.cpp59
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/vocabid.hh29
-rw-r--r--contrib/moses2/TranslationModel/Transliteration.cpp229
-rw-r--r--contrib/moses2/TranslationModel/Transliteration.h91
-rw-r--r--contrib/moses2/TranslationModel/UnknownWordPenalty.cpp285
-rw-r--r--contrib/moses2/TranslationModel/UnknownWordPenalty.h89
-rw-r--r--contrib/moses2/TranslationTask.cpp52
-rw-r--r--contrib/moses2/TranslationTask.h25
-rw-r--r--contrib/moses2/TrellisPaths.cpp14
-rw-r--r--contrib/moses2/TrellisPaths.h69
-rw-r--r--contrib/moses2/TypeDef.cpp11
-rw-r--r--contrib/moses2/TypeDef.h127
-rw-r--r--contrib/moses2/Vector.cpp14
-rw-r--r--contrib/moses2/Vector.h36
-rw-r--r--contrib/moses2/Weights.cpp61
-rw-r--r--contrib/moses2/Weights.h39
-rw-r--r--contrib/moses2/Word.cpp136
-rw-r--r--contrib/moses2/Word.h67
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Misc.cpp161
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Misc.h111
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Search.cpp206
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Search.h57
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Stack.cpp303
-rw-r--r--contrib/moses2/defer/CubePruningBitmapStack/Stack.h109
-rw-r--r--contrib/moses2/defer/CubePruningCardinalStack/Misc.cpp161
-rw-r--r--contrib/moses2/defer/CubePruningCardinalStack/Misc.h112
-rw-r--r--contrib/moses2/defer/CubePruningCardinalStack/Search.cpp206
-rw-r--r--contrib/moses2/defer/CubePruningCardinalStack/Search.h57
-rw-r--r--contrib/moses2/defer/CubePruningCardinalStack/Stack.cpp200
-rw-r--r--contrib/moses2/defer/CubePruningCardinalStack/Stack.h68
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp161
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Misc.h113
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Search.cpp273
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Search.h66
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp72
-rw-r--r--contrib/moses2/defer/CubePruningPerBitmap/Stacks.h51
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Misc.cpp161
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Misc.h113
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Search.cpp248
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Search.h66
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Stacks.cpp72
-rw-r--r--contrib/moses2/defer/CubePruningPerMiniStack/Stacks.h51
-rw-r--r--contrib/moses2/legacy/Bitmap.cpp87
-rw-r--r--contrib/moses2/legacy/Bitmap.h244
-rw-r--r--contrib/moses2/legacy/Bitmaps.cpp74
-rw-r--r--contrib/moses2/legacy/Bitmaps.h40
-rw-r--r--contrib/moses2/legacy/Factor.cpp45
-rw-r--r--contrib/moses2/legacy/Factor.h104
-rw-r--r--contrib/moses2/legacy/FactorCollection.cpp111
-rw-r--r--contrib/moses2/legacy/FactorCollection.h130
-rw-r--r--contrib/moses2/legacy/InputFileStream.cpp60
-rw-r--r--contrib/moses2/legacy/InputFileStream.h46
-rw-r--r--contrib/moses2/legacy/Matrix.cpp34
-rw-r--r--contrib/moses2/legacy/Matrix.h106
-rw-r--r--contrib/moses2/legacy/OutputCollector.h165
-rw-r--r--contrib/moses2/legacy/OutputFileStream.cpp88
-rw-r--r--contrib/moses2/legacy/OutputFileStream.h81
-rw-r--r--contrib/moses2/legacy/Parameter.cpp1707
-rw-r--r--contrib/moses2/legacy/Parameter.h176
-rw-r--r--contrib/moses2/legacy/Range.cpp32
-rw-r--r--contrib/moses2/legacy/Range.h123
-rw-r--r--contrib/moses2/legacy/ThreadPool.cpp150
-rw-r--r--contrib/moses2/legacy/ThreadPool.h140
-rw-r--r--contrib/moses2/legacy/Timer.cpp104
-rw-r--r--contrib/moses2/legacy/Timer.h39
-rw-r--r--contrib/moses2/legacy/Util2.cpp29
-rw-r--r--contrib/moses2/legacy/Util2.h351
-rw-r--r--contrib/moses2/legacy/gzfilebuf.h101
-rw-r--r--contrib/moses2/parameters/AllOptions.cpp123
-rw-r--r--contrib/moses2/parameters/AllOptions.h51
-rw-r--r--contrib/moses2/parameters/BeamSearchOptions.h15
-rw-r--r--contrib/moses2/parameters/BookkeepingOptions.cpp26
-rw-r--r--contrib/moses2/parameters/BookkeepingOptions.h18
-rw-r--r--contrib/moses2/parameters/ContextParameters.cpp53
-rw-r--r--contrib/moses2/parameters/ContextParameters.h21
-rw-r--r--contrib/moses2/parameters/CubePruningOptions.cpp80
-rw-r--r--contrib/moses2/parameters/CubePruningOptions.h25
-rw-r--r--contrib/moses2/parameters/InputOptions.cpp102
-rw-r--r--contrib/moses2/parameters/InputOptions.h32
-rw-r--r--contrib/moses2/parameters/LMBR_Options.cpp39
-rw-r--r--contrib/moses2/parameters/LMBR_Options.h26
-rw-r--r--contrib/moses2/parameters/LookupOptions.h17
-rw-r--r--contrib/moses2/parameters/MBR_Options.cpp26
-rw-r--r--contrib/moses2/parameters/MBR_Options.h21
-rw-r--r--contrib/moses2/parameters/NBestOptions.cpp68
-rw-r--r--contrib/moses2/parameters/NBestOptions.h32
-rw-r--r--contrib/moses2/parameters/OOVHandlingOptions.cpp50
-rw-r--r--contrib/moses2/parameters/OOVHandlingOptions.h27
-rw-r--r--contrib/moses2/parameters/OptionsBaseClass.cpp30
-rw-r--r--contrib/moses2/parameters/OptionsBaseClass.h20
-rw-r--r--contrib/moses2/parameters/ReorderingOptions.cpp31
-rw-r--r--contrib/moses2/parameters/ReorderingOptions.h20
-rw-r--r--contrib/moses2/parameters/ReportingOptions.cpp152
-rw-r--r--contrib/moses2/parameters/ReportingOptions.h70
-rw-r--r--contrib/moses2/parameters/SearchOptions.cpp107
-rw-r--r--contrib/moses2/parameters/SearchOptions.h54
-rw-r--r--contrib/moses2/parameters/ServerOptions.cpp87
-rw-r--r--contrib/moses2/parameters/ServerOptions.h43
-rw-r--r--contrib/moses2/parameters/SyntaxOptions.cpp47
-rw-r--r--contrib/moses2/parameters/SyntaxOptions.h40
-rw-r--r--contrib/moses2/pugiconfig.hpp74
-rw-r--r--contrib/moses2/pugixml.cpp12444
-rw-r--r--contrib/moses2/pugixml.hpp1400
-rw-r--r--contrib/moses2/server/Server.cpp68
-rw-r--r--contrib/moses2/server/Server.h39
-rw-r--r--contrib/moses2/server/TranslationRequest.cpp68
-rw-r--r--contrib/moses2/server/TranslationRequest.h81
-rw-r--r--contrib/moses2/server/Translator.cpp68
-rw-r--r--contrib/moses2/server/Translator.h40
323 files changed, 0 insertions, 50532 deletions
diff --git a/contrib/moses2-cmd/.cproject b/contrib/moses2-cmd/.cproject
deleted file mode 100644
index 9f4548c68..000000000
--- a/contrib/moses2-cmd/.cproject
+++ /dev/null
@@ -1,174 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
- <storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.597260676">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.597260676" moduleId="org.eclipse.cdt.core.settings" name="Debug">
- <externalSettings/>
- <extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- </extensions>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.597260676" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.597260676." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1894543739" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
- <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.607512381" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
- <builder buildPath="${workspace_loc:/moses2-cmd}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.219597164" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
- <tool id="cdt.managedbuild.tool.gnu.archiver.base.2087910158" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
- <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1546967275" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
- <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.826148068" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1303802900" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.368826329" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="/opt/local/include/"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/include&quot;"/>
- </option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.758438174" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
- <listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
- </option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.123491630" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.848723608" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1977842293" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.debug.option.debugging.level.322285470" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1011859741" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1706155110" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
- <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.24079646" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
- <option id="gnu.cpp.link.option.libs.587418382" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
- <listOptionValue builtIn="false" value="moses2"/>
- <listOptionValue builtIn="false" value="xmlrpc_xmltok"/>
- <listOptionValue builtIn="false" value="xmlrpc_xmlparse"/>
- <listOptionValue builtIn="false" value="xmlrpc_util++"/>
- <listOptionValue builtIn="false" value="xmlrpc_util"/>
- <listOptionValue builtIn="false" value="xmlrpc_server_abyss++"/>
- <listOptionValue builtIn="false" value="xmlrpc_server_abyss"/>
- <listOptionValue builtIn="false" value="xmlrpc_server++"/>
- <listOptionValue builtIn="false" value="xmlrpc_server"/>
- <listOptionValue builtIn="false" value="xmlrpc_abyss"/>
- <listOptionValue builtIn="false" value="xmlrpc++"/>
- <listOptionValue builtIn="false" value="xmlrpc"/>
- <listOptionValue builtIn="false" value="cmph"/>
- <listOptionValue builtIn="false" value="search"/>
- <listOptionValue builtIn="false" value="OnDiskPt"/>
- <listOptionValue builtIn="false" value="lm"/>
- <listOptionValue builtIn="false" value="util"/>
- <listOptionValue builtIn="false" value="boost_iostreams"/>
- <listOptionValue builtIn="false" value="boost_system"/>
- <listOptionValue builtIn="false" value="boost_thread"/>
- <listOptionValue builtIn="false" value="boost_filesystem"/>
- <listOptionValue builtIn="false" value="boost_program_options"/>
- <listOptionValue builtIn="false" value="pthread"/>
- <listOptionValue builtIn="false" value="z"/>
- <listOptionValue builtIn="false" value="bz2"/>
- <listOptionValue builtIn="false" value="dl"/>
- <listOptionValue builtIn="false" value="rt"/>
- </option>
- <option id="gnu.cpp.link.option.paths.1920945405" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/lib&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../moses2/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
- <listOptionValue builtIn="false" value="/opt/local/lib"/>
- </option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1508244207" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
- <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
- <additionalInput kind="additionalinput" paths="$(LIBS)"/>
- </inputType>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.994919684" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.2015973846" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
- </tool>
- </toolChain>
- </folderInfo>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
- </cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.gnu.exe.release.347900682">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.347900682" moduleId="org.eclipse.cdt.core.settings" name="Release">
- <externalSettings/>
- <extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- </extensions>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.347900682" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
- <folderInfo id="cdt.managedbuild.config.gnu.exe.release.347900682." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.19950210" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
- <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.201761026" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
- <builder buildPath="${workspace_loc:/moses2-cmd}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.249336616" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
- <tool id="cdt.managedbuild.tool.gnu.archiver.base.475854190" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
- <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1047605391" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
- <option id="gnu.cpp.compiler.exe.release.option.optimization.level.881009789" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.exe.release.option.debugging.level.695719104" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2077834205" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.534514015" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.301062410" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.exe.release.option.debugging.level.1891262877" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.176623232" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1762742642" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
- <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.563722476" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1771116495" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
- <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
- <additionalInput kind="additionalinput" paths="$(LIBS)"/>
- </inputType>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.167166289" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.659838834" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
- </tool>
- </toolChain>
- </folderInfo>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
- </cconfiguration>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="moses2-cmd.cdt.managedbuild.target.gnu.exe.1380079855" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
- </storageModule>
- <storageModule moduleId="scannerConfiguration">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.597260676;cdt.managedbuild.config.gnu.exe.debug.597260676.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1546967275;cdt.managedbuild.tool.gnu.cpp.compiler.input.123491630">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.597260676;cdt.managedbuild.config.gnu.exe.debug.597260676.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.848723608;cdt.managedbuild.tool.gnu.c.compiler.input.1011859741">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.347900682;cdt.managedbuild.config.gnu.exe.release.347900682.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.534514015;cdt.managedbuild.tool.gnu.c.compiler.input.176623232">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.347900682;cdt.managedbuild.config.gnu.exe.release.347900682.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1047605391;cdt.managedbuild.tool.gnu.cpp.compiler.input.2077834205">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- </scannerConfigBuildInfo>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
- <storageModule moduleId="refreshScope" versionNumber="2">
- <configuration configurationName="Release">
- <resource resourceType="PROJECT" workspacePath="/moses2-cmd"/>
- </configuration>
- <configuration configurationName="Debug">
- <resource resourceType="PROJECT" workspacePath="/moses2-cmd"/>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
-</cproject>
diff --git a/contrib/moses2-cmd/.project b/contrib/moses2-cmd/.project
deleted file mode 100644
index 5e0e0e2b1..000000000
--- a/contrib/moses2-cmd/.project
+++ /dev/null
@@ -1,43 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
- <name>moses2-cmd</name>
- <comment></comment>
- <projects>
- <project>lm</project>
- <project>moses</project>
- <project>moses2</project>
- <project>util</project>
- </projects>
- <buildSpec>
- <buildCommand>
- <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
- <triggers>clean,full,incremental,</triggers>
- <arguments>
- </arguments>
- </buildCommand>
- <buildCommand>
- <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
- <triggers>full,incremental,</triggers>
- <arguments>
- </arguments>
- </buildCommand>
- </buildSpec>
- <natures>
- <nature>org.eclipse.cdt.core.cnature</nature>
- <nature>org.eclipse.cdt.core.ccnature</nature>
- <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
- <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
- </natures>
- <linkedResources>
- <link>
- <name>Main.cpp</name>
- <type>1</type>
- <locationURI>PARENT-1-PROJECT_LOC/moses2/Main.cpp</locationURI>
- </link>
- <link>
- <name>Main.h</name>
- <type>1</type>
- <locationURI>PARENT-1-PROJECT_LOC/moses2/Main.h</locationURI>
- </link>
- </linkedResources>
-</projectDescription>
diff --git a/contrib/moses2/.cproject b/contrib/moses2/.cproject
deleted file mode 100644
index 82b82d591..000000000
--- a/contrib/moses2/.cproject
+++ /dev/null
@@ -1,180 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
- <storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041" moduleId="org.eclipse.cdt.core.settings" name="Debug">
- <externalSettings>
- <externalSetting>
- <entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/moses2"/>
- <entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/moses2/Debug"/>
- <entry flags="RESOLVED" kind="libraryFile" name="moses2" srcPrefixMapping="" srcRootPath=""/>
- </externalSetting>
- </externalSettings>
- <extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- </extensions>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.329828208" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.targetPlatform.gnu.cross.389137927" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/moses2}/Debug" id="cdt.managedbuild.builder.gnu.cross.2144359329" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1430831084" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.354944414" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.639588389" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.include.paths.7696150" name="Include paths (-I)" superClass="gnu.c.compiler.option.include.paths" useByScannerDiscovery="false"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1538601099" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1686613508" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.299605809" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.769854045" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1502531988" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/include&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/include&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/darts-clone&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
- </option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.1025143565" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
- <listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
- <listOptionValue builtIn="false" value="HAVE_CMPH"/>
- <listOptionValue builtIn="false" value="HAVE_PROBINGPT"/>
- <listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
- <listOptionValue builtIn="false" value="WITH_THREADS"/>
- </option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2101942464" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1439481930" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.351063004" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <option id="gnu.cpp.link.option.paths.1260140770" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/lib&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
- <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
- <listOptionValue builtIn="false" value="/opt/local/lib"/>
- </option>
- <option id="gnu.cpp.link.option.libs.1671854463" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
- <listOptionValue builtIn="false" value="cmph"/>
- <listOptionValue builtIn="false" value="dalm"/>
- <listOptionValue builtIn="false" value="search"/>
- <listOptionValue builtIn="false" value="OnDiskPt"/>
- <listOptionValue builtIn="false" value="lm"/>
- <listOptionValue builtIn="false" value="util"/>
- <listOptionValue builtIn="false" value="boost_iostreams"/>
- <listOptionValue builtIn="false" value="boost_serialization"/>
- <listOptionValue builtIn="false" value="boost_system"/>
- <listOptionValue builtIn="false" value="boost_thread"/>
- <listOptionValue builtIn="false" value="boost_filesystem"/>
- <listOptionValue builtIn="false" value="boost_program_options"/>
- <listOptionValue builtIn="false" value="pthread"/>
- <listOptionValue builtIn="false" value="z"/>
- <listOptionValue builtIn="false" value="bz2"/>
- <listOptionValue builtIn="false" value="dl"/>
- <listOptionValue builtIn="false" value="rt"/>
- </option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1955045545" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
- <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
- <additionalInput kind="additionalinput" paths="$(LIBS)"/>
- </inputType>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1028669671" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.917359146" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.254745364" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
- </tool>
- </toolChain>
- </folderInfo>
- <fileInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041.1123771618" name="HypothesisColl.h" rcbsApplicability="disable" resourcePath="HypothesisColl.h" toolsToInvoke=""/>
- <sourceEntries>
- <entry excluding="LM/LanguageModelDALM.cpp|defer|Main.cpp|CreateProbingPT2.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
- </sourceEntries>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
- </cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1445209421">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1445209421" moduleId="org.eclipse.cdt.core.settings" name="Release">
- <externalSettings/>
- <extensions>
- <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
- <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
- <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
- </extensions>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1445209421" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
- <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1445209421." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.662721996" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
- <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.targetPlatform.gnu.cross.895874625" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
- <builder buildPath="${workspace_loc:/moses2}/Release" id="cdt.managedbuild.builder.gnu.cross.468799862" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1943249236" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1011693969" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
- <option id="gnu.c.compiler.option.debugging.level.1339551360" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1175448562" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.2103617063" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
- <option id="gnu.cpp.compiler.option.optimization.level.13836904" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.debugging.level.763147930" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.946001537" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1462232829" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1359778241" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
- <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.89443491" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
- <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
- <additionalInput kind="additionalinput" paths="$(LIBS)"/>
- </inputType>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.cross.archiver.762494367" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
- <tool id="cdt.managedbuild.tool.gnu.cross.assembler.140795725" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.95131148" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
- </tool>
- </toolChain>
- </folderInfo>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
- </cconfiguration>
- </storageModule>
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="moses2.cdt.managedbuild.target.gnu.cross.exe.1741914059" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
- </storageModule>
- <storageModule moduleId="scannerConfiguration">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1445209421;cdt.managedbuild.config.gnu.cross.exe.release.1445209421.;cdt.managedbuild.tool.gnu.cross.c.compiler.1943249236;cdt.managedbuild.tool.gnu.c.compiler.input.1175448562">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1445209421;cdt.managedbuild.config.gnu.cross.exe.release.1445209421.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.2103617063;cdt.managedbuild.tool.gnu.cpp.compiler.input.946001537">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041;cdt.managedbuild.config.gnu.cross.exe.debug.1097293041.;cdt.managedbuild.tool.gnu.cross.c.compiler.1430831084;cdt.managedbuild.tool.gnu.c.compiler.input.1538601099">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- </scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041;cdt.managedbuild.config.gnu.cross.exe.debug.1097293041.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1686613508;cdt.managedbuild.tool.gnu.cpp.compiler.input.2101942464">
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- </scannerConfigBuildInfo>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
- <storageModule moduleId="refreshScope" versionNumber="2">
- <configuration configurationName="Debug">
- <resource resourceType="PROJECT" workspacePath="/moses2"/>
- </configuration>
- <configuration configurationName="Release">
- <resource resourceType="PROJECT" workspacePath="/moses2"/>
- </configuration>
- </storageModule>
- <storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
-</cproject>
diff --git a/contrib/moses2/.project b/contrib/moses2/.project
deleted file mode 100644
index b17dc477e..000000000
--- a/contrib/moses2/.project
+++ /dev/null
@@ -1,29 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
- <name>moses2</name>
- <comment></comment>
- <projects>
- <project>moses</project>
- <project>util</project>
- </projects>
- <buildSpec>
- <buildCommand>
- <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
- <triggers>clean,full,incremental,</triggers>
- <arguments>
- </arguments>
- </buildCommand>
- <buildCommand>
- <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
- <triggers>full,incremental,</triggers>
- <arguments>
- </arguments>
- </buildCommand>
- </buildSpec>
- <natures>
- <nature>org.eclipse.cdt.core.cnature</nature>
- <nature>org.eclipse.cdt.core.ccnature</nature>
- <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
- <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
- </natures>
-</projectDescription>
diff --git a/contrib/moses2/AlignmentInfo.cpp b/contrib/moses2/AlignmentInfo.cpp
deleted file mode 100644
index 2e19fa481..000000000
--- a/contrib/moses2/AlignmentInfo.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-/***********************************************************************
- Moses - statistical machine translation system
- Copyright (C) 2006-2011 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-#include <algorithm>
-#include <set>
-#include <sstream>
-#include "AlignmentInfo.h"
-#include "legacy/Util2.h"
-#include "util/exception.hh"
-
-namespace Moses2
-{
-
-AlignmentInfo::AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
- : m_collection(pairs)
-{
- BuildNonTermIndexMaps();
-}
-
-AlignmentInfo::AlignmentInfo(const std::vector<unsigned char> &aln)
-{
- assert(aln.size()%2==0);
- for (size_t i = 0; i < aln.size(); i+= 2)
- m_collection.insert(std::make_pair(size_t(aln[i]),size_t(aln[i+1])));
- BuildNonTermIndexMaps();
-}
-
-AlignmentInfo::AlignmentInfo(const std::string &str)
-{
- std::vector<std::string> points = Tokenize(str, " ");
- std::vector<std::string>::const_iterator iter;
- for (iter = points.begin(); iter != points.end(); iter++) {
- std::vector<size_t> point = Tokenize<size_t>(*iter, "-");
- UTIL_THROW_IF2(point.size() != 2, "Bad format of word alignment point: " << *iter);
- Add(point[0], point[1]);
- }
-}
-
-void AlignmentInfo::BuildNonTermIndexMaps()
-{
- if (m_collection.empty()) {
- return;
- }
- const_iterator p = begin();
- size_t maxIndex = p->second;
- for (++p; p != end(); ++p) {
- if (p->second > maxIndex) {
- maxIndex = p->second;
- }
- }
- m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
- m_nonTermIndexMap2.resize(maxIndex+1, NOT_FOUND);
- size_t i = 0;
- for (p = begin(); p != end(); ++p) {
- if (m_nonTermIndexMap[p->second] != NOT_FOUND) {
- // 1-to-many. Definitely a set of terminals. Don't bother storing 1-to-1 index map
- m_nonTermIndexMap.clear();
- m_nonTermIndexMap2.clear();
- return;
- }
- m_nonTermIndexMap[p->second] = i++;
- m_nonTermIndexMap2[p->second] = p->first;
- }
-}
-
-std::set<size_t> AlignmentInfo::GetAlignmentsForSource(size_t sourcePos) const
-{
- std::set<size_t> ret;
- CollType::const_iterator iter;
- for (iter = begin(); iter != end(); ++iter) {
- // const std::pair<size_t,size_t> &align = *iter;
- if (iter->first == sourcePos) {
- ret.insert(iter->second);
- }
- }
- return ret;
-}
-
-std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
-{
- std::set<size_t> ret;
- CollType::const_iterator iter;
- for (iter = begin(); iter != end(); ++iter) {
- // const std::pair<size_t,size_t> &align = *iter;
- if (iter->second == targetPos) {
- ret.insert(iter->first);
- }
- }
- return ret;
-}
-
-
-bool
-compare_target(std::pair<size_t,size_t> const* a,
- std::pair<size_t,size_t> const* b)
-{
- if(a->second < b->second) return true;
- if(a->second == b->second) return (a->first < b->first);
- return false;
-}
-
-
-std::vector< const std::pair<size_t,size_t>* >
-AlignmentInfo::
-GetSortedAlignments(WordAlignmentSort SortOrder) const
-{
- std::vector< const std::pair<size_t,size_t>* > ret;
-
- CollType::const_iterator iter;
- for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
- const std::pair<size_t,size_t> &alignPair = *iter;
- ret.push_back(&alignPair);
- }
-
- switch (SortOrder) {
- case NoSort:
- break;
-
- case TargetOrder:
- std::sort(ret.begin(), ret.end(), compare_target);
- break;
-
- default:
- UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
- << SortOrder);
- }
-
- return ret;
-
-}
-
-std::vector<size_t> AlignmentInfo::GetSourceIndex2PosMap() const
-{
- std::set<size_t> sourcePoses;
-
- CollType::const_iterator iter;
- for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
- size_t sourcePos = iter->first;
- sourcePoses.insert(sourcePos);
- }
- std::vector<size_t> ret(sourcePoses.begin(), sourcePoses.end());
- return ret;
-}
-
-std::string AlignmentInfo::Debug(const System &system) const
-{
- std::stringstream out;
- out << *this;
- return out.str();
-}
-
-std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj)
-{
- AlignmentInfo::const_iterator iter;
- for (iter = obj.begin(); iter != obj.end(); ++iter) {
- out << iter->first << "-" << iter->second << " ";
- }
- return out;
-}
-
-}
diff --git a/contrib/moses2/AlignmentInfo.h b/contrib/moses2/AlignmentInfo.h
deleted file mode 100644
index 89b31a1fc..000000000
--- a/contrib/moses2/AlignmentInfo.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/***********************************************************************
- Moses - statistical machine translation system
- Copyright (C) 2006-2011 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#pragma once
-
-#include <iostream>
-#include <ostream>
-#include <set>
-#include <vector>
-#include <cstdlib>
-
-#include <boost/functional/hash.hpp>
-#include "TypeDef.h"
-
-namespace Moses2
-{
-
-class AlignmentInfoCollection;
-class System;
-
-/** Collection of non-terminal alignment pairs, ordered by source index.
- * Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
- */
-class AlignmentInfo
-{
- friend struct AlignmentInfoOrderer;
- friend struct AlignmentInfoHasher;
- friend class AlignmentInfoCollection;
- friend class VW;
-
- friend std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj);
-
-public:
- typedef std::set<std::pair<size_t,size_t> > CollType;
- typedef std::vector<size_t> NonTermIndexMap;
- typedef CollType::const_iterator const_iterator;
-
- const_iterator begin() const {
- return m_collection.begin();
- }
- const_iterator end() const {
- return m_collection.end();
- }
-
- void Add(size_t sourcePos, size_t targetPos) {
- m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
- }
- /** Provides a map from target-side to source-side non-terminal indices.
- * The target-side index should be the rule symbol index (COUNTING terminals).
- * The index returned is the rule non-terminal index (IGNORING terminals).
- */
- const NonTermIndexMap &GetNonTermIndexMap() const {
- return m_nonTermIndexMap;
- }
-
- /** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
- * the index counting both terminals and non-terminals) */
- const NonTermIndexMap &GetNonTermIndexMap2() const {
- return m_nonTermIndexMap2;
- }
-
- const CollType &GetAlignments() const {
- return m_collection;
- }
-
- std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
- std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
-
- size_t GetSize() const {
- return m_collection.size();
- }
-
- std::vector< const std::pair<size_t,size_t>* >
- GetSortedAlignments(Moses2::WordAlignmentSort SortOrder) const;
-
- std::vector<size_t> GetSourceIndex2PosMap() const;
-
- bool operator==(const AlignmentInfo& rhs) const {
- return m_collection == rhs.m_collection &&
- m_nonTermIndexMap == rhs.m_nonTermIndexMap;
- }
-
- std::string Debug(const System &system) const;
-
-private:
- //! AlignmentInfo objects should only be created by an AlignmentInfoCollection
- explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
- explicit AlignmentInfo(const std::vector<unsigned char> &aln);
-
- // used only by VW to load word alignment between sentences
- explicit AlignmentInfo(const std::string &str);
-
- void BuildNonTermIndexMaps();
-
- CollType m_collection;
- NonTermIndexMap m_nonTermIndexMap;
- NonTermIndexMap m_nonTermIndexMap2;
-};
-
-/** Define an arbitrary strict weak ordering between AlignmentInfo objects
- * for use by AlignmentInfoCollection.
- */
-struct AlignmentInfoOrderer {
- bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
- if (a.m_collection == b.m_collection) {
- return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
- } else {
- return a.m_collection < b.m_collection;
- }
- }
-};
-
-/**
- * Hashing functoid
- **/
-struct AlignmentInfoHasher {
- size_t operator()(const AlignmentInfo& a) const {
- size_t seed = 0;
- boost::hash_combine(seed,a.m_collection);
- boost::hash_combine(seed,a.m_nonTermIndexMap);
- return seed;
- }
-
-};
-
-inline size_t hash_value(const AlignmentInfo& a)
-{
- static AlignmentInfoHasher hasher;
- return hasher(a);
-}
-
-}
diff --git a/contrib/moses2/AlignmentInfoCollection.cpp b/contrib/moses2/AlignmentInfoCollection.cpp
deleted file mode 100644
index a6116400c..000000000
--- a/contrib/moses2/AlignmentInfoCollection.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-/***********************************************************************
- Moses - statistical machine translation system
- Copyright (C) 2006-2011 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include "AlignmentInfoCollection.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-AlignmentInfoCollection AlignmentInfoCollection::s_instance;
-
-AlignmentInfoCollection::AlignmentInfoCollection()
-{
- std::set<std::pair<size_t,size_t> > pairs;
- m_emptyAlignmentInfo = Add(pairs);
-}
-
-AlignmentInfoCollection::~AlignmentInfoCollection()
-{}
-
-const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
-{
- return *m_emptyAlignmentInfo;
-}
-
-AlignmentInfo const *
-AlignmentInfoCollection::
-Add(AlignmentInfo const& ainfo)
-{
-#ifdef WITH_THREADS
- {
- boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
- AlignmentInfoSet::const_iterator i = m_collection.find(ainfo);
- if (i != m_collection.end())
- return &*i;
- }
- boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
-#endif
- std::pair<AlignmentInfoSet::iterator, bool> ret = m_collection.insert(ainfo);
- return &(*ret.first);
-}
-
-
-
-}
diff --git a/contrib/moses2/AlignmentInfoCollection.h b/contrib/moses2/AlignmentInfoCollection.h
deleted file mode 100644
index 0d409430d..000000000
--- a/contrib/moses2/AlignmentInfoCollection.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/***********************************************************************
- Moses - statistical machine translation system
- Copyright (C) 2006-2011 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#pragma once
-
-#include "AlignmentInfo.h"
-
-#include <set>
-
-#ifdef WITH_THREADS
-#include <boost/thread/shared_mutex.hpp>
-#include <boost/thread/locks.hpp>
-#endif
-
-namespace Moses2
-{
-
-/** Singleton collection of all AlignmentInfo objects.
- * Used as a cache of all alignment info to save space.
- */
-class AlignmentInfoCollection
-{
-public:
- static AlignmentInfoCollection &Instance() {
- return s_instance;
- }
-
- /** Returns a pointer to an AlignmentInfo object with the same source-target
- * alignment pairs as given in the argument. If the collection already
- * contains such an object then returns a pointer to it; otherwise a new
- * one is inserted.
- */
-private:
- const AlignmentInfo* Add(AlignmentInfo const& ainfo);
-
-public:
- template<typename ALNREP>
- AlignmentInfo const *
- Add(ALNREP const & aln) {
- return this->Add(AlignmentInfo(aln));
- }
-
- //! Returns a pointer to an empty AlignmentInfo object.
- const AlignmentInfo &GetEmptyAlignmentInfo() const;
-
-private:
- typedef std::set<AlignmentInfo, AlignmentInfoOrderer> AlignmentInfoSet;
-
-
- //! Only a single static variable should be created.
- AlignmentInfoCollection();
- ~AlignmentInfoCollection();
-
- static AlignmentInfoCollection s_instance;
-
-#ifdef WITH_THREADS
- //reader-writer lock
- mutable boost::shared_mutex m_accessLock;
-#endif
-
- AlignmentInfoSet m_collection;
- const AlignmentInfo *m_emptyAlignmentInfo;
-};
-
-}
diff --git a/contrib/moses2/ArcLists.cpp b/contrib/moses2/ArcLists.cpp
deleted file mode 100644
index edc985465..000000000
--- a/contrib/moses2/ArcLists.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * ArcList.cpp
- *
- * Created on: 26 Oct 2015
- * Author: hieu
- */
-#include <iostream>
-#include <sstream>
-#include <algorithm>
-#include <boost/foreach.hpp>
-#include "ArcLists.h"
-#include "HypothesisBase.h"
-#include "util/exception.hh"
-
-using namespace std;
-
-namespace Moses2
-{
-
-ArcLists::ArcLists()
-{
- // TODO Auto-generated constructor stub
-
-}
-
-ArcLists::~ArcLists()
-{
- BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
- const ArcList *arcList = collPair.second;
- delete arcList;
- }
-}
-
-void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
- const HypothesisBase *otherHypo)
-{
- //cerr << added << " " << currHypo << " " << otherHypo << endl;
- ArcList *arcList;
- if (added) {
- // we're winners!
- if (otherHypo) {
- // there was a existing losing hypo
- arcList = &GetAndDetachArcList(otherHypo);
- }
- else {
- // there was no existing hypo
- arcList = new ArcList;
- }
- m_coll[currHypo] = arcList;
- }
- else {
- // we're losers!
- // there should be a winner, we're not doing beam pruning
- UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
- arcList = &GetArcList(otherHypo);
- }
-
- // in any case, add the curr hypo
- arcList->push_back(currHypo);
-}
-
-ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
-{
- Coll::iterator iter = m_coll.find(hypo);
- UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
- ArcList &arcList = *iter->second;
- return arcList;
-}
-
-const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
-{
- Coll::const_iterator iter = m_coll.find(hypo);
-
- if (iter == m_coll.end()) {
- cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
- BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
- const HypothesisBase *hypo = collPair.first;
- cerr << hypo << " ";
- }
- }
-
- UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
- ArcList &arcList = *iter->second;
- return arcList;
-}
-
-ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
-{
- Coll::iterator iter = m_coll.find(hypo);
- UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
- ArcList &arcList = *iter->second;
-
- m_coll.erase(iter);
-
- return arcList;
-}
-
-void ArcLists::Sort()
-{
- BOOST_FOREACH(Coll::value_type &collPair, m_coll){
- ArcList &list = *collPair.second;
- std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
- }
-}
-
-void ArcLists::Delete(const HypothesisBase *hypo)
-{
- //cerr << "hypo=" << hypo->Debug() << endl;
- //cerr << "m_coll=" << m_coll.size() << endl;
- Coll::iterator iter = m_coll.find(hypo);
- UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
- ArcList *arcList = iter->second;
-
- m_coll.erase(iter);
- delete arcList;
-}
-
-std::string ArcLists::Debug(const System &system) const
-{
- stringstream strm;
- BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
- const ArcList *arcList = collPair.second;
- strm << arcList << "(" << arcList->size() << ") ";
- }
- return strm.str();
-}
-
-}
-
diff --git a/contrib/moses2/ArcLists.h b/contrib/moses2/ArcLists.h
deleted file mode 100644
index db606401f..000000000
--- a/contrib/moses2/ArcLists.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * ArcList.h
- *
- * Created on: 26 Oct 2015
- * Author: hieu
- */
-#pragma once
-#include <vector>
-#include <boost/unordered_map.hpp>
-
-namespace Moses2
-{
-class System;
-
-class HypothesisBase;
-
-typedef std::vector<const HypothesisBase*> ArcList;
-
-class ArcLists
-{
-public:
- ArcLists();
- virtual ~ArcLists();
-
- void AddArc(bool added, const HypothesisBase *currHypo,
- const HypothesisBase *otherHypo);
- void Sort();
- void Delete(const HypothesisBase *hypo);
-
- const ArcList &GetArcList(const HypothesisBase *hypo) const;
-
- std::string Debug(const System &system) const;
-protected:
- typedef boost::unordered_map<const HypothesisBase*, ArcList*> Coll;
- Coll m_coll;
-
- ArcList &GetArcList(const HypothesisBase *hypo);
- ArcList &GetAndDetachArcList(const HypothesisBase *hypo);
-
-};
-
-}
-
diff --git a/contrib/moses2/Array.h b/contrib/moses2/Array.h
deleted file mode 100644
index 59b003135..000000000
--- a/contrib/moses2/Array.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#pragma once
-#include <cassert>
-#include <boost/functional/hash.hpp>
-#include "MemPool.h"
-
-namespace Moses2
-{
-
-template<typename T>
-class Array
-{
-public:
- typedef T* iterator;
- typedef const T* const_iterator;
- //! iterators
- const_iterator begin() const
- {
- return m_arr;
- }
- const_iterator end() const
- {
- return m_arr + m_size;
- }
-
- iterator begin()
- {
- return m_arr;
- }
- iterator end()
- {
- return m_arr + m_size;
- }
-
- Array(MemPool &pool, size_t size = 0, const T &val = T())
- {
- m_size = size;
- m_maxSize = size;
- m_arr = pool.Allocate<T>(size);
- for (size_t i = 0; i < size; ++i) {
- m_arr[i] = val;
- }
- }
-
- size_t size() const
- {
- return m_size;
- }
-
- const T& operator[](size_t ind) const
- {
- return m_arr[ind];
- }
-
- T& operator[](size_t ind)
- {
- return m_arr[ind];
- }
-
- T *GetArray()
- { return m_arr; }
-
- size_t hash() const
- {
- size_t seed = 0;
- for (size_t i = 0; i < m_size; ++i) {
- boost::hash_combine(seed, m_arr[i]);
- }
- return seed;
- }
-
- int Compare(const Array &compare) const
- {
-
- int cmp = memcmp(m_arr, compare.m_arr, sizeof(T) * m_size);
- return cmp;
- }
-
- bool operator==(const Array &compare) const
- {
- int cmp = Compare(compare);
- return cmp == 0;
- }
-
- void resize(size_t newSize)
- {
- assert(m_size < m_maxSize);
- m_size = newSize;
- }
-protected:
- size_t m_size, m_maxSize;
- T *m_arr;
-};
-
-}
diff --git a/contrib/moses2/EstimatedScores.cpp b/contrib/moses2/EstimatedScores.cpp
deleted file mode 100644
index dfe52bb2b..000000000
--- a/contrib/moses2/EstimatedScores.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-// $Id$
-// vim:tabstop=2
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include <string>
-#include <iostream>
-#include "EstimatedScores.h"
-
-using namespace std;
-
-namespace Moses2
-{
-/**
- * Calculate future score estimate for a given coverage bitmap
- *
- * /param bitmap coverage bitmap
- */
-
-float EstimatedScores::CalcEstimatedScore(Bitmap const &bitmap) const
-{
- const size_t notInGap = numeric_limits<size_t>::max();
- size_t startGap = notInGap;
- float estimatedScore = 0.0f;
- for (size_t currPos = 0; currPos < bitmap.GetSize(); currPos++) {
- // start of a new gap?
- if (bitmap.GetValue(currPos) == false && startGap == notInGap) {
- startGap = currPos;
- }
- // end of a gap?
- else if (bitmap.GetValue(currPos) == true && startGap != notInGap) {
- estimatedScore += GetValue(startGap, currPos - 1);
- startGap = notInGap;
- }
- }
- // coverage ending with gap?
- if (startGap != notInGap) {
- estimatedScore += GetValue(startGap, bitmap.GetSize() - 1);
- }
-
- return estimatedScore;
-}
-
-/**
- * Calculare future score estimate for a given coverage bitmap
- * and an additional span that is also covered. This function is used
- * to compute future score estimates for hypotheses that we may want
- * build, but first want to check.
- *
- * Note: this function is implemented a bit more complex than
- * the basic one (w/o additional phrase) for speed reasons,
- * which is probably overkill.
- *
- * /param bitmap coverage bitmap
- * /param startPos start of the span that is added to the coverage
- * /param endPos end of the span that is added to the coverage
- */
-
-float EstimatedScores::CalcEstimatedScore(Bitmap const &bitmap, size_t startPos,
- size_t endPos) const
-{
- const size_t notInGap = numeric_limits<size_t>::max();
- float estimatedScore = 0.0f;
- size_t startGap = bitmap.GetFirstGapPos();
- if (startGap == NOT_FOUND) return estimatedScore; // everything filled
-
- // start loop at first gap
- size_t startLoop = startGap + 1;
- if (startPos == startGap) { // unless covered by phrase
- startGap = notInGap;
- startLoop = endPos + 1; // -> postpone start
- }
-
- size_t lastCovered = bitmap.GetLastPos();
- if (endPos > lastCovered || lastCovered == NOT_FOUND) lastCovered = endPos;
-
- for (size_t currPos = startLoop; currPos <= lastCovered; currPos++) {
- // start of a new gap?
- if (startGap == notInGap && bitmap.GetValue(currPos) == false
- && (currPos < startPos || currPos > endPos)) {
- startGap = currPos;
- }
- // end of a gap?
- else if (startGap != notInGap
- && (bitmap.GetValue(currPos) == true
- || (startPos <= currPos && currPos <= endPos))) {
- estimatedScore += GetValue(startGap, currPos - 1);
- startGap = notInGap;
- }
- }
- // coverage ending with gap?
- if (lastCovered != bitmap.GetSize() - 1) {
- estimatedScore += GetValue(lastCovered + 1, bitmap.GetSize() - 1);
- }
-
- return estimatedScore;
-}
-
-}
-
diff --git a/contrib/moses2/EstimatedScores.h b/contrib/moses2/EstimatedScores.h
deleted file mode 100644
index eae2e08ab..000000000
--- a/contrib/moses2/EstimatedScores.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <iostream>
-#include "legacy/Util2.h"
-#include "legacy/Bitmap.h"
-#include "legacy/Matrix.h"
-
-namespace Moses2
-{
-class MemPool;
-class System;
-
-//! A square array of floats to store future costs in the phrase-based decoder
-class EstimatedScores: public Matrix<float>
-{
-public:
- EstimatedScores(MemPool &pool, size_t size) :
- Matrix<float>(pool, size, size)
- {
- }
-
- ~EstimatedScores(); // not implemented
-
- float CalcEstimatedScore(Bitmap const&) const;
- float CalcEstimatedScore(Bitmap const&, size_t startPos, size_t endPos) const;
-
- std::ostream &Debug(std::ostream &out, const System &system) const
- {
- for (size_t endPos = 0; endPos < GetSize(); endPos++) {
- for (size_t startPos = 0; startPos < GetSize(); startPos++)
- out << GetValue(startPos, endPos) << " ";
- out << std::endl;
- }
- return out;
- }
-
-};
-
-}
-
diff --git a/contrib/moses2/FF/Distortion.cpp b/contrib/moses2/FF/Distortion.cpp
deleted file mode 100644
index 1d7b7246d..000000000
--- a/contrib/moses2/FF/Distortion.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Distortion.cpp
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-#include <sstream>
-#include "Distortion.h"
-#include "../PhraseBased/Hypothesis.h"
-#include "../PhraseBased/Manager.h"
-#include "../legacy/Range.h"
-#include "../legacy/Bitmap.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-struct DistortionState_traditional: public FFState
-{
- Range range;
- int first_gap;
-
- DistortionState_traditional() :
- range()
- {
- // uninitialised
- }
-
- void Set(const Range& wr, int fg)
- {
- range = wr;
- first_gap = fg;
- }
-
- size_t hash() const
- {
- return range.GetEndPos();
- }
- virtual bool operator==(const FFState& other) const
- {
- const DistortionState_traditional& o =
- static_cast<const DistortionState_traditional&>(other);
- return range.GetEndPos() == o.range.GetEndPos();
- }
-
- virtual std::string ToString() const
- {
- stringstream sb;
- sb << first_gap << " " << range;
- return sb.str();
- }
-
-};
-
-///////////////////////////////////////////////////////////////////////
-Distortion::Distortion(size_t startInd, const std::string &line) :
- StatefulFeatureFunction(startInd, line)
-{
- ReadParameters();
-}
-
-Distortion::~Distortion()
-{
- // TODO Auto-generated destructor stub
-}
-
-FFState* Distortion::BlankState(MemPool &pool, const System &sys) const
-{
- return new (pool.Allocate<DistortionState_traditional>()) DistortionState_traditional();
-}
-
-void Distortion::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const
-{
- DistortionState_traditional &stateCast =
- static_cast<DistortionState_traditional&>(state);
-
- // fake previous translated phrase start and end
- size_t start = NOT_FOUND;
- size_t end = NOT_FOUND;
- /*
- if (input.m_frontSpanCoveredLength > 0) {
- // can happen with --continue-partial-translation
- start = 0;
- end = input.m_frontSpanCoveredLength -1;
- }
- */
-
- stateCast.range = Range(start, end);
- stateCast.first_gap = NOT_FOUND;
-}
-
-void Distortion::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-}
-
-void Distortion::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-}
-
-void Distortion::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
-{
- const DistortionState_traditional &prev =
- static_cast<const DistortionState_traditional&>(prevState);
- SCORE distortionScore = CalculateDistortionScore(prev.range,
- hypo.GetInputPath().range, prev.first_gap);
- //cerr << "distortionScore=" << distortionScore << endl;
-
- scores.PlusEquals(mgr.system, *this, distortionScore);
-
- DistortionState_traditional &stateCast =
- static_cast<DistortionState_traditional&>(state);
- stateCast.Set(hypo.GetInputPath().range, hypo.GetBitmap().GetFirstGapPos());
-
- //cerr << "hypo=" << hypo.Debug(mgr.system) << endl;
-}
-
-SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr,
- const int FirstGap) const
-{
- bool useEarlyDistortionCost = false;
- if (!useEarlyDistortionCost) {
- return -(SCORE) ComputeDistortionDistance(prev, curr);
- }
- else {
- /* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
- Definitions:
- S : current source range
- S' : last translated source phrase range
- S'' : longest fully-translated initial segment
- */
-
- int prefixEndPos = (int) FirstGap - 1;
- if ((int) FirstGap == -1) prefixEndPos = -1;
-
- // case1: S is adjacent to S'' => return 0
- if ((int) curr.GetStartPos() == prefixEndPos + 1) {
- //IFVERBOSE(4) std::cerr<< "MQ07disto:case1" << std::endl;
- return 0;
- }
-
- // case2: S is to the left of S' => return 2(length(S))
- if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
- //IFVERBOSE(4) std::cerr<< "MQ07disto:case2" << std::endl;
- return (float) -2 * (int) curr.GetNumWordsCovered();
- }
-
- // case3: S' is a subsequence of S'' => return 2(nbWordBetween(S,S'')+length(S))
- if ((int) prev.GetEndPos() <= prefixEndPos) {
- //IFVERBOSE(4) std::cerr<< "MQ07disto:case3" << std::endl;
- int z = (int) curr.GetStartPos() - prefixEndPos - 1;
- return (float) -2 * (z + (int) curr.GetNumWordsCovered());
- }
-
- // case4: otherwise => return 2(nbWordBetween(S,S')+length(S))
- //IFVERBOSE(4) std::cerr<< "MQ07disto:case4" << std::endl;
- return (float) -2
- * ((int) curr.GetNumWordsBetween(prev) + (int) curr.GetNumWordsCovered());
-
- }
-}
-
-int Distortion::ComputeDistortionDistance(const Range& prev,
- const Range& current) const
-{
- int dist = 0;
- if (prev.GetNumWordsCovered() == 0) {
- dist = current.GetStartPos();
- }
- else {
- dist = (int) prev.GetEndPos() - (int) current.GetStartPos() + 1;
- }
- return abs(dist);
-}
-
-void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
diff --git a/contrib/moses2/FF/Distortion.h b/contrib/moses2/FF/Distortion.h
deleted file mode 100644
index 45577d1c3..000000000
--- a/contrib/moses2/FF/Distortion.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Distortion.h
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-
-#ifndef DISTORTION_H_
-#define DISTORTION_H_
-
-#include "StatefulFeatureFunction.h"
-#include "../legacy/Range.h"
-#include "../TypeDef.h"
-
-namespace Moses2
-{
-
-class Distortion: public StatefulFeatureFunction
-{
-public:
- Distortion(size_t startInd, const std::string &line);
- virtual ~Distortion();
-
- virtual FFState* BlankState(MemPool &pool, const System &sys) const;
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void EvaluateWhenApplied(const std::deque<Hypothesis*> &hypos) const
- {
- }
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
-
-protected:
- SCORE CalculateDistortionScore(const Range &prev, const Range &curr,
- const int FirstGap) const;
-
- int ComputeDistortionDistance(const Range& prev, const Range& current) const;
-
-};
-
-}
-
-#endif /* DISTORTION_H_ */
diff --git a/contrib/moses2/FF/FFState.cpp b/contrib/moses2/FF/FFState.cpp
deleted file mode 100644
index e69de29bb..000000000
--- a/contrib/moses2/FF/FFState.cpp
+++ /dev/null
diff --git a/contrib/moses2/FF/FFState.h b/contrib/moses2/FF/FFState.h
deleted file mode 100644
index 33ef5d1f6..000000000
--- a/contrib/moses2/FF/FFState.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#pragma once
-
-#include <vector>
-#include <stddef.h>
-#include "util/exception.hh"
-
-namespace Moses2
-{
-
-class FFState
-{
-public:
- virtual ~FFState()
- {
- }
- virtual size_t hash() const = 0;
- virtual bool operator==(const FFState& other) const = 0;
-
- virtual bool operator!=(const FFState& other) const
- {
- return !(*this == other);
- }
-
- virtual std::string ToString() const = 0;
-};
-
-////////////////////////////////////////////////////////////////////////////////////////
-inline std::ostream& operator<<(std::ostream& out, const FFState& obj)
-{
- out << obj.ToString();
- return out;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////
-class DummyState: public FFState
-{
-public:
- DummyState()
- {
- }
-
- virtual size_t hash() const
- {
- return 0;
- }
-
- virtual bool operator==(const FFState& other) const
- {
- return true;
- }
-
-};
-
-}
-
diff --git a/contrib/moses2/FF/FeatureFunction.cpp b/contrib/moses2/FF/FeatureFunction.cpp
deleted file mode 100644
index 3326ceaa4..000000000
--- a/contrib/moses2/FF/FeatureFunction.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * FeatureFunction.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <string>
-#include <vector>
-#include "FeatureFunction.h"
-#include "../System.h"
-#include "../legacy/Util2.h"
-#include "util/exception.hh"
-
-using namespace std;
-
-namespace Moses2
-{
-
-FeatureFunction::FeatureFunction(size_t startInd, const std::string &line)
-:m_startInd(startInd)
-,m_numScores(1)
-,m_PhraseTableInd(NOT_FOUND)
-,m_tuneable(true)
-{
- ParseLine(line);
- //cerr << GetName() << " " << m_startInd << "-" << (m_startInd + m_numScores - 1) << endl;
-}
-
-FeatureFunction::~FeatureFunction()
-{
- // TODO Auto-generated destructor stub
-}
-
-void FeatureFunction::ParseLine(const std::string &line)
-{
- vector<string> toks = Tokenize(line);
- UTIL_THROW_IF2(toks.empty(), "Empty line");
-
- string nameStub = toks[0];
-
- set<string> keys;
-
- for (size_t i = 1; i < toks.size(); ++i) {
- vector<string> args = TokenizeFirstOnly(toks[i], "=");
- UTIL_THROW_IF2(args.size() != 2,
- "Incorrect format for feature function arg: " << toks[i]);
-
- pair<set<string>::iterator, bool> ret = keys.insert(args[0]);
- UTIL_THROW_IF2(!ret.second, "Duplicate key in line " << line);
-
- if (args[0] == "num-features") {
- m_numScores = Scan<size_t>(args[1]);
- }
- else if (args[0] == "name") {
- m_name = args[1];
- }
- else {
- m_args.push_back(args);
- }
- }
-}
-
-void FeatureFunction::ReadParameters()
-{
- while (!m_args.empty()) {
- const vector<string> &args = m_args[0];
- SetParameter(args[0], args[1]);
-
- m_args.erase(m_args.begin());
- }
-}
-
-void FeatureFunction::SetParameter(const std::string& key,
- const std::string& value)
-{
- if (key == "tuneable") {
- m_tuneable = Scan<bool>(value);
- }
- else {
- UTIL_THROW2(GetName() << ": Unknown argument " << key << "=" << value);
- }
-}
-
-}
-
diff --git a/contrib/moses2/FF/FeatureFunction.h b/contrib/moses2/FF/FeatureFunction.h
deleted file mode 100644
index 1e25fce39..000000000
--- a/contrib/moses2/FF/FeatureFunction.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * FeatureFunction.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <cstddef>
-#include <string>
-#include <vector>
-#include "../TypeDef.h"
-#include "../Phrase.h"
-
-namespace Moses2
-{
-template<typename WORD>
-class TargetPhrase;
-
-class System;
-class PhraseImpl;
-class TargetPhrases;
-class TargetPhraseImpl;
-class Scores;
-class ManagerBase;
-class MemPool;
-
-namespace SCFG
-{
-class TargetPhrase;
-class TargetPhrases;
-class Word;
-}
-
-class FeatureFunction
-{
-public:
-
- FeatureFunction(size_t startInd, const std::string &line);
- virtual ~FeatureFunction();
- virtual void Load(System &system)
- {
- }
-
- size_t GetStartInd() const
- {
- return m_startInd;
- }
- size_t GetNumScores() const
- {
- return m_numScores;
- }
- const std::string &GetName() const
- {
- return m_name;
- }
- void SetName(const std::string &val)
- {
- m_name = val;
- }
-
- virtual size_t HasPhraseTableInd() const
- {
- return false;
- }
- void SetPhraseTableInd(size_t ind)
- {
- m_PhraseTableInd = ind;
- }
- size_t GetPhraseTableInd() const
- {
- return m_PhraseTableInd;
- }
-
- //! if false, then this feature is not displayed in the n-best list.
- // use with care
- virtual bool IsTuneable() const
- {
- return m_tuneable;
- }
-
- virtual void SetParameter(const std::string& key, const std::string& value);
-
- // may have more factors than actually need, but not guaranteed.
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const = 0;
-
- // For SCFG decoding, the source can contain non-terminals, NOT the raw
- // source from the input sentence
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const = 0;
-
- // used by lexicalised reordering model to add scores to tp data structures
- virtual void EvaluateAfterTablePruning(MemPool &pool,
- const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
- {
- }
-
- virtual void EvaluateAfterTablePruning(MemPool &pool,
- const SCFG::TargetPhrases &tps, const Phrase<SCFG::Word> &sourcePhrase) const
- {
- }
-
- // clean up temporary memory, called after processing each sentence
- virtual void CleanUpAfterSentenceProcessing() const
- {
- }
-
-protected:
- size_t m_startInd;
- size_t m_numScores;
- size_t m_PhraseTableInd;
- std::string m_name;
- std::vector<std::vector<std::string> > m_args;
- bool m_tuneable;
-
- virtual void ReadParameters();
- void ParseLine(const std::string &line);
-};
-
-}
-
diff --git a/contrib/moses2/FF/FeatureFunctions.cpp b/contrib/moses2/FF/FeatureFunctions.cpp
deleted file mode 100644
index 4e4f5b54c..000000000
--- a/contrib/moses2/FF/FeatureFunctions.cpp
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * FeatureFunctions.cpp
- *
- * Created on: 27 Oct 2015
- * Author: hieu
- */
-
-#include <boost/foreach.hpp>
-#include "FeatureRegistry.h"
-#include "FeatureFunctions.h"
-#include "StatefulFeatureFunction.h"
-#include "../System.h"
-#include "../Scores.h"
-#include "../MemPool.h"
-
-#include "../TranslationModel/PhraseTable.h"
-#include "../TranslationModel/UnknownWordPenalty.h"
-#include "../SCFG/TargetPhraseImpl.h"
-#include "../SCFG/Word.h"
-#include "../PhraseBased/TargetPhraseImpl.h"
-#include "util/exception.hh"
-
-using namespace std;
-
-namespace Moses2
-{
-FeatureFunctions::FeatureFunctions(System &system) :
- m_system(system), m_ffStartInd(0)
-{
-}
-
-FeatureFunctions::~FeatureFunctions()
-{
- RemoveAllInColl(m_featureFunctions);
-}
-
-void FeatureFunctions::Load()
-{
- // load, everything but pts
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
- FeatureFunction *nonConstFF = const_cast<FeatureFunction*>(ff);
- PhraseTable *pt = dynamic_cast<PhraseTable*>(nonConstFF);
-
- if (pt) {
- // do nothing. load pt last
- }
- else {
- cerr << "Loading " << nonConstFF->GetName() << endl;
- nonConstFF->Load(m_system);
- cerr << "Finished loading " << nonConstFF->GetName() << endl;
- }
-}
-
-// load pt
-BOOST_FOREACH(const PhraseTable *pt, phraseTables) {
- PhraseTable *nonConstPT = const_cast<PhraseTable*>(pt);
- cerr << "Loading " << nonConstPT->GetName() << endl;
- nonConstPT->Load(m_system);
- cerr << "Finished loading " << nonConstPT->GetName() << endl;
-}
-}
-
-void FeatureFunctions::Create()
-{
- const Parameter &params = m_system.params;
-
- const PARAM_VEC *ffParams = params.GetParam("feature");
- UTIL_THROW_IF2(ffParams == NULL, "Must have [feature] section");
-
- BOOST_FOREACH(const std::string &line, *ffParams){
- //cerr << "line=" << line << endl;
- FeatureFunction *ff = Create(line);
-
- m_featureFunctions.push_back(ff);
-
- StatefulFeatureFunction *sfff = dynamic_cast<StatefulFeatureFunction*>(ff);
- if (sfff) {
- sfff->SetStatefulInd(m_statefulFeatureFunctions.size());
- m_statefulFeatureFunctions.push_back(sfff);
- }
-
- if (ff->HasPhraseTableInd()) {
- ff->SetPhraseTableInd(m_withPhraseTableInd.size());
- m_withPhraseTableInd.push_back(ff);
- }
-
- PhraseTable *pt = dynamic_cast<PhraseTable*>(ff);
- if (pt) {
- pt->SetPtInd(phraseTables.size());
- phraseTables.push_back(pt);
- }
-
- UnknownWordPenalty *unkWP = dynamic_cast<UnknownWordPenalty *>(pt);
- if (unkWP) {
- m_unkWP = unkWP;
-
- // legacy support
- if (m_system.options.unk.drop) {
- unkWP->SetParameter("drop", "true");
- }
- if (m_system.options.unk.mark) {
- unkWP->SetParameter("prefix", m_system.options.unk.prefix);
- unkWP->SetParameter("suffix", m_system.options.unk.suffix);
- }
- }
- }
-
- OverrideFeatures();
-}
-
-FeatureFunction *FeatureFunctions::Create(const std::string &line)
-{
- vector<string> toks = Tokenize(line);
-
- FeatureFunction *ff = FeatureRegistry::Instance().Construct(m_ffStartInd, toks[0], line);
- UTIL_THROW_IF2(ff == NULL, "Feature function not created");
-
- // name
- if (ff->GetName() == "") {
- ff->SetName(GetDefaultName(toks[0]));
- }
-
- m_ffStartInd += ff->GetNumScores();
-
- return ff;
-}
-
-std::string FeatureFunctions::GetDefaultName(const std::string &stub)
-{
- size_t ind;
- boost::unordered_map<std::string, size_t>::iterator iter =
- m_defaultNames.find(stub);
- if (iter == m_defaultNames.end()) {
- m_defaultNames[stub] = 0;
- ind = 0;
- }
- else {
- ind = ++(iter->second);
- }
- return stub + SPrint(ind);
-}
-
-const FeatureFunction *FeatureFunctions::FindFeatureFunction(
- const std::string &name) const
-{
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
- if (ff->GetName() == name) {
- return ff;
- }
- }
- return NULL;
-}
-
-FeatureFunction *FeatureFunctions::FindFeatureFunction(
- const std::string &name)
-{
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
- if (ff->GetName() == name) {
- return const_cast<FeatureFunction *>(ff);
- }
- }
- return NULL;
-}
-
-const PhraseTable *FeatureFunctions::GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd)
-{
- // assume only 1 unk wp
- std::vector<const PhraseTable*> tmpVec(phraseTables);
- std::vector<const PhraseTable*>::iterator iter;
- for (iter = tmpVec.begin(); iter != tmpVec.end(); ++iter) {
- const PhraseTable *pt = *iter;
- if (pt == m_unkWP) {
- tmpVec.erase(iter);
- break;
- }
- }
-
- const PhraseTable *pt = tmpVec[ptInd];
- return pt;
-}
-
-void FeatureFunctions::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, TargetPhraseImpl &targetPhrase) const
-{
- SCORE estimatedScore = 0;
-
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
- Scores& scores = targetPhrase.GetScores();
- ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore);
- }
-
- targetPhrase.SetEstimatedScore(estimatedScore);
-}
-
-void FeatureFunctions::EvaluateInIsolation(
- MemPool &pool,
- const System &system,
- const Phrase<SCFG::Word> &source,
- SCFG::TargetPhraseImpl &targetPhrase) const
-{
- SCORE estimatedScore = 0;
-
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
- Scores& scores = targetPhrase.GetScores();
- ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore);
- }
-
- targetPhrase.SetEstimatedScore(estimatedScore);
-}
-
-void FeatureFunctions::EvaluateAfterTablePruning(MemPool &pool,
- const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
-{
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
- ff->EvaluateAfterTablePruning(pool, tps, sourcePhrase);
- }
-}
-
-void FeatureFunctions::EvaluateAfterTablePruning(MemPool &pool, const SCFG::TargetPhrases &tps,
- const Phrase<SCFG::Word> &sourcePhrase) const
-{
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
- ff->EvaluateAfterTablePruning(pool, tps, sourcePhrase);
- }
-}
-
-void FeatureFunctions::EvaluateWhenAppliedBatch(const Batch &batch) const
-{
- BOOST_FOREACH(const StatefulFeatureFunction *ff, m_statefulFeatureFunctions) {
- ff->EvaluateWhenAppliedBatch(m_system, batch);
- }
-}
-
-void FeatureFunctions::CleanUpAfterSentenceProcessing() const
-{
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
- ff->CleanUpAfterSentenceProcessing();
- }
-}
-
-void FeatureFunctions::ShowWeights(const Weights &allWeights)
-{
- BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
- cout << ff->GetName();
- if (ff->IsTuneable()) {
- cout << "=";
- vector<SCORE> weights = allWeights.GetWeights(*ff);
- for (size_t i = 0; i < weights.size(); ++i) {
- cout << " " << weights[i];
- }
- cout << endl;
- } else {
- cout << " UNTUNEABLE" << endl;
- }
- }
-}
-
-void FeatureFunctions::OverrideFeatures()
-{
- const Parameter &parameter = m_system.params;
-
- const PARAM_VEC *params = parameter.GetParam("feature-overwrite");
- for (size_t i = 0; params && i < params->size(); ++i) {
- const string &str = params->at(i);
- vector<string> toks = Tokenize(str);
- UTIL_THROW_IF2(toks.size() <= 1, "Incorrect format for feature override: " << str);
-
- FeatureFunction *ff = FindFeatureFunction(toks[0]);
- UTIL_THROW_IF2(ff == NULL, "Feature function not found: " << toks[0]);
-
- for (size_t j = 1; j < toks.size(); ++j) {
- const string &keyValStr = toks[j];
- vector<string> keyVal = Tokenize(keyValStr, "=");
- UTIL_THROW_IF2(keyVal.size() != 2, "Incorrect format for parameter override: " << keyValStr);
-
- cerr << "Override " << ff->GetName() << " "
- << keyVal[0] << "=" << keyVal[1] << endl;
-
- ff->SetParameter(keyVal[0], keyVal[1]);
-
- }
- }
-
-}
-
-}
-
diff --git a/contrib/moses2/FF/FeatureFunctions.h b/contrib/moses2/FF/FeatureFunctions.h
deleted file mode 100644
index 110ebf736..000000000
--- a/contrib/moses2/FF/FeatureFunctions.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * FeatureFunctions.h
- *
- * Created on: 27 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <boost/unordered_map.hpp>
-#include <vector>
-#include <string>
-#include "../legacy/Parameter.h"
-#include "../Phrase.h"
-
-namespace Moses2
-{
-template<typename WORD>
-class TargetPhrase;
-
-class System;
-class FeatureFunction;
-class StatefulFeatureFunction;
-class PhraseTable;
-class Manager;
-class MemPool;
-class PhraseImpl;
-class TargetPhrases;
-class TargetPhraseImpl;
-class Scores;
-class Hypothesis;
-class UnknownWordPenalty;
-class Weights;
-
-namespace SCFG
-{
-class TargetPhraseImpl;
-class TargetPhrases;
-class Word;
-}
-
-class FeatureFunctions
-{
-public:
- std::vector<const PhraseTable*> phraseTables;
-
- FeatureFunctions(System &system);
- virtual ~FeatureFunctions();
-
- const std::vector<const FeatureFunction*> &GetFeatureFunctions() const
- { return m_featureFunctions; }
-
- const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const
- { return m_statefulFeatureFunctions; }
-
- const std::vector<const FeatureFunction*> &GetWithPhraseTableInd() const
- { return m_withPhraseTableInd; }
-
- size_t GetNumScores() const
- { return m_ffStartInd; }
-
- void Create();
- void Load();
-
- const FeatureFunction *FindFeatureFunction(const std::string &name) const;
-
- const PhraseTable *GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd);
- const UnknownWordPenalty *GetUnknownWordPenalty() const
- { return m_unkWP; }
-
- // the pool here must be the system pool if the rule was loaded during load, or the mgr pool if it was loaded on demand
- void EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, TargetPhraseImpl &targetPhrase) const;
- void EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<SCFG::Word> &source, SCFG::TargetPhraseImpl &targetPhrase) const;
-
- void EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps,
- const Phrase<Moses2::Word> &sourcePhrase) const;
- void EvaluateAfterTablePruning(MemPool &pool, const SCFG::TargetPhrases &tps,
- const Phrase<SCFG::Word> &sourcePhrase) const;
-
- void EvaluateWhenAppliedBatch(const Batch &batch) const;
-
- void CleanUpAfterSentenceProcessing() const;
-
- void ShowWeights(const Weights &allWeights);
-
-protected:
- std::vector<const FeatureFunction*> m_featureFunctions;
- std::vector<const StatefulFeatureFunction*> m_statefulFeatureFunctions;
- std::vector<const FeatureFunction*> m_withPhraseTableInd;
- const UnknownWordPenalty *m_unkWP;
-
- boost::unordered_map<std::string, size_t> m_defaultNames;
- System &m_system;
- size_t m_ffStartInd;
-
- FeatureFunction *Create(const std::string &line);
- std::string GetDefaultName(const std::string &stub);
- void OverrideFeatures();
- FeatureFunction *FindFeatureFunction(const std::string &name);
-
-};
-
-}
-
diff --git a/contrib/moses2/FF/FeatureRegistry.cpp b/contrib/moses2/FF/FeatureRegistry.cpp
deleted file mode 100644
index 3ec8706e5..000000000
--- a/contrib/moses2/FF/FeatureRegistry.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-#include "FeatureRegistry.h"
-
-#include "../TranslationModel/Memory/PhraseTableMemory.h"
-#include "../TranslationModel/CompactPT/PhraseTableCompact.h"
-#include "../TranslationModel/ProbingPT/ProbingPT.h"
-#include "../TranslationModel/UnknownWordPenalty.h"
-#include "../TranslationModel/Transliteration.h"
-
-#include "../LM/KENLM.h"
-#include "../LM/KENLMBatch.h"
-#include "../LM/LanguageModel.h"
-#include "../LM/GPULM.h"
-
-#include "Distortion.h"
-#include "LexicalReordering/LexicalReordering.h"
-#include "PhrasePenalty.h"
-#include "WordPenalty.h"
-#include "OSM/OpSequenceModel.h"
-
-#include "SkeletonStatefulFF.h"
-#include "SkeletonStatelessFF.h"
-
-using namespace std;
-
-
-namespace Moses2
-{
-FeatureRegistry FeatureRegistry::s_instance;
-
-template<class F>
-class DefaultFeatureFactory: public FeatureFactory
-{
-public:
- FeatureFunction *Create(size_t startInd, const std::string &line) const
- {
- return new F(startInd, line);
- }
-};
-
-////////////////////////////////////////////////////////////////////
-class KenFactory: public FeatureFactory
-{
-public:
- FeatureFunction *Create(size_t startInd, const std::string &line) const
- {
- ConstructKenLM(startInd, line);
- }
-};
-
-////////////////////////////////////////////////////////////////////
-FeatureRegistry::FeatureRegistry()
-{
- // Feature with same name as class
-#define MOSES_FNAME(name) Add(#name, new DefaultFeatureFactory< name >());
- // Feature with different name than class.
-#define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >());
-
- MOSES_FNAME2("PhraseDictionaryCompact", PhraseTableCompact);
- MOSES_FNAME2("PhraseDictionaryMemory", PhraseTableMemory);
- MOSES_FNAME(ProbingPT);
- MOSES_FNAME2("PhraseDictionaryTransliteration", Transliteration);
- MOSES_FNAME(UnknownWordPenalty);
-
- Add("KENLM", new KenFactory());
-
- MOSES_FNAME(KENLMBatch);
- MOSES_FNAME(GPULM);
-
- MOSES_FNAME(LanguageModel);
-
- MOSES_FNAME(Distortion);
- MOSES_FNAME(LexicalReordering);
- MOSES_FNAME(PhrasePenalty);
- MOSES_FNAME(WordPenalty);
- MOSES_FNAME(OpSequenceModel);
-
- MOSES_FNAME(SkeletonStatefulFF);
- MOSES_FNAME(SkeletonStatelessFF);
-}
-
-FeatureRegistry::~FeatureRegistry()
-{
-
-}
-
-void FeatureRegistry::Add(const std::string &name, FeatureFactory *factory)
-{
- std::pair<std::string, boost::shared_ptr<FeatureFactory> > to_ins(name,
- boost::shared_ptr<FeatureFactory>(factory));
- if (!registry_.insert(to_ins).second) {
- cerr << "Duplicate feature name " << name << endl;
- abort();
- }
-}
-
-FeatureFunction *FeatureRegistry::Construct(size_t startInd,
- const std::string &name, const std::string &line) const
-{
- Map::const_iterator i = registry_.find(name);
- if (i == registry_.end()) {
- cerr << "Feature name " << name << " is not registered.";
- abort();
- }
- FeatureFactory *fact = i->second.get();
- FeatureFunction *ff = fact->Create(startInd, line);
- return ff;
-}
-
-void FeatureRegistry::PrintFF() const
-{
- std::vector<std::string> ffs;
- std::cerr << "Available feature functions:" << std::endl;
- Map::const_iterator iter;
- for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
- const std::string &ffName = iter->first;
- ffs.push_back(ffName);
- }
-
- std::vector<std::string>::const_iterator iterVec;
- std::sort(ffs.begin(), ffs.end());
- for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
- const std::string &ffName = *iterVec;
- std::cerr << ffName << " ";
- }
-
- std::cerr << std::endl;
-}
-
-}
-
diff --git a/contrib/moses2/FF/FeatureRegistry.h b/contrib/moses2/FF/FeatureRegistry.h
deleted file mode 100644
index 63e78aae0..000000000
--- a/contrib/moses2/FF/FeatureRegistry.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#pragma once
-#include <boost/unordered_map.hpp>
-#include <boost/shared_ptr.hpp>
-
-namespace Moses2
-{
-class FeatureFunction;
-
-////////////////////////////////////////////////////////////////////
-class FeatureFactory
-{
-public:
- virtual ~FeatureFactory()
- {
- }
-
- virtual FeatureFunction *Create(size_t startInd, const std::string &line) const = 0;
-
-protected:
- FeatureFactory()
- {
- }
-};
-
-////////////////////////////////////////////////////////////////////
-class FeatureRegistry
-{
-public:
- static const FeatureRegistry &Instance() {
- return s_instance;
- }
-
- ~FeatureRegistry();
-
- FeatureFunction *Construct(size_t startInd, const std::string &name,
- const std::string &line) const;
- void PrintFF() const;
-
-private:
- static FeatureRegistry s_instance;
-
- typedef boost::unordered_map<std::string, boost::shared_ptr<FeatureFactory> > Map;
- Map registry_;
-
- FeatureRegistry();
-
- void Add(const std::string &name, FeatureFactory *factory);
-
-};
-
-////////////////////////////////////////////////////////////////////
-
-}
-
diff --git a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp b/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp
deleted file mode 100644
index 8c1b409c3..000000000
--- a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * BidirectionalReorderingState.cpp
- *
- * Created on: 22 Mar 2016
- * Author: hieu
- */
-#include <boost/functional/hash_fwd.hpp>
-#include "BidirectionalReorderingState.h"
-#include "../../legacy/Util2.h"
-#include "../../PhraseBased/Manager.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-BidirectionalReorderingState::BidirectionalReorderingState(
- const LRModel &config, LRState *bw, LRState *fw, size_t offset) :
- LRState(config, LRModel::Bidirectional, offset), m_backward(bw), m_forward(
- fw)
-{
-}
-
-BidirectionalReorderingState::~BidirectionalReorderingState()
-{
- // TODO Auto-generated destructor stub
-}
-
-void BidirectionalReorderingState::Init(const LRState *prev,
- const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
- const Bitmap *coverage)
-{
- if (m_backward) {
- m_backward->Init(prev, topt, path, first, coverage);
- }
- if (m_forward) {
- m_forward->Init(prev, topt, path, first, coverage);
- }
-}
-
-std::string BidirectionalReorderingState::ToString() const
-{
- return "BidirectionalReorderingState " + SPrint(this) + " "
- + SPrint(m_backward) + " " + SPrint(m_forward);
-}
-
-size_t BidirectionalReorderingState::hash() const
-{
- size_t ret = m_backward->hash();
- boost::hash_combine(ret, m_forward->hash());
-
- return ret;
-}
-
-bool BidirectionalReorderingState::operator==(const FFState& o) const
-{
- if (&o == this) return true;
-
- BidirectionalReorderingState const &other =
- static_cast<BidirectionalReorderingState const&>(o);
-
- bool ret = (*m_backward == *other.m_backward)
- && (*m_forward == *other.m_forward);
- return ret;
-}
-
-void BidirectionalReorderingState::Expand(const ManagerBase &mgr,
- const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
- Scores &scores, FFState &state) const
-{
- BidirectionalReorderingState &stateCast =
- static_cast<BidirectionalReorderingState&>(state);
- m_backward->Expand(mgr, ff, hypo, phraseTableInd, scores,
- *stateCast.m_backward);
- m_forward->Expand(mgr, ff, hypo, phraseTableInd, scores,
- *stateCast.m_forward);
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.h b/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.h
deleted file mode 100644
index 487e84928..000000000
--- a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * BidirectionalReorderingState.h
- *
- * Created on: 22 Mar 2016
- * Author: hieu
- */
-#pragma once
-#include "LRState.h"
-
-namespace Moses2
-{
-
-class BidirectionalReorderingState: public LRState
-{
-public:
- BidirectionalReorderingState(const LRModel &config, LRState *bw, LRState *fw,
- size_t offset);
-
- virtual ~BidirectionalReorderingState();
-
- void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
- const InputPathBase &path, bool first, const Bitmap *coverage);
-
- size_t hash() const;
- virtual bool operator==(const FFState& other) const;
-
- virtual std::string ToString() const;
-
- void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
- const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
- FFState &state) const;
-
-protected:
- LRState *m_backward;
- LRState *m_forward;
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp b/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp
deleted file mode 100644
index 600a208b9..000000000
--- a/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * HReorderingBackwardState.cpp
- *
- * Created on: 22 Mar 2016
- * Author: hieu
- */
-
-#include "HReorderingBackwardState.h"
-#include "../../PhraseBased/Hypothesis.h"
-#include "../../PhraseBased/Manager.h"
-
-namespace Moses2
-{
-
-HReorderingBackwardState::HReorderingBackwardState(MemPool &pool,
- const LRModel &config, size_t offset) :
- LRState(config, LRModel::Backward, offset), reoStack(pool)
-{
- // TODO Auto-generated constructor stub
-
-}
-
-HReorderingBackwardState::~HReorderingBackwardState()
-{
- // TODO Auto-generated destructor stub
-}
-
-void HReorderingBackwardState::Init(const LRState *prev,
- const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
- const Bitmap *coverage)
-{
- prevTP = &topt;
- reoStack.Init();
-}
-
-size_t HReorderingBackwardState::hash() const
-{
- size_t ret = reoStack.hash();
- return ret;
-}
-
-bool HReorderingBackwardState::operator==(const FFState& o) const
-{
- const HReorderingBackwardState& other =
- static_cast<const HReorderingBackwardState&>(o);
- bool ret = reoStack == other.reoStack;
- return ret;
-}
-
-std::string HReorderingBackwardState::ToString() const
-{
- return "HReorderingBackwardState " + SPrint(m_offset);
-}
-
-void HReorderingBackwardState::Expand(const ManagerBase &mgr,
- const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
- Scores &scores, FFState &state) const
-{
- HReorderingBackwardState &nextState =
- static_cast<HReorderingBackwardState&>(state);
- nextState.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
- NULL);
- nextState.reoStack = reoStack;
-
- const Range &swrange = hypo.GetInputPath().range;
- int reoDistance = nextState.reoStack.ShiftReduce(swrange);
- ReorderingType reoType = m_configuration.GetOrientation(reoDistance);
- CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.h b/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.h
deleted file mode 100644
index 9977724d3..000000000
--- a/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * HReorderingBackwardState.h
- *
- * Created on: 22 Mar 2016
- * Author: hieu
- */
-#pragma once
-#include "LRState.h"
-#include "ReorderingStack.h"
-
-namespace Moses2
-{
-
-class HReorderingBackwardState: public LRState
-{
-private:
- ReorderingStack reoStack;
-
-public:
- HReorderingBackwardState(MemPool &pool, const LRModel &config, size_t offset);
-
- virtual void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
- const InputPathBase &path, bool first, const Bitmap *coverage);
-
- virtual ~HReorderingBackwardState();
-
- size_t hash() const;
- virtual bool operator==(const FFState& other) const;
- virtual std::string ToString() const;
- void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
- const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
- FFState &state) const;
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.cpp b/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.cpp
deleted file mode 100644
index c50626106..000000000
--- a/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * HReorderingForwardState.cpp
- *
- * Created on: 22 Mar 2016
- * Author: hieu
- */
-
-#include "HReorderingForwardState.h"
-#include "../../InputPathBase.h"
-#include "../../PhraseBased/Manager.h"
-#include "../../PhraseBased/Hypothesis.h"
-
-namespace Moses2
-{
-
-HReorderingForwardState::HReorderingForwardState(const LRModel &config,
- size_t offset) :
- LRState(config, LRModel::Forward, offset), m_first(true)
-{
- prevPath = NULL;
- m_coverage = NULL;
-}
-
-HReorderingForwardState::~HReorderingForwardState()
-{
- // TODO Auto-generated destructor stub
-}
-
-void HReorderingForwardState::Init(const LRState *prev,
- const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
- const Bitmap *coverage)
-{
- prevTP = &topt;
- prevPath = &path;
- m_first = first;
- m_coverage = coverage;
-}
-
-size_t HReorderingForwardState::hash() const
-{
- size_t ret;
- ret = hash_value(prevPath->range);
- return ret;
-}
-
-bool HReorderingForwardState::operator==(const FFState& o) const
-{
- if (&o == this) return true;
-
- HReorderingForwardState const& other =
- static_cast<HReorderingForwardState const&>(o);
-
- int compareScores = (
- (prevPath->range == other.prevPath->range) ?
- ComparePrevScores(other.prevTP) :
- (prevPath->range < other.prevPath->range) ? -1 : 1);
- return compareScores == 0;
-}
-
-std::string HReorderingForwardState::ToString() const
-{
- return "HReorderingForwardState " + SPrint(m_offset);
-}
-
-void HReorderingForwardState::Expand(const ManagerBase &mgr,
- const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
- Scores &scores, FFState &state) const
-{
- const Range &cur = hypo.GetInputPath().range;
- // keep track of the current coverage ourselves so we don't need the hypothesis
- Manager &mgrCast = const_cast<Manager&>(static_cast<const Manager&>(mgr));
- Bitmaps &bms = mgrCast.GetBitmaps();
- const Bitmap &cov = bms.GetBitmap(*m_coverage, cur);
-
- if (!m_first) {
- LRModel::ReorderingType reoType;
- reoType = m_configuration.GetOrientation(prevPath->range, cur, cov);
- CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
- }
-
- HReorderingForwardState &stateCast =
- static_cast<HReorderingForwardState&>(state);
- stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
- &cov);
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.h b/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.h
deleted file mode 100644
index 8f9b8bd23..000000000
--- a/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * HReorderingForwardState.h
- *
- * Created on: 22 Mar 2016
- * Author: hieu
- */
-#pragma once
-#include "LRState.h"
-
-namespace Moses2
-{
-class Range;
-class Bitmap;
-class InputPathBase;
-
-class HReorderingForwardState: public LRState
-{
-public:
- HReorderingForwardState(const LRModel &config, size_t offset);
- virtual ~HReorderingForwardState();
-
- void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
- const InputPathBase &path, bool first, const Bitmap *coverage);
-
- size_t hash() const;
- virtual bool operator==(const FFState& other) const;
- virtual std::string ToString() const;
- void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
- const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
- FFState &state) const;
-
-protected:
- bool m_first;
- //const Range &m_prevRange;
- const InputPathBase *prevPath;
- const Bitmap *m_coverage;
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/FF/LexicalReordering/LRModel.cpp b/contrib/moses2/FF/LexicalReordering/LRModel.cpp
deleted file mode 100644
index 47b711369..000000000
--- a/contrib/moses2/FF/LexicalReordering/LRModel.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * LRModel.cpp
- *
- * Created on: 23 Mar 2016
- * Author: hieu
- */
-
-#include "LRModel.h"
-#include "../../legacy/Util2.h"
-#include "../../legacy/Range.h"
-#include "../../legacy/Bitmap.h"
-#include "../../MemPool.h"
-#include "util/exception.hh"
-#include "PhraseBasedReorderingState.h"
-#include "BidirectionalReorderingState.h"
-#include "HReorderingBackwardState.h"
-#include "HReorderingForwardState.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-bool IsMonotonicStep(Range const& prev, // words range of last source phrase
- Range const& cur, // words range of current source phrase
- Bitmap const& cov) // coverage bitmap
-{
- size_t e = prev.GetEndPos() + 1;
- size_t s = cur.GetStartPos();
- return (s == e || (s >= e && !cov.GetValue(e)));
-}
-
-bool IsSwap(Range const& prev, Range const& cur, Bitmap const& cov)
-{
- size_t s = prev.GetStartPos();
- size_t e = cur.GetEndPos();
- return (e + 1 == s || (e < s && !cov.GetValue(s - 1)));
-}
-
-LRModel::LRModel(const std::string &modelType, LexicalReordering &ff) :
- m_modelType(None), m_phraseBased(true), m_collapseScores(false), m_direction(
- Backward), m_scoreProducer(&ff)
-{
- std::vector<std::string> config = Tokenize(modelType, "-");
-
- for (size_t i = 0; i < config.size(); ++i) {
- if (config[i] == "hier") {
- m_phraseBased = false;
- }
- else if (config[i] == "phrase") {
- m_phraseBased = true;
- }
- else if (config[i] == "wbe") {
- m_phraseBased = true;
- }
- // no word-based decoding available, fall-back to phrase-based
- // This is the old lexical reordering model combination of moses
-
- else if (config[i] == "msd") {
- m_modelType = MSD;
- }
- else if (config[i] == "mslr") {
- m_modelType = MSLR;
- }
- else if (config[i] == "monotonicity") {
- m_modelType = Monotonic;
- }
- else if (config[i] == "leftright") {
- m_modelType = LeftRight;
- }
-
- // unidirectional is deprecated, use backward instead
- else if (config[i] == "unidirectional") {
- m_direction = Backward;
- }
- else if (config[i] == "backward") {
- m_direction = Backward;
- }
- else if (config[i] == "forward") {
- m_direction = Forward;
- }
- else if (config[i] == "bidirectional") {
- m_direction = Bidirectional;
- }
-
- else if (config[i] == "f") {
- m_condition = F;
- }
- else if (config[i] == "fe") {
- m_condition = FE;
- }
-
- else if (config[i] == "collapseff") {
- m_collapseScores = true;
- }
- else if (config[i] == "allff") {
- m_collapseScores = false;
- }
- else {
- std::cerr
- << "Illegal part in the lexical reordering configuration string: "
- << config[i] << std::endl;
- exit(1);
- }
- }
-
- if (m_modelType == None) {
- std::cerr << "You need to specify the type of the reordering model "
- << "(msd, monotonicity,...)" << std::endl;
- exit(1);
- }
-
-}
-
-LRModel::~LRModel()
-{
- // TODO Auto-generated destructor stub
-}
-
-size_t LRModel::GetNumberOfTypes() const
-{
- return ((m_modelType == MSD) ? 3 : (m_modelType == MSLR) ? 4 : 2);
-}
-
-/// return orientation for the first phrase
-LRModel::ReorderingType LRModel::GetOrientation(Range const& cur) const
-{
- UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None");
- return ((m_modelType == LeftRight) ? R : (cur.GetStartPos() == 0) ? M :
- (m_modelType == MSD) ? D : (m_modelType == MSLR) ? DR : NM);
-}
-
-LRModel::ReorderingType LRModel::GetOrientation(Range const& prev,
- Range const& cur) const
-{
- UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
- return (
- (m_modelType == LeftRight) ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
- : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M :
- (m_modelType == Monotonic) ? NM :
- (prev.GetStartPos() == cur.GetEndPos() + 1) ? S :
- (m_modelType == MSD) ? D :
- (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
-}
-
-LRModel::ReorderingType LRModel::GetOrientation(int const reoDistance) const
-{
- // this one is for HierarchicalReorderingBackwardState
- return ((m_modelType == LeftRight) ? (reoDistance >= 1) ? R : L
- : (reoDistance == 1) ? M : (m_modelType == Monotonic) ? NM :
- (reoDistance == -1) ? S : (m_modelType == MSD) ? D :
- (reoDistance > 1) ? DR : DL);
-}
-
-LRState *LRModel::CreateLRState(MemPool &pool) const
-{
- LRState *bwd = NULL, *fwd = NULL;
- size_t offset = 0;
-
- switch (m_direction) {
- case Backward:
- case Bidirectional:
- if (m_phraseBased) {
- bwd =
- new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
- *this, Backward, offset);
- //cerr << "bwd=" << bwd << bwd->ToString() << endl;
- }
- else {
- bwd =
- new (pool.Allocate<HReorderingBackwardState>()) HReorderingBackwardState(
- pool, *this, offset);
- }
- offset += m_collapseScores ? 1 : GetNumberOfTypes();
- if (m_direction == Backward) return bwd; // else fall through
- case Forward:
- if (m_phraseBased) {
- fwd =
- new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
- *this, Forward, offset);
- //cerr << "fwd=" << fwd << fwd->ToString() << endl;
- }
- else {
- fwd =
- new (pool.Allocate<HReorderingForwardState>()) HReorderingForwardState(
- *this, offset);
- }
- offset += m_collapseScores ? 1 : GetNumberOfTypes();
- if (m_direction == Forward) return fwd;
- }
-
- //cerr << "LRStates:" << *bwd << endl << *fwd << endl;
- BidirectionalReorderingState *ret =
- new (pool.Allocate<BidirectionalReorderingState>()) BidirectionalReorderingState(
- *this, bwd, fwd, 0);
- return ret;
-}
-
-LRModel::ReorderingType LRModel::GetOrientation(Range const& prev,
- Range const& cur, Bitmap const& cov) const
-{
- return (
- (m_modelType == LeftRight) ? cur.GetStartPos() > prev.GetEndPos() ? R : L
- : IsMonotonicStep(prev, cur, cov) ? M : (m_modelType == Monotonic) ? NM :
- IsSwap(prev, cur, cov) ? S : (m_modelType == MSD) ? D :
- cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/FF/LexicalReordering/LRModel.h b/contrib/moses2/FF/LexicalReordering/LRModel.h
deleted file mode 100644
index 2713fa46d..000000000
--- a/contrib/moses2/FF/LexicalReordering/LRModel.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * LRModel.h
- *
- * Created on: 23 Mar 2016
- * Author: hieu
- */
-#pragma once
-#include <string>
-
-namespace Moses2
-{
-
-class MemPool;
-class Range;
-class Bitmap;
-class LRState;
-class LexicalReordering;
-
-class LRModel
-{
-public:
- enum ModelType
- {
- Monotonic, MSD, MSLR, LeftRight, None
- };
- enum Direction
- {
- Forward, Backward, Bidirectional
- };
- enum Condition
- {
- F, E, FE
- };
-
- enum ReorderingType
- {
- M = 0, // monotonic
- NM = 1, // non-monotonic
- S = 1, // swap
- D = 2, // discontinuous
- DL = 2, // discontinuous, left
- DR = 3, // discontinuous, right
- R = 0, // right
- L = 1, // left
- MAX = 3, // largest possible
- NONE = 4 // largest possible
- };
-
- LRModel(const std::string &modelType, LexicalReordering &ff);
- virtual ~LRModel();
-
- ModelType GetModelType() const
- {
- return m_modelType;
- }
- Direction GetDirection() const
- {
- return m_direction;
- }
- Condition GetCondition() const
- {
- return m_condition;
- }
-
- bool IsPhraseBased() const
- {
- return m_phraseBased;
- }
-
- bool CollapseScores() const
- {
- return m_collapseScores;
- }
-
- size_t GetNumberOfTypes() const;
-
- LexicalReordering*
- GetScoreProducer() const
- {
- return m_scoreProducer;
- }
-
- LRState *CreateLRState(MemPool &pool) const;
-
- ReorderingType // for first phrase in phrase-based
- GetOrientation(Range const& cur) const;
-
- ReorderingType // for non-first phrases in phrase-based
- GetOrientation(Range const& prev, Range const& cur) const;
-
- ReorderingType // for HReorderingForwardState
- GetOrientation(Range const& prev, Range const& cur, Bitmap const& cov) const;
-
- ReorderingType // for HReorderingBackwarddState
- GetOrientation(int const reoDistance) const;
-
-protected:
-
- ModelType m_modelType;
- bool m_phraseBased;
- bool m_collapseScores;
- Direction m_direction;
- Condition m_condition;
- LexicalReordering *m_scoreProducer;
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/FF/LexicalReordering/LRState.cpp b/contrib/moses2/FF/LexicalReordering/LRState.cpp
deleted file mode 100644
index 4e9abd774..000000000
--- a/contrib/moses2/FF/LexicalReordering/LRState.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * LRState.cpp
- *
- * Created on: 22 Mar 2016
- * Author: hieu
- */
-#include "LRState.h"
-#include "LexicalReordering.h"
-#include "../../Scores.h"
-#include "../../TargetPhrase.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-class InputType;
-
-LRState::LRState(const LRModel &config, LRModel::Direction dir, size_t offset) :
- m_configuration(config), m_direction(dir), m_offset(offset)
-{
-}
-
-int LRState::ComparePrevScores(const TargetPhrase<Moses2::Word> *other) const
-{
- LexicalReordering* producer = m_configuration.GetScoreProducer();
- size_t phraseTableInd = producer->GetPhraseTableInd();
- const SCORE *myScores = (const SCORE*) prevTP->ffData[phraseTableInd]; //producer->
- const SCORE *yrScores = (const SCORE*) other->ffData[phraseTableInd]; //producer->
-
- if (myScores == yrScores) return 0;
-
- // The pointers are NULL if a phrase pair isn't found in the reordering table.
- if (yrScores == NULL) return -1;
- if (myScores == NULL) return 1;
-
- size_t stop = m_offset + m_configuration.GetNumberOfTypes();
- for (size_t i = m_offset; i < stop; i++) {
- if ((myScores)[i] < (yrScores)[i]) return -1;
- if ((myScores)[i] > (yrScores)[i]) return 1;
- }
- return 0;
-}
-
-void LRState::CopyScores(const System &system, Scores &accum,
- const TargetPhrase<Moses2::Word> &topt, ReorderingType reoType) const
-{
- // don't call this on a bidirectional object
- UTIL_THROW_IF2(
- m_direction != LRModel::Backward && m_direction != LRModel::Forward,
- "Unknown direction: " << m_direction);
-
- TargetPhrase<Moses2::Word> const* relevantOpt = (
- (m_direction == LRModel::Backward) ? &topt : prevTP);
-
- LexicalReordering* producer = m_configuration.GetScoreProducer();
- size_t phraseTableInd = producer->GetPhraseTableInd();
- const SCORE *cached = (const SCORE*) relevantOpt->ffData[phraseTableInd]; //producer->
-
- if (cached == NULL) {
- return;
- }
-
- size_t off_remote = m_offset + reoType;
- size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote;
-
- UTIL_THROW_IF2(off_local >= producer->GetNumScores(),
- "offset out of vector bounds!");
-
- // look up applicable score from vector of scores
- //UTIL_THROW_IF2(off_remote >= cached->size(), "offset out of vector bounds!");
- //Scores scores(producer->GetNumScoreComponents(),0);
- SCORE score = cached[off_remote];
- accum.PlusEquals(system, *producer, score, off_local);
-
- // else: use default scores (if specified)
- /*
- else if (producer->GetHaveDefaultScores()) {
- Scores scores(producer->GetNumScoreComponents(),0);
- scores[off_local] = producer->GetDefaultScore(off_remote);
- accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
- }
- */
- // note: if no default score, no cost
- /*
- const SparseReordering* sparse = m_configuration.GetSparseReordering();
- if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
- m_direction, accum);
- */
-}
-
-}
-
diff --git a/contrib/moses2/FF/LexicalReordering/LRState.h b/contrib/moses2/FF/LexicalReordering/LRState.h
deleted file mode 100644
index 0e906d09a..000000000
--- a/contrib/moses2/FF/LexicalReordering/LRState.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#pragma once
-#include "../FFState.h"
-#include "LRModel.h"
-
-namespace Moses2
-{
-template<typename WORD>
-class TargetPhrase;
-
-class LexicalReordering;
-class Hypothesis;
-class System;
-class Scores;
-class Bitmap;
-class ManagerBase;
-class InputType;
-class InputPathBase;
-class Word;
-
-class LRState: public FFState
-{
-public:
- typedef LRModel::ReorderingType ReorderingType;
- const TargetPhrase<Moses2::Word> *prevTP;
-
- LRState(const LRModel &config, LRModel::Direction dir, size_t offset);
-
- virtual void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
- const InputPathBase &path, bool first, const Bitmap *coverage) = 0;
-
- virtual void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
- const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
- FFState &state) const = 0;
-
- void CopyScores(const System &system, Scores &accum, const TargetPhrase<Moses2::Word> &topt,
- ReorderingType reoType) const;
-
-protected:
- const LRModel& m_configuration;
- LRModel::Direction m_direction;
- size_t m_offset;
-
- int
- ComparePrevScores(const TargetPhrase<Moses2::Word> *other) const;
-
-};
-
-}
diff --git a/contrib/moses2/FF/LexicalReordering/LexicalReordering.cpp b/contrib/moses2/FF/LexicalReordering/LexicalReordering.cpp
deleted file mode 100644
index 97394ce84..000000000
--- a/contrib/moses2/FF/LexicalReordering/LexicalReordering.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * LexicalReordering.cpp
- *
- * Created on: 15 Dec 2015
- * Author: hieu
- */
-
-#include <boost/foreach.hpp>
-#include "util/exception.hh"
-#include "LexicalReordering.h"
-#include "LRModel.h"
-#include "PhraseBasedReorderingState.h"
-#include "BidirectionalReorderingState.h"
-#include "../../TranslationModel/PhraseTable.h"
-#include "../../TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
-#include "../../System.h"
-#include "../../PhraseBased/PhraseImpl.h"
-#include "../../PhraseBased/Manager.h"
-#include "../../PhraseBased/Hypothesis.h"
-#include "../../PhraseBased/TargetPhrases.h"
-#include "../../PhraseBased/TargetPhraseImpl.h"
-#include "../../legacy/InputFileStream.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-///////////////////////////////////////////////////////////////////////
-
-LexicalReordering::LexicalReordering(size_t startInd, const std::string &line) :
- StatefulFeatureFunction(startInd, line), m_compactModel(NULL), m_blank(
- NULL), m_propertyInd(-1), m_coll(NULL), m_configuration(NULL)
-{
- ReadParameters();
- assert(m_configuration);
- //assert(m_numScores == 6);
-}
-
-LexicalReordering::~LexicalReordering()
-{
- delete m_compactModel;
- delete m_coll;
- delete m_configuration;
-}
-
-void LexicalReordering::Load(System &system)
-{
- MemPool &pool = system.GetSystemPool();
-
- if (m_propertyInd >= 0) {
- // Using integrate Lex RO. No loading needed
- }
- else if (FileExists(m_path + ".minlexr")) {
- m_compactModel = new LexicalReorderingTableCompact(m_path + ".minlexr",
- m_FactorsF, m_FactorsE, m_FactorsC);
- m_blank = new (pool.Allocate<PhraseImpl>()) PhraseImpl(pool, 0);
- }
- else {
- m_coll = new Coll();
- InputFileStream file(m_path);
- string line;
- size_t lineNum = 0;
-
- while (getline(file, line)) {
- if (++lineNum % 1000000 == 0) {
- cerr << lineNum << " ";
- }
-
- std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
- assert(toks.size() == 3);
- PhraseImpl *source = PhraseImpl::CreateFromString(pool, system.GetVocab(),
- system, toks[0]);
- PhraseImpl *target = PhraseImpl::CreateFromString(pool, system.GetVocab(),
- system, toks[1]);
- std::vector<SCORE> scores = Tokenize<SCORE>(toks[2]);
- std::transform(scores.begin(), scores.end(), scores.begin(),
- TransformScore);
- std::transform(scores.begin(), scores.end(), scores.begin(), FloorScore);
-
- Key key(source, target);
- (*m_coll)[key] = scores;
- }
- }
-}
-
-void LexicalReordering::SetParameter(const std::string& key,
- const std::string& value)
-{
- if (key == "path") {
- m_path = value;
- }
- else if (key == "type") {
- m_configuration = new LRModel(value, *this);
- }
- else if (key == "input-factor") {
- m_FactorsF = Tokenize<FactorType>(value);
- }
- else if (key == "output-factor") {
- m_FactorsE = Tokenize<FactorType>(value);
- }
- else if (key == "property-index") {
- m_propertyInd = Scan<int>(value);
- }
- else {
- StatefulFeatureFunction::SetParameter(key, value);
- }
-}
-
-FFState* LexicalReordering::BlankState(MemPool &pool, const System &sys) const
-{
- FFState *ret = m_configuration->CreateLRState(pool);
- return ret;
-}
-
-void LexicalReordering::EmptyHypothesisState(FFState &state,
- const ManagerBase &mgr, const InputType &input,
- const Hypothesis &hypo) const
-{
- BidirectionalReorderingState &stateCast =
- static_cast<BidirectionalReorderingState&>(state);
- stateCast.Init(NULL, hypo.GetTargetPhrase(), hypo.GetInputPath(), true,
- &hypo.GetBitmap());
-}
-
-void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-}
-
-void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- UTIL_THROW2("Don't use with SCFG models");
-}
-
-
-void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
- const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
-{
- BOOST_FOREACH(const TargetPhraseImpl *tp, tps){
- EvaluateAfterTablePruning(pool, *tp, sourcePhrase);
-}
-}
-
-void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
- const TargetPhraseImpl &targetPhrase, const Phrase<Moses2::Word> &sourcePhrase) const
-{
- if (m_propertyInd >= 0) {
- SCORE *scoreArr = targetPhrase.GetScoresProperty(m_propertyInd);
- targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
- }
- else if (m_compactModel) {
- // using external compact binary model
- const Values values = m_compactModel->GetScore(sourcePhrase, targetPhrase,
- *m_blank);
- if (values.size()) {
- assert(values.size() == m_numScores);
-
- SCORE *scoreArr = pool.Allocate<SCORE>(m_numScores);
- for (size_t i = 0; i < m_numScores; ++i) {
- scoreArr[i] = values[i];
- }
- targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
- }
- else {
- targetPhrase.ffData[m_PhraseTableInd] = NULL;
- }
- }
- else if (m_coll) {
- // using external memory model
-
- // cache data in target phrase
- const Values *values = GetValues(sourcePhrase, targetPhrase);
- assert(values->size() == m_numScores);
-
- if (values) {
- SCORE *scoreArr = pool.Allocate<SCORE>(m_numScores);
- for (size_t i = 0; i < m_numScores; ++i) {
- scoreArr[i] = (*values)[i];
- }
- targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
- }
- else {
- targetPhrase.ffData[m_PhraseTableInd] = NULL;
- }
- }
-}
-
-void LexicalReordering::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
-{
- const LRState &prevStateCast = static_cast<const LRState&>(prevState);
- prevStateCast.Expand(mgr, *this, hypo, m_PhraseTableInd, scores, state);
-}
-
-const LexicalReordering::Values *LexicalReordering::GetValues(
- const Phrase<Moses2::Word> &source, const Phrase<Moses2::Word> &target) const
-{
- Key key(&source, &target);
- Coll::const_iterator iter;
- iter = m_coll->find(key);
- if (iter == m_coll->end()) {
- return NULL;
- }
- else {
- return &iter->second;
- }
-}
-
-void LexicalReordering::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/FF/LexicalReordering/LexicalReordering.h b/contrib/moses2/FF/LexicalReordering/LexicalReordering.h
deleted file mode 100644
index b14517db2..000000000
--- a/contrib/moses2/FF/LexicalReordering/LexicalReordering.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * LexicalReordering.h
- *
- * Created on: 15 Dec 2015
- * Author: hieu
- */
-
-#pragma once
-#include <vector>
-#include <boost/unordered_map.hpp>
-#include "../StatefulFeatureFunction.h"
-#include "../../TypeDef.h"
-#include "../../Phrase.h"
-#include "../../legacy/Range.h"
-
-namespace Moses2
-{
-
-class LexicalReorderingTableCompact;
-class LRModel;
-class TargetPhraseImpl;
-
-class LexicalReordering: public StatefulFeatureFunction
-{
-public:
- LexicalReordering(size_t startInd, const std::string &line);
- virtual ~LexicalReordering();
-
- virtual void Load(System &system);
-
- virtual void SetParameter(const std::string& key, const std::string& value);
-
- virtual size_t HasPhraseTableInd() const
- {
- return true;
- }
-
- virtual FFState* BlankState(MemPool &pool, const System &sys) const;
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps,
- const Phrase<Moses2::Word> &sourcePhrase) const;
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
-
-protected:
- std::string m_path;
- FactorList m_FactorsF;
- FactorList m_FactorsE;
- FactorList m_FactorsC;
-
- LRModel *m_configuration;
-
- virtual void
- EvaluateAfterTablePruning(MemPool &pool, const TargetPhraseImpl &targetPhrase,
- const Phrase<Moses2::Word> &sourcePhrase) const;
-
- // PROPERTY IN PT
- int m_propertyInd;
-
- // COMPACT MODEL
- LexicalReorderingTableCompact *m_compactModel;
- Phrase<Moses2::Word> *m_blank;
-
- // MEMORY MODEL
- typedef std::pair<const Phrase<Moses2::Word>*, const Phrase<Moses2::Word>* > Key;
- typedef std::vector<SCORE> Values;
-
- struct KeyComparer
- {
- size_t operator()(const Key &obj) const
- {
- size_t seed = obj.first->hash();
- boost::hash_combine(seed, obj.second->hash());
- return seed;
- }
-
- bool operator()(const Key& a, const Key& b) const
- {
- if ((*a.first) != (*b.first)) {
- return false;
- }
- if ((*a.second) != (*b.second)) {
- return false;
- }
- return true;
- }
-
- };
-
- typedef boost::unordered_map<Key, Values, KeyComparer, KeyComparer> Coll;
- Coll *m_coll;
-
- const Values *GetValues(const Phrase<Moses2::Word> &source, const Phrase<Moses2::Word> &target) const;
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp b/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp
deleted file mode 100644
index c7d4abf03..000000000
--- a/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * PhraseLR.cpp
- *
- * Created on: 22 Mar 2016
- * Author: hieu
- */
-
-#include "PhraseBasedReorderingState.h"
-#include "LexicalReordering.h"
-#include "../../PhraseBased/Hypothesis.h"
-#include "../../InputPathBase.h"
-#include "../../PhraseBased/Manager.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-PhraseBasedReorderingState::PhraseBasedReorderingState(const LRModel &config,
- LRModel::Direction dir, size_t offset) :
- LRState(config, dir, offset)
-{
- // uninitialised
- prevPath = NULL;
- prevTP = NULL;
-}
-
-void PhraseBasedReorderingState::Init(const LRState *prev,
- const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
- const Bitmap *coverage)
-{
- prevTP = &topt;
- prevPath = &path;
- m_first = first;
-}
-
-size_t PhraseBasedReorderingState::hash() const
-{
- size_t ret;
- ret = (size_t) &prevPath->range;
- boost::hash_combine(ret, m_direction);
-
- return ret;
-}
-
-bool PhraseBasedReorderingState::operator==(const FFState& o) const
-{
- if (&o == this) return true;
-
- const PhraseBasedReorderingState &other =
- static_cast<const PhraseBasedReorderingState&>(o);
- if (&prevPath->range == &other.prevPath->range) {
- if (m_direction == LRModel::Forward) {
- int compareScore = ComparePrevScores(other.prevTP);
- return compareScore == 0;
- }
- else {
- return true;
- }
- }
- else {
- return false;
- }
-}
-
-void PhraseBasedReorderingState::Expand(const ManagerBase &mgr,
- const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
- Scores &scores, FFState &state) const
-{
- if ((m_direction != LRModel::Forward) || !m_first) {
- LRModel const& lrmodel = m_configuration;
- Range const &cur = hypo.GetInputPath().range;
- LRModel::ReorderingType reoType = (
- m_first ?
- lrmodel.GetOrientation(cur) :
- lrmodel.GetOrientation(prevPath->range, cur));
- CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
- }
-
- PhraseBasedReorderingState &stateCast =
- static_cast<PhraseBasedReorderingState&>(state);
- stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
- NULL);
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h b/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h
deleted file mode 100644
index e26237cf7..000000000
--- a/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * PhraseLR.h
- *
- * Created on: 22 Mar 2016
- * Author: hieu
- */
-
-#pragma once
-#include "LRState.h"
-
-namespace Moses2
-{
-
-class InputPathBase;
-
-class PhraseBasedReorderingState: public LRState
-{
-public:
- const InputPathBase *prevPath;
- bool m_first;
-
- PhraseBasedReorderingState(const LRModel &config, LRModel::Direction dir,
- size_t offset);
-
- void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
- const InputPathBase &path, bool first, const Bitmap *coverage);
-
- size_t hash() const;
- virtual bool operator==(const FFState& other) const;
-
- virtual std::string ToString() const
- {
- return "PhraseBasedReorderingState";
- }
-
- void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
- const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
- FFState &state) const;
-
-protected:
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/FF/LexicalReordering/ReorderingStack.cpp b/contrib/moses2/FF/LexicalReordering/ReorderingStack.cpp
deleted file mode 100644
index 298257fc4..000000000
--- a/contrib/moses2/FF/LexicalReordering/ReorderingStack.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * ReorderingStack.cpp
- ** Author: Ankit K. Srivastava
- ** Date: Jan 26, 2010
- */
-
-#include <vector>
-#include "ReorderingStack.h"
-#include "../../MemPool.h"
-
-namespace Moses2
-{
-ReorderingStack::ReorderingStack(MemPool &pool) :
- m_stack(pool)
-{
-
-}
-
-void ReorderingStack::Init()
-{
- m_stack.clear();
-}
-
-size_t ReorderingStack::hash() const
-{
- std::size_t ret = boost::hash_range(m_stack.begin(), m_stack.end());
- return ret;
-}
-
-bool ReorderingStack::operator==(const ReorderingStack& o) const
-{
- const ReorderingStack& other = static_cast<const ReorderingStack&>(o);
- return m_stack == other.m_stack;
-}
-
-// Method to push (shift element into the stack and reduce if reqd)
-int ReorderingStack::ShiftReduce(const Range &input_span)
-{
- int distance; // value to return: the initial distance between this and previous span
-
- // stack is empty
- if (m_stack.empty()) {
- m_stack.push_back(input_span);
- return input_span.GetStartPos() + 1; // - (-1)
- }
-
- // stack is non-empty
- Range prev_span = m_stack.back(); //access last element added
-
- //calculate the distance we are returning
- if (input_span.GetStartPos() > prev_span.GetStartPos()) {
- distance = input_span.GetStartPos() - prev_span.GetEndPos();
- }
- else {
- distance = input_span.GetEndPos() - prev_span.GetStartPos();
- }
-
- if (distance == 1) { //monotone
- m_stack.pop_back();
- Range new_span(prev_span.GetStartPos(), input_span.GetEndPos());
- Reduce(new_span);
- }
- else if (distance == -1) { //swap
- m_stack.pop_back();
- Range new_span(input_span.GetStartPos(), prev_span.GetEndPos());
- Reduce(new_span);
- }
- else { // discontinuous
- m_stack.push_back(input_span);
- }
-
- return distance;
-}
-
-// Method to reduce, if possible the spans
-void ReorderingStack::Reduce(Range current)
-{
- bool cont_loop = true;
-
- while (cont_loop && m_stack.size() > 0) {
-
- Range previous = m_stack.back();
-
- if (current.GetStartPos() - previous.GetEndPos() == 1) { //mono&merge
- m_stack.pop_back();
- Range t(previous.GetStartPos(), current.GetEndPos());
- current = t;
- }
- else if (previous.GetStartPos() - current.GetEndPos() == 1) { //swap&merge
- m_stack.pop_back();
- Range t(current.GetStartPos(), previous.GetEndPos());
- current = t;
- }
- else { // discontinuous, no more merging
- cont_loop = false;
- }
- } // finished reducing, exit
-
- // add to stack
- m_stack.push_back(current);
-}
-
-}
-
diff --git a/contrib/moses2/FF/LexicalReordering/ReorderingStack.h b/contrib/moses2/FF/LexicalReordering/ReorderingStack.h
deleted file mode 100644
index fab986bc0..000000000
--- a/contrib/moses2/FF/LexicalReordering/ReorderingStack.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * ReorderingStack.h
- ** Author: Ankit K. Srivastava
- ** Date: Jan 26, 2010
- */
-
-#pragma once
-
-//#include <string>
-#include <vector>
-//#include "Factor.h"
-//#include "Phrase.h"
-//#include "TypeDef.h"
-//#include "Util.h"
-#include "../../legacy/Range.h"
-#include "../../Vector.h"
-
-namespace Moses2
-{
-class MemPool;
-
-class ReorderingStack
-{
-private:
-
- Vector<Range> m_stack;
-
-public:
- ReorderingStack(MemPool &pool);
-
- size_t hash() const;
- bool operator==(const ReorderingStack& other) const;
-
- void Init();
- int ShiftReduce(const Range &input_span);
-
-private:
- void Reduce(Range input_span);
-};
-
-}
diff --git a/contrib/moses2/FF/OSM/KenOSM.cpp b/contrib/moses2/FF/OSM/KenOSM.cpp
deleted file mode 100644
index 6b410fc9e..000000000
--- a/contrib/moses2/FF/OSM/KenOSM.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "KenOSM.h"
-
-namespace Moses2
-{
-
-OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method)
-{
- lm::ngram::ModelType model_type;
- lm::ngram::Config config;
- config.load_method = load_method;
- if (lm::ngram::RecognizeBinary(file, model_type)) {
- switch(model_type) {
- case lm::ngram::PROBING:
- return new KenOSM<lm::ngram::ProbingModel>(file, config);
- case lm::ngram::REST_PROBING:
- return new KenOSM<lm::ngram::RestProbingModel>(file, config);
- case lm::ngram::TRIE:
- return new KenOSM<lm::ngram::TrieModel>(file, config);
- case lm::ngram::QUANT_TRIE:
- return new KenOSM<lm::ngram::QuantTrieModel>(file, config);
- case lm::ngram::ARRAY_TRIE:
- return new KenOSM<lm::ngram::ArrayTrieModel>(file, config);
- case lm::ngram::QUANT_ARRAY_TRIE:
- return new KenOSM<lm::ngram::QuantArrayTrieModel>(file, config);
- default:
- UTIL_THROW2("Unrecognized kenlm model type " << model_type);
- }
- } else {
- return new KenOSM<lm::ngram::ProbingModel>(file, config);
- }
-}
-
-} // namespace
diff --git a/contrib/moses2/FF/OSM/KenOSM.h b/contrib/moses2/FF/OSM/KenOSM.h
deleted file mode 100644
index f1275232f..000000000
--- a/contrib/moses2/FF/OSM/KenOSM.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#pragma once
-
-#include <string>
-#include "lm/model.hh"
-
-namespace Moses2
-{
-
-class KenOSMBase
-{
-public:
- virtual ~KenOSMBase() {}
-
- virtual float Score(const lm::ngram::State&, StringPiece,
- lm::ngram::State&) const = 0;
-
- virtual const lm::ngram::State &BeginSentenceState() const = 0;
-
- virtual const lm::ngram::State &NullContextState() const = 0;
-};
-
-template <class KenModel>
-class KenOSM : public KenOSMBase
-{
-public:
- KenOSM(const char *file, const lm::ngram::Config &config)
- : m_kenlm(file, config) {}
-
- float Score(const lm::ngram::State &in_state,
- StringPiece word,
- lm::ngram::State &out_state) const {
- return m_kenlm.Score(in_state, m_kenlm.GetVocabulary().Index(word),
- out_state);
- }
-
- const lm::ngram::State &BeginSentenceState() const {
- return m_kenlm.BeginSentenceState();
- }
-
- const lm::ngram::State &NullContextState() const {
- return m_kenlm.NullContextState();
- }
-
-private:
- KenModel m_kenlm;
-};
-
-typedef KenOSMBase OSMLM;
-
-OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method);
-
-
-} // namespace
diff --git a/contrib/moses2/FF/OSM/OpSequenceModel.cpp b/contrib/moses2/FF/OSM/OpSequenceModel.cpp
deleted file mode 100644
index 572065813..000000000
--- a/contrib/moses2/FF/OSM/OpSequenceModel.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-#include <sstream>
-#include "OpSequenceModel.h"
-#include "osmHyp.h"
-#include "lm/state.hh"
-#include "../../PhraseBased/Manager.h"
-#include "../../PhraseBased/Hypothesis.h"
-#include "../../PhraseBased/TargetPhraseImpl.h"
-#include "../../PhraseBased/Sentence.h"
-#include "../../TranslationModel/UnknownWordPenalty.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-////////////////////////////////////////////////////////////////////////////////////////
-
-OpSequenceModel::OpSequenceModel(size_t startInd, const std::string &line) :
- StatefulFeatureFunction(startInd, line)
-{
- sFactor = 0;
- tFactor = 0;
- numFeatures = 5;
- load_method = util::READ;
-
- ReadParameters();
-}
-
-OpSequenceModel::~OpSequenceModel()
-{
- // TODO Auto-generated destructor stub
-}
-
-void OpSequenceModel::Load(System &system)
-{
- readLanguageModel(m_lmPath.c_str());
-}
-
-FFState* OpSequenceModel::BlankState(MemPool &pool, const System &sys) const
-{
- return new (pool.Allocate<osmState>()) osmState();
-}
-
-void OpSequenceModel::EmptyHypothesisState(FFState &state,
- const ManagerBase &mgr, const InputType &input,
- const Hypothesis &hypo) const
-{
- lm::ngram::State startState = OSM->BeginSentenceState();
-
- osmState &stateCast = static_cast<osmState&>(state);
- stateCast.setState(startState);
-}
-
-void OpSequenceModel::EvaluateInIsolation(MemPool &pool,
- const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- osmHypothesis obj;
- obj.setState(OSM->NullContextState());
-
- Bitmap myBitmap (pool, source.GetSize());
- myBitmap.Init(std::vector<bool>());
-
- vector <string> mySourcePhrase;
- vector <string> myTargetPhrase;
- vector<float> scoresVec;
- vector <int> alignments;
- int startIndex = 0;
- int endIndex = source.GetSize();
-
- const AlignmentInfo &align = targetPhrase.GetAlignTerm();
- AlignmentInfo::const_iterator iter;
-
- for (iter = align.begin(); iter != align.end(); ++iter) {
- alignments.push_back(iter->first);
- alignments.push_back(iter->second);
- }
-
- for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
- if (&targetPhrase.pt == system.featureFunctions.GetUnknownWordPenalty() && sFactor == 0 && tFactor == 0)
- myTargetPhrase.push_back("_TRANS_SLF_");
- else
- myTargetPhrase.push_back(targetPhrase[i][tFactor]->GetString().as_string());
- }
-
- for (size_t i = 0; i < source.GetSize(); i++) {
- mySourcePhrase.push_back(source[i][sFactor]->GetString().as_string());
- }
-
- obj.setPhrases(mySourcePhrase , myTargetPhrase);
- obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
- obj.computeOSMFeature(startIndex,myBitmap);
- obj.calculateOSMProb(*OSM);
- obj.populateScores(scoresVec,numFeatures);
-
- SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- scoresVec.data());
- estimatedScore += weightedScore;
-
-}
-
-void OpSequenceModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void OpSequenceModel::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
-{
- const TargetPhrase<Moses2::Word> &target = hypo.GetTargetPhrase();
- const Bitmap &bitmap = hypo.GetBitmap();
- Bitmap myBitmap(bitmap);
- const ManagerBase &manager = hypo.GetManager();
- const InputType &source = manager.GetInput();
- const Sentence &sourceSentence = static_cast<const Sentence&>(source);
-
- osmHypothesis obj;
- vector <string> mySourcePhrase;
- vector <string> myTargetPhrase;
- vector<float> scoresVec;
-
-
- //target.GetWord(0)
-
- //cerr << target <<" --- "<<target.GetSourcePhrase()<< endl; // English ...
-
- //cerr << align << endl; // Alignments ...
- //cerr << cur_hypo.GetCurrSourceWordsRange() << endl;
-
- //cerr << source <<endl;
-
-// int a = sourceRange.GetStartPos();
-// cerr << source.GetWord(a);
- //cerr <<a<<endl;
-
- //const Sentence &sentence = static_cast<const Sentence&>(curr_hypo.GetManager().GetSource());
-
-
- const Range & sourceRange = hypo.GetInputPath().range;
- int startIndex = sourceRange.GetStartPos();
- int endIndex = sourceRange.GetEndPos();
- const AlignmentInfo &align = hypo.GetTargetPhrase().GetAlignTerm();
- // osmState * statePtr;
-
- vector <int> alignments;
-
-
-
- AlignmentInfo::const_iterator iter;
-
- for (iter = align.begin(); iter != align.end(); ++iter) {
- //cerr << iter->first << "----" << iter->second << " ";
- alignments.push_back(iter->first);
- alignments.push_back(iter->second);
- }
-
-
- //cerr<<bitmap<<endl;
- //cerr<<startIndex<<" "<<endIndex<<endl;
-
-
- for (int i = startIndex; i <= endIndex; i++) {
- myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
- mySourcePhrase.push_back(sourceSentence[i][sFactor]->GetString().as_string());
- // cerr<<mySourcePhrase[i]<<endl;
- }
-
- for (size_t i = 0; i < target.GetSize(); i++) {
- if (&target.pt == mgr.system.featureFunctions.GetUnknownWordPenalty() && sFactor == 0 && tFactor == 0)
- myTargetPhrase.push_back("_TRANS_SLF_");
- else
- myTargetPhrase.push_back(target[i][tFactor]->GetString().as_string());
-
- }
-
-
- //cerr<<myBitmap<<endl;
-
- obj.setState(&prevState);
- obj.constructCepts(alignments,startIndex,endIndex,target.GetSize());
- obj.setPhrases(mySourcePhrase , myTargetPhrase);
- obj.computeOSMFeature(startIndex,myBitmap);
- obj.calculateOSMProb(*OSM);
- obj.populateScores(scoresVec,numFeatures);
- //obj.print();
-
- scores.PlusEquals(mgr.system, *this, scoresVec);
-
- osmState &stateCast = static_cast<osmState&>(state);
- obj.saveState(stateCast);
-}
-
-void OpSequenceModel::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void OpSequenceModel::SetParameter(const std::string& key, const std::string& value)
-{
-
- if (key == "path") {
- m_lmPath = value;
- } else if (key == "support-features") {
- if(value == "no")
- numFeatures = 1;
- else
- numFeatures = 5;
- } else if (key == "input-factor") {
- sFactor = Scan<int>(value);
- } else if (key == "output-factor") {
- tFactor = Scan<int>(value);
- } else if (key == "load") {
- if (value == "lazy") {
- load_method = util::LAZY;
- } else if (value == "populate_or_lazy") {
- load_method = util::POPULATE_OR_LAZY;
- } else if (value == "populate_or_read" || value == "populate") {
- load_method = util::POPULATE_OR_READ;
- } else if (value == "read") {
- load_method = util::READ;
- } else if (value == "parallel_read") {
- load_method = util::PARALLEL_READ;
- } else {
- UTIL_THROW2("Unknown KenLM load method " << value);
- }
- } else {
- StatefulFeatureFunction::SetParameter(key, value);
- }
-}
-
-void OpSequenceModel :: readLanguageModel(const char *lmFile)
-{
- string unkOp = "_TRANS_SLF_";
- OSM = ConstructOSMLM(m_lmPath.c_str(), load_method);
-
- lm::ngram::State startState = OSM->NullContextState();
- lm::ngram::State endState;
- unkOpProb = OSM->Score(startState,unkOp,endState);
-}
-
-}
diff --git a/contrib/moses2/FF/OSM/OpSequenceModel.h b/contrib/moses2/FF/OSM/OpSequenceModel.h
deleted file mode 100644
index d46cc82fb..000000000
--- a/contrib/moses2/FF/OSM/OpSequenceModel.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "../StatefulFeatureFunction.h"
-#include "util/mmap.hh"
-#include "KenOSM.h"
-
-namespace Moses2
-{
-
-
-class OpSequenceModel : public StatefulFeatureFunction
-{
-public:
- OSMLM* OSM;
- float unkOpProb;
- int numFeatures; // Number of features used ...
- int sFactor; // Source Factor ...
- int tFactor; // Target Factor ...
- util::LoadMethod load_method; // method to load model
-
- OpSequenceModel(size_t startInd, const std::string &line);
- virtual ~OpSequenceModel();
-
- virtual void Load(System &system);
-
- virtual FFState* BlankState(MemPool &pool, const System &sys) const;
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
-
- void SetParameter(const std::string& key, const std::string& value);
-
-protected:
- std::string m_lmPath;
-
- void readLanguageModel(const char *);
-
-};
-
-}
-
-
diff --git a/contrib/moses2/FF/OSM/osmHyp.cpp b/contrib/moses2/FF/OSM/osmHyp.cpp
deleted file mode 100644
index ede841a80..000000000
--- a/contrib/moses2/FF/OSM/osmHyp.cpp
+++ /dev/null
@@ -1,601 +0,0 @@
-#include "osmHyp.h"
-#include <sstream>
-
-using namespace std;
-using namespace lm::ngram;
-
-namespace Moses2
-{
-void osmState::setState(const lm::ngram::State & val)
-{
- j = 0;
- E = 0;
- lmState = val;
-}
-
-void osmState::saveState(int jVal, int eVal, map <int , string> & gapVal)
-{
- gap.clear();
- gap = gapVal;
- j = jVal;
- E = eVal;
-}
-
-size_t osmState::hash() const
-{
- size_t ret = j;
-
- boost::hash_combine(ret, E);
- boost::hash_combine(ret, gap);
- boost::hash_combine(ret, lmState.length);
-
- return ret;
-}
-
-bool osmState::operator==(const FFState& otherBase) const
-{
- const osmState &other = static_cast<const osmState&>(otherBase);
- if (j != other.j)
- return false;
- if (E != other.E)
- return false;
- if (gap != other.gap)
- return false;
- if (lmState.length != other.lmState.length)
- return false;
-
- return true;
-}
-
-std::string osmState :: getName() const
-{
-
- return "done";
-}
-
-//////////////////////////////////////////////////
-
-osmHypothesis :: osmHypothesis()
-{
- opProb = 0;
- gapWidth = 0;
- gapCount = 0;
- openGapCount = 0;
- deletionCount = 0;
- gapCount = 0;
- j = 0;
- E = 0;
- gap.clear();
-}
-
-void osmHypothesis :: setState(const FFState* prev_state)
-{
-
- if(prev_state != NULL) {
-
- j = static_cast <const osmState *> (prev_state)->getJ();
- E = static_cast <const osmState *> (prev_state)->getE();
- gap = static_cast <const osmState *> (prev_state)->getGap();
- lmState = static_cast <const osmState *> (prev_state)->getLMState();
- }
-}
-
-void osmHypothesis :: saveState(osmState &state)
-{
- state.setState(lmState);
- state.saveState(j,E,gap);
-}
-
-int osmHypothesis :: isTranslationOperation(int x)
-{
- if (operations[x].find("_JMP_BCK_") != -1)
- return 0;
-
- if (operations[x].find("_JMP_FWD_") != -1)
- return 0;
-
- if (operations[x].find("_CONT_CEPT_") != -1)
- return 0;
-
- if (operations[x].find("_INS_GAP_") != -1)
- return 0;
-
- return 1;
-
-}
-
-void osmHypothesis :: removeReorderingOperations()
-{
- gapCount = 0;
- deletionCount = 0;
- openGapCount = 0;
- gapWidth = 0;
-
- std::vector <std::string> tupleSequence;
-
- for (int x = 0; x < operations.size(); x++) {
- // cout<<operations[x]<<endl;
-
- if(isTranslationOperation(x) == 1) {
- tupleSequence.push_back(operations[x]);
- }
-
- }
-
- operations.clear();
- operations = tupleSequence;
-}
-
-void osmHypothesis :: calculateOSMProb(OSMLM& ptrOp)
-{
-
- opProb = 0;
- State currState = lmState;
- State temp;
-
- for (size_t i = 0; i<operations.size(); i++) {
- temp = currState;
- opProb += ptrOp.Score(temp,operations[i],currState);
- }
-
- lmState = currState;
-
- //print();
-}
-
-
-int osmHypothesis :: firstOpenGap(vector <int> & coverageVector)
-{
-
- int firstOG =-1;
-
- for(int nd = 0; nd < coverageVector.size(); nd++) {
- if(coverageVector[nd]==0) {
- firstOG = nd;
- return firstOG;
- }
- }
-
- return firstOG;
-
-}
-
-string osmHypothesis :: intToString(int num)
-{
- return SPrint(num);
-
-}
-
-void osmHypothesis :: generateOperations(int & startIndex , int j1 , int contFlag , Bitmap & coverageVector , string english , string german , set <int> & targetNullWords , vector <string> & currF)
-{
-
- int gFlag = 0;
- int gp = 0;
- int ans;
-
-
- if ( j < j1) { // j1 is the index of the source word we are about to generate ...
- //if(coverageVector[j]==0) // if source word at j is not generated yet ...
- if(coverageVector.GetValue(j)==0) { // if source word at j is not generated yet ...
- operations.push_back("_INS_GAP_");
- gFlag++;
- gap[j]="Unfilled";
- }
- if (j == E) {
- j = j1;
- } else {
- operations.push_back("_JMP_FWD_");
- j=E;
- }
- }
-
- if (j1 < j) {
- // if(j < E && coverageVector[j]==0)
- if(j < E && coverageVector.GetValue(j)==0) {
- operations.push_back("_INS_GAP_");
- gFlag++;
- gap[j]="Unfilled";
- }
-
- j=closestGap(gap,j1,gp);
- operations.push_back("_JMP_BCK_"+ intToString(gp));
-
- //cout<<"I am j "<<j<<endl;
- //cout<<"I am j1 "<<j1<<endl;
-
- if(j==j1)
- gap[j]="Filled";
- }
-
- if (j < j1) {
- operations.push_back("_INS_GAP_");
- gap[j] = "Unfilled";
- gFlag++;
- j=j1;
- }
-
- if(contFlag == 0) { // First words of the multi-word cept ...
-
- if(english == "_TRANS_SLF_") { // Unknown word ...
- operations.push_back("_TRANS_SLF_");
- } else {
- operations.push_back("_TRANS_" + english + "_TO_" + german);
- }
-
- //ans = firstOpenGap(coverageVector);
- ans = coverageVector.GetFirstGapPos();
-
- if (ans != -1)
- gapWidth += j - ans;
-
- } else if (contFlag == 2) {
-
- operations.push_back("_INS_" + german);
- ans = coverageVector.GetFirstGapPos();
-
- if (ans != -1)
- gapWidth += j - ans;
- deletionCount++;
- } else {
- operations.push_back("_CONT_CEPT_");
- }
-
- //coverageVector[j]=1;
- coverageVector.SetValue(j,1);
- j+=1;
-
- if(E<j)
- E=j;
-
- if (gFlag > 0)
- gapCount++;
-
- openGapCount += getOpenGaps();
-
- //if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end())
- if (j < coverageVector.GetSize()) {
- if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end()) {
- j1 = j;
- german = currF[j1-startIndex];
- english = "_INS_";
- generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF);
- }
- }
-
-}
-
-void osmHypothesis :: print()
-{
- for (int i = 0; i< operations.size(); i++) {
- cerr<<operations[i]<<" ";
-
- }
-
- cerr<<endl<<endl;
-
- cerr<<"Operation Probability "<<opProb<<endl;
- cerr<<"Gap Count "<<gapCount<<endl;
- cerr<<"Open Gap Count "<<openGapCount<<endl;
- cerr<<"Gap Width "<<gapWidth<<endl;
- cerr<<"Deletion Count "<<deletionCount<<endl;
-
- cerr<<"_______________"<<endl;
-}
-
-int osmHypothesis :: closestGap(map <int,string> gap, int j1, int & gp)
-{
-
- int dist=1172;
- int value=-1;
- int temp=0;
- gp=0;
- int opGap=0;
-
- map <int,string> :: iterator iter;
-
- iter=gap.end();
-
- do {
- iter--;
- //cout<<"Trapped "<<iter->first<<endl;
-
- if(iter->first==j1 && iter->second== "Unfilled") {
- opGap++;
- gp = opGap;
- return j1;
-
- }
-
- if(iter->second =="Unfilled") {
- opGap++;
- temp = iter->first - j1;
-
- if(temp<0)
- temp=temp * -1;
-
- if(dist>temp && iter->first < j1) {
- dist=temp;
- value=iter->first;
- gp=opGap;
- }
- }
-
-
- } while(iter!=gap.begin());
-
- return value;
-}
-
-
-
-int osmHypothesis :: getOpenGaps()
-{
- map <int,string> :: iterator iter;
-
- int nd = 0;
- for (iter = gap.begin(); iter!=gap.end(); iter++) {
- if(iter->second == "Unfilled")
- nd++;
- }
-
- return nd;
-
-}
-
-void osmHypothesis :: generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes)
-{
-
- operations.push_back("_DEL_" + english);
- currTargetIndex++;
-
- while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end()) {
- currTargetIndex++;
- }
-
- if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end()) {
- english = currE[currTargetIndex];
- generateDeleteOperations(english,currTargetIndex,doneTargetIndexes);
- }
-
-}
-
-void osmHypothesis :: computeOSMFeature(int startIndex , Bitmap & coverageVector)
-{
-
- set <int> doneTargetIndexes;
- set <int> eSide;
- set <int> fSide;
- set <int> :: iterator iter;
- string english;
- string source;
- int j1;
- int targetIndex = 0;
- doneTargetIndexes.clear();
-
-
- if (targetNullWords.size() != 0) { // Source words to be deleted in the start of this phrase ...
- iter = targetNullWords.begin();
-
- if (*iter == startIndex) {
-
- j1 = startIndex;
- source = currF[j1-startIndex];
- english = "_INS_";
- generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF);
- }
- }
-
- if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) { // first word has to be deleted ...
- english = currE[targetIndex];
- generateDeleteOperations(english,targetIndex, doneTargetIndexes);
- }
-
-
- for (size_t i = 0; i < ceptsInPhrase.size(); i++) {
- source = "";
- english = "";
-
- fSide = ceptsInPhrase[i].first;
- eSide = ceptsInPhrase[i].second;
-
- iter = eSide.begin();
- targetIndex = *iter;
- english += currE[*iter];
- iter++;
-
- for (; iter != eSide.end(); iter++) {
- if(*iter == targetIndex+1)
- targetIndex++;
- else
- doneTargetIndexes.insert(*iter);
-
- english += "^_^";
- english += currE[*iter];
- }
-
- iter = fSide.begin();
- source += currF[*iter];
- iter++;
-
- for (; iter != fSide.end(); iter++) {
- source += "^_^";
- source += currF[*iter];
- }
-
- iter = fSide.begin();
- j1 = *iter + startIndex;
- iter++;
-
- generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF);
-
-
- for (; iter != fSide.end(); iter++) {
- j1 = *iter + startIndex;
- generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF);
- }
-
- targetIndex++; // Check whether the next target word is unaligned ...
-
- while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end()) {
- targetIndex++;
- }
-
- if(sourceNullWords.find(targetIndex) != sourceNullWords.end()) {
- english = currE[targetIndex];
- generateDeleteOperations(english,targetIndex, doneTargetIndexes);
- }
- }
-
- //removeReorderingOperations();
-
- //print();
-
-}
-
-void osmHypothesis :: getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT)
-{
- set <int> :: iterator iter;
-
- int sz = eSide.size();
- vector <int> t;
-
- for (iter = eSide.begin(); iter != eSide.end(); iter++) {
- t = tS[*iter];
-
- for (size_t i = 0; i < t.size(); i++) {
- fSide.insert(t[i]);
- }
-
- }
-
- for (iter = fSide.begin(); iter != fSide.end(); iter++) {
-
- t = sT[*iter];
-
- for (size_t i = 0 ; i<t.size(); i++) {
- eSide.insert(t[i]);
- }
-
- }
-
- if (eSide.size () > sz) {
- getMeCepts(eSide,fSide,tS,sT);
- }
-
-}
-
-void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int endIndex, int targetPhraseLength)
-{
-
- std::map <int , vector <int> > sT;
- std::map <int , vector <int> > tS;
- std::set <int> eSide;
- std::set <int> fSide;
- std::set <int> :: iterator iter;
- std :: map <int , vector <int> > :: iterator iter2;
- std :: pair < set <int> , set <int> > cept;
- int src;
- int tgt;
-
-
- for (size_t i = 0; i < align.size(); i+=2) {
- src = align[i];
- tgt = align[i+1];
- tS[tgt].push_back(src);
- sT[src].push_back(tgt);
- }
-
- for (int i = startIndex; i<= endIndex; i++) { // What are unaligned source words in this phrase ...
- if (sT.find(i-startIndex) == sT.end()) {
- targetNullWords.insert(i);
- }
- }
-
- for (int i = 0; i < targetPhraseLength; i++) { // What are unaligned target words in this phrase ...
- if (tS.find(i) == tS.end()) {
- sourceNullWords.insert(i);
- }
- }
-
-
- while (tS.size() != 0 && sT.size() != 0) {
-
- iter2 = tS.begin();
-
- eSide.clear();
- fSide.clear();
- eSide.insert (iter2->first);
-
- getMeCepts(eSide, fSide, tS , sT);
-
- for (iter = eSide.begin(); iter != eSide.end(); iter++) {
- iter2 = tS.find(*iter);
- tS.erase(iter2);
- }
-
- for (iter = fSide.begin(); iter != fSide.end(); iter++) {
- iter2 = sT.find(*iter);
- sT.erase(iter2);
- }
-
- cept = make_pair (fSide , eSide);
- ceptsInPhrase.push_back(cept);
- }
-
-
-
- /*
-
- cerr<<"Extracted Cepts "<<endl;
- for (int i = 0; i < ceptsInPhrase.size(); i++)
- {
-
- fSide = ceptsInPhrase[i].first;
- eSide = ceptsInPhrase[i].second;
-
- for (iter = eSide.begin(); iter != eSide.end(); iter++)
- {
- cerr<<*iter<<" ";
- }
- cerr<<"<---> ";
-
- for (iter = fSide.begin(); iter != fSide.end(); iter++)
- {
- cerr<<*iter<<" ";
- }
-
- cerr<<endl;
- }
- cerr<<endl;
-
- cerr<<"Unaligned Target Words"<<endl;
-
- for (iter = sourceNullWords.begin(); iter != sourceNullWords.end(); iter++)
- cerr<<*iter<<"<--->"<<endl;
-
- cerr<<"Unaligned Source Words"<<endl;
-
- for (iter = targetNullWords.begin(); iter != targetNullWords.end(); iter++)
- cerr<<*iter<<"<--->"<<endl;
-
- */
-
-}
-
-void osmHypothesis :: populateScores(vector <float> & scores , const int numFeatures)
-{
- scores.clear();
- scores.push_back(opProb);
-
- if (numFeatures == 1)
- return;
-
- scores.push_back(gapWidth);
- scores.push_back(gapCount);
- scores.push_back(openGapCount);
- scores.push_back(deletionCount);
-}
-
-
-} // namespace
-
diff --git a/contrib/moses2/FF/OSM/osmHyp.h b/contrib/moses2/FF/OSM/osmHyp.h
deleted file mode 100644
index c2893d366..000000000
--- a/contrib/moses2/FF/OSM/osmHyp.h
+++ /dev/null
@@ -1,111 +0,0 @@
-#pragma once
-
-# include <set>
-# include <map>
-# include <string>
-# include <vector>
-#include "KenOSM.h"
-# include "../FFState.h"
-# include "../../legacy/Bitmap.h"
-
-namespace Moses2
-{
-
-class osmState : public FFState
-{
-public:
- osmState()
- {}
-
- void setState(const lm::ngram::State & val);
-
- virtual size_t hash() const;
- virtual bool operator==(const FFState& other) const;
-
- virtual std::string ToString() const
- { return "osmState"; }
-
- void saveState(int jVal, int eVal, std::map <int , std::string> & gapVal);
- int getJ()const {
- return j;
- }
- int getE()const {
- return E;
- }
- std::map <int , std::string> getGap() const {
- return gap;
- }
-
- lm::ngram::State getLMState() const {
- return lmState;
- }
-
- void print() const;
- std::string getName() const;
-
-protected:
- int j, E;
- std::map <int,std::string> gap;
- lm::ngram::State lmState;
-};
-
-class osmHypothesis
-{
-
-private:
-
-
- std::vector <std::string> operations; // List of operations required to generated this hyp ...
- std::map <int,std::string> gap; // Maintains gap history ...
- int j; // Position after the last source word generated ...
- int E; // Position after the right most source word so far generated ...
- lm::ngram::State lmState; // KenLM's Model State ...
-
- int gapCount; // Number of gaps inserted ...
- int deletionCount;
- int openGapCount;
- int gapWidth;
- double opProb;
-
- std::vector <std::string> currE;
- std::vector <std::string> currF;
- std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
- std::set <int> targetNullWords;
- std::set <int> sourceNullWords;
-
- int closestGap(std::map <int,std::string> gap,int j1, int & gp);
- int firstOpenGap(std::vector <int> & coverageVector);
- std::string intToString(int);
- int getOpenGaps();
- int isTranslationOperation(int j);
- void removeReorderingOperations();
-
- void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
-
-public:
-
- osmHypothesis();
- ~osmHypothesis() {};
- void generateOperations(int & startIndex, int j1 , int contFlag , Bitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
- void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
- void calculateOSMProb(OSMLM& ptrOp);
- void computeOSMFeature(int startIndex , Bitmap & coverageVector);
- void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
- void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2) {
- currF = val1;
- currE = val2;
- }
- void setState(const FFState* prev_state);
- void saveState(osmState &state);
- void print();
- void populateScores(std::vector <float> & scores , const int numFeatures);
- void setState(const lm::ngram::State & val) {
- lmState = val;
- }
-
-};
-
-} // namespace
-
-
-
diff --git a/contrib/moses2/FF/PhrasePenalty.cpp b/contrib/moses2/FF/PhrasePenalty.cpp
deleted file mode 100644
index 2a1764a0e..000000000
--- a/contrib/moses2/FF/PhrasePenalty.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * SkeletonStatefulFF.cpp
- *
- * Created on: 27 Oct 2015
- * Author: hieu
- */
-
-#include "PhrasePenalty.h"
-#include "../Scores.h"
-
-namespace Moses2
-{
-
-PhrasePenalty::PhrasePenalty(size_t startInd, const std::string &line) :
- StatelessFeatureFunction(startInd, line)
-{
- ReadParameters();
-}
-
-PhrasePenalty::~PhrasePenalty()
-{
- // TODO Auto-generated destructor stub
-}
-
-void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- scores.PlusEquals(system, *this, 1);
-}
-
-void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- scores.PlusEquals(system, *this, 1);
-}
-
-}
-
diff --git a/contrib/moses2/FF/PhrasePenalty.h b/contrib/moses2/FF/PhrasePenalty.h
deleted file mode 100644
index c2066356c..000000000
--- a/contrib/moses2/FF/PhrasePenalty.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * SkeletonStatefulFF.h
- *
- * Created on: 27 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include "StatelessFeatureFunction.h"
-
-namespace Moses2
-{
-
-class PhrasePenalty: public StatelessFeatureFunction
-{
-public:
- PhrasePenalty(size_t startInd, const std::string &line);
- virtual ~PhrasePenalty();
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
-};
-
-}
-
diff --git a/contrib/moses2/FF/PointerState.cpp b/contrib/moses2/FF/PointerState.cpp
deleted file mode 100644
index e69de29bb..000000000
--- a/contrib/moses2/FF/PointerState.cpp
+++ /dev/null
diff --git a/contrib/moses2/FF/PointerState.h b/contrib/moses2/FF/PointerState.h
deleted file mode 100644
index 41e6edf9f..000000000
--- a/contrib/moses2/FF/PointerState.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#pragma once
-
-#include <sstream>
-#include "FFState.h"
-
-namespace Moses2
-{
-
-struct PointerState: public FFState
-{
- const void* lmstate;
-
- explicit PointerState()
- {
- // uninitialised
- }
-
- PointerState(const void* lms)
- {
- lmstate = lms;
- }
- virtual size_t hash() const
- {
- return (size_t) lmstate;
- }
- virtual bool operator==(const FFState& other) const
- {
- const PointerState& o = static_cast<const PointerState&>(other);
- return lmstate == o.lmstate;
- }
-
- virtual std::string ToString() const
- {
- std::stringstream sb;
- sb << lmstate;
- return sb.str();
- }
-
-};
-
-}
-
diff --git a/contrib/moses2/FF/SkeletonStatefulFF.cpp b/contrib/moses2/FF/SkeletonStatefulFF.cpp
deleted file mode 100644
index d159794f0..000000000
--- a/contrib/moses2/FF/SkeletonStatefulFF.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * SkeletonStatefulFF.cpp
- *
- * Created on: 27 Oct 2015
- * Author: hieu
- */
-#include <sstream>
-#include "SkeletonStatefulFF.h"
-#include "../PhraseBased/Manager.h"
-#include "../PhraseBased/Hypothesis.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-class SkeletonState: public FFState
-{
-public:
- int targetLen;
-
- SkeletonState()
- {
- // uninitialised
- }
-
- virtual size_t hash() const
- {
- return (size_t) targetLen;
- }
- virtual bool operator==(const FFState& o) const
- {
- const SkeletonState& other = static_cast<const SkeletonState&>(o);
- return targetLen == other.targetLen;
- }
-
- virtual std::string ToString() const
- {
- stringstream sb;
- sb << targetLen;
- return sb.str();
- }
-
-};
-
-////////////////////////////////////////////////////////////////////////////////////////
-SkeletonStatefulFF::SkeletonStatefulFF(size_t startInd, const std::string &line) :
- StatefulFeatureFunction(startInd, line)
-{
- ReadParameters();
-}
-
-SkeletonStatefulFF::~SkeletonStatefulFF()
-{
- // TODO Auto-generated destructor stub
-}
-
-FFState* SkeletonStatefulFF::BlankState(MemPool &pool, const System &sys) const
-{
- return new (pool.Allocate<SkeletonState>()) SkeletonState();
-}
-
-void SkeletonStatefulFF::EmptyHypothesisState(FFState &state,
- const ManagerBase &mgr, const InputType &input,
- const Hypothesis &hypo) const
-{
- SkeletonState &stateCast = static_cast<SkeletonState&>(state);
- stateCast.targetLen = 0;
-}
-
-void SkeletonStatefulFF::EvaluateInIsolation(MemPool &pool,
- const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-}
-
-void SkeletonStatefulFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-}
-
-void SkeletonStatefulFF::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
-{
- SkeletonState &stateCast = static_cast<SkeletonState&>(state);
- stateCast.targetLen = hypo.GetTargetPhrase().GetSize();
-}
-
-void SkeletonStatefulFF::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
-
diff --git a/contrib/moses2/FF/SkeletonStatefulFF.h b/contrib/moses2/FF/SkeletonStatefulFF.h
deleted file mode 100644
index bfa3ad870..000000000
--- a/contrib/moses2/FF/SkeletonStatefulFF.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * SkeletonStatefulFF.h
- *
- * Created on: 27 Oct 2015
- * Author: hieu
- */
-
-#ifndef SKELETONSTATEFULFF_H_
-#define SKELETONSTATEFULFF_H_
-
-#include "StatefulFeatureFunction.h"
-
-namespace Moses2
-{
-
-class SkeletonStatefulFF: public StatefulFeatureFunction
-{
-public:
- SkeletonStatefulFF(size_t startInd, const std::string &line);
- virtual ~SkeletonStatefulFF();
-
- virtual FFState* BlankState(MemPool &pool, const System &sys) const;
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
-
-};
-
-}
-
-#endif /* SKELETONSTATEFULFF_H_ */
diff --git a/contrib/moses2/FF/SkeletonStatelessFF.cpp b/contrib/moses2/FF/SkeletonStatelessFF.cpp
deleted file mode 100644
index 981f9dd75..000000000
--- a/contrib/moses2/FF/SkeletonStatelessFF.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * SkeletonStatefulFF.cpp
- *
- * Created on: 27 Oct 2015
- * Author: hieu
- */
-#include "../Scores.h"
-
-#include "SkeletonStatelessFF.h"
-
-namespace Moses2
-{
-
-SkeletonStatelessFF::SkeletonStatelessFF(size_t startInd,
- const std::string &line) :
- StatelessFeatureFunction(startInd, line)
-{
- ReadParameters();
-}
-
-SkeletonStatelessFF::~SkeletonStatelessFF()
-{
- // TODO Auto-generated destructor stub
-}
-
-void SkeletonStatelessFF::EvaluateInIsolation(MemPool &pool,
- const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-}
-
-void SkeletonStatelessFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-}
-
-}
-
diff --git a/contrib/moses2/FF/SkeletonStatelessFF.h b/contrib/moses2/FF/SkeletonStatelessFF.h
deleted file mode 100644
index 9be14bffe..000000000
--- a/contrib/moses2/FF/SkeletonStatelessFF.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * SkeletonStatefulFF.h
- *
- * Created on: 27 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include "StatelessFeatureFunction.h"
-
-namespace Moses2
-{
-
-class SkeletonStatelessFF: public StatelessFeatureFunction
-{
-public:
- SkeletonStatelessFF(size_t startInd, const std::string &line);
- virtual ~SkeletonStatelessFF();
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
-};
-
-}
-
diff --git a/contrib/moses2/FF/StatefulFeatureFunction.cpp b/contrib/moses2/FF/StatefulFeatureFunction.cpp
deleted file mode 100644
index 060338159..000000000
--- a/contrib/moses2/FF/StatefulFeatureFunction.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * StatefulFeatureFunction.cpp
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-
-#include <boost/foreach.hpp>
-#include "StatefulFeatureFunction.h"
-#include "../PhraseBased/Hypothesis.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-StatefulFeatureFunction::StatefulFeatureFunction(size_t startInd,
- const std::string &line) :
- FeatureFunction(startInd, line)
-{
-}
-
-StatefulFeatureFunction::~StatefulFeatureFunction()
-{
- // TODO Auto-generated destructor stub
-}
-
-void StatefulFeatureFunction::EvaluateWhenAppliedBatch(
- const System &system,
- const Batch &batch) const
-{
- //cerr << "EvaluateWhenAppliedBatch:" << m_name << endl;
-#ifdef __linux
- /*
- pthread_t handle;
- handle = pthread_self();
-
- int s;
- cpu_set_t cpusetOrig, cpuset;
- s = pthread_getaffinity_np(handle, sizeof(cpu_set_t), &cpusetOrig);
-
- CPU_ZERO(&cpuset);
-
- int core = handle % 8;
- core += 24;
- CPU_SET(core, &cpuset);
-
- s = pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset);
- */
-#endif
-
- for (size_t i = 0; i < batch.size(); ++i) {
- Hypothesis *hypo = batch[i];
- hypo->EvaluateWhenApplied(*this);
- }
-
-#ifdef __linux
- // s = pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpusetOrig);
-#endif
-}
-
-}
-
diff --git a/contrib/moses2/FF/StatefulFeatureFunction.h b/contrib/moses2/FF/StatefulFeatureFunction.h
deleted file mode 100644
index fffb1eea7..000000000
--- a/contrib/moses2/FF/StatefulFeatureFunction.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * StatefulFeatureFunction.h
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-
-#ifndef STATEFULFEATUREFUNCTION_H_
-#define STATEFULFEATUREFUNCTION_H_
-
-#include "FeatureFunction.h"
-#include "FFState.h"
-#include "../MemPool.h"
-
-namespace Moses2
-{
-
-class Hypothesis;
-class InputType;
-
-namespace SCFG
-{
-class Hypothesis;
-class Manager;
-}
-
-class StatefulFeatureFunction: public FeatureFunction
-{
-public:
- StatefulFeatureFunction(size_t startInd, const std::string &line);
- virtual ~StatefulFeatureFunction();
-
- void SetStatefulInd(size_t ind)
- {
- m_statefulInd = ind;
- }
- size_t GetStatefulInd() const
- {
- return m_statefulInd;
- }
-
- //! return uninitialise state
- virtual FFState* BlankState(MemPool &pool, const System &sys) const = 0;
-
- //! return the state associated with the empty hypothesis for a given sentence
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const = 0;
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const = 0;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const = 0;
-
- virtual void EvaluateWhenAppliedBatch(
- const System &system,
- const Batch &batch) const;
-
-protected:
- size_t m_statefulInd;
-
-};
-
-}
-
-#endif /* STATEFULFEATUREFUNCTION_H_ */
diff --git a/contrib/moses2/FF/StatelessFeatureFunction.cpp b/contrib/moses2/FF/StatelessFeatureFunction.cpp
deleted file mode 100644
index 62fa35d3f..000000000
--- a/contrib/moses2/FF/StatelessFeatureFunction.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * StatelessFeatureFunction.cpp
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-
-#include "StatelessFeatureFunction.h"
-
-namespace Moses2
-{
-
-StatelessFeatureFunction::StatelessFeatureFunction(size_t startInd,
- const std::string &line) :
- FeatureFunction(startInd, line)
-{
- // TODO Auto-generated constructor stub
-
-}
-
-StatelessFeatureFunction::~StatelessFeatureFunction()
-{
- // TODO Auto-generated destructor stub
-}
-
-}
-
diff --git a/contrib/moses2/FF/StatelessFeatureFunction.h b/contrib/moses2/FF/StatelessFeatureFunction.h
deleted file mode 100644
index 249e4fdfe..000000000
--- a/contrib/moses2/FF/StatelessFeatureFunction.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * StatelessFeatureFunction.h
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-
-#ifndef STATELESSFEATUREFUNCTION_H_
-#define STATELESSFEATUREFUNCTION_H_
-
-#include "FeatureFunction.h"
-
-namespace Moses2
-{
-
-class StatelessFeatureFunction: public FeatureFunction
-{
-public:
- StatelessFeatureFunction(size_t startInd, const std::string &line);
- virtual ~StatelessFeatureFunction();
-};
-
-}
-
-#endif /* STATELESSFEATUREFUNCTION_H_ */
diff --git a/contrib/moses2/FF/WordPenalty.cpp b/contrib/moses2/FF/WordPenalty.cpp
deleted file mode 100644
index e8af47568..000000000
--- a/contrib/moses2/FF/WordPenalty.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * WordPenalty.cpp
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-
-#include "WordPenalty.h"
-#include "../TypeDef.h"
-#include "../Scores.h"
-#include "../Phrase.h"
-#include "../TargetPhrase.h"
-#include "../SCFG/Word.h"
-#include "../PhraseBased/TargetPhraseImpl.h"
-
-namespace Moses2
-{
-
-WordPenalty::WordPenalty(size_t startInd, const std::string &line) :
- StatelessFeatureFunction(startInd, line)
-{
- ReadParameters();
-}
-
-WordPenalty::~WordPenalty()
-{
- // TODO Auto-generated destructor stub
-}
-
-void WordPenalty::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- SCORE score = -(SCORE) targetPhrase.GetSize();
- scores.PlusEquals(system, *this, score);
-}
-
-void WordPenalty::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- size_t count = 0;
- for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
- const SCFG::Word &word = targetPhrase[i];
- if (!word.isNonTerminal) {
- ++count;
- }
- }
- scores.PlusEquals(system, *this, -(SCORE) count);
-}
-
-}
-
diff --git a/contrib/moses2/FF/WordPenalty.h b/contrib/moses2/FF/WordPenalty.h
deleted file mode 100644
index c322a15f7..000000000
--- a/contrib/moses2/FF/WordPenalty.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * WordPenalty.h
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-
-#ifndef WORDPENALTY_H_
-#define WORDPENALTY_H_
-
-#include "StatelessFeatureFunction.h"
-
-namespace Moses2
-{
-
-class WordPenalty: public StatelessFeatureFunction
-{
-public:
- WordPenalty(size_t startInd, const std::string &line);
- virtual ~WordPenalty();
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
-};
-
-}
-
-#endif /* WORDPENALTY_H_ */
-
diff --git a/contrib/moses2/HypothesisBase.cpp b/contrib/moses2/HypothesisBase.cpp
deleted file mode 100644
index 8b65a0cdf..000000000
--- a/contrib/moses2/HypothesisBase.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Hypothesis.cpp
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-
-#include <boost/foreach.hpp>
-#include <stdlib.h>
-#include <deque>
-#include "HypothesisBase.h"
-#include "System.h"
-#include "Scores.h"
-#include "ManagerBase.h"
-#include "MemPool.h"
-#include "FF/StatefulFeatureFunction.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-//size_t g_numHypos = 0;
-
-HypothesisBase::HypothesisBase(MemPool &pool, const System &system)
-{
- m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
- system.featureFunctions.GetNumScores());
-
- // FF states
- const std::vector<const StatefulFeatureFunction*> &sfffs =
- system.featureFunctions.GetStatefulFeatureFunctions();
- size_t numStatefulFFs = sfffs.size();
- m_ffStates = (FFState **) pool.Allocate(sizeof(FFState*) * numStatefulFFs);
-
- BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){
- size_t statefulInd = sfff->GetStatefulInd();
- FFState *state = sfff->BlankState(pool, system);
- m_ffStates[statefulInd] = state;
-}
-}
-
-size_t HypothesisBase::hash() const
-{
- return hash(0);
-}
-
-size_t HypothesisBase::hash(size_t seed) const
-{
- size_t numStatefulFFs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
-
- // states
- for (size_t i = 0; i < numStatefulFFs; ++i) {
- const FFState *state = m_ffStates[i];
- size_t hash = state->hash();
- boost::hash_combine(seed, hash);
- }
- return seed;
-
-}
-
-bool HypothesisBase::operator==(const HypothesisBase &other) const
-{
- size_t numStatefulFFs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
-
- // states
- for (size_t i = 0; i < numStatefulFFs; ++i) {
- const FFState &thisState = *m_ffStates[i];
- const FFState &otherState = *other.m_ffStates[i];
- if (thisState != otherState) {
- return false;
- }
- }
- return true;
-
-}
-
-}
-
diff --git a/contrib/moses2/HypothesisBase.h b/contrib/moses2/HypothesisBase.h
deleted file mode 100644
index 6ef4d3891..000000000
--- a/contrib/moses2/HypothesisBase.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Hypothesis.h
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#pragma once
-
-#include <iostream>
-#include <cstddef>
-#include "FF/FFState.h"
-#include "Scores.h"
-
-namespace Moses2
-{
-
-class ManagerBase;
-class Scores;
-
-class HypothesisBase
-{
-public:
- virtual ~HypothesisBase()
- {
- }
-
- inline ManagerBase &GetManager() const
- {
- return *m_mgr;
- }
-
- template<typename T>
- const T &Cast() const
- { return static_cast<const T&>(*this); }
-
- const Scores &GetScores() const
- { return *m_scores; }
- Scores &GetScores()
- { return *m_scores; }
-
- const FFState *GetState(size_t ind) const
- { return m_ffStates[ind]; }
- FFState *GetState(size_t ind)
- { return m_ffStates[ind]; }
-
- virtual size_t hash() const;
- virtual size_t hash(size_t seed) const;
- virtual bool operator==(const HypothesisBase &other) const;
-
- virtual SCORE GetFutureScore() const = 0;
- virtual void EvaluateWhenApplied() = 0;
-
- virtual std::string Debug(const System &system) const = 0;
-
-protected:
- ManagerBase *m_mgr;
- Scores *m_scores;
- FFState **m_ffStates;
-
- HypothesisBase(MemPool &pool, const System &system);
-};
-
-////////////////////////////////////////////////////////////////////////////////////
-class HypothesisFutureScoreOrderer
-{
-public:
- bool operator()(const HypothesisBase* a, const HypothesisBase* b) const
- {
- return a->GetFutureScore() > b->GetFutureScore();
- }
-};
-
-}
-
diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp
deleted file mode 100644
index a75113d58..000000000
--- a/contrib/moses2/HypothesisColl.cpp
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * HypothesisColl.cpp
- *
- * Created on: 26 Feb 2016
- * Author: hieu
- */
-#include <iostream>
-#include <sstream>
-#include <algorithm>
-#include <boost/foreach.hpp>
-#include "HypothesisColl.h"
-#include "ManagerBase.h"
-#include "System.h"
-#include "MemPoolAllocator.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-HypothesisColl::HypothesisColl(const ManagerBase &mgr)
-:m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool()))
-,m_sortedHypos(NULL)
-{
- m_bestScore = -std::numeric_limits<float>::infinity();
- m_worstScore = std::numeric_limits<float>::infinity();
-}
-
-const HypothesisBase *HypothesisColl::GetBestHypo() const
-{
- if (GetSize() == 0) {
- return NULL;
- }
- if (m_sortedHypos) {
- return (*m_sortedHypos)[0];
- }
-
- SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
- const HypothesisBase *bestHypo;
- BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
- if (hypo->GetFutureScore() > bestScore) {
- bestScore = hypo->GetFutureScore();
- bestHypo = hypo;
- }
- }
- return bestHypo;
-}
-
-void HypothesisColl::Add(
- const ManagerBase &mgr,
- HypothesisBase *hypo,
- Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists)
-{
- size_t maxStackSize = mgr.system.options.search.stack_size;
-
- if (GetSize() > maxStackSize * 2) {
- //cerr << "maxStackSize=" << maxStackSize << " " << GetSize() << endl;
- PruneHypos(mgr, mgr.arcLists);
- }
-
- SCORE futureScore = hypo->GetFutureScore();
-
- /*
- cerr << "scores:"
- << futureScore << " "
- << m_bestScore << " "
- << GetSize() << " "
- << endl;
- */
- if (GetSize() >= maxStackSize && futureScore < m_worstScore) {
- // beam threshold or really bad hypo that won't make the pruning cut
- // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point
- //cerr << "Discard, really bad score:" << hypo->Debug(mgr.system) << endl;
- hypoRecycle.Recycle(hypo);
- return;
- }
-
- StackAdd added = Add(hypo);
-
- size_t nbestSize = mgr.system.options.nbest.nbest_size;
- if (nbestSize) {
- arcLists.AddArc(added.added, hypo, added.other);
- }
- else {
- if (added.added) {
- if (added.other) {
- hypoRecycle.Recycle(added.other);
- }
- }
- else {
- hypoRecycle.Recycle(hypo);
- }
- }
-
- // update beam variables
- if (added.added) {
- if (futureScore > m_bestScore) {
- m_bestScore = futureScore;
- float beamWidth = mgr.system.options.search.beam_width;
- if ( m_bestScore + beamWidth > m_worstScore ) {
- m_worstScore = m_bestScore + beamWidth;
- }
- }
- else if (GetSize() <= maxStackSize && futureScore < m_worstScore) {
- m_worstScore = futureScore;
- }
- }
-}
-
-StackAdd HypothesisColl::Add(const HypothesisBase *hypo)
-{
- std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
- //cerr << endl << "new=" << hypo->Debug(hypo->GetManager().system) << endl;
-
- // CHECK RECOMBINATION
- if (addRet.second) {
- // equiv hypo doesn't exists
- //cerr << "Added " << hypo << endl;
- return StackAdd(true, NULL);
- }
- else {
- HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
- //cerr << "hypoExisting=" << hypoExisting->Debug(hypo->GetManager().system) << endl;
-
- if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
- // incoming hypo is better than the one we have
- const HypothesisBase * const &hypoExisting1 = *addRet.first;
- const HypothesisBase *&hypoExisting2 =
- const_cast<const HypothesisBase *&>(hypoExisting1);
- hypoExisting2 = hypo;
-
- //cerr << "Added " << hypo << " dicard existing " << hypoExisting2 << endl;
- return StackAdd(true, hypoExisting);
- }
- else {
- // already storing the best hypo. discard incoming hypo
- //cerr << "Keep existing " << hypoExisting << " dicard new " << hypo << endl;
- return StackAdd(false, hypoExisting);
- }
- }
-
- //assert(false);
-}
-
-const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos(
- const ManagerBase &mgr,
- ArcLists &arcLists) const
-{
- if (m_sortedHypos == NULL) {
- // create sortedHypos first
- MemPool &pool = mgr.GetPool();
- m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
- m_coll.size());
-
- SortHypos(mgr, m_sortedHypos->GetArray());
-
- // prune
- Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
-
- size_t maxStackSize = mgr.system.options.search.stack_size;
- if (maxStackSize && m_sortedHypos->size() > maxStackSize) {
- for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) {
- HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
- recycler.Recycle(hypo);
-
- // delete from arclist
- if (mgr.system.options.nbest.nbest_size) {
- arcLists.Delete(hypo);
- }
- }
- m_sortedHypos->resize(maxStackSize);
- }
-
- }
-
- return *m_sortedHypos;
-}
-
-void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists)
-{
- size_t maxStackSize = mgr.system.options.search.stack_size;
-
- Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
-
- const HypothesisBase *sortedHypos[GetSize()];
- SortHypos(mgr, sortedHypos);
-
- // update worse score
- m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore();
-
- // prune
- for (size_t i = maxStackSize; i < GetSize(); ++i) {
- HypothesisBase *hypo = const_cast<HypothesisBase*>(sortedHypos[i]);
-
- // delete from arclist
- if (mgr.system.options.nbest.nbest_size) {
- arcLists.Delete(hypo);
- }
-
- // delete from collection
- Delete(hypo);
-
- recycler.Recycle(hypo);
- }
-
-}
-
-void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const
-{
- size_t maxStackSize = mgr.system.options.search.stack_size;
- //assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that
- //assert(GetSize() > maxStackSize);
- //assert(sortedHypos.size() == GetSize());
-
- /*
- cerr << "UNSORTED hypos: ";
- BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
- cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " ";
- }
- cerr << endl;
- */
- size_t ind = 0;
- BOOST_FOREACH(const HypothesisBase *hypo, m_coll){
- sortedHypos[ind] = hypo;
- ++ind;
- }
-
- size_t indMiddle;
- if (maxStackSize == 0) {
- indMiddle = GetSize();
- }
- else if (GetSize() > maxStackSize) {
- indMiddle = maxStackSize;
- }
- else {
- // GetSize() <= maxStackSize
- indMiddle = GetSize();
- }
-
- const HypothesisBase **iterMiddle = sortedHypos + indMiddle;
-
- std::partial_sort(
- sortedHypos,
- iterMiddle,
- sortedHypos + GetSize(),
- HypothesisFutureScoreOrderer());
-
- /*
- cerr << "sorted hypos: ";
- for (size_t i = 0; i < sortedHypos.size(); ++i) {
- const HypothesisBase *hypo = sortedHypos[i];
- cerr << hypo << " ";
- }
- cerr << endl;
- */
-}
-
-void HypothesisColl::Delete(const HypothesisBase *hypo)
-{
- //cerr << "hypo=" << hypo << " " << m_coll.size() << endl;
-
- size_t erased = m_coll.erase(hypo);
- UTIL_THROW_IF2(erased != 1, "couldn't erase hypo " << hypo);
-}
-
-void HypothesisColl::Clear()
-{
- m_sortedHypos = NULL;
- m_coll.clear();
-
- m_bestScore = -std::numeric_limits<float>::infinity();
- m_worstScore = std::numeric_limits<float>::infinity();
-}
-
-std::string HypothesisColl::Debug(const System &system) const
-{
- stringstream out;
- BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
- out << hypo->Debug(system);
- out << std::endl << std::endl;
- }
-
- return out.str();
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h
deleted file mode 100644
index 81a3b25c3..000000000
--- a/contrib/moses2/HypothesisColl.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * HypothesisColl.h
- *
- * Created on: 26 Feb 2016
- * Author: hieu
- */
-#pragma once
-#include <boost/unordered_set.hpp>
-#include "HypothesisBase.h"
-#include "MemPoolAllocator.h"
-#include "Recycler.h"
-#include "Array.h"
-#include "legacy/Util2.h"
-
-namespace Moses2
-{
-
-class ManagerBase;
-class ArcLists;
-
-typedef Array<const HypothesisBase*> Hypotheses;
-
-class HypothesisColl
-{
-public:
- HypothesisColl(const ManagerBase &mgr);
-
- void Add(const ManagerBase &mgr,
- HypothesisBase *hypo,
- Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists);
-
- size_t GetSize() const
- { return m_coll.size(); }
-
- void Clear();
-
- const Hypotheses &GetSortedAndPrunedHypos(
- const ManagerBase &mgr,
- ArcLists &arcLists) const;
-
- const HypothesisBase *GetBestHypo() const;
-
- template<typename T>
- const T *GetBestHypo() const
- {
- const HypothesisBase *hypo = GetBestHypo();
- return hypo ? &hypo->Cast<T>() : NULL;
- }
-
- void Delete(const HypothesisBase *hypo);
-
- std::string Debug(const System &system) const;
-
-protected:
- typedef boost::unordered_set<const HypothesisBase*,
- UnorderedComparer<HypothesisBase>, UnorderedComparer<HypothesisBase>,
- MemPoolAllocator<const HypothesisBase*> > _HCType;
-
- _HCType m_coll;
- mutable Hypotheses *m_sortedHypos;
-
- SCORE m_bestScore;
- SCORE m_worstScore;
-
- StackAdd Add(const HypothesisBase *hypo);
-
- void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists);
- void SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const;
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/InputPathBase.cpp b/contrib/moses2/InputPathBase.cpp
deleted file mode 100644
index 034122cc2..000000000
--- a/contrib/moses2/InputPathBase.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * InputPath.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "InputPathBase.h"
-#include "TranslationModel/PhraseTable.h"
-
-namespace Moses2
-{
-InputPathBase::InputPathBase(MemPool &pool,
- const Range &range, size_t numPt, const InputPathBase *prefixPath) :
- range(range), prefixPath(prefixPath)
-{
-
-}
-
-}
-
diff --git a/contrib/moses2/InputPathBase.h b/contrib/moses2/InputPathBase.h
deleted file mode 100644
index d95d29e35..000000000
--- a/contrib/moses2/InputPathBase.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * InputPath.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <iostream>
-#include <vector>
-#include "SubPhrase.h"
-#include "legacy/Range.h"
-
-namespace Moses2
-{
-
-class PhraseTable;
-
-class InputPathBase
-{
-public:
- const InputPathBase *prefixPath;
- Range range;
-
- InputPathBase(MemPool &pool, const Range &range,
- size_t numPt, const InputPathBase *prefixPath);
-
-};
-
-}
-
diff --git a/contrib/moses2/InputPathsBase.cpp b/contrib/moses2/InputPathsBase.cpp
deleted file mode 100644
index bcc57a7f7..000000000
--- a/contrib/moses2/InputPathsBase.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * InputPaths.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <iostream>
-#include "InputPathsBase.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-InputPathsBase::~InputPathsBase()
-{
-}
-
-}
-
diff --git a/contrib/moses2/InputPathsBase.h b/contrib/moses2/InputPathsBase.h
deleted file mode 100644
index 861bbf9f7..000000000
--- a/contrib/moses2/InputPathsBase.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * InputPaths.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <vector>
-#include "MemPool.h"
-
-namespace Moses2
-{
-
-class InputType;
-class System;
-class ManagerBase;
-class InputPathBase;
-
-class InputPathsBase
-{
- typedef std::vector<InputPathBase*> Coll;
-public:
- InputPathsBase()
- {
- }
- virtual ~InputPathsBase();
-
- //! iterators
- typedef Coll::iterator iterator;
- typedef Coll::const_iterator const_iterator;
-
- const_iterator begin() const
- {
- return m_inputPaths.begin();
- }
- const_iterator end() const
- {
- return m_inputPaths.end();
- }
-
- iterator begin()
- {
- return m_inputPaths.begin();
- }
- iterator end()
- {
- return m_inputPaths.end();
- }
-
- virtual void Init(const InputType &input, const ManagerBase &mgr) = 0;
-
-protected:
- Coll m_inputPaths;
-};
-
-}
-
diff --git a/contrib/moses2/InputType.cpp b/contrib/moses2/InputType.cpp
deleted file mode 100644
index 01169c162..000000000
--- a/contrib/moses2/InputType.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * InputType.cpp
- *
- * Created on: 14 Dec 2015
- * Author: hieu
- */
-
-#include "InputType.h"
-#include "System.h"
-
-namespace Moses2
-{
-//////////////////////////////////////////////////////////////////////////////
-InputType::XMLOption::XMLOption(MemPool &pool, const std::string &nodeName, size_t vStartPos)
-:startPos(vStartPos)
-,prob(0)
-,m_entity(NULL)
-{
- m_nodeName = pool.Allocate<char>(nodeName.size() + 1);
- strcpy(m_nodeName, nodeName.c_str());
-}
-
-void InputType::XMLOption::SetTranslation(MemPool &pool, const std::string &val)
-{
- m_translation = pool.Allocate<char>(val.size() + 1);
- strcpy(m_translation, val.c_str());
-}
-
-void InputType::XMLOption::SetEntity(MemPool &pool, const std::string &val)
-{
- m_entity = pool.Allocate<char>(val.size() + 1);
- strcpy(m_entity, val.c_str());
-}
-
-std::string InputType::XMLOption::Debug(const System &system) const
-{
- std::stringstream out;
- out << "[" << startPos << "," << phraseSize << "]="
- << m_nodeName << ","
- << m_translation << ","
- << prob;
- if (m_entity) {
- out << "," << m_entity;
- }
- return out.str();
-}
-
-//////////////////////////////////////////////////////////////////////////////
-
-InputType::InputType(MemPool &pool)
-:m_reorderingConstraint(pool)
-,m_xmlOptions(pool)
-,m_xmlCoverageMap(pool)
-{
-}
-
-InputType::~InputType()
-{
- // TODO Auto-generated destructor stub
-}
-
-void InputType::Init(const System &system, size_t size, int max_distortion)
-{
- m_reorderingConstraint.InitializeWalls(size, max_distortion);
-
- if (system.options.input.xml_policy != XmlPassThrough) {
- m_xmlCoverageMap.assign(size, false);
- }
-}
-
-void InputType::AddXMLOption(const System &system, const XMLOption *xmlOption)
-{
- m_xmlOptions.push_back(xmlOption);
-
- if (system.options.input.xml_policy != XmlPassThrough) {
- for(size_t j = xmlOption->startPos; j < xmlOption->startPos + xmlOption->phraseSize; ++j) {
- m_xmlCoverageMap[j]=true;
- }
- }
-}
-
-bool InputType::XmlOverlap(size_t startPos, size_t endPos) const
-{
- for (size_t pos = startPos; pos <= endPos ; pos++) {
- if (pos < m_xmlCoverageMap.size() && m_xmlCoverageMap[pos]) {
- return true;
- }
- }
- return false;
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/InputType.h b/contrib/moses2/InputType.h
deleted file mode 100644
index 0a2aebfa1..000000000
--- a/contrib/moses2/InputType.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * InputType.h
- *
- * Created on: 14 Dec 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include "PhraseBased/ReorderingConstraint.h"
-#include "TypeDef.h"
-
-namespace Moses2
-{
-
-class InputType
-{
-public:
- //////////////////////////////////////////////////////////////////////////////
- class XMLOption
- {
- public:
- size_t startPos, phraseSize;
-
- SCORE prob;
-
- XMLOption(MemPool &pool, const std::string &nodeName, size_t vStartPos);
-
- const char *GetNodeName() const
- { return m_nodeName; }
-
- const char *GetTranslation() const
- { return m_translation; }
-
- const char *GetEntity() const
- { return m_entity; }
-
- void SetTranslation(MemPool &pool, const std::string &val);
- void SetEntity(MemPool &pool, const std::string &val);
-
- std::string Debug(const System &system) const;
- public:
- char *m_nodeName;
- char *m_translation;
- char *m_entity;
-
- };
-
- //////////////////////////////////////////////////////////////////////////////
-
- InputType(MemPool &pool);
- virtual ~InputType();
-
- virtual void Init(const System &system, size_t size, int max_distortion);
-
- ReorderingConstraint &GetReorderingConstraint()
- { return m_reorderingConstraint; }
-
- const ReorderingConstraint &GetReorderingConstraint() const
- { return m_reorderingConstraint; }
-
- const Vector<const XMLOption*> &GetXMLOptions() const
- { return m_xmlOptions; }
-
- void AddXMLOption(const System &system, const XMLOption *xmlOption);
-
- //! Returns true if there were any XML tags parsed that at least partially covered the range passed
- bool XmlOverlap(size_t startPos, size_t endPos) const;
-
-protected:
- ReorderingConstraint m_reorderingConstraint; /**< limits on reordering specified either by "-mp" switch or xml tags */
- Vector<const XMLOption*> m_xmlOptions;
- Vector<bool> m_xmlCoverageMap;
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/Jamfile b/contrib/moses2/Jamfile
deleted file mode 100644
index 98e1c1e30..000000000
--- a/contrib/moses2/Jamfile
+++ /dev/null
@@ -1,181 +0,0 @@
-alias deps : ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../moses/TranslationModel/CompactPT//cmph ../../moses//moses ;
-
- lib moses2_lib :
- AlignmentInfo.cpp
- AlignmentInfoCollection.cpp
- ArcLists.cpp
- EstimatedScores.cpp
- HypothesisBase.cpp
- HypothesisColl.cpp
- InputPathBase.cpp
- InputPathsBase.cpp
- InputType.cpp
- ManagerBase.cpp
- MemPool.cpp
- Phrase.cpp
- pugixml.cpp
- Scores.cpp
- SubPhrase.cpp
- System.cpp
- TargetPhrase.cpp
- TranslationTask.cpp
- TrellisPaths.cpp
- TypeDef.cpp
- Vector.cpp
- Weights.cpp
- Word.cpp
- FF/Distortion.cpp
- FF/FeatureFunction.cpp
- FF/FeatureFunctions.cpp
- FF/FeatureRegistry.cpp
- FF/PhrasePenalty.cpp
- FF/SkeletonStatefulFF.cpp
- FF/SkeletonStatelessFF.cpp
- FF/StatefulFeatureFunction.cpp
- FF/StatelessFeatureFunction.cpp
- FF/WordPenalty.cpp
-
- FF/LexicalReordering/BidirectionalReorderingState.cpp
- FF/LexicalReordering/HReorderingBackwardState.cpp
- FF/LexicalReordering/HReorderingForwardState.cpp
- FF/LexicalReordering/LexicalReordering.cpp
- FF/LexicalReordering/LRModel.cpp
- FF/LexicalReordering/LRState.cpp
- FF/LexicalReordering/PhraseBasedReorderingState.cpp
- FF/LexicalReordering/ReorderingStack.cpp
-
- FF/OSM/OpSequenceModel.cpp
- FF/OSM/KenOSM.cpp
- FF/OSM/osmHyp.cpp
-
- # LM/LanguageModelDALM.cpp
- LM/LanguageModel.cpp
- LM/KENLM.cpp
- LM/KENLMBatch.cpp
- LM/GPULM.cpp
-
- TranslationModel/PhraseTable.cpp
- TranslationModel/Transliteration.cpp
- TranslationModel/UnknownWordPenalty.cpp
- TranslationModel/Memory/PhraseTableMemory.cpp
-
- TranslationModel/CompactPT/PhraseTableCompact.cpp
- TranslationModel/CompactPT/BlockHashIndex.cpp
- TranslationModel/CompactPT/CmphStringVectorAdapter.cpp
- TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp
- TranslationModel/CompactPT/MurmurHash3.cpp
- TranslationModel/CompactPT/PhraseDecoder.cpp
- TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp
- TranslationModel/CompactPT/ThrowingFwrite.cpp
-
- TranslationModel/ProbingPT/ProbingPT.cpp
- TranslationModel/ProbingPT/hash.cpp
- TranslationModel/ProbingPT/line_splitter.cpp
- TranslationModel/ProbingPT/probing_hash_utils.cpp
- TranslationModel/ProbingPT/querying.cpp
- TranslationModel/ProbingPT/storing.cpp
- TranslationModel/ProbingPT/StoreVocab.cpp
- TranslationModel/ProbingPT/StoreTarget.cpp
- TranslationModel/ProbingPT/vocabid.cpp
-
- parameters/AllOptions.cpp
- parameters/BookkeepingOptions.cpp
- parameters/ContextParameters.cpp
- parameters/CubePruningOptions.cpp
- parameters/InputOptions.cpp
- parameters/LMBR_Options.cpp
- parameters/MBR_Options.cpp
- parameters/NBestOptions.cpp
- parameters/OOVHandlingOptions.cpp
- parameters/OptionsBaseClass.cpp
- parameters/ReorderingOptions.cpp
- parameters/ReportingOptions.cpp
- parameters/SearchOptions.cpp
- parameters/ServerOptions.cpp
- parameters/SyntaxOptions.cpp
-
- PhraseBased/Hypothesis.cpp
- PhraseBased/InputPath.cpp
- PhraseBased/InputPaths.cpp
- PhraseBased/Manager.cpp
- PhraseBased/PhraseImpl.cpp
- PhraseBased/ReorderingConstraint.cpp
- PhraseBased/TargetPhrases.cpp
- PhraseBased/Search.cpp
- PhraseBased/Sentence.cpp
- PhraseBased/TargetPhraseImpl.cpp
- PhraseBased/TrellisPath.cpp
-
- PhraseBased/Normal/Search.cpp
- PhraseBased/Normal/Stack.cpp
- PhraseBased/Normal/Stacks.cpp
-
- PhraseBased/CubePruningMiniStack/Misc.cpp
- PhraseBased/CubePruningMiniStack/Search.cpp
- PhraseBased/CubePruningMiniStack/Stack.cpp
-
-# PhraseBased/CubePruningCardinalStack/Misc.cpp
-# PhraseBased/CubePruningCardinalStack/Search.cpp
-# PhraseBased/CubePruningCardinalStack/Stack.cpp
-
-# PhraseBased/CubePruningBitmapStack/Misc.cpp
-# PhraseBased/CubePruningBitmapStack/Search.cpp
-# PhraseBased/CubePruningBitmapStack/Stack.cpp
-
-# PhraseBased/CubePruningPerBitmap/Misc.cpp
-# PhraseBased/CubePruningPerBitmap/Search.cpp
-# PhraseBased/CubePruningPerBitmap/Stacks.cpp
-
-# PhraseBased/CubePruningPerMiniStack/Misc.cpp
-# PhraseBased/CubePruningPerMiniStack/Search.cpp
-# PhraseBased/CubePruningPerMiniStack/Stacks.cpp
-
- legacy/Bitmap.cpp
- legacy/Bitmaps.cpp
- legacy/Factor.cpp
- legacy/FactorCollection.cpp
- legacy/InputFileStream.cpp
- legacy/Matrix.cpp
- legacy/OutputFileStream.cpp
- legacy/Parameter.cpp
- legacy/Range.cpp
- legacy/Range.cpp
- legacy/ThreadPool.cpp
- legacy/Timer.cpp
- legacy/Util2.cpp
-
- SCFG/ActiveChart.cpp
- SCFG/Hypothesis.cpp
- SCFG/InputPath.cpp
- SCFG/InputPaths.cpp
- SCFG/Manager.cpp
- SCFG/Misc.cpp
- SCFG/PhraseImpl.cpp
- SCFG/Sentence.cpp
- SCFG/Stack.cpp
- SCFG/Stacks.cpp
- SCFG/TargetPhraseImpl.cpp
- SCFG/TargetPhrases.cpp
- SCFG/Word.cpp
- SCFG/nbest/KBestExtractor.cpp
- SCFG/nbest/NBest.cpp
- SCFG/nbest/NBests.cpp
- SCFG/nbest/NBestColl.cpp
-
- server/Server.cpp
- server/Translator.cpp
- server/TranslationRequest.cpp
-
- deps ;
-
-exe moses2 : Main.cpp moses2_lib ;
-
-if [ xmlrpc ] {
- echo "Building Moses2" ;
- alias programs : moses2 ;
-}
-else {
- echo "Not building Moses2" ;
- alias programs : ;
-}
-
diff --git a/contrib/moses2/LM/GPULM.cpp b/contrib/moses2/LM/GPULM.cpp
deleted file mode 100644
index f2ff7b7e7..000000000
--- a/contrib/moses2/LM/GPULM.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * GPULM.cpp
- *
- * Created on: 4 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include <sstream>
-#include <vector>
-
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include "GPULM.h"
-#include "../Phrase.h"
-#include "../Scores.h"
-#include "../System.h"
-#include "../PhraseBased/Hypothesis.h"
-#include "../PhraseBased/Manager.h"
-#include "../PhraseBased/TargetPhraseImpl.h"
-#include "util/exception.hh"
-#include "../legacy/FactorCollection.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-struct GPULMState: public FFState
-{
- virtual std::string ToString() const
- {
- return "GPULMState";
- }
-
- virtual size_t hash() const
- {
- return boost::hash_value(lastWords);
- }
-
- virtual bool operator==(const FFState& other) const
- {
- const GPULMState &otherCast = static_cast<const GPULMState&>(other);
- bool ret = lastWords == otherCast.lastWords;
-
- return ret;
- }
-
- void SetContext(const Context &context)
- {
- lastWords = context;
- if (lastWords.size()) {
- lastWords.resize(lastWords.size() - 1);
- }
- }
-
- Context lastWords;
-};
-
-
-/////////////////////////////////////////////////////////////////
-GPULM::GPULM(size_t startInd, const std::string &line)
-:StatefulFeatureFunction(startInd, line)
-{
- cerr << "GPULM::GPULM" << endl;
- ReadParameters();
-}
-
-GPULM::~GPULM()
-{
- // TODO Auto-generated destructor stub
-}
-
-void GPULM::Load(System &system)
-{
- cerr << "GPULM::Load" << endl;
- FactorCollection &fc = system.GetVocab();
-
- m_bos = fc.AddFactor(BOS_, system, false);
- m_eos = fc.AddFactor(EOS_, system, false);
-
- FactorCollection &collection = system.GetVocab();
-}
-
-FFState* GPULM::BlankState(MemPool &pool, const System &sys) const
-{
- GPULMState *ret = new (pool.Allocate<GPULMState>()) GPULMState();
- return ret;
-}
-
-//! return the state associated with the empty hypothesis for a given sentence
-void GPULM::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const
-{
- GPULMState &stateCast = static_cast<GPULMState&>(state);
- stateCast.lastWords.push_back(m_bos);
-}
-
-void GPULM::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- if (targetPhrase.GetSize() == 0) {
- return;
- }
-
- SCORE score = 0;
- SCORE nonFullScore = 0;
- Context context;
-// context.push_back(m_bos);
-
- context.reserve(m_order);
- for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
- const Factor *factor = targetPhrase[i][m_factorType];
- ShiftOrPush(context, factor);
-
- if (context.size() == m_order) {
- //std::pair<SCORE, void*> fromScoring = Score(context);
- //score += fromScoring.first;
- }
- else {
- //std::pair<SCORE, void*> fromScoring = Score(context);
- //nonFullScore += fromScoring.first;
- }
- }
-
-}
-
-void GPULM::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void GPULM::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void GPULM::SetParameter(const std::string& key,
- const std::string& value)
-{
- //cerr << "key=" << key << " " << value << endl;
- if (key == "path") {
- m_path = value;
- }
- else if (key == "order") {
- m_order = Scan<size_t>(value);
- }
- else if (key == "factor") {
- m_factorType = Scan<FactorType>(value);
- }
- else {
- StatefulFeatureFunction::SetParameter(key, value);
- }
-
- //cerr << "SetParameter done" << endl;
-}
-
-void GPULM::EvaluateWhenAppliedBatch(
- const System &system,
- const Batch &batch) const
-{
- // create list of ngrams
- std::vector<std::pair<Hypothesis*, Context> > contexts;
-
- for (size_t i = 0; i < batch.size(); ++i) {
- Hypothesis *hypo = batch[i];
- CreateNGram(contexts, *hypo);
- }
-
- // score ngrams
- for (size_t i = 0; i < contexts.size(); ++i) {
- const Context &context = contexts[i].second;
- Hypothesis *hypo = contexts[i].first;
- SCORE score = Score(context);
- Scores &scores = hypo->GetScores();
- scores.PlusEquals(system, *this, score);
- }
-
-
-}
-
-void GPULM::CreateNGram(std::vector<std::pair<Hypothesis*, Context> > &contexts, Hypothesis &hypo) const
-{
- const TargetPhrase<Moses2::Word> &tp = hypo.GetTargetPhrase();
-
- if (tp.GetSize() == 0) {
- return;
- }
-
- const Hypothesis *prevHypo = hypo.GetPrevHypo();
- assert(prevHypo);
- const FFState *prevState = prevHypo->GetState(GetStatefulInd());
- assert(prevState);
- const GPULMState &prevStateCast = static_cast<const GPULMState&>(*prevState);
-
- Context context = prevStateCast.lastWords;
- context.reserve(m_order);
-
- for (size_t i = 0; i < tp.GetSize(); ++i) {
- const Word &word = tp[i];
- const Factor *factor = word[m_factorType];
- ShiftOrPush(context, factor);
-
- std::pair<Hypothesis*, Context> ele(&hypo, context);
- contexts.push_back(ele);
- }
-
- FFState *state = hypo.GetState(GetStatefulInd());
- GPULMState &stateCast = static_cast<GPULMState&>(*state);
- stateCast.SetContext(context);
-}
-
-void GPULM::ShiftOrPush(std::vector<const Factor*> &context,
- const Factor *factor) const
-{
- if (context.size() < m_order) {
- context.resize(context.size() + 1);
- }
- assert(context.size());
-
- for (size_t i = context.size() - 1; i > 0; --i) {
- context[i] = context[i - 1];
- }
-
- context[0] = factor;
-}
-
-SCORE GPULM::Score(const Context &context) const
-{
- return 444;
-}
-
-void GPULM::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
-
diff --git a/contrib/moses2/LM/GPULM.h b/contrib/moses2/LM/GPULM.h
deleted file mode 100644
index ad236ef95..000000000
--- a/contrib/moses2/LM/GPULM.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * KENLM.h
- *
- * Created on: 4 Nov 2015
- * Author: hieu
- */
-#pragma once
-
-#include <boost/shared_ptr.hpp>
-#include <boost/bind.hpp>
-#include <boost/thread.hpp>
-#include <pthread.h>
-
-#include "../FF/StatefulFeatureFunction.h"
-#include "lm/model.hh"
-#include "../legacy/Factor.h"
-#include "../legacy/Util2.h"
-#include "../Word.h"
-#include "../TypeDef.h"
-
-namespace Moses2
-{
-
-class Word;
-
-class GPULM: public StatefulFeatureFunction
-{
-public:
- GPULM(size_t startInd, const std::string &line);
-
- virtual ~GPULM();
-
- virtual void Load(System &system);
-
- void SetParameter(const std::string& key,
- const std::string& value);
-
- virtual FFState* BlankState(MemPool &pool, const System &sys) const;
-
- //! return the state associated with the empty hypothesis for a given sentence
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenAppliedBatch(
- const System &system,
- const Batch &batch) const;
-
-protected:
- std::string m_path;
- FactorType m_factorType;
- util::LoadMethod m_load_method;
- const Factor *m_bos;
- const Factor *m_eos;
- size_t m_order;
-
- inline lm::WordIndex TranslateID(const Word &word) const
- {
- std::size_t factor = word[m_factorType]->GetId();
- return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
- }
-
- std::vector<lm::WordIndex> m_lmIdLookup;
-
- // batch
- void CreateNGram(std::vector<std::pair<Hypothesis*, Context> > &contexts, Hypothesis &hypo) const;
-
- void ShiftOrPush(std::vector<const Factor*> &context,
- const Factor *factor) const;
-
- SCORE Score(const Context &context) const;
-};
-
-}
diff --git a/contrib/moses2/LM/KENLM.cpp b/contrib/moses2/LM/KENLM.cpp
deleted file mode 100644
index 3173392cd..000000000
--- a/contrib/moses2/LM/KENLM.cpp
+++ /dev/null
@@ -1,601 +0,0 @@
-/*
- * KENLM.cpp
- *
- * Created on: 4 Nov 2015
- * Author: hieu
- */
-#include <sstream>
-#include <vector>
-#include "KENLM.h"
-#include "../Phrase.h"
-#include "../Scores.h"
-#include "../System.h"
-#include "../PhraseBased/Hypothesis.h"
-#include "../PhraseBased/Manager.h"
-#include "../PhraseBased/TargetPhraseImpl.h"
-#include "lm/state.hh"
-#include "lm/left.hh"
-#include "util/exception.hh"
-#include "util/tokenize_piece.hh"
-#include "util/string_stream.hh"
-#include "../legacy/FactorCollection.h"
-#include "../SCFG/TargetPhraseImpl.h"
-#include "../SCFG/Hypothesis.h"
-#include "../SCFG/Manager.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-struct KenLMState: public FFState
-{
- lm::ngram::State state;
- virtual size_t hash() const
- {
- size_t ret = hash_value(state);
- return ret;
- }
- virtual bool operator==(const FFState& o) const
- {
- const KenLMState &other = static_cast<const KenLMState &>(o);
- bool ret = state == other.state;
- return ret;
- }
-
- virtual std::string ToString() const
- {
- stringstream ss;
- for (size_t i = 0; i < state.Length(); ++i) {
- ss << state.words[i] << " ";
- }
- return ss.str();
- }
-
-};
-
-/////////////////////////////////////////////////////////////////
-class LanguageModelChartStateKenLM : public FFState
-{
-public:
- LanguageModelChartStateKenLM() {}
-
- const lm::ngram::ChartState &GetChartState() const {
- return m_state;
- }
- lm::ngram::ChartState &GetChartState() {
- return m_state;
- }
-
- size_t hash() const {
- size_t ret = hash_value(m_state);
- return ret;
- }
- virtual bool operator==(const FFState& o) const {
- const LanguageModelChartStateKenLM &other = static_cast<const LanguageModelChartStateKenLM &>(o);
- bool ret = m_state == other.m_state;
- return ret;
- }
-
- virtual std::string ToString() const
- {
- return "LanguageModelChartStateKenLM";
- }
-
-private:
- lm::ngram::ChartState m_state;
-};
-
-/////////////////////////////////////////////////////////////////
-class MappingBuilder: public lm::EnumerateVocab
-{
-public:
- MappingBuilder(FactorCollection &factorCollection, System &system,
- std::vector<lm::WordIndex> &mapping) :
- m_factorCollection(factorCollection), m_system(system), m_mapping(mapping)
- {
- }
-
- void Add(lm::WordIndex index, const StringPiece &str)
- {
- std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId();
- if (m_mapping.size() <= factorId) {
- // 0 is <unk> :-)
- m_mapping.resize(factorId + 1);
- }
- m_mapping[factorId] = index;
- }
-
-private:
- FactorCollection &m_factorCollection;
- std::vector<lm::WordIndex> &m_mapping;
- System &m_system;
-};
-
-/////////////////////////////////////////////////////////////////
-template<class Model>
-KENLM<Model>::KENLM(size_t startInd, const std::string &line,
- const std::string &file, FactorType factorType,
- util::LoadMethod load_method) :
- StatefulFeatureFunction(startInd, line), m_path(file), m_factorType(
- factorType), m_load_method(load_method)
-{
- ReadParameters();
-}
-
-template<class Model>
-KENLM<Model>::~KENLM()
-{
- // TODO Auto-generated destructor stub
-}
-
-template<class Model>
-void KENLM<Model>::Load(System &system)
-{
- FactorCollection &fc = system.GetVocab();
-
- m_bos = fc.AddFactor(BOS_, system, false);
- m_eos = fc.AddFactor(EOS_, system, false);
-
- lm::ngram::Config config;
- config.messages = NULL;
-
- FactorCollection &collection = system.GetVocab();
- MappingBuilder builder(collection, system, m_lmIdLookup);
- config.enumerate_vocab = &builder;
- config.load_method = m_load_method;
-
- m_ngram.reset(new Model(m_path.c_str(), config));
-}
-
-template<class Model>
-FFState* KENLM<Model>::BlankState(MemPool &pool, const System &sys) const
-{
- FFState *ret;
- if (sys.isPb) {
- ret = new (pool.Allocate<KenLMState>()) KenLMState();
- }
- else {
- ret = new (pool.Allocate<LanguageModelChartStateKenLM>()) LanguageModelChartStateKenLM();
- }
- return ret;
-}
-
-//! return the state associated with the empty hypothesis for a given sentence
-template<class Model>
-void KENLM<Model>::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const
-{
- KenLMState &stateCast = static_cast<KenLMState&>(state);
- stateCast.state = m_ngram->BeginSentenceState();
-}
-
-template<class Model>
-void KENLM<Model>::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- // contains factors used by this LM
- float fullScore, nGramScore;
- size_t oovCount;
-
- CalcScore(targetPhrase, fullScore, nGramScore, oovCount);
-
- float estimateScore = fullScore - nGramScore;
-
- bool GetLMEnableOOVFeature = false;
- if (GetLMEnableOOVFeature) {
- float scoresVec[2], estimateScoresVec[2];
- scoresVec[0] = nGramScore;
- scoresVec[1] = oovCount;
- scores.PlusEquals(system, *this, scoresVec);
-
- estimateScoresVec[0] = estimateScore;
- estimateScoresVec[1] = 0;
- SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScoresVec);
- estimatedScore += weightedScore;
- }
- else {
- scores.PlusEquals(system, *this, nGramScore);
-
- SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScore);
- estimatedScore += weightedScore;
- }
-}
-
-template<class Model>
-void KENLM<Model>::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- // contains factors used by this LM
- float fullScore, nGramScore;
- size_t oovCount;
-
- CalcScore(targetPhrase, fullScore, nGramScore, oovCount);
-
- //float estimateScore = fullScore - nGramScore;
-
- // all LM scores are estimated
- float estimateScore = fullScore;
- nGramScore = 0;
-
- bool GetLMEnableOOVFeature = false;
- if (GetLMEnableOOVFeature) {
- float scoresVec[2], estimateScoresVec[2];
- scoresVec[0] = nGramScore;
- scoresVec[1] = oovCount;
- scores.PlusEquals(system, *this, scoresVec);
-
- estimateScoresVec[0] = estimateScore;
- estimateScoresVec[1] = 0;
- SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScoresVec);
- estimatedScore += weightedScore;
- }
- else {
- scores.PlusEquals(system, *this, nGramScore);
-
- SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScore);
- estimatedScore += weightedScore;
- }
-}
-
-template<class Model>
-void KENLM<Model>::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
-{
- KenLMState &stateCast = static_cast<KenLMState&>(state);
-
- const System &system = mgr.system;
-
- const lm::ngram::State &in_state =
- static_cast<const KenLMState&>(prevState).state;
-
- if (!hypo.GetTargetPhrase().GetSize()) {
- stateCast.state = in_state;
- return;
- }
-
- const std::size_t begin = hypo.GetCurrTargetWordsRange().GetStartPos();
- //[begin, end) in STL-like fashion.
- const std::size_t end = hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
- const std::size_t adjust_end = std::min(end, begin + m_ngram->Order() - 1);
-
- std::size_t position = begin;
- typename Model::State aux_state;
- typename Model::State *state0 = &stateCast.state, *state1 = &aux_state;
-
- float score = m_ngram->Score(in_state, TranslateID(hypo.GetWord(position)),
- *state0);
- ++position;
- for (; position < adjust_end; ++position) {
- score += m_ngram->Score(*state0, TranslateID(hypo.GetWord(position)),
- *state1);
- std::swap(state0, state1);
- }
-
- if (hypo.GetBitmap().IsComplete()) {
- // Score end of sentence.
- std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
- const lm::WordIndex *last = LastIDs(hypo, &indices.front());
- score += m_ngram->FullScoreForgotState(&indices.front(), last,
- m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob;
- }
- else if (adjust_end < end) {
- // Get state after adding a long phrase.
- std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
- const lm::WordIndex *last = LastIDs(hypo, &indices.front());
- m_ngram->GetState(&indices.front(), last, stateCast.state);
- }
- else if (state0 != &stateCast.state) {
- // Short enough phrase that we can just reuse the state.
- stateCast.state = *state0;
- }
-
- score = TransformLMScore(score);
-
- bool OOVFeatureEnabled = false;
- if (OOVFeatureEnabled) {
- std::vector<float> scoresVec(2);
- scoresVec[0] = score;
- scoresVec[1] = 0.0;
- scores.PlusEquals(system, *this, scoresVec);
- }
- else {
- scores.PlusEquals(system, *this, score);
- }
-}
-
-template<class Model>
-void KENLM<Model>::CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore,
- float &ngramScore, std::size_t &oovCount) const
-{
- fullScore = 0;
- ngramScore = 0;
- oovCount = 0;
-
- if (!phrase.GetSize()) return;
-
- lm::ngram::ChartState discarded_sadly;
- lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);
-
- size_t position;
- if (m_bos == phrase[0][m_factorType]) {
- scorer.BeginSentence();
- position = 1;
- }
- else {
- position = 0;
- }
-
- size_t ngramBoundary = m_ngram->Order() - 1;
-
- size_t end_loop = std::min(ngramBoundary, phrase.GetSize());
- for (; position < end_loop; ++position) {
- const Word &word = phrase[position];
- lm::WordIndex index = TranslateID(word);
- scorer.Terminal(index);
- if (!index) ++oovCount;
- }
- float before_boundary = fullScore + scorer.Finish();
- for (; position < phrase.GetSize(); ++position) {
- const Word &word = phrase[position];
- lm::WordIndex index = TranslateID(word);
- scorer.Terminal(index);
- if (!index) ++oovCount;
- }
- fullScore += scorer.Finish();
-
- ngramScore = TransformLMScore(fullScore - before_boundary);
- fullScore = TransformLMScore(fullScore);
-}
-
-template<class Model>
-void KENLM<Model>::CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore,
- float &ngramScore, std::size_t &oovCount) const
-{
- fullScore = 0;
- ngramScore = 0;
- oovCount = 0;
-
- if (!phrase.GetSize()) return;
-
- lm::ngram::ChartState discarded_sadly;
- lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);
-
- size_t position;
- if (m_bos == phrase[0][m_factorType]) {
- scorer.BeginSentence();
- position = 1;
- } else {
- position = 0;
- }
-
- size_t ngramBoundary = m_ngram->Order() - 1;
-
- size_t end_loop = std::min(ngramBoundary, phrase.GetSize());
- for (; position < end_loop; ++position) {
- const SCFG::Word &word = phrase[position];
- if (word.isNonTerminal) {
- fullScore += scorer.Finish();
- scorer.Reset();
- } else {
- lm::WordIndex index = TranslateID(word);
- scorer.Terminal(index);
- if (!index) ++oovCount;
- }
- }
- float before_boundary = fullScore + scorer.Finish();
- for (; position < phrase.GetSize(); ++position) {
- const SCFG::Word &word = phrase[position];
- if (word.isNonTerminal) {
- fullScore += scorer.Finish();
- scorer.Reset();
- } else {
- lm::WordIndex index = TranslateID(word);
- scorer.Terminal(index);
- if (!index) ++oovCount;
- }
- }
- fullScore += scorer.Finish();
-
- ngramScore = TransformLMScore(fullScore - before_boundary);
- fullScore = TransformLMScore(fullScore);
-}
-
-// Convert last words of hypothesis into vocab ids, returning an end pointer.
-template<class Model>
-lm::WordIndex *KENLM<Model>::LastIDs(const Hypothesis &hypo,
- lm::WordIndex *indices) const
-{
- lm::WordIndex *index = indices;
- lm::WordIndex *end = indices + m_ngram->Order() - 1;
- int position = hypo.GetCurrTargetWordsRange().GetEndPos();
- for (;; ++index, --position) {
- if (index == end) return index;
- if (position == -1) {
- *index = m_ngram->GetVocabulary().BeginSentence();
- return index + 1;
- }
- *index = TranslateID(hypo.GetWord(position));
- }
-}
-
-template<class Model>
-void KENLM<Model>::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
-{
- LanguageModelChartStateKenLM &newState = static_cast<LanguageModelChartStateKenLM&>(state);
- lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState.GetChartState());
- const SCFG::TargetPhraseImpl &target = hypo.GetTargetPhrase();
- const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
- target.GetAlignNonTerm().GetNonTermIndexMap();
-
- const size_t size = target.GetSize();
- size_t phrasePos = 0;
- // Special cases for first word.
- if (size) {
- const SCFG::Word &word = target[0];
- if (word[m_factorType] == m_bos) {
- // Begin of sentence
- ruleScore.BeginSentence();
- phrasePos++;
- } else if (word.isNonTerminal) {
- // Non-terminal is first so we can copy instead of rescoring.
- const SCFG::Hypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
- const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetState(featureID))->GetChartState();
- ruleScore.BeginNonTerminal(prevState);
- phrasePos++;
- }
- }
-
- for (; phrasePos < size; phrasePos++) {
- const SCFG::Word &word = target[phrasePos];
- if (word.isNonTerminal) {
- const SCFG::Hypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
- const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetState(featureID))->GetChartState();
- ruleScore.NonTerminal(prevState);
- } else {
- ruleScore.Terminal(TranslateID(word));
- }
- }
-
- float score = ruleScore.Finish();
- score = TransformLMScore(score);
-
- // take out score from loading. This needs reworking
- //score -= target.GetScores().GetScores(*this)[0];
-
- bool OOVFeatureEnabled = false;
- if (OOVFeatureEnabled) {
- std::vector<float> scoresVec(2);
- scoresVec[0] = score;
- scoresVec[1] = 0.0;
- scores.PlusEquals(mgr.system, *this, scoresVec);
- } else {
- scores.PlusEquals(mgr.system, *this, score);
- }
-}
-
-///////////////////////////////////////////////////////////////////////////
-
-/* Instantiate LanguageModelKen here. Tells the compiler to generate code
- * for the instantiations' non-inline member functions in this file.
- * Otherwise, depending on the compiler, those functions may not be present
- * at link time.
- */
-template class KENLM<lm::ngram::ProbingModel> ;
-template class KENLM<lm::ngram::RestProbingModel> ;
-template class KENLM<lm::ngram::TrieModel> ;
-template class KENLM<lm::ngram::ArrayTrieModel> ;
-template class KENLM<lm::ngram::QuantTrieModel> ;
-template class KENLM<lm::ngram::QuantArrayTrieModel> ;
-
-FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig)
-{
- FactorType factorType = 0;
- string filePath;
- util::LoadMethod load_method = util::POPULATE_OR_READ;
-
- util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' ');
- ++argument; // KENLM
-
- util::StringStream line;
- line << "KENLM";
-
- for (; argument; ++argument) {
- const char *equals = std::find(argument->data(),
- argument->data() + argument->size(), '=');
- UTIL_THROW_IF2(equals == argument->data() + argument->size(),
- "Expected = in KenLM argument " << *argument);
- StringPiece name(argument->data(), equals - argument->data());
- StringPiece value(equals + 1,
- argument->data() + argument->size() - equals - 1);
- if (name == "factor") {
- factorType = boost::lexical_cast<FactorType>(value);
- }
- else if (name == "order") {
- // Ignored
- }
- else if (name == "path") {
- filePath.assign(value.data(), value.size());
- }
- else if (name == "lazyken") {
- // deprecated: use load instead.
- load_method =
- boost::lexical_cast<bool>(value) ?
- util::LAZY : util::POPULATE_OR_READ;
- }
- else if (name == "load") {
- if (value == "lazy") {
- load_method = util::LAZY;
- }
- else if (value == "populate_or_lazy") {
- load_method = util::POPULATE_OR_LAZY;
- }
- else if (value == "populate_or_read" || value == "populate") {
- load_method = util::POPULATE_OR_READ;
- }
- else if (value == "read") {
- load_method = util::READ;
- }
- else if (value == "parallel_read") {
- load_method = util::PARALLEL_READ;
- }
- else {
- UTIL_THROW2("Unknown KenLM load method " << value);
- }
- }
- else {
- // pass to base class to interpret
- line << " " << name << "=" << value;
- }
- }
-
- return ConstructKenLM(startInd, line.str(), filePath, factorType, load_method);
-}
-
-FeatureFunction *ConstructKenLM(size_t startInd, const std::string &line,
- const std::string &file, FactorType factorType,
- util::LoadMethod load_method)
-{
- lm::ngram::ModelType model_type;
- if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
- switch (model_type) {
- case lm::ngram::PROBING:
- return new KENLM<lm::ngram::ProbingModel>(startInd, line, file,
- factorType, load_method);
- case lm::ngram::REST_PROBING:
- return new KENLM<lm::ngram::RestProbingModel>(startInd, line, file,
- factorType, load_method);
- case lm::ngram::TRIE:
- return new KENLM<lm::ngram::TrieModel>(startInd, line, file, factorType,
- load_method);
- case lm::ngram::QUANT_TRIE:
- return new KENLM<lm::ngram::QuantTrieModel>(startInd, line, file,
- factorType, load_method);
- case lm::ngram::ARRAY_TRIE:
- return new KENLM<lm::ngram::ArrayTrieModel>(startInd, line, file,
- factorType, load_method);
- case lm::ngram::QUANT_ARRAY_TRIE:
- return new KENLM<lm::ngram::QuantArrayTrieModel>(startInd, line, file,
- factorType, load_method);
- default:
- UTIL_THROW2("Unrecognized kenlm model type " << model_type)
- ;
- }
- }
- else {
- return new KENLM<lm::ngram::ProbingModel>(startInd, line, file, factorType,
- load_method);
- }
-}
-
-}
-
diff --git a/contrib/moses2/LM/KENLM.h b/contrib/moses2/LM/KENLM.h
deleted file mode 100644
index 703b398d8..000000000
--- a/contrib/moses2/LM/KENLM.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * KENLM.h
- *
- * Created on: 4 Nov 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/shared_ptr.hpp>
-#include "../FF/StatefulFeatureFunction.h"
-#include "lm/model.hh"
-#include "../legacy/Factor.h"
-#include "../legacy/Util2.h"
-#include "../Word.h"
-
-namespace Moses2
-{
-
-class Word;
-
-FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig);
-FeatureFunction *ConstructKenLM(size_t startInd, const std::string &line,
- const std::string &file, FactorType factorType,
- util::LoadMethod load_method);
-
-template<class Model>
-class KENLM: public StatefulFeatureFunction
-{
-public:
- KENLM(size_t startInd, const std::string &line, const std::string &file,
- FactorType factorType, util::LoadMethod load_method);
-
- virtual ~KENLM();
-
- virtual void Load(System &system);
-
- virtual FFState* BlankState(MemPool &pool, const System &sys) const;
-
- //! return the state associated with the empty hypothesis for a given sentence
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
-
-protected:
- std::string m_path;
- FactorType m_factorType;
- util::LoadMethod m_load_method;
- const Factor *m_bos;
- const Factor *m_eos;
-
- boost::shared_ptr<Model> m_ngram;
-
- void CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore, float &ngramScore,
- std::size_t &oovCount) const;
-
- void CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore, float &ngramScore,
- std::size_t &oovCount) const;
-
- inline lm::WordIndex TranslateID(const Word &word) const
- {
- std::size_t factor = word[m_factorType]->GetId();
- return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
- }
- // Convert last words of hypothesis into vocab ids, returning an end pointer.
- lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const;
-
- std::vector<lm::WordIndex> m_lmIdLookup;
-
-};
-
-}
-
diff --git a/contrib/moses2/LM/KENLMBatch.cpp b/contrib/moses2/LM/KENLMBatch.cpp
deleted file mode 100644
index 1ed6e7663..000000000
--- a/contrib/moses2/LM/KENLMBatch.cpp
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * KENLMBatch.cpp
- *
- * Created on: 4 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include <sstream>
-#include <vector>
-
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include "KENLMBatch.h"
-#include "../Phrase.h"
-#include "../Scores.h"
-#include "../System.h"
-#include "../PhraseBased/Hypothesis.h"
-#include "../PhraseBased/Manager.h"
-#include "../PhraseBased/TargetPhraseImpl.h"
-#include "lm/state.hh"
-#include "lm/left.hh"
-#include "util/exception.hh"
-#include "util/tokenize_piece.hh"
-#include "util/string_stream.hh"
-#include "../legacy/FactorCollection.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-struct KenLMState: public FFState
-{
- lm::ngram::State state;
- virtual size_t hash() const
- {
- size_t ret = hash_value(state);
- return ret;
- }
- virtual bool operator==(const FFState& o) const
- {
- const KenLMState &other = static_cast<const KenLMState &>(o);
- bool ret = state == other.state;
- return ret;
- }
-
- virtual std::string ToString() const
- {
- stringstream ss;
- for (size_t i = 0; i < state.Length(); ++i) {
- ss << state.words[i] << " ";
- }
- return ss.str();
- }
-
-};
-
-/////////////////////////////////////////////////////////////////
-class MappingBuilder: public lm::EnumerateVocab
-{
-public:
- MappingBuilder(FactorCollection &factorCollection, System &system,
- std::vector<lm::WordIndex> &mapping) :
- m_factorCollection(factorCollection), m_system(system), m_mapping(mapping)
- {
- }
-
- void Add(lm::WordIndex index, const StringPiece &str)
- {
- std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId();
- if (m_mapping.size() <= factorId) {
- // 0 is <unk> :-)
- m_mapping.resize(factorId + 1);
- }
- m_mapping[factorId] = index;
- }
-
-private:
- FactorCollection &m_factorCollection;
- std::vector<lm::WordIndex> &m_mapping;
- System &m_system;
-};
-
-/////////////////////////////////////////////////////////////////
-KENLMBatch::KENLMBatch(size_t startInd, const std::string &line)
-:StatefulFeatureFunction(startInd, line)
-,m_numHypos(0)
-{
- cerr << "KENLMBatch::KENLMBatch" << endl;
- ReadParameters();
-}
-
-KENLMBatch::~KENLMBatch()
-{
- // TODO Auto-generated destructor stub
-}
-
-void KENLMBatch::Load(System &system)
-{
- cerr << "KENLMBatch::Load" << endl;
- FactorCollection &fc = system.GetVocab();
-
- m_bos = fc.AddFactor(BOS_, system, false);
- m_eos = fc.AddFactor(EOS_, system, false);
-
- lm::ngram::Config config;
- config.messages = NULL;
-
- FactorCollection &collection = system.GetVocab();
- MappingBuilder builder(collection, system, m_lmIdLookup);
- config.enumerate_vocab = &builder;
- config.load_method = m_load_method;
-
- m_ngram.reset(new Model(m_path.c_str(), config));
-}
-
-FFState* KENLMBatch::BlankState(MemPool &pool, const System &sys) const
-{
- KenLMState *ret = new (pool.Allocate<KenLMState>()) KenLMState();
- return ret;
-}
-
-//! return the state associated with the empty hypothesis for a given sentence
-void KENLMBatch::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const
-{
- KenLMState &stateCast = static_cast<KenLMState&>(state);
- stateCast.state = m_ngram->BeginSentenceState();
-}
-
-void KENLMBatch::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- // contains factors used by this LM
- float fullScore, nGramScore;
- size_t oovCount;
-
- CalcScore(targetPhrase, fullScore, nGramScore, oovCount);
-
- float estimateScore = fullScore - nGramScore;
-
- bool GetLMEnableOOVFeature = false;
- if (GetLMEnableOOVFeature) {
- float scoresVec[2], estimateScoresVec[2];
- scoresVec[0] = nGramScore;
- scoresVec[1] = oovCount;
- scores.PlusEquals(system, *this, scoresVec);
-
- estimateScoresVec[0] = estimateScore;
- estimateScoresVec[1] = 0;
- SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScoresVec);
- estimatedScore += weightedScore;
- }
- else {
- scores.PlusEquals(system, *this, nGramScore);
-
- SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
- estimateScore);
- estimatedScore += weightedScore;
- }
-}
-
-void KENLMBatch::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-}
-
-void KENLMBatch::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
-{
- KenLMState &stateCast = static_cast<KenLMState&>(state);
-
- const System &system = mgr.system;
-
- const lm::ngram::State &in_state =
- static_cast<const KenLMState&>(prevState).state;
-
- if (!hypo.GetTargetPhrase().GetSize()) {
- stateCast.state = in_state;
- return;
- }
-
- const std::size_t begin = hypo.GetCurrTargetWordsRange().GetStartPos();
- //[begin, end) in STL-like fashion.
- const std::size_t end = hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
- const std::size_t adjust_end = std::min(end, begin + m_ngram->Order() - 1);
-
- std::size_t position = begin;
- typename Model::State aux_state;
- typename Model::State *state0 = &stateCast.state, *state1 = &aux_state;
-
- float score = m_ngram->Score(in_state, TranslateID(hypo.GetWord(position)),
- *state0);
- ++position;
- for (; position < adjust_end; ++position) {
- score += m_ngram->Score(*state0, TranslateID(hypo.GetWord(position)),
- *state1);
- std::swap(state0, state1);
- }
-
- if (hypo.GetBitmap().IsComplete()) {
- // Score end of sentence.
- std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
- const lm::WordIndex *last = LastIDs(hypo, &indices.front());
- score += m_ngram->FullScoreForgotState(&indices.front(), last,
- m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob;
- }
- else if (adjust_end < end) {
- // Get state after adding a long phrase.
- std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
- const lm::WordIndex *last = LastIDs(hypo, &indices.front());
- m_ngram->GetState(&indices.front(), last, stateCast.state);
- }
- else if (state0 != &stateCast.state) {
- // Short enough phrase that we can just reuse the state.
- stateCast.state = *state0;
- }
-
- score = TransformLMScore(score);
-
- bool OOVFeatureEnabled = false;
- if (OOVFeatureEnabled) {
- std::vector<float> scoresVec(2);
- scoresVec[0] = score;
- scoresVec[1] = 0.0;
- scores.PlusEquals(system, *this, scoresVec);
- }
- else {
- scores.PlusEquals(system, *this, score);
- }
-}
-
-void KENLMBatch::CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore,
- float &ngramScore, std::size_t &oovCount) const
-{
- fullScore = 0;
- ngramScore = 0;
- oovCount = 0;
-
- if (!phrase.GetSize()) return;
-
- lm::ngram::ChartState discarded_sadly;
- lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);
-
- size_t position;
- if (m_bos == phrase[0][m_factorType]) {
- scorer.BeginSentence();
- position = 1;
- }
- else {
- position = 0;
- }
-
- size_t ngramBoundary = m_ngram->Order() - 1;
-
- size_t end_loop = std::min(ngramBoundary, phrase.GetSize());
- for (; position < end_loop; ++position) {
- const Word &word = phrase[position];
- lm::WordIndex index = TranslateID(word);
- scorer.Terminal(index);
- if (!index) ++oovCount;
- }
- float before_boundary = fullScore + scorer.Finish();
- for (; position < phrase.GetSize(); ++position) {
- const Word &word = phrase[position];
- lm::WordIndex index = TranslateID(word);
- scorer.Terminal(index);
- if (!index) ++oovCount;
- }
- fullScore += scorer.Finish();
-
- ngramScore = TransformLMScore(fullScore - before_boundary);
- fullScore = TransformLMScore(fullScore);
-}
-
-// Convert last words of hypothesis into vocab ids, returning an end pointer.
-lm::WordIndex *KENLMBatch::LastIDs(const Hypothesis &hypo,
- lm::WordIndex *indices) const
-{
- lm::WordIndex *index = indices;
- lm::WordIndex *end = indices + m_ngram->Order() - 1;
- int position = hypo.GetCurrTargetWordsRange().GetEndPos();
- for (;; ++index, --position) {
- if (index == end) return index;
- if (position == -1) {
- *index = m_ngram->GetVocabulary().BeginSentence();
- return index + 1;
- }
- *index = TranslateID(hypo.GetWord(position));
- }
-}
-
-void KENLMBatch::SetParameter(const std::string& key,
- const std::string& value)
-{
- //cerr << "key=" << key << " " << value << endl;
- if (key == "path") {
- m_path = value;
- }
- else if (key == "order") {
- // ignore
- }
- else if (key == "factor") {
- m_factorType = Scan<FactorType>(value);
- }
- else if (key == "lazyken") {
- m_load_method =
- boost::lexical_cast<bool>(value) ?
- util::LAZY : util::POPULATE_OR_READ;
- }
- else if (key == "load") {
- if (value == "lazy") {
- m_load_method = util::LAZY;
- }
- else if (value == "populate_or_lazy") {
- m_load_method = util::POPULATE_OR_LAZY;
- }
- else if (value == "populate_or_read" || value == "populate") {
- m_load_method = util::POPULATE_OR_READ;
- }
- else if (value == "read") {
- m_load_method = util::READ;
- }
- else if (value == "parallel_read") {
- m_load_method = util::PARALLEL_READ;
- }
- else {
- UTIL_THROW2("Unknown KenLM load method " << value);
- }
- }
- else {
- StatefulFeatureFunction::SetParameter(key, value);
- }
-
- //cerr << "SetParameter done" << endl;
-}
-
-void KENLMBatch::EvaluateWhenAppliedBatch(
- const Batch &batch) const
-{
- {
- // write lock
- boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
- m_batches.push_back(&batch);
- m_numHypos += batch.size();
- }
- //cerr << "m_numHypos=" << m_numHypos << endl;
-
- if (m_numHypos > 0) {
- // process batch
- EvaluateWhenAppliedBatch();
-
- m_batches.clear();
- m_numHypos = 0;
-
- m_threadNeeded.notify_all();
- }
- else {
- boost::mutex::scoped_lock lock(m_mutex);
- m_threadNeeded.wait(lock);
- }
-}
-
-void KENLMBatch::EvaluateWhenAppliedBatch() const
-{
- BOOST_FOREACH(const Batch *batch, m_batches) {
- //cerr << "batch=" << batch->size() << endl;
- BOOST_FOREACH(Hypothesis *hypo, *batch) {
- hypo->EvaluateWhenApplied(*this);
- }
- }
-}
-
-void KENLMBatch::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
-
diff --git a/contrib/moses2/LM/KENLMBatch.h b/contrib/moses2/LM/KENLMBatch.h
deleted file mode 100644
index 21dc8637c..000000000
--- a/contrib/moses2/LM/KENLMBatch.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * KENLM.h
- *
- * Created on: 4 Nov 2015
- * Author: hieu
- */
-#pragma once
-
-#include <boost/shared_ptr.hpp>
-#include <boost/bind.hpp>
-#include <boost/thread.hpp>
-#include <pthread.h>
-
-#include "../FF/StatefulFeatureFunction.h"
-#include "lm/model.hh"
-#include "../legacy/Factor.h"
-#include "../legacy/Util2.h"
-#include "../Word.h"
-#include "../TypeDef.h"
-
-namespace Moses2
-{
-
-class Word;
-
-class KENLMBatch: public StatefulFeatureFunction
-{
-public:
- KENLMBatch(size_t startInd, const std::string &line);
-
- virtual ~KENLMBatch();
-
- virtual void Load(System &system);
-
- void SetParameter(const std::string& key,
- const std::string& value);
-
- virtual FFState* BlankState(MemPool &pool, const System &sys) const;
-
- //! return the state associated with the empty hypothesis for a given sentence
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenAppliedBatch(
- const Batch &batch) const;
-
-protected:
- std::string m_path;
- FactorType m_factorType;
- util::LoadMethod m_load_method;
- const Factor *m_bos;
- const Factor *m_eos;
-
- typedef lm::ngram::ProbingModel Model;
- boost::shared_ptr<Model> m_ngram;
-
- void CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore, float &ngramScore,
- std::size_t &oovCount) const;
-
- inline lm::WordIndex TranslateID(const Word &word) const
- {
- std::size_t factor = word[m_factorType]->GetId();
- return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
- }
- // Convert last words of hypothesis into vocab ids, returning an end pointer.
- lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const;
-
- std::vector<lm::WordIndex> m_lmIdLookup;
-
- // batch
- mutable std::vector<const Batch*> m_batches;
- mutable size_t m_numHypos;
-
- mutable boost::shared_mutex m_accessLock;
-
- mutable boost::mutex m_mutex;
- mutable boost::condition_variable m_threadNeeded;
-
- void EvaluateWhenAppliedBatch() const;
-
-};
-
-}
diff --git a/contrib/moses2/LM/LanguageModel.cpp b/contrib/moses2/LM/LanguageModel.cpp
deleted file mode 100644
index 3e0c39d20..000000000
--- a/contrib/moses2/LM/LanguageModel.cpp
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * LanguageModel.cpp
- *
- * Created on: 29 Oct 2015
- * Author: hieu
- */
-#include <vector>
-#include "LanguageModel.h"
-#include "../Phrase.h"
-#include "../System.h"
-#include "../PhraseBased/Manager.h"
-#include "../PhraseBased/Hypothesis.h"
-#include "../PhraseBased/TargetPhraseImpl.h"
-#include "../FF/PointerState.h"
-#include "../legacy/Util2.h"
-#include "../legacy/InputFileStream.h"
-#include "../legacy/Bitmap.h"
-#include "../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-struct LMState: public PointerState
-{
- LMState() :
- PointerState()
- {
- // uninitialised
- }
-
- void Set(MemPool &pool, void *lms, const std::vector<const Factor*> &context)
- {
- lmstate = lms;
-
- numWords = context.size();
- lastWords = (const Factor**) pool.Allocate(
- sizeof(const Factor*) * numWords);
- for (size_t i = 0; i < numWords; ++i) {
- lastWords[i] = context[i];
- }
- }
-
- void Init(MemPool &pool, const Factor *factor)
- {
- lmstate = NULL;
- numWords = 1;
- lastWords = (const Factor**) pool.Allocate(sizeof(const Factor*));
- lastWords[0] = factor;
- }
-
- size_t numWords;
- const Factor** lastWords;
-};
-
-////////////////////////////////////////////////////////////////////////////////////////
-LanguageModel::LanguageModel(size_t startInd, const std::string &line) :
- StatefulFeatureFunction(startInd, line), m_oov(-100)
-{
- ReadParameters();
-}
-
-LanguageModel::~LanguageModel()
-{
- // TODO Auto-generated destructor stub
-}
-
-void LanguageModel::Load(System &system)
-{
- FactorCollection &fc = system.GetVocab();
-
- m_bos = fc.AddFactor(BOS_, system, false);
- m_eos = fc.AddFactor(EOS_, system, false);
-
- InputFileStream infile(m_path);
- size_t lineNum = 0;
- string line;
- while (getline(infile, line)) {
- if (++lineNum % 100000 == 0) {
- cerr << lineNum << " ";
- }
-
- vector<string> substrings = Tokenize(line, "\t");
-
- if (substrings.size() < 2) continue;
-
- assert(substrings.size() == 2 || substrings.size() == 3);
-
- SCORE prob = TransformLMScore(Scan<SCORE>(substrings[0]));
- if (substrings[1] == "<unk>") {
- m_oov = prob;
- continue;
- }
-
- SCORE backoff = 0.f;
- if (substrings.size() == 3) {
- backoff = TransformLMScore(Scan<SCORE>(substrings[2]));
- }
-
- // ngram
- vector<string> key = Tokenize(substrings[1], " ");
-
- vector<const Factor*> factorKey(key.size());
- for (size_t i = 0; i < key.size(); ++i) {
- factorKey[factorKey.size() - i - 1] = fc.AddFactor(key[i], system, false);
- }
-
- m_root.insert(factorKey, LMScores(prob, backoff));
- }
-
-}
-
-void LanguageModel::SetParameter(const std::string& key,
- const std::string& value)
-{
- if (key == "path") {
- m_path = value;
- }
- else if (key == "factor") {
- m_factorType = Scan<FactorType>(value);
- }
- else if (key == "order") {
- m_order = Scan<size_t>(value);
- }
- else {
- StatefulFeatureFunction::SetParameter(key, value);
- }
-}
-
-FFState* LanguageModel::BlankState(MemPool &pool, const System &sys) const
-{
- return new (pool.Allocate<LMState>()) LMState();
-}
-
-void LanguageModel::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const
-{
- LMState &stateCast = static_cast<LMState&>(state);
-
- MemPool &pool = mgr.GetPool();
- stateCast.Init(pool, m_bos);
-}
-
-void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- if (targetPhrase.GetSize() == 0) {
- return;
- }
-
- SCORE score = 0;
- SCORE nonFullScore = 0;
- vector<const Factor*> context;
-// context.push_back(m_bos);
-
- context.reserve(m_order);
- for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
- const Factor *factor = targetPhrase[i][m_factorType];
- ShiftOrPush(context, factor);
-
- if (context.size() == m_order) {
- std::pair<SCORE, void*> fromScoring = Score(context);
- score += fromScoring.first;
- }
- else {
- std::pair<SCORE, void*> fromScoring = Score(context);
- nonFullScore += fromScoring.first;
- }
- }
-
- scores.PlusEquals(system, *this, score);
- SCORE weightedScore = Scores::CalcWeightedScore(system, *this, nonFullScore);
- estimatedScore += weightedScore;
-}
-
-void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-}
-
-void LanguageModel::EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const
-{
- const LMState &prevLMState = static_cast<const LMState &>(prevState);
- size_t numWords = prevLMState.numWords;
-
- // context is held backwards
- vector<const Factor*> context(numWords);
- for (size_t i = 0; i < numWords; ++i) {
- context[i] = prevLMState.lastWords[i];
- }
- //DebugContext(context);
-
- SCORE score = 0;
- std::pair<SCORE, void*> fromScoring;
- const TargetPhrase<Moses2::Word> &tp = hypo.GetTargetPhrase();
- for (size_t i = 0; i < tp.GetSize(); ++i) {
- const Word &word = tp[i];
- const Factor *factor = word[m_factorType];
- ShiftOrPush(context, factor);
- fromScoring = Score(context);
- score += fromScoring.first;
- }
-
- const Bitmap &bm = hypo.GetBitmap();
- if (bm.IsComplete()) {
- // everything translated
- ShiftOrPush(context, m_eos);
- fromScoring = Score(context);
- score += fromScoring.first;
- fromScoring.second = NULL;
- context.clear();
- }
- else {
- assert(context.size());
- if (context.size() == m_order) {
- context.resize(context.size() - 1);
- }
- }
-
- scores.PlusEquals(mgr.system, *this, score);
-
- // return state
- //DebugContext(context);
-
- LMState &stateCast = static_cast<LMState&>(state);
- MemPool &pool = mgr.GetPool();
- stateCast.Set(pool, fromScoring.second, context);
-}
-
-void LanguageModel::ShiftOrPush(std::vector<const Factor*> &context,
- const Factor *factor) const
-{
- if (context.size() < m_order) {
- context.resize(context.size() + 1);
- }
- assert(context.size());
-
- for (size_t i = context.size() - 1; i > 0; --i) {
- context[i] = context[i - 1];
- }
-
- context[0] = factor;
-}
-
-std::pair<SCORE, void*> LanguageModel::Score(
- const std::vector<const Factor*> &context) const
-{
- //cerr << "context=";
- //DebugContext(context);
-
- std::pair<SCORE, void*> ret;
-
- typedef Node<const Factor*, LMScores> LMNode;
- const LMNode *node = m_root.getNode(context);
- if (node) {
- ret.first = node->getValue().prob;
- ret.second = (void*) node;
- }
- else {
- SCORE backoff = 0;
- std::vector<const Factor*> backOffContext(context.begin() + 1,
- context.end());
- node = m_root.getNode(backOffContext);
- if (node) {
- backoff = node->getValue().backoff;
- }
-
- std::vector<const Factor*> newContext(context.begin(), context.end() - 1);
- std::pair<SCORE, void*> newRet = Score(newContext);
-
- ret.first = backoff + newRet.first;
- ret.second = newRet.second;
- }
-
- //cerr << "score=" << ret.first << endl;
- return ret;
-}
-
-SCORE LanguageModel::BackoffScore(
- const std::vector<const Factor*> &context) const
-{
- //cerr << "backoff=";
- //DebugContext(context);
-
- SCORE ret;
- size_t stoppedAtInd;
- const Node<const Factor*, LMScores> &node = m_root.getNode(context,
- stoppedAtInd);
-
- if (stoppedAtInd == context.size()) {
- // found entire ngram
- ret = node.getValue().backoff;
- }
- else {
- if (stoppedAtInd == 0) {
- ret = m_oov;
- stoppedAtInd = 1;
- }
- else {
- ret = node.getValue().backoff;
- }
-
- // recursive
- std::vector<const Factor*> backoff(context.begin() + stoppedAtInd,
- context.end());
- ret += BackoffScore(backoff);
- }
-
- return ret;
-}
-
-void LanguageModel::DebugContext(
- const std::vector<const Factor*> &context) const
-{
- for (size_t i = 0; i < context.size(); ++i) {
- cerr << context[i]->GetString() << " ";
- }
- cerr << endl;
-}
-
-void LanguageModel::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
-
diff --git a/contrib/moses2/LM/LanguageModel.h b/contrib/moses2/LM/LanguageModel.h
deleted file mode 100644
index d262a8497..000000000
--- a/contrib/moses2/LM/LanguageModel.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * LanguageModel.h
- *
- * Created on: 29 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include "../FF/StatefulFeatureFunction.h"
-#include "../TypeDef.h"
-#include "../MorphoTrie/MorphTrie.h"
-#include "../legacy/Factor.h"
-#include "../legacy/Util2.h"
-
-namespace Moses2
-{
-
-////////////////////////////////////////////////////////////////////////////////////////
-struct LMScores
-{
- LMScores()
- {
- }
-
- LMScores(const LMScores &copy) :
- prob(copy.prob), backoff(copy.backoff)
- {
- }
-
- LMScores(float inProb, float inBackoff) :
- prob(inProb), backoff(inBackoff)
- {
- }
-
- void Debug(std::ostream &out, const System &system) const
- {
- out << "(" << prob << "," << backoff << ")" << std::flush;
- }
-
- float prob, backoff;
-};
-
-////////////////////////////////////////////////////////////////////////////////////////
-class LanguageModel: public StatefulFeatureFunction
-{
-public:
- LanguageModel(size_t startInd, const std::string &line);
- virtual ~LanguageModel();
-
- virtual void Load(System &system);
-
- virtual void SetParameter(const std::string& key, const std::string& value);
-
- virtual FFState* BlankState(MemPool &pool, const System &sys) const;
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
-
-protected:
- std::string m_path;
- FactorType m_factorType;
- size_t m_order;
-
- MorphTrie<const Factor*, LMScores> m_root;
- SCORE m_oov;
- const Factor *m_bos;
- const Factor *m_eos;
-
- void ShiftOrPush(std::vector<const Factor*> &context,
- const Factor *factor) const;
- std::pair<SCORE, void*> Score(
- const std::vector<const Factor*> &context) const;
- SCORE BackoffScore(const std::vector<const Factor*> &context) const;
-
- void DebugContext(const std::vector<const Factor*> &context) const;
-};
-
-}
-
diff --git a/contrib/moses2/LM/LanguageModelDALM.cpp b/contrib/moses2/LM/LanguageModelDALM.cpp
deleted file mode 100644
index 7d3e8242b..000000000
--- a/contrib/moses2/LM/LanguageModelDALM.cpp
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * LanguageModelDALM.cpp
- *
- * Created on: 5 Dec 2015
- * Author: hieu
- */
-
-#include "LanguageModelDALM.h"
-#include "../TypeDef.h"
-#include "../System.h"
-#include "dalm.h"
-#include "util/exception.hh"
-#include "../legacy/InputFileStream.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-//////////////////////////////////////////////////////////////////////////////////////////
-class Murmur: public DALM::State::HashFunction
-{
-public:
- Murmur(std::size_t seed=0): seed(seed) {
- }
- virtual std::size_t operator()(const DALM::VocabId *words, std::size_t size) const {
- return util::MurmurHashNative(words, sizeof(DALM::VocabId) * size, seed);
- }
-private:
- std::size_t seed;
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////
-class DALMState : public FFState
-{
-private:
- DALM::State state;
-
-public:
- DALMState() {
- }
-
- DALMState(const DALMState &from) {
- state = from.state;
- }
-
- virtual ~DALMState() {
- }
-
- void reset(const DALMState &from) {
- state = from.state;
- }
-
- virtual int Compare(const FFState& other) const {
- const DALMState &o = static_cast<const DALMState &>(other);
- if(state.get_count() < o.state.get_count()) return -1;
- else if(state.get_count() > o.state.get_count()) return 1;
- else return state.compare(o.state);
- }
-
- virtual size_t hash() const {
- // imitate KenLM
- return state.hash(Murmur());
- }
-
- virtual bool operator==(const FFState& other) const {
- const DALMState &o = static_cast<const DALMState &>(other);
- return state.compare(o.state) == 0;
- }
-
- DALM::State &get_state() {
- return state;
- }
-
- void refresh() {
- state.refresh();
- }
-
- virtual std::string ToString() const
- { return "DALM state"; }
-
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-inline void read_ini(const char *inifile, string &model, string &words, string &wordstxt)
-{
- ifstream ifs(inifile);
- string line;
-
- getline(ifs, line);
- while(ifs) {
- unsigned int pos = line.find("=");
- string key = line.substr(0, pos);
- string value = line.substr(pos+1, line.size()-pos);
- if(key=="MODEL") {
- model = value;
- } else if(key=="WORDS") {
- words = value;
- } else if(key=="WORDSTXT") {
- wordstxt = value;
- }
- getline(ifs, line);
- }
-}
-/////////////////////////
-
-LanguageModelDALM::LanguageModelDALM(size_t startInd, const std::string &line)
-:StatefulFeatureFunction(startInd, line)
-{
- ReadParameters();
-}
-
-LanguageModelDALM::~LanguageModelDALM() {
- // TODO Auto-generated destructor stub
-}
-
-void LanguageModelDALM::Load(System &system)
-{
- /////////////////////
- // READING INIFILE //
- /////////////////////
- string inifile= m_filePath + "/dalm.ini";
-
- string model; // Path to the double-array file.
- string words; // Path to the vocabulary file.
- string wordstxt; //Path to the vocabulary file in text format.
- read_ini(inifile.c_str(), model, words, wordstxt);
-
- model = m_filePath + "/" + model;
- words = m_filePath + "/" + words;
- wordstxt = m_filePath + "/" + wordstxt;
-
- UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
- util::FileOpenException,
- "Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
-
- ////////////////
- // LOADING LM //
- ////////////////
-
- // Preparing a logger object.
- m_logger = new DALM::Logger(stderr);
- m_logger->setLevel(DALM::LOGGER_INFO);
-
- // Load the vocabulary file.
- m_vocab = new DALM::Vocabulary(words, *m_logger);
-
- // Load the language model.
- m_lm = new DALM::LM(model, *m_vocab, m_nGramOrder, *m_logger);
-
- wid_start = m_vocab->lookup(BOS_);
- wid_end = m_vocab->lookup(EOS_);
-
- // vocab mapping
- CreateVocabMapping(wordstxt, system);
-
- m_beginSentenceFactor = system.GetVocab().AddFactor(BOS_, system);
-}
-
-void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt, const System &system)
-{
- InputFileStream vocabStrm(wordstxt);
-
- std::vector< std::pair<std::size_t, DALM::VocabId> > vlist;
- string line;
- std::size_t max_fid = 0;
- while(getline(vocabStrm, line)) {
- const Factor *factor = system.GetVocab().AddFactor(line, system);
- std::size_t fid = factor->GetId();
- DALM::VocabId wid = m_vocab->lookup(line.c_str());
-
- vlist.push_back(std::pair<std::size_t, DALM::VocabId>(fid, wid));
- if(max_fid < fid) max_fid = fid;
- }
-
- for(std::size_t i = 0; i < m_vocabMap.size(); i++) {
- m_vocabMap[i] = m_vocab->unk();
- }
-
- m_vocabMap.resize(max_fid+1, m_vocab->unk());
- std::vector< std::pair<std::size_t, DALM::VocabId> >::iterator it = vlist.begin();
- while(it != vlist.end()) {
- std::pair<std::size_t, DALM::VocabId> &entry = *it;
- m_vocabMap[entry.first] = entry.second;
-
- ++it;
- }
-}
-
-void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value)
-{
- if (key == "factor") {
- m_factorType = Scan<FactorType>(value);
- } else if (key == "order") {
- m_nGramOrder = Scan<size_t>(value);
- } else if (key == "path") {
- m_filePath = value;
- } else {
- StatefulFeatureFunction::SetParameter(key, value);
- }
- m_ContextSize = m_nGramOrder-1;
-}
-
-FFState* LanguageModelDALM::BlankState(MemPool &pool, const System &sys) const
-{
- DALMState *state = new DALMState();
- return state;
-}
-
-void LanguageModelDALM::EmptyHypothesisState(FFState &state,
- const ManagerBase &mgr,
- const InputType &input,
- const Hypothesis &hypo) const
-{
- DALMState &dalmState = static_cast<DALMState&>(state);
- m_lm->init_state(dalmState.get_state());
-}
-
- void LanguageModelDALM::EvaluateInIsolation(MemPool &pool,
- const System &system,
- const Phrase &source,
- const TargetPhraseImpl &targetPhrase,
- Scores &scores,
- SCORE &estimatedScore) const
- {
-
- }
-
-void LanguageModelDALM::EvaluateWhenApplied(const ManagerBase &mgr,
-const Hypothesis &hypo,
-const FFState &prevState,
-Scores &scores,
-FFState &state) const
-{
-
-}
-
-void LanguageModelDALM::EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
-
diff --git a/contrib/moses2/LM/LanguageModelDALM.h b/contrib/moses2/LM/LanguageModelDALM.h
deleted file mode 100644
index cbbeca97d..000000000
--- a/contrib/moses2/LM/LanguageModelDALM.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * LanguageModelDALM.h
- *
- * Created on: 5 Dec 2015
- * Author: hieu
- */
-
-#pragma once
-#include "../FF/StatefulFeatureFunction.h"
-#include "../legacy/Util2.h"
-#include "../legacy/Factor.h"
-
-namespace DALM
-{
-class Logger;
-class Vocabulary;
-class State;
-class LM;
-union Fragment;
-class Gap;
-
-typedef unsigned int VocabId;
-}
-
-namespace Moses2
-{
-
-class LanguageModelDALM: public StatefulFeatureFunction
-{
-public:
- LanguageModelDALM(size_t startInd, const std::string &line);
- virtual ~LanguageModelDALM();
-
- virtual void Load(System &system);
- virtual void SetParameter(const std::string& key, const std::string& value);
-
- virtual FFState* BlankState(MemPool &pool, const System &sys) const;
- virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
- const InputType &input, const Hypothesis &hypo) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void EvaluateWhenApplied(const ManagerBase &mgr,
- const Hypothesis &hypo, const FFState &prevState, Scores &scores,
- FFState &state) const;
-
- virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
- const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
- FFState &state) const;
-
-protected:
- FactorType m_factorType;
-
- std::string m_filePath;
- size_t m_nGramOrder; //! max n-gram length contained in this LM
- size_t m_ContextSize;
-
- DALM::Logger *m_logger;
- DALM::Vocabulary *m_vocab;
- DALM::LM *m_lm;
- DALM::VocabId wid_start, wid_end;
-
- const Factor *m_beginSentenceFactor;
-
- mutable std::vector<DALM::VocabId> m_vocabMap;
-
- void CreateVocabMapping(const std::string &wordstxt, const System &system);
-
-};
-
-}
-
diff --git a/contrib/moses2/Main.cpp b/contrib/moses2/Main.cpp
deleted file mode 100644
index 0661d1d0e..000000000
--- a/contrib/moses2/Main.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-#include <iostream>
-#include <memory>
-#include <boost/pool/pool_alloc.hpp>
-#include "Main.h"
-#include "System.h"
-#include "Phrase.h"
-#include "TranslationTask.h"
-#include "MemPoolAllocator.h"
-#include "server/Server.h"
-#include "legacy/InputFileStream.h"
-#include "legacy/Parameter.h"
-#include "legacy/ThreadPool.h"
-#include "legacy/Timer.h"
-#include "legacy/Util2.h"
-#include "util/usage.hh"
-
-using namespace std;
-
-//extern size_t g_numHypos;
-
-int main(int argc, char** argv)
-{
- cerr << "Starting..." << endl;
-
- Moses2::Timer timer;
- timer.start();
- //Temp();
-
- Moses2::Parameter params;
- if (!params.LoadParam(argc, argv)) {
- return EXIT_FAILURE;
- }
- Moses2::System system(params);
- timer.check("Loaded");
-
- if (params.GetParam("show-weights")) {
- return EXIT_SUCCESS;
- }
-
- //cerr << "system.numThreads=" << system.options.server.numThreads << endl;
-
- Moses2::ThreadPool pool(system.options.server.numThreads, system.cpuAffinityOffset, system.cpuAffinityOffsetIncr);
- //cerr << "CREATED POOL" << endl;
-
- if (params.GetParam("server")) {
- std::cerr << "RUN SERVER" << std::endl;
- run_as_server(system);
- }
- else {
- std::cerr << "RUN BATCH" << std::endl;
- batch_run(params, system, pool);
- }
-
- cerr << "Decoding took " << timer.get_elapsed_time() << endl;
- // cerr << "g_numHypos=" << g_numHypos << endl;
- cerr << "Finished" << endl;
- return EXIT_SUCCESS;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////
-void run_as_server(Moses2::System &system)
-{
- Moses2::Server server(system.options.server, system);
- server.run(system); // actually: don't return. see Server::run()
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////
-istream &GetInputStream(Moses2::Parameter &params)
-{
- const Moses2::PARAM_VEC *vec = params.GetParam("input-file");
- if (vec && vec->size()) {
- Moses2::InputFileStream *stream = new Moses2::InputFileStream(vec->at(0));
- return *stream;
- }
- else {
- return cin;
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////
-void batch_run(Moses2::Parameter &params, Moses2::System &system, Moses2::ThreadPool &pool)
-{
- istream &inStream = GetInputStream(params);
-
- long translationId = 0;
- string line;
- while (getline(inStream, line)) {
- //cerr << "line=" << line << endl;
- boost::shared_ptr<Moses2::TranslationTask> task(new Moses2::TranslationTask(system, line, translationId));
-
- //cerr << "START pool.Submit()" << endl;
- pool.Submit(task);
- //task->Run();
- ++translationId;
- }
-
- pool.Stop(true);
-
- if (&inStream != &cin) {
- delete &inStream;
- }
-
- //util::PrintUsage(std::cerr);
-
-}
-////////////////////////////////////////////////////////////////////////////////////////////////
-void Temp()
-{
- Moses2::MemPool pool;
- Moses2::MemPoolAllocator<int> a(pool);
-
- boost::unordered_set<int, boost::hash<int>, std::equal_to<int>, Moses2::MemPoolAllocator<int> > s(a);
- s.insert(3);
- s.insert(4);
- s.insert(3);
- s.erase(3);
-
- boost::pool_allocator<int> alloc;
- std::vector<int, boost::pool_allocator<int> > v(alloc);
- for (int i = 0; i < 1000; ++i)
- v.push_back(i);
-
- v.clear();
- boost::singleton_pool<boost::pool_allocator_tag, sizeof(int)>::
- purge_memory();
-
- abort();
-}
diff --git a/contrib/moses2/Main.h b/contrib/moses2/Main.h
deleted file mode 100644
index 41e016130..000000000
--- a/contrib/moses2/Main.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Main.h
- *
- * Created on: 1 Apr 2016
- * Author: hieu
- */
-#pragma once
-#include <iostream>
-
-namespace Moses2 {
-class Parameter;
-class System;
-class ThreadPool;
-}
-
-std::istream &GetInputStream(Moses2::Parameter &params);
-void batch_run(Moses2::Parameter &params, Moses2::System &system, Moses2::ThreadPool &pool);
-void run_as_server(Moses2::System &system);
-
-void Temp();
-
-
diff --git a/contrib/moses2/ManagerBase.cpp b/contrib/moses2/ManagerBase.cpp
deleted file mode 100644
index 1e774cc5b..000000000
--- a/contrib/moses2/ManagerBase.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Manager.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include <vector>
-#include <sstream>
-#include "System.h"
-#include "ManagerBase.h"
-#include "Phrase.h"
-#include "InputPathsBase.h"
-#include "InputPathBase.h"
-#include "TranslationModel/PhraseTable.h"
-#include "legacy/Range.h"
-#include "PhraseBased/Sentence.h"
-
-using namespace std;
-
-namespace Moses2
-{
-ManagerBase::ManagerBase(System &sys, const TranslationTask &task,
- const std::string &inputStr, long translationId)
-:system(sys)
-,task(task)
-,m_inputStr(inputStr)
-,m_translationId(translationId)
-,m_pool(NULL)
-,m_systemPool(NULL)
-,m_hypoRecycle(NULL)
-{
-}
-
-ManagerBase::~ManagerBase()
-{
- system.featureFunctions.CleanUpAfterSentenceProcessing();
-
- if (m_pool) {
- GetPool().Reset();
- }
- if (m_hypoRecycle) {
- GetHypoRecycle().Clear();
- }
-}
-
-void ManagerBase::InitPools()
-{
- m_pool = &system.GetManagerPool();
- m_systemPool = &system.GetSystemPool();
- m_hypoRecycle = &system.GetHypoRecycler();
-}
-
-}
-
diff --git a/contrib/moses2/ManagerBase.h b/contrib/moses2/ManagerBase.h
deleted file mode 100644
index 7b4a02ba8..000000000
--- a/contrib/moses2/ManagerBase.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Manager.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <queue>
-#include <cstddef>
-#include <string>
-#include <deque>
-#include "Phrase.h"
-#include "MemPool.h"
-#include "Recycler.h"
-#include "EstimatedScores.h"
-#include "ArcLists.h"
-#include "legacy/Bitmaps.h"
-
-namespace Moses2
-{
-
-class System;
-class TranslationTask;
-class PhraseImpl;
-class SearchNormal;
-class Search;
-class InputType;
-class OutputCollector;
-class HypothesisBase;
-
-class ManagerBase
-{
-public:
- const System &system;
- const TranslationTask &task;
- mutable ArcLists arcLists;
-
- ManagerBase(System &sys, const TranslationTask &task,
- const std::string &inputStr, long translationId);
- virtual ~ManagerBase();
- virtual void Decode() = 0;
- virtual std::string OutputBest() const = 0;
- virtual std::string OutputNBest() = 0;
- virtual std::string OutputTransOpt() = 0;
-
- MemPool &GetPool() const
- { return *m_pool; }
-
- MemPool &GetSystemPool() const
- { return *m_systemPool; }
-
- Recycler<HypothesisBase*> &GetHypoRecycle() const
- { return *m_hypoRecycle; }
-
- const InputType &GetInput() const
- { return *m_input; }
-
- long GetTranslationId() const
- { return m_translationId; }
-
-protected:
- std::string m_inputStr;
- long m_translationId;
- InputType *m_input;
-
- mutable MemPool *m_pool, *m_systemPool;
- mutable Recycler<HypothesisBase*> *m_hypoRecycle;
-
- void InitPools();
-
-};
-
-}
-
diff --git a/contrib/moses2/MemPool.cpp b/contrib/moses2/MemPool.cpp
deleted file mode 100644
index 7e159117b..000000000
--- a/contrib/moses2/MemPool.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * MemPool.cpp
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-
-#include <boost/foreach.hpp>
-#include "MemPool.h"
-#include "util/scoped.hh"
-#include "legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-MemPool::Page::Page(std::size_t vSize) :
- size(vSize)
-{
- mem = (uint8_t*) util::MallocOrThrow(size);
- end = mem + size;
-}
-
-MemPool::Page::~Page()
-{
- free(mem);
-}
-////////////////////////////////////////////////////
-MemPool::MemPool(size_t initSize) :
- m_currSize(initSize), m_currPage(0)
-{
- Page *page = new Page(m_currSize);
- m_pages.push_back(page);
-
- current_ = page->mem;
- //cerr << "new memory pool";
-}
-
-MemPool::~MemPool()
-{
- //cerr << "delete memory pool" << endl;
- RemoveAllInColl(m_pages);
-}
-
-uint8_t *MemPool::More(std::size_t size)
-{
- ++m_currPage;
- if (m_currPage >= m_pages.size()) {
- // add new page
- m_currSize <<= 1;
- std::size_t amount = std::max(m_currSize, size);
-
- Page *page = new Page(amount);
- m_pages.push_back(page);
-
- uint8_t *ret = page->mem;
- current_ = ret + size;
- return ret;
- }
- else {
- // use existing page
- Page &page = *m_pages[m_currPage];
- if (size <= page.size) {
- uint8_t *ret = page.mem;
- current_ = ret + size;
- return ret;
- }
- else {
- // recursive call More()
- return More(size);
- }
- }
-}
-
-void MemPool::Reset()
-{
- m_currPage = 0;
- current_ = m_pages[0]->mem;
-}
-
-}
-
diff --git a/contrib/moses2/MemPool.h b/contrib/moses2/MemPool.h
deleted file mode 100644
index eaa55915e..000000000
--- a/contrib/moses2/MemPool.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * MemPool.h
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <algorithm>
-#include <iostream>
-#include <vector>
-#include <stdint.h>
-#include <stdlib.h>
-#include <limits>
-#include <iostream>
-
-namespace Moses2
-{
-
-class MemPool
-{
- struct Page
- {
- uint8_t *mem;
- uint8_t *end;
- size_t size;
-
- Page()
- {
- }
- Page(std::size_t size);
- ~Page();
- };
-
-public:
- MemPool(std::size_t initSize = 10000);
-
- ~MemPool();
-
- uint8_t *Allocate(std::size_t size)
- {
- size = (size + 3) & 0xfffffffc;
-
- uint8_t *ret = current_;
- current_ += size;
-
- Page &page = *m_pages[m_currPage];
- if (current_ <= page.end) {
- // return what we got
- }
- else {
- ret = More(size);
- }
- return ret;
-
- }
-
- template<typename T>
- T *Allocate()
- {
- uint8_t *ret = Allocate(sizeof(T));
- return (T*) ret;
- }
-
- template<typename T>
- T *Allocate(size_t num)
- {
- uint8_t *ret = Allocate(sizeof(T) * num);
- return (T*) ret;
- }
-
- // re-use pool
- void Reset();
-
-private:
- uint8_t *More(std::size_t size);
-
- std::vector<Page*> m_pages;
-
- size_t m_currSize;
- size_t m_currPage;
- uint8_t *current_;
-
- // no copying
- MemPool(const MemPool &);
- MemPool &operator=(const MemPool &);
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////
-template<typename T>
-class ObjectPoolContiguous
-{
-
-public:
- ObjectPoolContiguous(std::size_t initSize = 100000) :
- m_size(0), m_actualSize(initSize)
- {
- m_vec = (T*) malloc(sizeof(T) * initSize);
- }
-
- ~ObjectPoolContiguous()
- {
- free(m_vec);
- }
-
- void Add(T &obj)
- {
- if (m_size >= m_actualSize) {
- //std::cerr << std::endl << "MORE " << m_size << std::endl;
- m_actualSize *= 2;
- m_vec = (T*) realloc(m_vec, sizeof(T) * m_actualSize);
-
- }
- m_vec[m_size] = obj;
- ++m_size;
- }
-
- bool IsEmpty() const
- {
- return m_size == 0;
- }
-
- void Reset()
- {
- m_size = 0;
- }
-
- // vector op
- size_t GetSize() const
- {
- return m_size;
- }
-
- const T& operator[](size_t ind) const
- {
- return m_vec[ind];
- }
-
- // stack op
- const T &Get() const
- {
- return m_vec[m_size - 1];
- }
-
- void Pop()
- {
- --m_size;
- }
-
- T *GetData()
- {
- return m_vec;
- }
-
- template<typename ORDERER>
- void Sort(const ORDERER &orderer)
- {
- std::sort(m_vec, m_vec + m_size, orderer);
- }
-
-private:
- T *m_vec;
- size_t m_size, m_actualSize;
-
- // no copying
- ObjectPoolContiguous(const ObjectPoolContiguous &);
- ObjectPoolContiguous &operator=(const ObjectPoolContiguous &);
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////
-
-
-}
-
diff --git a/contrib/moses2/MemPoolAllocator.h b/contrib/moses2/MemPoolAllocator.h
deleted file mode 100644
index 6cc699893..000000000
--- a/contrib/moses2/MemPoolAllocator.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#pragma once
-#include "MemPool.h"
-
-namespace Moses2
-{
-
-template<typename T>
-class MemPoolAllocator
-{
-public:
- typedef T value_type;
- typedef T* pointer;
- typedef const T* const_pointer;
- typedef T& reference;
- typedef const T& const_reference;
- typedef std::size_t size_type;
- typedef std::ptrdiff_t difference_type;
-
- template<class U>
- struct rebind
- {
- typedef MemPoolAllocator<U> other;
- };
-
- MemPoolAllocator(Moses2::MemPool &pool) :
- m_pool(pool)
- {
- }
- MemPoolAllocator(const MemPoolAllocator &other) :
- m_pool(other.m_pool)
- {
- }
-
- template<class U>
- MemPoolAllocator(const MemPoolAllocator<U>& other) :
- m_pool(other.m_pool)
- {
- }
-
- size_type max_size() const
- {
- return std::numeric_limits<size_type>::max();
- }
-
- void deallocate(pointer p, size_type n)
- {
- //std::cerr << "deallocate " << p << " " << n << std::endl;
- }
-
- pointer allocate(size_type n, std::allocator<void>::const_pointer hint = 0)
- {
- //std::cerr << "allocate " << n << " " << hint << std::endl;
- pointer ret = m_pool.Allocate<T>(n);
- return ret;
- }
-
- void construct(pointer p, const_reference val)
- {
- //std::cerr << "construct " << p << " " << n << std::endl;
- new ((void *) p) T(val);
- }
-
- void destroy(pointer p)
- {
- //std::cerr << "destroy " << p << " " << n << std::endl;
- }
-
- // return address of values
- pointer address (reference value) const {
- return &value;
- }
- const_pointer address (const_reference value) const {
- return &value;
- }
-
- bool operator==(const MemPoolAllocator<T> &allocator) const {
- return true;
- }
-
- bool operator!=(const MemPoolAllocator<T> &allocator) const {
- return false;
- }
-
- MemPool &m_pool;
-protected:
-};
-
-}
-
-
diff --git a/contrib/moses2/MorphoTrie/MorphTrie.h b/contrib/moses2/MorphoTrie/MorphTrie.h
deleted file mode 100644
index 0b013b5bb..000000000
--- a/contrib/moses2/MorphoTrie/MorphTrie.h
+++ /dev/null
@@ -1,100 +0,0 @@
-#ifndef MORPHTRIE_H_
-#define MORPHTRIE_H_
-
-#include <vector>
-#include "Node.h"
-
-namespace Moses2
-{
-
-template<class KeyClass, class ValueClass>
-class MorphTrie
-{
-public:
- MorphTrie()
- {
- }
- Node<KeyClass, ValueClass>* insert(const std::vector<KeyClass>& word,
- const ValueClass& value);
- const Node<KeyClass, ValueClass>* getNode(
- const std::vector<KeyClass>& words) const;
- const Node<KeyClass, ValueClass> &getNode(const std::vector<KeyClass>& words,
- size_t &stoppedAtInd) const;
- std::vector<const Node<KeyClass, ValueClass>*> getNodes(
- const std::vector<KeyClass>& words, size_t &stoppedAtInd) const;
-private:
- Node<KeyClass, ValueClass> root;
-};
-
-template<class KeyClass, class ValueClass>
-Node<KeyClass, ValueClass>* MorphTrie<KeyClass, ValueClass>::insert(
- const std::vector<KeyClass>& word, const ValueClass& value)
-{
- Node<KeyClass, ValueClass>* cNode = &root;
- for (size_t i = 0; i < word.size(); ++i) {
- KeyClass cKey = word[i];
- cNode = cNode->addSubnode(cKey);
- }
- cNode->setValue(value);
- return cNode;
-}
-
-template<class KeyClass, class ValueClass>
-const Node<KeyClass, ValueClass>* MorphTrie<KeyClass, ValueClass>::getNode(
- const std::vector<KeyClass>& words) const
-{
- size_t stoppedAtInd;
- const Node<KeyClass, ValueClass> &ret = getNode(words, stoppedAtInd);
- if (stoppedAtInd < words.size()) {
- return NULL;
- }
- return &ret;
-}
-
-template<class KeyClass, class ValueClass>
-const Node<KeyClass, ValueClass> &MorphTrie<KeyClass, ValueClass>::getNode(
- const std::vector<KeyClass>& words, size_t &stoppedAtInd) const
-{
- const Node<KeyClass, ValueClass> *prevNode = &root, *newNode;
- for (size_t i = 0; i < words.size(); ++i) {
- const KeyClass &cKey = words[i];
- newNode = prevNode->findSub(cKey);
- if (newNode == NULL) {
- stoppedAtInd = i;
- return *prevNode;
- }
- prevNode = newNode;
- }
-
- stoppedAtInd = words.size();
- return *newNode;
-}
-
-template<class KeyClass, class ValueClass>
-std::vector<const Node<KeyClass, ValueClass>*> MorphTrie<KeyClass, ValueClass>::getNodes(
- const std::vector<KeyClass>& words, size_t &stoppedAtInd) const
-{
- std::vector<const Node<KeyClass, ValueClass>*> ret;
- const Node<KeyClass, ValueClass> *prevNode = &root, *newNode;
- ret.push_back(prevNode);
-
- for (size_t i = 0; i < words.size(); ++i) {
- const KeyClass &cKey = words[i];
- newNode = prevNode->findSub(cKey);
- if (newNode == NULL) {
- stoppedAtInd = i;
- return ret;
- }
- else {
- ret.push_back(newNode);
- }
- prevNode = newNode;
- }
-
- stoppedAtInd = words.size();
- return ret;
-}
-
-}
-
-#endif /* end of include guard: MORPHTRIE_H_ */
diff --git a/contrib/moses2/MorphoTrie/Node.h b/contrib/moses2/MorphoTrie/Node.h
deleted file mode 100644
index ca165ef67..000000000
--- a/contrib/moses2/MorphoTrie/Node.h
+++ /dev/null
@@ -1,93 +0,0 @@
-#ifndef NODE_H_
-#define NODE_H_
-
-#include <vector>
-#include <boost/unordered_map.hpp>
-#include <boost/foreach.hpp>
-
-namespace Moses2
-{
-
-template<class KeyClass, class ValueClass>
-class Node
-{
-public:
- Node()
- {
- }
- Node(const ValueClass& value) :
- m_value(value)
- {
- }
- ~Node();
- void setKey(const KeyClass& key);
- void setValue(const ValueClass& value)
- {
- m_value = value;
- }
- Node* findSub(const KeyClass& key);
- const Node* findSub(const KeyClass& key) const;
- Node *addSubnode(const KeyClass& cKey)
- {
- Node *node = findSub(cKey);
- if (node) {
- return node;
- }
- else {
- node = new Node();
- subNodes[cKey] = node;
- return node;
- }
- }
-
- std::vector<Node*> getSubnodes();
- const ValueClass &getValue() const
- {
- return m_value;
- }
-
-private:
- boost::unordered_map<KeyClass, Node*> subNodes;
- ValueClass m_value;
-
-};
-
-template<class KeyClass, class ValueClass>
-Node<KeyClass, ValueClass>::~Node()
-{
- typename boost::unordered_map<KeyClass, Node*>::iterator iter;
- for (iter = subNodes.begin(); iter != subNodes.end(); ++iter) {
- Node *node = iter->second;
- delete node;
- }
-}
-
-template<class KeyClass, class ValueClass>
-const Node<KeyClass, ValueClass>* Node<KeyClass, ValueClass>::findSub(
- const KeyClass& cKey) const
-{
- typename boost::unordered_map<KeyClass, Node*>::const_iterator iter;
- iter = subNodes.find(cKey);
- if (iter != subNodes.end()) {
- Node *node = iter->second;
- return node;
- }
- return NULL;
-}
-
-template<class KeyClass, class ValueClass>
-Node<KeyClass, ValueClass>* Node<KeyClass, ValueClass>::findSub(
- const KeyClass& cKey)
-{
- typename boost::unordered_map<KeyClass, Node*>::iterator iter;
- iter = subNodes.find(cKey);
- if (iter != subNodes.end()) {
- Node *node = iter->second;
- return node;
- }
- return NULL;
-}
-
-}
-
-#endif /* end of include guard: NODE_H_ */
diff --git a/contrib/moses2/MorphoTrie/utils.h b/contrib/moses2/MorphoTrie/utils.h
deleted file mode 100644
index e6f0aa7d6..000000000
--- a/contrib/moses2/MorphoTrie/utils.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#include "MorphTrie.h"
-#include <fstream>
-#include <ostream>
-#include <string>
-#include <vector>
-#include "legacy/Util2.h"
-#include "../legacy/Factor.h"
-#include "../legacy/InputFileStream.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-inline void ParseLineByChar(string& line, char c, vector<string>& substrings)
-{
- size_t i = 0;
- size_t j = line.find(c);
-
- while (j != string::npos) {
- substrings.push_back(line.substr(i, j - i));
- i = ++j;
- j = line.find(c, j);
-
- if (j == string::npos) substrings.push_back(line.substr(i, line.length()));
- }
-}
-
-}
-
diff --git a/contrib/moses2/Phrase.cpp b/contrib/moses2/Phrase.cpp
deleted file mode 100644
index dd4abf328..000000000
--- a/contrib/moses2/Phrase.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * PhraseImpl.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <boost/functional/hash.hpp>
-#include "Phrase.h"
-#include "Word.h"
-#include "MemPool.h"
-#include "Scores.h"
-#include "System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-
-
-
-} // namespace
-
diff --git a/contrib/moses2/Phrase.h b/contrib/moses2/Phrase.h
deleted file mode 100644
index 714e65d42..000000000
--- a/contrib/moses2/Phrase.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * PhraseImpl.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <cstddef>
-#include <string>
-#include <sstream>
-#include <iostream>
-#include "Word.h"
-#include "MemPool.h"
-#include "TypeDef.h"
-#include "legacy/FactorCollection.h"
-#include "SCFG/Word.h"
-
-namespace Moses2
-{
-
-template<typename WORD>
-class SubPhrase;
-
-class Scores;
-class PhraseTable;
-class MemPool;
-class System;
-
-template<typename WORD>
-class Phrase
-{
-public:
- virtual ~Phrase()
- {
- }
- virtual const WORD& operator[](size_t pos) const = 0;
- virtual size_t GetSize() const = 0;
-
- virtual const WORD& Back() const
- { return (*this)[GetSize() - 1]; }
-
- virtual size_t hash() const
- {
- size_t seed = 0;
-
- for (size_t i = 0; i < GetSize(); ++i) {
- const WORD &word = (*this)[i];
- size_t wordHash = word.hash();
- boost::hash_combine(seed, wordHash);
- }
-
- return seed;
- }
-
- virtual bool operator==(const Phrase &compare) const
- {
- if (GetSize() != compare.GetSize()) {
- return false;
- }
-
- for (size_t i = 0; i < GetSize(); ++i) {
- const WORD &word = (*this)[i];
- const WORD &otherWord = compare[i];
- if (word != otherWord) {
- return false;
- }
- }
-
- return true;
- }
-
- virtual bool operator!=(const Phrase &compare) const
- {
- return !((*this) == compare);
- }
-
- virtual std::string GetString(const FactorList &factorTypes) const
- {
- if (GetSize() == 0) {
- return "";
- }
-
- std::stringstream ret;
-
- const WORD &word = (*this)[0];
- ret << word.GetString(factorTypes);
- for (size_t i = 1; i < GetSize(); ++i) {
- const WORD &word = (*this)[i];
- ret << " " << word.GetString(factorTypes);
- }
- return ret.str();
- }
-
- virtual SubPhrase<WORD> GetSubPhrase(size_t start, size_t size) const = 0;
-
- virtual std::string Debug(const System &system) const
- {
- std::stringstream out;
- size_t size = GetSize();
- if (size) {
- out << (*this)[0].Debug(system);
- for (size_t i = 1; i < size; ++i) {
- const WORD &word = (*this)[i];
- out << " " << word.Debug(system);
- }
- }
-
- return out.str();
- }
-
- virtual void OutputToStream(const System &system, std::ostream &out) const
- {
- size_t size = GetSize();
- if (size) {
- (*this)[0].OutputToStream(system, out);
- for (size_t i = 1; i < size; ++i) {
- const WORD &word = (*this)[i];
- out << " ";
- word.OutputToStream(system, out);
- }
- }
- }
-
-
-};
-
-////////////////////////////////////////////////////////////////////////
-template<typename WORD>
-class PhraseOrdererLexical
-{
-public:
- bool operator()(const Phrase<WORD> &a, const Phrase<WORD> &b) const
- {
- size_t minSize = std::min(a.GetSize(), b.GetSize());
- for (size_t i = 0; i < minSize; ++i) {
- const Word &aWord = a[i];
- const Word &bWord = b[i];
- int cmp = aWord.Compare(bWord);
- //std::cerr << "WORD: " << aWord << " ||| " << bWord << " ||| " << lessThan << std::endl;
- if (cmp) {
- return (cmp < 0);
- }
- }
- return a.GetSize() < b.GetSize();
- }
-};
-
-}
-
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp
deleted file mode 100644
index 2af2b35f0..000000000
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * CubePruning.cpp
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-
-#include "Misc.h"
-#include "Stack.h"
-#include "../Manager.h"
-#include "../../MemPool.h"
-#include "../../System.h"
-#include "../../PhraseBased/TargetPhrases.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningMiniStack
-{
-
-////////////////////////////////////////////////////////////////////////
-QueueItem *QueueItem::Create(QueueItem *currItem, Manager &mgr, CubeEdge &edge,
- size_t hypoIndex, size_t tpIndex,
- QueueItemRecycler &queueItemRecycler)
-{
- QueueItem *ret;
- if (currItem) {
- // reuse incoming queue item to create new item
- ret = currItem;
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- }
- else if (!queueItemRecycler.empty()) {
- // use item from recycle bin
- ret = queueItemRecycler.back();
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- queueItemRecycler.pop_back();
- }
- else {
- // create new item
- ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge,
- hypoIndex, tpIndex);
- }
-
- return ret;
-}
-
-QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex,
- size_t tpIndex) :
- edge(&edge), hypoIndex(hypoIndex), tpIndex(tpIndex)
-{
- CreateHypothesis(mgr);
-}
-
-void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex,
- size_t tpIndex)
-{
- this->edge = &edge;
- this->hypoIndex = hypoIndex;
- this->tpIndex = tpIndex;
-
- CreateHypothesis(mgr);
-}
-
-void QueueItem::CreateHypothesis(Manager &mgr)
-{
- const Hypothesis *prevHypo =
- static_cast<const Hypothesis*>(edge->hypos[hypoIndex]);
- const TargetPhraseImpl &tp = edge->tps[tpIndex];
-
- //cerr << "hypoIndex=" << hypoIndex << endl;
- //cerr << "edge.hypos=" << edge.hypos.size() << endl;
- //cerr << prevHypo << endl;
- //cerr << *prevHypo << endl;
-
- hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap,
- edge->estimatedScore);
-
- if (!mgr.system.options.cube.lazy_scoring) {
- hypo->EvaluateWhenApplied();
- }
-}
-
-////////////////////////////////////////////////////////////////////////
-CubeEdge::CubeEdge(Manager &mgr, const Hypotheses &hypos, const InputPath &path,
- const TargetPhrases &tps, const Bitmap &newBitmap) :
- hypos(hypos), path(path), tps(tps), newBitmap(newBitmap)
-{
- estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
-}
-
-std::string CubeEdge::Debug(const System &system) const
-{
- stringstream out;
- out << newBitmap;
- return out.str();
-}
-
-bool CubeEdge::SetSeenPosition(const size_t x, const size_t y,
- SeenPositions &seenPositions) const
-{
- //UTIL_THROW_IF2(x >= (1<<17), "Error");
- //UTIL_THROW_IF2(y >= (1<<17), "Error");
-
- SeenPositionItem val(this, (x << 16) + y);
- std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
- return pairRet.second;
-}
-
-void CubeEdge::CreateFirst(Manager &mgr, Queue &queue,
- SeenPositions &seenPositions,
- QueueItemRecycler &queueItemRecycler)
-{
- assert(hypos.size());
- assert(tps.GetSize());
-
- QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0,
- queueItemRecycler);
- queue.push(item);
- bool setSeen = SetSeenPosition(0, 0, seenPositions);
- assert(setSeen);
-}
-
-void CubeEdge::CreateNext(Manager &mgr, QueueItem *item, Queue &queue,
- SeenPositions &seenPositions,
- QueueItemRecycler &queueItemRecycler)
-{
- size_t hypoIndex = item->hypoIndex;
- size_t tpIndex = item->tpIndex;
-
- if (hypoIndex + 1 < hypos.size()
- && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
- // reuse incoming queue item to create new item
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1,
- tpIndex, queueItemRecycler);
- assert(newItem == item);
- queue.push(newItem);
- item = NULL;
- }
-
- if (tpIndex + 1 < tps.GetSize()
- && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex,
- tpIndex + 1, queueItemRecycler);
- queue.push(newItem);
- item = NULL;
- }
-
- if (item) {
- // recycle unused queue item
- queueItemRecycler.push_back(item);
- }
-}
-
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.h b/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.h
deleted file mode 100644
index 535ef6ada..000000000
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * CubePruning.h
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <vector>
-#include <queue>
-#include "../../legacy/Range.h"
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "../../MemPoolAllocator.h"
-#include "Stack.h"
-
-namespace Moses2
-{
-
-class Manager;
-class InputPath;
-class TargetPhrases;
-class Bitmap;
-
-namespace NSCubePruningMiniStack
-{
-class CubeEdge;
-
-class QueueItem;
-typedef std::deque<QueueItem*, MemPoolAllocator<QueueItem*> > QueueItemRecycler;
-
-///////////////////////////////////////////
-class QueueItem
-{
- ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
-public:
- static QueueItem *Create(QueueItem *currItem, Manager &mgr, CubeEdge &edge,
- size_t hypoIndex, size_t tpIndex,
- QueueItemRecycler &queueItemRecycler);
- QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- CubeEdge *edge;
- size_t hypoIndex, tpIndex;
- Hypothesis *hypo;
-
-protected:
- void CreateHypothesis(Manager &mgr);
-};
-
-///////////////////////////////////////////
-class QueueItemOrderer
-{
-public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const
- {
- HypothesisFutureScoreOrderer orderer;
- return !orderer(itemA->hypo, itemB->hypo);
- }
-};
-
-///////////////////////////////////////////
-class CubeEdge
-{
-public:
- typedef std::priority_queue<QueueItem*,
- std::vector<QueueItem*, MemPoolAllocator<QueueItem*> >, QueueItemOrderer> Queue;
-
- typedef std::pair<const CubeEdge*, int> SeenPositionItem;
- typedef boost::unordered_set<SeenPositionItem, boost::hash<SeenPositionItem>,
- std::equal_to<SeenPositionItem>, MemPoolAllocator<SeenPositionItem> > SeenPositions;
-
- const Hypotheses &hypos;
- const InputPath &path;
- const TargetPhrases &tps;
- const Bitmap &newBitmap;
- SCORE estimatedScore;
-
- CubeEdge(Manager &mgr, const Hypotheses &hypos, const InputPath &path,
- const TargetPhrases &tps, const Bitmap &newBitmap);
-
- bool SetSeenPosition(const size_t x, const size_t y,
- SeenPositions &seenPositions) const;
-
- void CreateFirst(Manager &mgr, Queue &queue, SeenPositions &seenPositions,
- QueueItemRecycler &queueItemRecycler);
- void CreateNext(Manager &mgr, QueueItem *item, Queue &queue,
- SeenPositions &seenPositions,
- QueueItemRecycler &queueItemRecycler);
-
- std::string Debug(const System &system) const;
-
-protected:
-
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp
deleted file mode 100644
index 94baafeb9..000000000
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Search.cpp
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Search.h"
-#include "Stack.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../TrellisPath.h"
-#include "../Sentence.h"
-#include "../../TrellisPaths.h"
-#include "../../InputPathsBase.h"
-#include "../../InputPathBase.h"
-#include "../../System.h"
-#include "../../TranslationTask.h"
-#include "../../legacy/Util2.h"
-#include "../../PhraseBased/TargetPhrases.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningMiniStack
-{
-
-////////////////////////////////////////////////////////////////////////
-Search::Search(Manager &mgr) :
- Moses2::Search(mgr), m_stack(mgr), m_cubeEdgeAlloc(mgr.GetPool())
-
-, m_queue(QueueItemOrderer(),
- std::vector<QueueItem*, MemPoolAllocator<QueueItem*> >(
- MemPoolAllocator<QueueItem*>(mgr.GetPool())))
-
-, m_seenPositions(
- MemPoolAllocator<CubeEdge::SeenPositionItem>(mgr.GetPool()))
-
-, m_queueItemRecycler(MemPoolAllocator<QueueItem*>(mgr.GetPool()))
-
-{
-}
-
-Search::~Search()
-{
-}
-
-void Search::Decode()
-{
- const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
-
- // init cue edges
- m_cubeEdges.resize(sentence.GetSize() + 1);
- for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
- m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges(
- m_cubeEdgeAlloc);
- }
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
- initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
- //cerr << "initHypo=" << *initHypo << endl;
-
- m_stack.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
- PostDecode(0);
-
- for (size_t stackInd = 1; stackInd < sentence.GetSize() + 1;
- ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- m_stack.Clear();
- Decode(stackInd);
- PostDecode(stackInd);
-
- //m_stack.DebugCounts();
- }
-
-}
-
-void Search::Decode(size_t stackInd)
-{
- Recycler<HypothesisBase*> &hypoRecycler = mgr.GetHypoRecycle();
-
- // reuse queue from previous stack. Clear it first
- std::vector<QueueItem*, MemPoolAllocator<QueueItem*> > &container = Container(
- m_queue);
- //cerr << "container=" << container.size() << endl;
- BOOST_FOREACH(QueueItem *item, container){
- // recycle unused hypos from queue
- Hypothesis *hypo = item->hypo;
- hypoRecycler.Recycle(hypo);
-
- // recycle queue item
- m_queueItemRecycler.push_back(item);
- }
- container.clear();
-
- m_seenPositions.clear();
-
- // add top hypo from every edge into queue
- CubeEdges &edges = *m_cubeEdges[stackInd];
-
- BOOST_FOREACH(CubeEdge *edge, edges){
- //cerr << *edge << " ";
- edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
- }
-
- /*
- cerr << "edges: ";
- boost::unordered_set<const Bitmap*> uniqueBM;
- BOOST_FOREACH(CubeEdge *edge, edges) {
- uniqueBM.insert(&edge->newBitmap);
- //cerr << *edge << " ";
- }
- cerr << edges.size() << " " << uniqueBM.size();
- cerr << endl;
- */
-
- size_t pops = 0;
- while (!m_queue.empty() && pops < mgr.system.options.cube.pop_limit) {
- // get best hypo from queue, add to stack
- //cerr << "queue=" << queue.size() << endl;
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- CubeEdge *edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
-
- if (mgr.system.options.cube.lazy_scoring) {
- hypo->EvaluateWhenApplied();
- }
-
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stack.Add(hypo, hypoRecycler, mgr.arcLists);
-
- edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
-
- ++pops;
- }
-
- // create hypo from every edge. Increase diversity
- if (mgr.system.options.cube.diversity) {
- while (!m_queue.empty()) {
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- if (item->hypoIndex == 0 && item->tpIndex == 0) {
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stack.Add(hypo, hypoRecycler, mgr.arcLists);
- }
- }
- }
-}
-
-void Search::PostDecode(size_t stackInd)
-{
- MemPool &pool = mgr.GetPool();
-
- const InputPaths &paths = mgr.GetInputPaths();
- const Matrix<InputPath*> &pathMatrix = paths.GetMatrix();
- size_t inputSize = pathMatrix.GetRows();
- size_t numPaths = pathMatrix.GetCols();
-
- BOOST_FOREACH(const Stack::Coll::value_type &val, m_stack.GetColl()){
- const Bitmap &hypoBitmap = *val.first.first;
- size_t firstGap = hypoBitmap.GetFirstGapPos();
- size_t hypoEndPos = val.first.second;
-
- Moses2::HypothesisColl &hypos = *val.second;
-
- //cerr << "key=" << hypoBitmap << " " << firstGap << " " << inputSize << endl;
-
- // create edges to next hypos from existing hypos
- for (size_t startPos = firstGap; startPos < inputSize; ++startPos) {
- for (size_t pathInd = 0; pathInd < numPaths; ++pathInd) {
- const InputPath *path = pathMatrix.GetValue(startPos, pathInd);
-
- if (path == NULL) {
- break;
- }
- if (path->GetNumRules() == 0) {
- continue;
- }
-
- const Range &pathRange = path->range;
- //cerr << "pathRange=" << pathRange << endl;
- if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
- continue;
- }
-
- const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
- if (!reorderingConstraint.Check(hypoBitmap, startPos, pathRange.GetEndPos())) {
- continue;
- }
-
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
- size_t numWords = newBitmap.GetNumWordsCovered();
-
- CubeEdges &edges = *m_cubeEdges[numWords];
-
- // sort hypo for a particular bitmap and hypoEndPos
- const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
-
- size_t numPt = mgr.system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = path->targetPhrases[i];
- if (tps && tps->GetSize()) {
- CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
- edges.push_back(edge);
- }
- }
- }
- }
- }
-}
-
-const Hypothesis *Search::GetBestHypo() const
-{
- const Hypothesis *bestHypo = m_stack.GetBestHypo();
- return bestHypo;
-}
-
-void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
-{
- const Stack::Coll &coll = m_stack.GetColl();
- BOOST_FOREACH(const Stack::Coll::value_type &val, coll){
- Moses2::HypothesisColl &hypos = *val.second;
- const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
-
- BOOST_FOREACH(const HypothesisBase *hypoBase, sortedHypos) {
- const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
- TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
- paths.Add(path);
- }
- }
-}
-
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.h b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.h
deleted file mode 100644
index 0dfe9dfb2..000000000
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Search.h
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include "../Search.h"
-#include "Misc.h"
-#include "Stack.h"
-#include "../../legacy/Range.h"
-#include "../../MemPoolAllocator.h"
-
-namespace Moses2
-{
-
-class Bitmap;
-class Hypothesis;
-class InputPath;
-class TargetPhrases;
-class TargetPhraseImpl;
-
-namespace NSCubePruningMiniStack
-{
-
-class Search: public Moses2::Search
-{
-public:
- Search(Manager &mgr);
- virtual ~Search();
-
- virtual void Decode();
- const Hypothesis *GetBestHypo() const;
-
- void AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const;
-
-protected:
- Stack m_stack;
-
- CubeEdge::Queue m_queue;
- CubeEdge::SeenPositions m_seenPositions;
-
- // CUBE PRUNING VARIABLES
- // setup
- MemPoolAllocator<CubeEdge*> m_cubeEdgeAlloc;
- typedef std::vector<CubeEdge*, MemPoolAllocator<CubeEdge*> > CubeEdges;
- std::vector<CubeEdges*> m_cubeEdges;
-
- QueueItemRecycler m_queueItemRecycler;
-
- // CUBE PRUNING
- // decoding
- void Decode(size_t stackInd);
- void PostDecode(size_t stackInd);
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp
deleted file mode 100644
index e2b81f0ba..000000000
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Stack.cpp
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#include <algorithm>
-#include <boost/foreach.hpp>
-#include "Stack.h"
-#include "../Hypothesis.h"
-#include "../Manager.h"
-#include "../../Scores.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningMiniStack
-{
-Stack::Stack(const Manager &mgr) :
- m_mgr(mgr), m_coll(
- MemPoolAllocator<std::pair<HypoCoverage, Moses2::HypothesisColl*> >(
- mgr.GetPool())), m_miniStackRecycler(
- MemPoolAllocator<Moses2::HypothesisColl*>(mgr.GetPool()))
-{
-}
-
-Stack::~Stack()
-{
- BOOST_FOREACH(const Coll::value_type &val, m_coll){
- const Moses2::HypothesisColl *miniStack = val.second;
- delete miniStack;
- }
-
- while (!m_miniStackRecycler.empty()) {
- Moses2::HypothesisColl *miniStack = m_miniStackRecycler.back();
- m_miniStackRecycler.pop_back();
- delete miniStack;
-
- }
-}
-
-void Stack::Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists)
-{
- HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
- Moses2::HypothesisColl &coll = GetMiniStack(key);
- coll.Add(m_mgr, hypo, hypoRecycle, arcLists);
-}
-
-const Hypothesis *Stack::GetBestHypo() const
-{
- SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
- const HypothesisBase *bestHypo = NULL;
- BOOST_FOREACH(const Coll::value_type &val, m_coll){
- const Moses2::HypothesisColl &hypos = *val.second;
- const Moses2::HypothesisBase *hypo = hypos.GetBestHypo();
-
- if (hypo && hypo->GetFutureScore() > bestScore) {
- bestScore = hypo->GetFutureScore();
- bestHypo = hypo;
- }
- }
- return &bestHypo->Cast<Hypothesis>();
-}
-
-size_t Stack::GetHypoSize() const
-{
- size_t ret = 0;
- BOOST_FOREACH(const Coll::value_type &val, m_coll){
- const Moses2::HypothesisColl &hypos = *val.second;
- ret += hypos.GetSize();
- }
- return ret;
-}
-
-Moses2::HypothesisColl &Stack::GetMiniStack(const HypoCoverage &key)
-{
- Moses2::HypothesisColl *ret;
- Coll::iterator iter = m_coll.find(key);
- if (iter == m_coll.end()) {
- if (m_miniStackRecycler.empty()) {
- ret = new Moses2::HypothesisColl(m_mgr);
- }
- else {
- ret = m_miniStackRecycler.back();
- ret->Clear();
- m_miniStackRecycler.pop_back();
- }
-
- m_coll[key] = ret;
- }
- else {
- ret = iter->second;
- }
- return *ret;
-}
-
-void Stack::Clear()
-{
- BOOST_FOREACH(const Coll::value_type &val, m_coll){
- Moses2::HypothesisColl *miniStack = val.second;
- m_miniStackRecycler.push_back(miniStack);
- }
-
- m_coll.clear();
-}
-
-void Stack::DebugCounts()
-{
- cerr << "counts=";
- BOOST_FOREACH(const Coll::value_type &val, GetColl()){
- const Moses2::HypothesisColl &miniStack = *val.second;
- size_t count = miniStack.GetSize();
- cerr << count << " ";
- }
- cerr << endl;
-}
-
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.h b/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.h
deleted file mode 100644
index 7601f90b2..000000000
--- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Stack.h
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <deque>
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "../../MemPool.h"
-#include "../../MemPoolAllocator.h"
-#include "../../Recycler.h"
-#include "../../HypothesisColl.h"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-class Manager;
-class HypothesisBase;
-class ArcLists;
-
-namespace NSCubePruningMiniStack
-{
-
-class Stack
-{
-protected:
-
-public:
- typedef std::pair<const Bitmap*, size_t> HypoCoverage;
- // bitmap and current endPos of hypos
-
- typedef boost::unordered_map<HypoCoverage, Moses2::HypothesisColl*,
- boost::hash<HypoCoverage>, std::equal_to<HypoCoverage>,
- MemPoolAllocator<std::pair<HypoCoverage, Moses2::HypothesisColl*> > > Coll;
-
- Stack(const Manager &mgr);
- virtual ~Stack();
-
- size_t GetHypoSize() const;
-
- Coll &GetColl()
- {
- return m_coll;
- }
- const Coll &GetColl() const
- {
- return m_coll;
- }
-
- void Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists);
-
- Moses2::HypothesisColl &GetMiniStack(const HypoCoverage &key);
-
- const Hypothesis *GetBestHypo() const;
- void Clear();
-
- void DebugCounts();
-
-protected:
- const Manager &m_mgr;
- Coll m_coll;
-
- std::deque<Moses2::HypothesisColl*, MemPoolAllocator<Moses2::HypothesisColl*> > m_miniStackRecycler;
-
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/Hypothesis.cpp b/contrib/moses2/PhraseBased/Hypothesis.cpp
deleted file mode 100644
index d59efb11a..000000000
--- a/contrib/moses2/PhraseBased/Hypothesis.cpp
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Hypothesis.cpp
- *
- * Created on: 24 Oct 2015
- * Author: hieu hoang
- */
-#include <boost/foreach.hpp>
-#include <stdlib.h>
-#include <deque>
-#include "Hypothesis.h"
-#include "Manager.h"
-#include "Sentence.h"
-#include "TargetPhraseImpl.h"
-#include "../InputPathBase.h"
-#include "../System.h"
-#include "../Scores.h"
-#include "../Phrase.h"
-#include "../FF/StatefulFeatureFunction.h"
-
-using namespace std;
-
-namespace Moses2
-{
-Hypothesis *Hypothesis::Create(MemPool &pool, Manager &mgr)
-{
-// ++g_numHypos;
- Hypothesis *ret;
-
- Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
- ret = static_cast<Hypothesis*>(recycler.Get());
- if (ret) {
- // got new hypo from recycler. Do nothing
- }
- else {
- ret = new (pool.Allocate<Hypothesis>()) Hypothesis(pool, mgr.system);
- //cerr << "Hypothesis=" << sizeof(Hypothesis) << " " << ret << endl;
- recycler.Keep(ret);
- }
- return ret;
-}
-
-Hypothesis::Hypothesis(MemPool &pool, const System &system) :
- HypothesisBase(pool, system), m_currTargetWordsRange()
-{
-}
-
-Hypothesis::~Hypothesis()
-{
- // TODO Auto-generated destructor stub
-}
-
-void Hypothesis::Init(Manager &mgr, const InputPathBase &path,
- const TargetPhraseImpl &tp, const Bitmap &bitmap)
-{
- m_mgr = &mgr;
- m_targetPhrase = &tp;
- m_sourceCompleted = &bitmap;
- m_path = &path;
- m_prevHypo = NULL;
-
- m_currTargetWordsRange.SetStartPos(NOT_FOUND);
- m_currTargetWordsRange.SetEndPos(NOT_FOUND);
-
- m_estimatedScore = 0;
- m_scores->Reset(mgr.system);
-}
-
-void Hypothesis::Init(Manager &mgr, const Hypothesis &prevHypo,
- const InputPathBase &path, const TargetPhraseImpl &tp, const Bitmap &bitmap,
- SCORE estimatedScore)
-{
- m_mgr = &mgr;
- m_targetPhrase = &tp;
- m_sourceCompleted = &bitmap;
- m_path = &path;
- m_prevHypo = &prevHypo;
-
- m_currTargetWordsRange.SetStartPos(
- prevHypo.m_currTargetWordsRange.GetEndPos() + 1);
- m_currTargetWordsRange.SetEndPos(
- prevHypo.m_currTargetWordsRange.GetEndPos() + tp.GetSize());
-
- m_estimatedScore = estimatedScore;
-
- m_scores->Reset(mgr.system);
- m_scores->PlusEquals(mgr.system, prevHypo.GetScores());
- m_scores->PlusEquals(mgr.system, GetTargetPhrase().GetScores());
-}
-
-size_t Hypothesis::hash() const
-{
- // coverage
- size_t seed = (size_t) m_sourceCompleted;
-
- seed = HypothesisBase::hash(seed);
- return seed;
-}
-
-bool Hypothesis::operator==(const Hypothesis &other) const
-{
- // coverage
- if (m_sourceCompleted != other.m_sourceCompleted) {
- return false;
- }
-
- bool ret = HypothesisBase::operator ==(other);
- return ret;
-}
-
-std::string Hypothesis::Debug(const System &system) const
-{
- stringstream out;
-
- // coverage
- out << GetBitmap() << " " << GetInputPath().range << " ";
-
- // states
- const std::vector<const StatefulFeatureFunction*> &sfffs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
- size_t numStatefulFFs = sfffs.size();
- for (size_t i = 0; i < numStatefulFFs; ++i) {
- const FFState &state = *GetState(i);
- out << "(" << state << ") ";
- }
-
- // string
- //Debug(out, m_mgr->system);
- out << " ";
- out << "fc=" << GetFutureScore() << " ";
- out << GetScores().Debug(GetManager().system);
-
- return out.str();
-}
-
-void Hypothesis::OutputToStream(std::ostream &out) const
-{
- if (m_prevHypo) {
- m_prevHypo->OutputToStream(out);
- }
- //cerr << "range=" << GetInputPath().range << endl;
-
- const TargetPhrase<Moses2::Word> &tp = GetTargetPhrase();
- if (tp.GetSize()) {
- const SubPhrase<Moses2::Word> &subPhrase = static_cast<const InputPath&>(GetInputPath()).subPhrase;
- //cerr << "tp=" << tp.Debug(m_mgr->system) << endl;
- //cerr << "subPhrase=" << subPhrase.Debug(m_mgr->system) << endl;
-
- tp.OutputToStream(m_mgr->system, subPhrase, out);
- }
-
- if (m_path->range.GetStartPos() != NOT_FOUND) {
- if (m_mgr->system.options.output.ReportSegmentation == 1) {
- // just report phrase segmentation
- out << "|" << m_path->range.GetStartPos() << "-" << m_path->range.GetEndPos() << "| ";
- }
- else if (m_mgr->system.options.output.ReportSegmentation == 2) {
- // more detailed info about every segment
- out << "|";
-
- // phrase segmentation
- out << m_path->range.GetStartPos() << "-" << m_path->range.GetEndPos() << ",";
-
- // score breakdown
- m_scores->OutputBreakdown(out, m_mgr->system);
-
- out << "| ";
- }
- }
-}
-
-void Hypothesis::EmptyHypothesisState(const InputType &input)
-{
- const std::vector<const StatefulFeatureFunction*> &sfffs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
- BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){
- size_t statefulInd = sfff->GetStatefulInd();
- FFState *state = m_ffStates[statefulInd];
- sfff->EmptyHypothesisState(*state, GetManager(), input, *this);
-}
-}
-
-void Hypothesis::EvaluateWhenApplied()
-{
- const std::vector<const StatefulFeatureFunction*> &sfffs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
- BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){
- EvaluateWhenApplied(*sfff);
- }
-//cerr << *this << endl;
-}
-
-void Hypothesis::EvaluateWhenApplied(const StatefulFeatureFunction &sfff)
-{
- size_t statefulInd = sfff.GetStatefulInd();
- const FFState *prevState = m_prevHypo->GetState(statefulInd);
- FFState *thisState = m_ffStates[statefulInd];
- assert(prevState);
- sfff.EvaluateWhenApplied(GetManager(), *this, *prevState, *m_scores,
- *thisState);
-
-}
-
-/** recursive - pos is relative from start of sentence */
-const Word &Hypothesis::GetWord(size_t pos) const
-{
- const Hypothesis *hypo = this;
- while (pos < hypo->GetCurrTargetWordsRange().GetStartPos()) {
- hypo = hypo->GetPrevHypo();
- UTIL_THROW_IF2(hypo == NULL, "Previous hypothesis should not be NULL");
- }
- return hypo->GetCurrWord(pos - hypo->GetCurrTargetWordsRange().GetStartPos());
-}
-
-void Hypothesis::Swap(Hypothesis &other)
-{
- /*
- Swap(m_targetPhrase, other.m_targetPhrase);
- Swap(m_sourceCompleted, other.m_sourceCompleted);
- Swap(m_range, other.m_range);
- Swap(m_prevHypo, other.m_prevHypo);
- Swap(m_ffStates, other.m_ffStates);
- Swap(m_estimatedScore, other.m_estimatedScore);
- Swap(m_currTargetWordsRange, other.m_currTargetWordsRange);
- */
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/Hypothesis.h b/contrib/moses2/PhraseBased/Hypothesis.h
deleted file mode 100644
index 7859c1d14..000000000
--- a/contrib/moses2/PhraseBased/Hypothesis.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Hypothesis.h
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#pragma once
-
-#include <iostream>
-#include <cstddef>
-#include "../FF/FFState.h"
-#include "../legacy/Bitmap.h"
-#include "../legacy/Range.h"
-#include "../Scores.h"
-#include "../Phrase.h"
-#include "../TargetPhrase.h"
-#include "../InputPathBase.h"
-#include "../HypothesisBase.h"
-
-namespace Moses2
-{
-class Manager;
-class InputType;
-class StatefulFeatureFunction;
-class TargetPhraseImpl;
-
-class Hypothesis: public HypothesisBase
-{
- Hypothesis(MemPool &pool, const System &system);
-
-public:
-
- static Hypothesis *Create(MemPool &pool, Manager &mgr);
- virtual ~Hypothesis();
-
- // initial, empty hypo
- void Init(Manager &mgr, const InputPathBase &path, const TargetPhraseImpl &tp,
- const Bitmap &bitmap);
-
- void Init(Manager &mgr, const Hypothesis &prevHypo, const InputPathBase &path,
- const TargetPhraseImpl &tp, const Bitmap &bitmap, SCORE estimatedScore);
-
- size_t hash() const;
- bool operator==(const Hypothesis &other) const;
-
- inline const Bitmap &GetBitmap() const
- {
- return *m_sourceCompleted;
- }
-
- inline const InputPathBase &GetInputPath() const
- {
- return *m_path;
- }
-
- inline const Range &GetCurrTargetWordsRange() const
- {
- return m_currTargetWordsRange;
- }
-
- SCORE GetFutureScore() const
- {
- return GetScores().GetTotalScore() + m_estimatedScore;
- }
-
- const TargetPhrase<Moses2::Word> &GetTargetPhrase() const
- {
- return *m_targetPhrase;
- }
-
- std::string Debug(const System &system) const;
-
- virtual void OutputToStream(std::ostream &out) const;
-
- void EmptyHypothesisState(const InputType &input);
-
- void EvaluateWhenApplied();
- void EvaluateWhenApplied(const StatefulFeatureFunction &sfff);
-
- const Hypothesis* GetPrevHypo() const
- {
- return m_prevHypo;
- }
-
- /** curr - pos is relative from CURRENT hypothesis's starting index
- * (ie, start of sentence would be some negative number, which is
- * not allowed- USE WITH CAUTION) */
- inline const Word &GetCurrWord(size_t pos) const
- {
- return GetTargetPhrase()[pos];
- }
-
- /** recursive - pos is relative from start of sentence */
- const Word &GetWord(size_t pos) const;
-
- void Swap(Hypothesis &other);
-protected:
- const TargetPhrase<Moses2::Word> *m_targetPhrase;
- const Bitmap *m_sourceCompleted;
- const InputPathBase *m_path;
- const Hypothesis *m_prevHypo;
-
- SCORE m_estimatedScore;
- Range m_currTargetWordsRange;
-};
-
-////////////////////////////////////////////////////////////////////////////////////
-class HypothesisTargetPhraseOrderer
-{
-public:
- bool operator()(const Hypothesis* a, const Hypothesis* b) const
- {
- PhraseOrdererLexical<Moses2::Word> phraseCmp;
- bool ret = phraseCmp(a->GetTargetPhrase(), b->GetTargetPhrase());
- /*
- std::cerr << (const Phrase&) a->GetTargetPhrase() << " ||| "
- << (const Phrase&) b->GetTargetPhrase() << " ||| "
- << ret << std::endl;
- */
- return ret;
- }
-};
-
-}
-
diff --git a/contrib/moses2/PhraseBased/InputPath.cpp b/contrib/moses2/PhraseBased/InputPath.cpp
deleted file mode 100644
index 1a9716380..000000000
--- a/contrib/moses2/PhraseBased/InputPath.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * InputPath.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <sstream>
-#include <boost/foreach.hpp>
-#include "InputPath.h"
-#include "TargetPhrases.h"
-#include "../TranslationModel/PhraseTable.h"
-
-using namespace std;
-
-namespace Moses2
-{
-InputPath::InputPath(MemPool &pool, const SubPhrase<Moses2::Word> &subPhrase,
- const Range &range, size_t numPt, const InputPath *prefixPath)
-:InputPathBase(pool, range, numPt, prefixPath)
-,m_numRules(0)
-,subPhrase(subPhrase)
-{
- targetPhrases = pool.Allocate<const TargetPhrases*>(numPt);
- Init<const TargetPhrases*>(targetPhrases, numPt, NULL);
-}
-
-InputPath::~InputPath()
-{
- // TODO Auto-generated destructor stub
-}
-
-void InputPath::AddTargetPhrases(const PhraseTable &pt,
- const TargetPhrases *tps)
-{
- size_t ptInd = pt.GetPtInd();
- targetPhrases[ptInd] = tps;
-
- if (tps) {
- m_numRules += tps->GetSize();
- }
-}
-
-const TargetPhrases *InputPath::GetTargetPhrases(const PhraseTable &pt) const
-{
- size_t ptInd = pt.GetPtInd();
- return targetPhrases[ptInd];
-}
-
-std::string InputPath::Debug(const System &system) const
-{
- stringstream out;
-
- out << range << " " << flush;
- out << subPhrase.Debug(system);
- return out.str();
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/InputPath.h b/contrib/moses2/PhraseBased/InputPath.h
deleted file mode 100644
index 100649155..000000000
--- a/contrib/moses2/PhraseBased/InputPath.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * InputPath.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <iostream>
-#include <vector>
-#include "../InputPathBase.h"
-
-namespace Moses2
-{
-class TargetPhrases;
-
-class InputPath: public InputPathBase
-{
-public:
- const TargetPhrases** targetPhrases;
- SubPhrase<Moses2::Word> subPhrase;
-
- InputPath(MemPool &pool, const SubPhrase<Moses2::Word> &subPhrase, const Range &range,
- size_t numPt, const InputPath *prefixPath);
- virtual ~InputPath();
-
- void AddTargetPhrases(const PhraseTable &pt, const TargetPhrases *tps);
- const TargetPhrases *GetTargetPhrases(const PhraseTable &pt) const;
-
- size_t GetNumRules() const
- { return m_numRules; }
-
- std::string Debug(const System &system) const;
-
-protected:
- size_t m_numRules;
-};
-
-}
-
diff --git a/contrib/moses2/PhraseBased/InputPaths.cpp b/contrib/moses2/PhraseBased/InputPaths.cpp
deleted file mode 100644
index 50c00acbb..000000000
--- a/contrib/moses2/PhraseBased/InputPaths.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * InputPaths.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <iostream>
-#include "../InputPathsBase.h"
-#include "../System.h"
-#include "../legacy/Range.h"
-#include "Manager.h"
-#include "Sentence.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-void InputPaths::Init(const InputType &input, const ManagerBase &mgr)
-{
- const Sentence &sentence = static_cast<const Sentence&>(input);
-
- MemPool &pool = mgr.GetPool();
- size_t numPt = mgr.system.mappings.size();
- size_t size = sentence.GetSize();
- size_t maxLength = min(size, mgr.system.options.search.max_phrase_length);
-
- m_matrix = new (pool.Allocate<Matrix<InputPath*> >()) Matrix<InputPath*>(pool,
- size, maxLength);
- m_matrix->Init(NULL);
-
- // create blank path for initial hypo
- Range range(NOT_FOUND, NOT_FOUND);
- SubPhrase<Moses2::Word> subPhrase = sentence.GetSubPhrase(NOT_FOUND, NOT_FOUND);
- m_blank = new (pool.Allocate<InputPath>()) InputPath(pool, subPhrase, range,
- numPt, NULL);
-
- // create normal paths of subphrases through the sentence
- for (size_t startPos = 0; startPos < size; ++startPos) {
- const InputPath *prefixPath = NULL;
-
- for (size_t phaseSize = 1; phaseSize <= maxLength; ++phaseSize) {
- size_t endPos = startPos + phaseSize - 1;
-
- if (endPos >= size) {
- break;
- }
-
- SubPhrase<Moses2::Word> subPhrase = sentence.GetSubPhrase(startPos, phaseSize);
- Range range(startPos, endPos);
-
- InputPath *path = new (pool.Allocate<InputPath>()) InputPath(pool,
- subPhrase, range, numPt, prefixPath);
- m_inputPaths.push_back(path);
-
- prefixPath = path;
-
- m_matrix->SetValue(startPos, phaseSize - 1, path);
- }
- }
-
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/InputPaths.h b/contrib/moses2/PhraseBased/InputPaths.h
deleted file mode 100644
index dda374515..000000000
--- a/contrib/moses2/PhraseBased/InputPaths.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * InputPaths.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <vector>
-#include "InputPath.h"
-#include "../MemPool.h"
-#include "../InputPathsBase.h"
-#include "../legacy/Matrix.h"
-
-namespace Moses2
-{
-
-class System;
-
-class InputPaths: public InputPathsBase
-{
-public:
- void Init(const InputType &input, const ManagerBase &mgr);
-
- const InputPath &GetBlank() const
- {
- return *m_blank;
- }
-
- Matrix<InputPath*> &GetMatrix()
- { return *m_matrix; }
-
- const Matrix<InputPath*> &GetMatrix() const
- { return *m_matrix; }
-
-protected:
- InputPath *m_blank;
- Matrix<InputPath*> *m_matrix;
-
-};
-
-}
-
diff --git a/contrib/moses2/PhraseBased/Manager.cpp b/contrib/moses2/PhraseBased/Manager.cpp
deleted file mode 100644
index 158b72592..000000000
--- a/contrib/moses2/PhraseBased/Manager.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Manager.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include <boost/functional/hash.hpp>
-#include <boost/unordered_set.hpp>
-#include <vector>
-#include <sstream>
-#include "Manager.h"
-#include "TargetPhraseImpl.h"
-#include "InputPath.h"
-#include "Sentence.h"
-
-#include "Normal/Search.h"
-#include "CubePruningMiniStack/Search.h"
-
-/*
- #include "CubePruningPerMiniStack/Search.h"
- #include "CubePruningPerBitmap/Search.h"
- #include "CubePruningCardinalStack/Search.h"
- #include "CubePruningBitmapStack/Search.h"
- */
-#include "../TrellisPaths.h"
-#include "../System.h"
-#include "../Phrase.h"
-#include "../InputPathsBase.h"
-#include "../TranslationModel/PhraseTable.h"
-#include "../TranslationModel/UnknownWordPenalty.h"
-#include "../legacy/Range.h"
-#include "../PhraseBased/TargetPhrases.h"
-
-using namespace std;
-
-namespace Moses2
-{
-Manager::Manager(System &sys, const TranslationTask &task,
- const std::string &inputStr, long translationId) :
- ManagerBase(sys, task, inputStr, translationId)
-,m_search(NULL)
-,m_bitmaps(NULL)
-{
- //cerr << translationId << " inputStr=" << inputStr << endl;
-}
-
-Manager::~Manager()
-{
- //cerr << "Start ~Manager " << this << endl;
- delete m_search;
- delete m_bitmaps;
- //cerr << "Finish ~Manager " << this << endl;
-}
-
-void Manager::Init()
-{
- // init pools etc
- InitPools();
-
- FactorCollection &vocab = system.GetVocab();
- m_input = Moses2::Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr);
-
- m_bitmaps = new Bitmaps(GetPool());
-
- const PhraseTable &firstPt = *system.featureFunctions.phraseTables[0];
- m_initPhrase = new (GetPool().Allocate<TargetPhraseImpl>()) TargetPhraseImpl(
- GetPool(), firstPt, system, 0);
-
- const Sentence &sentence = static_cast<const Sentence&>(GetInput());
- //cerr << "sentence=" << sentence.GetSize() << " " << sentence.Debug(system) << endl;
-
- m_inputPaths.Init(sentence, *this);
-
- // xml
- const UnknownWordPenalty *unkWP = system.featureFunctions.GetUnknownWordPenalty();
- UTIL_THROW_IF2(unkWP == NULL, "There must be a UnknownWordPenalty FF");
- unkWP->ProcessXML(*this, GetPool(), sentence, m_inputPaths);
-
- // lookup with every pt
- const std::vector<const PhraseTable*> &pts = system.mappings;
- for (size_t i = 0; i < pts.size(); ++i) {
- const PhraseTable &pt = *pts[i];
- //cerr << "Looking up from " << pt.GetName() << endl;
- pt.Lookup(*this, m_inputPaths);
- }
- //m_inputPaths.DeleteUnusedPaths();
- CalcFutureScore();
-
- m_bitmaps->Init(sentence.GetSize(), vector<bool>(0));
-
- switch (system.options.search.algo) {
- case Normal:
- m_search = new NSNormal::Search(*this);
- break;
- case NormalBatch:
- //m_search = new NSBatch::Search(*this);
- UTIL_THROW2("Not implemented");
- break;
- case CubePruning:
- case CubePruningMiniStack:
- m_search = new NSCubePruningMiniStack::Search(*this);
- break;
- /*
- case CubePruningPerMiniStack:
- m_search = new NSCubePruningPerMiniStack::Search(*this);
- break;
- case CubePruningPerBitmap:
- m_search = new NSCubePruningPerBitmap::Search(*this);
- break;
- case CubePruningCardinalStack:
- m_search = new NSCubePruningCardinalStack::Search(*this);
- break;
- case CubePruningBitmapStack:
- m_search = new NSCubePruningBitmapStack::Search(*this);
- break;
- */
- default:
- UTIL_THROW2("Unknown search algorithm");
- }
-}
-
-void Manager::Decode()
-{
- //cerr << "Start Decode " << this << endl;
-
- Init();
- m_search->Decode();
-
- //cerr << "Finished Decode " << this << endl;
-}
-
-void Manager::CalcFutureScore()
-{
- const Sentence &sentence = static_cast<const Sentence&>(GetInput());
- size_t size = sentence.GetSize();
- m_estimatedScores =
- new (GetPool().Allocate<EstimatedScores>()) EstimatedScores(GetPool(),
- size);
- m_estimatedScores->InitTriangle(-numeric_limits<SCORE>::infinity());
-
- // walk all the translation options and record the cheapest option for each span
- BOOST_FOREACH(const InputPathBase *path, m_inputPaths){
- const Range &range = path->range;
- SCORE bestScore = -numeric_limits<SCORE>::infinity();
-
- size_t numPt = system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = static_cast<const InputPath*>(path)->targetPhrases[i];
- if (tps) {
- BOOST_FOREACH(const TargetPhraseImpl *tp, *tps) {
- SCORE score = tp->GetFutureScore();
- if (score > bestScore) {
- bestScore = score;
- }
- }
- }
- }
- m_estimatedScores->SetValue(range.GetStartPos(), range.GetEndPos(), bestScore);
- }
-
- // now fill all the cells in the strictly upper triangle
- // there is no way to modify the diagonal now, in the case
- // where no translation option covers a single-word span,
- // we leave the +inf in the matrix
- // like in chart parsing we want each cell to contain the highest score
- // of the full-span trOpt or the sum of scores of joining two smaller spans
-
- for (size_t colstart = 1; colstart < size; colstart++) {
- for (size_t diagshift = 0; diagshift < size - colstart; diagshift++) {
- size_t sPos = diagshift;
- size_t ePos = colstart + diagshift;
- for (size_t joinAt = sPos; joinAt < ePos; joinAt++) {
- float joinedScore = m_estimatedScores->GetValue(sPos, joinAt)
- + m_estimatedScores->GetValue(joinAt + 1, ePos);
- // uncomment to see the cell filling scheme
- // TRACE_ERR("[" << sPos << "," << ePos << "] <-? ["
- // << sPos << "," << joinAt << "]+["
- // << joinAt+1 << "," << ePos << "] (colstart: "
- // << colstart << ", diagshift: " << diagshift << ")"
- // << endl);
-
- if (joinedScore > m_estimatedScores->GetValue(sPos, ePos)) m_estimatedScores->SetValue(
- sPos, ePos, joinedScore);
- }
- }
- }
-
- //cerr << "Square matrix:" << endl;
- //cerr << *m_estimatedScores << endl;
-}
-
-std::string Manager::OutputBest() const
-{
- stringstream out;
- Moses2::FixPrecision(out);
-
- const Hypothesis *bestHypo = m_search->GetBestHypo();
- if (bestHypo) {
- if (system.options.output.ReportHypoScore) {
- out << bestHypo->GetScores().GetTotalScore() << " ";
- }
-
- bestHypo->OutputToStream(out);
- //cerr << "BEST TRANSLATION: " << *bestHypo;
- }
- else {
- if (system.options.output.ReportHypoScore) {
- out << "0 ";
- }
- //cerr << "NO TRANSLATION " << m_input->GetTranslationId() << endl;
- }
-
- return out.str();
- //cerr << endl;
-}
-
-std::string Manager::OutputNBest()
-{
- arcLists.Sort();
-
- boost::unordered_set<size_t> distinctHypos;
-
- TrellisPaths<TrellisPath> contenders;
- m_search->AddInitialTrellisPaths(contenders);
-
- long transId = GetTranslationId();
-
- // MAIN LOOP
- stringstream out;
- //Moses2::FixPrecision(out);
-
- size_t maxIter = system.options.nbest.nbest_size * system.options.nbest.factor;
- size_t bestInd = 0;
- for (size_t i = 0; i < maxIter; ++i) {
- if (bestInd > system.options.nbest.nbest_size || contenders.empty()) {
- break;
- }
-
- //cerr << "bestInd=" << bestInd << endl;
- TrellisPath *path = contenders.Get();
-
- bool ok = false;
- if (system.options.nbest.only_distinct) {
- string tgtPhrase = path->OutputTargetPhrase(system);
- //cerr << "tgtPhrase=" << tgtPhrase << endl;
- boost::hash<std::string> string_hash;
- size_t hash = string_hash(tgtPhrase);
-
- if (distinctHypos.insert(hash).second) {
- ok = true;
- }
- }
- else {
- ok = true;
- }
-
- if (ok) {
- ++bestInd;
- out << transId << " ||| ";
- path->OutputToStream(out, system);
- out << "\n";
- }
-
- // create next paths
- path->CreateDeviantPaths(contenders, arcLists, GetPool(), system);
-
- delete path;
- }
-
- return out.str();
-}
-
-std::string Manager::OutputTransOpt()
-{
- return "";
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/Manager.h b/contrib/moses2/PhraseBased/Manager.h
deleted file mode 100644
index 3f42d6b27..000000000
--- a/contrib/moses2/PhraseBased/Manager.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Manager.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <queue>
-#include <cstddef>
-#include <string>
-#include <deque>
-#include "../ManagerBase.h"
-#include "../Phrase.h"
-#include "../TargetPhrase.h"
-#include "../MemPool.h"
-#include "../Recycler.h"
-#include "../EstimatedScores.h"
-#include "../legacy/Bitmaps.h"
-#include "InputPaths.h"
-
-namespace Moses2
-{
-
-class System;
-class TranslationTask;
-class PhraseImpl;
-class TargetPhraseImpl;
-class SearchNormal;
-class Search;
-class Hypothesis;
-class Sentence;
-class OutputCollector;
-
-class Manager: public ManagerBase
-{
-public:
- Manager(System &sys, const TranslationTask &task, const std::string &inputStr,
- long translationId);
-
- virtual ~Manager();
-
- Bitmaps &GetBitmaps()
- { return *m_bitmaps; }
-
- const EstimatedScores &GetEstimatedScores() const
- { return *m_estimatedScores; }
-
- const InputPaths &GetInputPaths() const
- { return m_inputPaths; }
-
- const TargetPhraseImpl &GetInitPhrase() const
- { return *m_initPhrase; }
-
- void Decode();
- std::string OutputBest() const;
- std::string OutputNBest();
- std::string OutputTransOpt();
-
-protected:
-
- InputPaths m_inputPaths;
- Bitmaps *m_bitmaps;
- EstimatedScores *m_estimatedScores;
- TargetPhraseImpl *m_initPhrase;
-
- Search *m_search;
-
- // must be run in same thread as Decode()
- void Init();
- void CalcFutureScore();
-
-};
-
-}
-
diff --git a/contrib/moses2/PhraseBased/Normal/Search.cpp b/contrib/moses2/PhraseBased/Normal/Search.cpp
deleted file mode 100644
index 7c5026a7c..000000000
--- a/contrib/moses2/PhraseBased/Normal/Search.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * SearchNormal.cpp
- *
- * Created on: 25 Oct 2015
- * Author: hieu
- */
-
-#include "Search.h"
-#include <algorithm>
-#include <boost/foreach.hpp>
-#include "Stack.h"
-#include "../Manager.h"
-#include "../TrellisPath.h"
-#include "../Sentence.h"
-#include "../../TrellisPaths.h"
-#include "../../InputPathsBase.h"
-#include "../../Phrase.h"
-#include "../../System.h"
-#include "../../PhraseBased/TargetPhrases.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace NSNormal
-{
-
-Search::Search(Manager &mgr)
-:Moses2::Search(mgr)
-, m_stacks(mgr)
-{
- // TODO Auto-generated constructor stub
-
-}
-
-Search::~Search()
-{
- // TODO Auto-generated destructor stub
-}
-
-void Search::Decode()
-{
- // init stacks
- const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
- m_stacks.Init(mgr, sentence.GetSize() + 1);
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
- initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
-
- m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
-
- for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
- Decode(stackInd);
- //cerr << m_stacks << endl;
-
- // delete stack to save mem
- if (stackInd < m_stacks.GetSize() - 1) {
- m_stacks.Delete(stackInd);
- }
- //cerr << m_stacks.Debug(mgr.system) << endl;
- }
-}
-
-void Search::Decode(size_t stackInd)
-{
- //cerr << "stackInd=" << stackInd << endl;
- Stack &stack = m_stacks[stackInd];
- if (&stack == &m_stacks.Back()) {
- // last stack. don't do anythin
- return;
- }
-
- const Hypotheses &hypos = stack.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
- //cerr << "hypos=" << hypos.size() << endl;
-
- const InputPaths &paths = mgr.GetInputPaths();
-
- BOOST_FOREACH(const InputPathBase *path, paths){
- BOOST_FOREACH(const HypothesisBase *hypo, hypos) {
- Extend(*static_cast<const Hypothesis*>(hypo), *static_cast<const InputPath*>(path));
- }
- }
-}
-
-void Search::Extend(const Hypothesis &hypo, const InputPath &path)
-{
- const Bitmap &hypoBitmap = hypo.GetBitmap();
- const Range &hypoRange = hypo.GetInputPath().range;
- const Range &pathRange = path.range;
-
- if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) {
- return;
- }
-
- const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
- if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) {
- return;
- }
-
- // extend this hypo
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
- //SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos());
- SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
-
- size_t numPt = mgr.system.mappings.size();
- const TargetPhrases **tpsAllPt = path.targetPhrases;
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = tpsAllPt[i];
- if (tps) {
- Extend(hypo, *tps, path, newBitmap, estimatedScore);
- }
- }
-}
-
-void Search::Extend(const Hypothesis &hypo, const TargetPhrases &tps,
- const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
-{
- BOOST_FOREACH(const TargetPhraseImpl *tp, tps){
- Extend(hypo, *tp, path, newBitmap, estimatedScore);
- }
-}
-
-void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
- const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
-{
- Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore);
- newHypo->EvaluateWhenApplied();
-
- m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists);
-
- //m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other);
- //stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2);
-
-}
-
-const Hypothesis *Search::GetBestHypo() const
-{
- const Stack &lastStack = m_stacks.Back();
- const Hypothesis *best = lastStack.GetBestHypo<Hypothesis>();
- return best;
-}
-
-void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
-{
- const Stack &lastStack = m_stacks.Back();
- const Hypotheses &hypos = lastStack.GetSortedAndPrunedHypos(mgr, mgr.arcLists);
-
- BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){
- const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
- TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
- paths.Add(path);
- }
-}
-
-} // namespace
-}
-
diff --git a/contrib/moses2/PhraseBased/Normal/Search.h b/contrib/moses2/PhraseBased/Normal/Search.h
deleted file mode 100644
index cefefa924..000000000
--- a/contrib/moses2/PhraseBased/Normal/Search.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * SearchNormal.h
- *
- * Created on: 25 Oct 2015
- * Author: hieu
- */
-#pragma once
-
-#include <vector>
-#include "../../legacy/Range.h"
-#include "../../legacy/Bitmap.h"
-#include "../../TypeDef.h"
-#include "../Search.h"
-#include "Stacks.h"
-
-namespace Moses2
-{
-class Hypothesis;
-class InputPath;
-class TargetPhrases;
-class TargetPhraseImpl;
-
-namespace NSNormal
-{
-class Stacks;
-
-class Search: public Moses2::Search
-{
-public:
- Search(Manager &mgr);
- virtual ~Search();
-
- virtual void Decode();
- const Hypothesis *GetBestHypo() const;
-
- void AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const;
-
-protected:
- Stacks m_stacks;
-
- void Decode(size_t stackInd);
- void Extend(const Hypothesis &hypo, const InputPath &path);
- void Extend(const Hypothesis &hypo, const TargetPhrases &tps,
- const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore);
- void Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
- const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore);
-
-};
-
-}
-}
diff --git a/contrib/moses2/PhraseBased/Normal/Stack.cpp b/contrib/moses2/PhraseBased/Normal/Stack.cpp
deleted file mode 100644
index 782ce5b84..000000000
--- a/contrib/moses2/PhraseBased/Normal/Stack.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Stack.cpp
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Stack.h"
-#include "../Hypothesis.h"
-#include "../Manager.h"
-#include "../../Scores.h"
-#include "../../HypothesisColl.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSNormal
-{
-
-Stack::Stack(const Manager &mgr) :
- HypothesisColl(mgr)
-{
- // TODO Auto-generated constructor stub
-
-}
-
-Stack::~Stack()
-{
- // TODO Auto-generated destructor stub
-}
-
-}
-}
diff --git a/contrib/moses2/PhraseBased/Normal/Stack.h b/contrib/moses2/PhraseBased/Normal/Stack.h
deleted file mode 100644
index 4ad707ce4..000000000
--- a/contrib/moses2/PhraseBased/Normal/Stack.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Stack.h
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#pragma once
-
-#include <boost/unordered_set.hpp>
-#include <deque>
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../HypothesisColl.h"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-namespace NSNormal
-{
-class Stack: public HypothesisColl
-{
-public:
- Stack(const Manager &mgr);
- virtual ~Stack();
-
-protected:
-
-};
-
-}
-}
diff --git a/contrib/moses2/PhraseBased/Normal/Stacks.cpp b/contrib/moses2/PhraseBased/Normal/Stacks.cpp
deleted file mode 100644
index bb7239cf8..000000000
--- a/contrib/moses2/PhraseBased/Normal/Stacks.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Stacks.cpp
- *
- * Created on: 6 Nov 2015
- * Author: hieu
- */
-
-#include "Stacks.h"
-#include "../Manager.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSNormal
-{
-
-Stacks::Stacks(const Manager &mgr) :
- m_mgr(mgr)
-{
- // TODO Auto-generated constructor stub
-
-}
-
-Stacks::~Stacks()
-{
- for (size_t i = 0; i < m_stacks.size(); ++i) {
- delete m_stacks[i];
- }
-}
-
-void Stacks::Init(const Manager &mgr, size_t numStacks)
-{
- m_stacks.resize(numStacks);
- for (size_t i = 0; i < m_stacks.size(); ++i) {
- m_stacks[i] = new Stack(mgr);
- }
-}
-
-std::string Stacks::Debug(const System &system) const
-{
- stringstream out;
- for (size_t i = 0; i < GetSize(); ++i) {
- const Stack *stack = m_stacks[i];
- if (stack) {
- out << stack->GetSize() << " ";
- }
- else {
- out << "N ";
- }
- }
- return out.str();
-}
-
-void Stacks::Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists)
-{
- size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
- //cerr << "numWordsCovered=" << numWordsCovered << endl;
- Stack &stack = *m_stacks[numWordsCovered];
- stack.Add(m_mgr, hypo, hypoRecycle, arcLists);
-}
-
-}
-}
diff --git a/contrib/moses2/PhraseBased/Normal/Stacks.h b/contrib/moses2/PhraseBased/Normal/Stacks.h
deleted file mode 100644
index 58626f234..000000000
--- a/contrib/moses2/PhraseBased/Normal/Stacks.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Stacks.h
- *
- * Created on: 6 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <vector>
-#include "Stack.h"
-#include "../../Recycler.h"
-
-namespace Moses2
-{
-class Manager;
-class ArcLists;
-
-namespace NSNormal
-{
-
-class Stacks
-{
-public:
- Stacks(const Manager &mgr);
- virtual ~Stacks();
-
- void Init(const Manager &mgr, size_t numStacks);
-
- size_t GetSize() const
- {
- return m_stacks.size();
- }
-
- const Stack &Back() const
- {
- return *m_stacks.back();
- }
-
- Stack &operator[](size_t ind)
- {
- return *m_stacks[ind];
- }
-
- void Delete(size_t ind)
- {
- delete m_stacks[ind];
- m_stacks[ind] = NULL;
- }
-
- void Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists);
-
- std::string Debug(const System &system) const;
-
-protected:
- const Manager &m_mgr;
- std::vector<Stack*> m_stacks;
-};
-
-}
-}
diff --git a/contrib/moses2/PhraseBased/PhraseImpl.cpp b/contrib/moses2/PhraseBased/PhraseImpl.cpp
deleted file mode 100644
index 00f55a35b..000000000
--- a/contrib/moses2/PhraseBased/PhraseImpl.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * PhraseImpl.cpp
- *
- * Created on: 19 Feb 2016
- * Author: hieu
- */
-#include "PhraseImpl.h"
-
-using namespace std;
-
-namespace Moses2
-{
-PhraseImpl *PhraseImpl::CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str)
-{
- std::vector<std::string> toks = Moses2::Tokenize(str);
- size_t size = toks.size();
- PhraseImpl *ret;
-
- ret = new (pool.Allocate<PhraseImpl>()) PhraseImpl(pool, size);
-
- ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks);
- return ret;
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/PhraseImpl.h b/contrib/moses2/PhraseBased/PhraseImpl.h
deleted file mode 100644
index 787cdf58d..000000000
--- a/contrib/moses2/PhraseBased/PhraseImpl.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#pragma once
-#include "../PhraseImplTemplate.h"
-#include "../SubPhrase.h"
-
-namespace Moses2
-{
-
-class PhraseImpl: public PhraseImplTemplate<Word>
-{
-public:
- static PhraseImpl *CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str);
-
- PhraseImpl(MemPool &pool, size_t size) :
- PhraseImplTemplate<Word>(pool, size)
- {
- }
-
-};
-
-}
diff --git a/contrib/moses2/PhraseBased/ReorderingConstraint.cpp b/contrib/moses2/PhraseBased/ReorderingConstraint.cpp
deleted file mode 100644
index cff09cc24..000000000
--- a/contrib/moses2/PhraseBased/ReorderingConstraint.cpp
+++ /dev/null
@@ -1,252 +0,0 @@
-#include <stdlib.h>
-#include <iostream>
-#include "ReorderingConstraint.h"
-#include "Sentence.h"
-#include "../TypeDef.h"
-#include "../legacy/Bitmap.h"
-
-using namespace std;
-
-namespace Moses2
-{
-//! destructer
-ReorderingConstraint::~ReorderingConstraint()
-{
- //if (m_wall != NULL) free(m_wall);
- //if (m_localWall != NULL) free(m_localWall);
-}
-
-//! allocate memory for reordering walls
-void ReorderingConstraint::InitializeWalls(size_t size, int max_distortion)
-{
- m_size = size;
-
- m_wall = m_pool.Allocate<bool>(size);
- m_localWall = m_pool.Allocate<size_t>(size);
-
- m_max_distortion = max_distortion;
-
- for (size_t pos = 0 ; pos < m_size ; pos++) {
- m_wall[pos] = false;
- m_localWall[pos] = NOT_A_ZONE;
- }
-}
-
-//! has to be called to localized walls
-void ReorderingConstraint::FinalizeWalls()
-{
- for(size_t z = 0; z < m_zone.size(); z++ ) {
- const size_t startZone = m_zone[z].first;
- const size_t endZone = m_zone[z].second;// note: wall after endZone is not local
- for( size_t pos = startZone; pos < endZone; pos++ ) {
- if (m_wall[ pos ]) {
- m_localWall[ pos ] = z;
- m_wall[ pos ] = false;
- //cerr << "SETTING local wall " << pos << std::endl;
- }
- // enforce that local walls only apply to innermost zone
- else if (m_localWall[ pos ] != NOT_A_ZONE) {
- size_t assigned_z = m_localWall[ pos ];
- if ((m_zone[assigned_z].first < startZone) ||
- (m_zone[assigned_z].second > endZone)) {
- m_localWall[ pos ] = z;
- }
- }
- }
- }
-}
-
-//! set value at a particular position
-void ReorderingConstraint::SetWall( size_t pos, bool value )
-{
- //cerr << "SETTING reordering wall at position " << pos << std::endl;
- UTIL_THROW_IF2(pos >= m_size, "Wall over length of sentence: " << pos << " >= " << m_size);
- m_wall[pos] = value;
- m_active = true;
-}
-
-//! set a reordering zone (once entered, need to finish)
-void ReorderingConstraint::SetZone( size_t startPos, size_t endPos )
-{
- //cerr << "SETTING zone " << startPos << "-" << endPos << std::endl;
- std::pair<size_t,size_t> newZone;
- newZone.first = startPos;
- newZone.second = endPos;
- m_zone.push_back( newZone );
- m_active = true;
-}
-
-//! set walls based on "-monotone-at-punctuation" flag
-void ReorderingConstraint::SetMonotoneAtPunctuation( const Sentence &sentence )
-{
- for( size_t i=0; i<sentence.GetSize(); i++ ) {
- const Word& word = sentence[i];
- if (word[0]->GetString() == "," ||
- word[0]->GetString() == "." ||
- word[0]->GetString() == "!" ||
- word[0]->GetString() == "?" ||
- word[0]->GetString() == ":" ||
- word[0]->GetString() == ";" ||
- word[0]->GetString() == "\"") {
- // set wall before and after punc, but not at sentence start, end
- if (i>0 && i<m_size-1) SetWall( i, true );
- if (i>1) SetWall( i-1, true );
- }
- }
-}
-
-//! check if the current hypothesis extension violates reordering constraints
-bool ReorderingConstraint::Check( const Bitmap &bitmap, size_t startPos, size_t endPos ) const
-{
- // nothing to be checked, we are done
- if (! IsActive() ) return true;
-
- //cerr << "Check " << bitmap << " " << startPos << "-" << endPos;
-
- // check walls
- size_t firstGapPos = bitmap.GetFirstGapPos();
- // filling first gap -> no wall violation possible
- if (firstGapPos != startPos) {
- // if there is a wall before the last word,
- // we created a gap while moving through wall
- // -> violation
- for( size_t pos = firstGapPos; pos < endPos; pos++ ) {
- if( GetWall( pos ) ) {
- //cerr << " hitting wall " << pos << std::endl;
- return false;
- }
- }
- }
-
- // monotone -> no violation possible
- size_t lastPos = bitmap.GetLastPos();
- if ((lastPos == NOT_FOUND && startPos == 0) || // nothing translated
- (firstGapPos > lastPos && // no gaps
- firstGapPos == startPos)) { // translating first empty word
- //cerr << " montone, fine." << std::endl;
- return true;
- }
-
- // check zones
- for(size_t z = 0; z < m_zone.size(); z++ ) {
- const size_t startZone = m_zone[z].first;
- const size_t endZone = m_zone[z].second;
-
- // fine, if translation has not reached zone yet and phrase outside zone
- if (lastPos < startZone && ( endPos < startZone || startPos > endZone ) ) {
- continue;
- }
-
- // already completely translated zone, no violations possible
- if (firstGapPos > endZone) {
- continue;
- }
-
- // some words are translated beyond the start
- // let's look closer if some are in the zone
- size_t numWordsInZoneTranslated = 0;
- if (lastPos >= startZone) {
- for(size_t pos = startZone; pos <= endZone; pos++ ) {
- if( bitmap.GetValue( pos ) ) {
- numWordsInZoneTranslated++;
- }
- }
- }
-
- // all words in zone translated, no violation possible
- if (numWordsInZoneTranslated == endZone-startZone+1) {
- continue;
- }
-
- // flag if this is an active zone
- bool activeZone = (numWordsInZoneTranslated > 0);
-
- // fine, if zone completely untranslated and phrase outside zone
- if (!activeZone && ( endPos < startZone || startPos > endZone ) ) {
- continue;
- }
-
- // violation, if phrase completely outside active zone
- if (activeZone && ( endPos < startZone || startPos > endZone ) ) {
- //cerr << " outside active zone" << std::endl;
- return false;
- }
-
- // ok, this is what we know now:
- // * the phrase is in the zone (at least partially)
- // * either zone is already active, or it becomes active now
-
-
- // check, if we are setting us up for a dead end due to distortion limits
-
- // size_t distortionLimit = (size_t)StaticData::Instance().GetMaxDistortion();
- size_t distortionLimit = m_max_distortion;
- if (startPos != firstGapPos && endZone-firstGapPos >= distortionLimit) {
- //cerr << " dead end due to distortion limit" << std::endl;
- return false;
- }
-
- // let us check on phrases that are partially outside
-
- // phrase overlaps at the beginning, always ok
- if (startPos <= startZone) {
- continue;
- }
-
- // phrase goes beyond end, has to fill zone completely
- if (endPos > endZone) {
- if (endZone-startPos+1 < // num. words filled in by phrase
- endZone-startZone+1-numWordsInZoneTranslated) { // num. untranslated
- //cerr << " overlap end, but not completing" << std::endl;
- return false;
- } else {
- continue;
- }
- }
-
- // now we are down to phrases that are completely inside the zone
- // we have to check local walls
- bool seenUntranslatedBeforeStartPos = false;
- for(size_t pos = startZone; pos < endZone && pos < endPos; pos++ ) {
- // be careful when there is a gap before phrase
- if( !bitmap.GetValue( pos ) // untranslated word
- && pos < startPos ) { // before startPos
- seenUntranslatedBeforeStartPos = true;
- }
- if( seenUntranslatedBeforeStartPos && GetLocalWall( pos, z ) ) {
- //cerr << " local wall violation" << std::endl;
- return false;
- }
- }
-
- // passed all checks for this zone, on to the next one
- }
-
- // passed all checks, no violations
- //cerr << " fine." << std::endl;
- return true;
-}
-
-std::ostream &ReorderingConstraint::Debug(std::ostream &out, const System &system) const
-{
- out << "Zones:";
- for (size_t i = 0; i < m_zone.size(); ++i) {
- const std::pair<size_t,size_t> &zone1 = m_zone[i];
- out << zone1.first << "-" << zone1.second << " ";
- }
-
- out << "Walls:";
- for (size_t i = 0; i < m_size; ++i) {
- out << m_wall[i];
- }
-
- out << " Local walls:";
- for (size_t i = 0; i < m_size; ++i) {
- out << m_localWall[i] << " ";
- }
-
- return out;
-}
-
-} // namespace
-
diff --git a/contrib/moses2/PhraseBased/ReorderingConstraint.h b/contrib/moses2/PhraseBased/ReorderingConstraint.h
deleted file mode 100644
index b8d2461e5..000000000
--- a/contrib/moses2/PhraseBased/ReorderingConstraint.h
+++ /dev/null
@@ -1,88 +0,0 @@
-#pragma once
-#include <iostream>
-#include <vector>
-#include "../Vector.h"
-
-namespace Moses2
-{
-class System;
-class Sentence;
-class Bitmap;
-class MemPool;
-
-#define NOT_A_ZONE 999999999
-
-class ReorderingConstraint
-{
-protected:
- // const size_t m_size; /**< number of words in sentence */
- size_t m_size; /**< number of words in sentence */
- bool *m_wall; /**< flag for each word if it is a wall */
- //size_t *m_wall; /**< flag for each word if it is a wall */
- size_t *m_localWall; /**< flag for each word if it is a local wall */
- Vector< std::pair<size_t,size_t> > m_zone; /** zones that limit reordering */
- bool m_active; /**< flag indicating, if there are any active constraints */
- int m_max_distortion;
- MemPool &m_pool;
-
- ReorderingConstraint(const ReorderingConstraint &); // do not implement
-
-public:
-
- //! create ReorderingConstraint of length size and initialise to zero
- ReorderingConstraint(MemPool &pool)
- : m_wall(NULL)
- , m_localWall(NULL)
- , m_active(false)
- , m_pool(pool)
- , m_zone(pool)
- {}
-
- //! destructer
- ~ReorderingConstraint();
-
- //! allocate memory for memory for a sentence of a given size
- void InitializeWalls(size_t size, int max_distortion);
-
- //! changes walls in zones into local walls
- void FinalizeWalls();
-
- //! set value at a particular position
- void SetWall( size_t pos, bool value );
-
- //! whether a word has been translated at a particular position
- bool GetWall(size_t pos) const {
- return m_wall[pos];
- }
-
- //! whether a word has been translated at a particular position
- bool GetLocalWall(size_t pos, size_t zone ) const {
- return (m_localWall[pos] == zone);
- }
-
- //! set a zone
- void SetZone( size_t startPos, size_t endPos );
-
- //! returns the vector of zones
- Vector< std::pair< size_t,size_t> > & GetZones() {
- return m_zone;
- }
-
- //! set the reordering walls based on punctuation in the sentence
- void SetMonotoneAtPunctuation( const Sentence & sentence );
-
- //! check if all constraints are fulfilled -> all find
- bool Check( const Bitmap &bitmap, size_t start, size_t end ) const;
-
- //! checks if reordering constraints will be enforced
- bool IsActive() const {
- return m_active;
- }
-
- std::ostream &Debug(std::ostream &out, const System &system) const;
-
-};
-
-
-}
-
diff --git a/contrib/moses2/PhraseBased/Search.cpp b/contrib/moses2/PhraseBased/Search.cpp
deleted file mode 100644
index 1a85e15f5..000000000
--- a/contrib/moses2/PhraseBased/Search.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Search.cpp
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-
-#include "Search.h"
-#include "Manager.h"
-#include "../System.h"
-#include "../legacy/Bitmap.h"
-#include "../legacy/Range.h"
-
-namespace Moses2
-{
-
-Search::Search(Manager &mgr) :
- mgr(mgr)
-{
- // TODO Auto-generated constructor stub
-
-}
-
-Search::~Search()
-{
- // TODO Auto-generated destructor stub
-}
-
-bool Search::CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos,
- const Range &pathRange)
-{
- const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos();
-
- //cerr << "DOING " << hypoBitmap << " [" << hypoRange.GetStartPos() << " " << hypoRange.GetEndPos() << "]"
- // " [" << pathRange.GetStartPos() << " " << pathRange.GetEndPos() << "]";
-
- if (hypoBitmap.Overlap(pathRange)) {
- //cerr << " NO" << endl;
- return false;
- }
-
- if (mgr.system.options.reordering.max_distortion == -1) {
- return true;
- }
-
- if (mgr.system.options.reordering.max_distortion >= 0) {
- // distortion limit
- int distortion = ComputeDistortionDistance(hypoRangeEndPos,
- pathRange.GetStartPos());
- if (distortion > mgr.system.options.reordering.max_distortion) {
- //cerr << " NO" << endl;
- return false;
- }
- }
-
- // first question: is there a path from the closest translated word to the left
- // of the hypothesized extension to the start of the hypothesized extension?
- // long version:
- // - is there anything to our left?
- // - is it farther left than where we're starting anyway?
- // - can we get to it?
-
- // closestLeft is exclusive: a value of 3 means 2 is covered, our
- // arc is currently ENDING at 3 and can start at 3 implicitly
-
- // TODO is this relevant? only for lattice input?
-
- // ask second question here: we already know we can get to our
- // starting point from the closest thing to the left. We now ask the
- // follow up: can we get from our end to the closest thing on the
- // right?
- //
- // long version: is anything to our right? is it farther
- // right than our (inclusive) end? can our end reach it?
- bool isLeftMostEdge = (hypoFirstGapPos == pathRange.GetStartPos());
-
- size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(pathRange.GetEndPos());
- /*
- if (isWordLattice) {
- if (closestRight != endPos
- && ((closestRight + 1) < sourceSize)
- && !m_source.CanIGetFromAToB(endPos + 1, closestRight + 1)) {
- continue;
- }
- }
- */
-
- if (isLeftMostEdge) {
- // any length extension is okay if starting at left-most edge
-
- }
- else { // starting somewhere other than left-most edge, use caution
- // the basic idea is this: we would like to translate a phrase
- // starting from a position further right than the left-most
- // open gap. The distortion penalty for the following phrase
- // will be computed relative to the ending position of the
- // current extension, so we ask now what its maximum value will
- // be (which will always be the value of the hypothesis starting
- // at the left-most edge). If this value is less than the
- // distortion limit, we don't allow this extension to be made.
- Range bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
-
- if (ComputeDistortionDistance(pathRange.GetEndPos(),
- bestNextExtension.GetStartPos()) > mgr.system.options.reordering.max_distortion) {
- //cerr << " NO" << endl;
- return false;
- }
-
- // everything is fine, we're good to go
- }
-
- return true;
-}
-
-}
-
diff --git a/contrib/moses2/PhraseBased/Search.h b/contrib/moses2/PhraseBased/Search.h
deleted file mode 100644
index 8e9e9f787..000000000
--- a/contrib/moses2/PhraseBased/Search.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Search.h
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <stddef.h>
-#include "../legacy/Util2.h"
-
-namespace Moses2
-{
-
-class Manager;
-class Stack;
-class Hypothesis;
-class Bitmap;
-class Range;
-class TrellisPath;
-
-template<typename T>
-class TrellisPaths;
-
-class Search
-{
-public:
- Search(Manager &mgr);
- virtual ~Search();
-
- virtual void Decode() = 0;
- virtual const Hypothesis *GetBestHypo() const = 0;
-
- virtual void AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const = 0;
-
-protected:
- Manager &mgr;
- //ArcLists m_arcLists;
-
- bool CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos,
- const Range &pathRange);
-
- inline int ComputeDistortionDistance(size_t prevEndPos,
- size_t currStartPos) const
- {
- int dist = 0;
- if (prevEndPos == NOT_FOUND) {
- dist = currStartPos;
- }
- else {
- dist = (int)prevEndPos - (int)currStartPos + 1;
- }
- return abs(dist);
- }
-
-};
-
-}
-
diff --git a/contrib/moses2/PhraseBased/Sentence.cpp b/contrib/moses2/PhraseBased/Sentence.cpp
deleted file mode 100644
index dbedf878e..000000000
--- a/contrib/moses2/PhraseBased/Sentence.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Sentence.cpp
- *
- * Created on: 14 Dec 2015
- * Author: hieu
- */
-#include <boost/property_tree/ptree.hpp>
-#include <boost/property_tree/xml_parser.hpp>
-#include "Sentence.h"
-#include "../System.h"
-#include "../parameters/AllOptions.h"
-#include "../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str)
-{
- Sentence *ret;
-
- if (system.options.input.xml_policy) {
- // xml
- ret = CreateFromStringXML(pool, vocab, system, str);
- }
- else {
- // no xml
- //cerr << "PB Sentence" << endl;
- std::vector<std::string> toks = Tokenize(str);
-
- size_t size = toks.size();
- ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
- ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
- }
-
- //cerr << "REORDERING CONSTRAINTS:" << ret->GetReorderingConstraint() << endl;
- //cerr << "ret=" << ret->Debug(system) << endl;
-
- return ret;
-}
-
-Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str)
-{
- Sentence *ret;
-
- vector<XMLOption*> xmlOptions;
- pugi::xml_document doc;
-
- string str2 = "<xml>" + str + "</xml>";
- pugi::xml_parse_result result = doc.load(str2.c_str(),
- pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
- pugi::xml_node topNode = doc.child("xml");
-
- std::vector<std::string> toks;
- XMLParse(pool, system, 0, topNode, toks, xmlOptions);
-
- // debug
- /*
- cerr << "xmloptions:" << endl;
- for (size_t i = 0; i < xmlOptions.size(); ++i) {
- cerr << xmlOptions[i]->Debug(system) << endl;
- }
- */
-
- // create words
- size_t size = toks.size();
- ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
- ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
-
- // xml
- ret->Init(system, size, system.options.reordering.max_distortion);
-
- ReorderingConstraint &reorderingConstraint = ret->GetReorderingConstraint();
-
- // set reordering walls, if "-monotone-at-punction" is set
- if (system.options.reordering.monotone_at_punct && ret->GetSize()) {
- reorderingConstraint.SetMonotoneAtPunctuation(*ret);
- }
-
- // set walls obtained from xml
- for(size_t i=0; i<xmlOptions.size(); i++) {
- const XMLOption *xmlOption = xmlOptions[i];
- if(strcmp(xmlOption->GetNodeName(), "wall") == 0) {
- UTIL_THROW_IF2(xmlOption->startPos > ret->GetSize(), "wall is beyond the sentence"); // no buggy walls, please
- reorderingConstraint.SetWall(xmlOption->startPos - 1, true);
- }
- else if (strcmp(xmlOption->GetNodeName(), "zone") == 0) {
- reorderingConstraint.SetZone( xmlOption->startPos, xmlOption->startPos + xmlOption->phraseSize -1 );
- }
- else if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
- FactorType placeholderFactor = system.options.input.placeholder_factor;
- UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
- "Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
- UTIL_THROW_IF2(xmlOption->phraseSize != 1,
- "Placeholder must only cover 1 word");
-
- const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
- (*ret)[xmlOption->startPos][placeholderFactor] = factor;
- }
- else {
- // default - forced translation. Add to class variable
- ret->AddXMLOption(system, xmlOption);
- }
- }
- reorderingConstraint.FinalizeWalls();
-
- return ret;
-}
-
-void Sentence::XMLParse(
- MemPool &pool,
- const System &system,
- size_t depth,
- const pugi::xml_node &parentNode,
- std::vector<std::string> &toks,
- vector<XMLOption*> &xmlOptions)
-{ // pugixml
- for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
- string nodeName = childNode.name();
- //cerr << depth << " nodeName=" << nodeName << endl;
-
- int startPos = toks.size();
-
- string value = childNode.value();
- if (!value.empty()) {
- //cerr << depth << "childNode text=" << value << endl;
- std::vector<std::string> subPhraseToks = Tokenize(value);
- for (size_t i = 0; i < subPhraseToks.size(); ++i) {
- toks.push_back(subPhraseToks[i]);
- }
- }
-
- if (!nodeName.empty()) {
- XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
-
- pugi::xml_attribute attr;
- attr = childNode.attribute("translation");
- if (!attr.empty()) {
- xmlOption->SetTranslation(pool, attr.as_string());
- }
-
- attr = childNode.attribute("entity");
- if (!attr.empty()) {
- xmlOption->SetEntity(pool, attr.as_string());
- }
-
- attr = childNode.attribute("prob");
- if (!attr.empty()) {
- xmlOption->prob = attr.as_float();
- }
-
- xmlOptions.push_back(xmlOption);
-
- // recursively call this function. For proper recursive trees
- XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
-
- size_t endPos = toks.size();
- xmlOption->phraseSize = endPos - startPos;
-
- /*
- cerr << "xmlOptions=";
- xmlOption->Debug(cerr, system);
- cerr << endl;
- */
- }
-
- }
-}
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/PhraseBased/Sentence.h b/contrib/moses2/PhraseBased/Sentence.h
deleted file mode 100644
index 2e9e834a7..000000000
--- a/contrib/moses2/PhraseBased/Sentence.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Sentence.h
- *
- * Created on: 14 Dec 2015
- * Author: hieu
- */
-#pragma once
-
-#include <boost/property_tree/ptree.hpp>
-#include <string>
-#include "PhraseImpl.h"
-#include "../InputType.h"
-#include "../MemPool.h"
-#include "../pugixml.hpp"
-#include "../legacy/Util2.h"
-
-namespace Moses2
-{
-class FactorCollection;
-class System;
-
-class Sentence: public InputType, public PhraseImpl
-{
-public:
-
- static Sentence *CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str);
-
- Sentence(MemPool &pool, size_t size)
- :InputType(pool)
- ,PhraseImpl(pool, size)
- {}
-
- virtual ~Sentence()
- {}
-
-protected:
- static Sentence *CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str);
-
- static void XMLParse(
- MemPool &pool,
- const System &system,
- size_t depth,
- const pugi::xml_node &parentNode,
- std::vector<std::string> &toks,
- std::vector<XMLOption*> &xmlOptions);
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/PhraseBased/TargetPhraseImpl.cpp b/contrib/moses2/PhraseBased/TargetPhraseImpl.cpp
deleted file mode 100644
index 3768ca278..000000000
--- a/contrib/moses2/PhraseBased/TargetPhraseImpl.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * TargetPhraseImpl.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#include <sstream>
-#include <stdlib.h>
-#include "TargetPhraseImpl.h"
-#include "../Scores.h"
-#include "../System.h"
-#include "../MemPool.h"
-#include "Manager.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-TargetPhraseImpl *TargetPhraseImpl::CreateFromString(MemPool &pool,
- const PhraseTable &pt, const System &system, const std::string &str)
-{
- FactorCollection &vocab = system.GetVocab();
-
- vector<string> toks = Tokenize(str);
- size_t size = toks.size();
- TargetPhraseImpl *ret =
- new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, pt, system,
- size);
- ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks);
-
- return ret;
-}
-
-TargetPhraseImpl::TargetPhraseImpl(MemPool &pool, const PhraseTable &pt,
- const System &system, size_t size)
-:Moses2::TargetPhrase<Moses2::Word>(pool, pt, system, size)
-{
- m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
- system.featureFunctions.GetNumScores());
-
- size_t numWithPtData = system.featureFunctions.GetWithPhraseTableInd().size();
- ffData = new (pool.Allocate<void *>(numWithPtData)) void *[numWithPtData];
-}
-
-TargetPhraseImpl::~TargetPhraseImpl()
-{
- // TODO Auto-generated destructor stub
-}
-
-}
diff --git a/contrib/moses2/PhraseBased/TargetPhraseImpl.h b/contrib/moses2/PhraseBased/TargetPhraseImpl.h
deleted file mode 100644
index a3355ffe6..000000000
--- a/contrib/moses2/PhraseBased/TargetPhraseImpl.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * TargetPhraseImpl.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <iostream>
-#include "../Phrase.h"
-#include "../PhraseImplTemplate.h"
-#include "../TargetPhrase.h"
-#include "../MemPool.h"
-#include "../Word.h"
-#include "../SubPhrase.h"
-
-namespace Moses2
-{
-
-class Scores;
-class Manager;
-class System;
-class PhraseTable;
-
-class TargetPhraseImpl: public TargetPhrase<Moses2::Word>
-{
-public:
- typedef TargetPhrase<Moses2::Word> Parent;
-
- static TargetPhraseImpl *CreateFromString(MemPool &pool,
- const PhraseTable &pt, const System &system, const std::string &str);
- TargetPhraseImpl(MemPool &pool, const PhraseTable &pt, const System &system,
- size_t size);
- //TargetPhraseImpl(MemPool &pool, const System &system, const TargetPhraseImpl &copy);
-
- virtual ~TargetPhraseImpl();
-
- SCORE GetFutureScore() const
- { return m_scores->GetTotalScore() + m_estimatedScore; }
-
- void SetEstimatedScore(const SCORE &value)
- { m_estimatedScore = value; }
-
- virtual SCORE GetScoreForPruning() const
- { return GetFutureScore(); }
-
-protected:
- SCORE m_estimatedScore;
-
-};
-
-}
-
diff --git a/contrib/moses2/PhraseBased/TargetPhrases.cpp b/contrib/moses2/PhraseBased/TargetPhrases.cpp
deleted file mode 100644
index a48afefa9..000000000
--- a/contrib/moses2/PhraseBased/TargetPhrases.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * TargetPhrases.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <cassert>
-#include <boost/foreach.hpp>
-#include "TargetPhrases.h"
-#include "TargetPhraseImpl.h"
-#include "../Phrase.h"
-#include "../TargetPhrase.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-TargetPhrases::TargetPhrases(MemPool &pool, size_t size) :
- m_coll(pool, size), m_currInd(0)
-{
-}
-
-/*
- TargetPhrases::TargetPhrases(MemPool &pool, const System &system, const TargetPhrases &copy)
- :m_coll(pool, copy.m_coll.size())
- {
- for (size_t i = 0; i < copy.m_coll.size(); ++i) {
- const TargetPhrase *tpOrig = copy.m_coll[i];
- assert(tpOrig);
- const TargetPhrase *tpClone = new (pool.Allocate<TargetPhrase>()) TargetPhrase(pool, system, *tpOrig);
- m_coll[i] = tpClone;
- }
- }
- */
-
-TargetPhrases::~TargetPhrases()
-{
- // TODO Auto-generated destructor stub
-}
-
-std::string TargetPhrases::Debug(const System &system) const
-{
- stringstream out;
- BOOST_FOREACH(const TargetPhraseImpl *tp, *this){
- out << tp->Debug(system);
- out << endl;
- }
- return out.str();
-}
-
-void TargetPhrases::SortAndPrune(size_t tableLimit)
-{
- iterator iterMiddle;
- iterMiddle =
- (tableLimit == 0 || m_coll.size() < tableLimit) ?
- m_coll.end() : m_coll.begin() + tableLimit;
-
- std::partial_sort(m_coll.begin(), iterMiddle, m_coll.end(),
- CompareScoreForPruning<TP>());
-
- if (tableLimit && m_coll.size() > tableLimit) {
- m_coll.resize(tableLimit);
- }
-
- //cerr << "TargetPhrases=" << GetSize() << endl;
-}
-
-/*
- const TargetPhrases *TargetPhrases::Clone(MemPool &pool, const System &system) const
- {
- const TargetPhrases *ret = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, system, *this);
- return ret;
- }
- */
-
-}
-
diff --git a/contrib/moses2/PhraseBased/TargetPhrases.h b/contrib/moses2/PhraseBased/TargetPhrases.h
deleted file mode 100644
index 2582a7386..000000000
--- a/contrib/moses2/PhraseBased/TargetPhrases.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * TargetPhrases.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-#include <vector>
-#include "../Array.h"
-
-namespace Moses2
-{
-
-class TargetPhraseImpl;
-
-class Word;
-class System;
-
-class TargetPhrases
-{
- typedef TargetPhraseImpl TP;
- typedef Array<const TP*> Coll;
-public:
- typedef Coll::iterator iterator;
- typedef Coll::const_iterator const_iterator;
- //! iterators
- const_iterator begin() const
- {
- return m_coll.begin();
- }
- const_iterator end() const
- {
- return m_coll.end();
- }
-
- TargetPhrases(MemPool &pool, size_t size);
- //TargetPhrases(MemPool &pool, const System &system, const TargetPhrases &copy);
- virtual ~TargetPhrases();
-
- void AddTargetPhrase(const TP &targetPhrase)
- {
- m_coll[m_currInd++] = &targetPhrase;
- }
-
- size_t GetSize() const
- {
- return m_coll.size();
- }
-
- const TP& operator[](size_t ind) const
- {
- return *m_coll[ind];
- }
-
- void SortAndPrune(size_t tableLimit);
-
- std::string Debug(const System &system) const;
-
-protected:
- Coll m_coll;
- size_t m_currInd;
-};
-
-}
-
diff --git a/contrib/moses2/PhraseBased/TrellisPath.cpp b/contrib/moses2/PhraseBased/TrellisPath.cpp
deleted file mode 100644
index a7213fe18..000000000
--- a/contrib/moses2/PhraseBased/TrellisPath.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * TrellisPath.cpp
- *
- * Created on: 16 Mar 2016
- * Author: hieu
- */
-#include <cassert>
-#include <sstream>
-#include "TrellisPath.h"
-#include "Hypothesis.h"
-#include "InputPath.h"
-#include "../TrellisPaths.h"
-#include "../System.h"
-#include "../SubPhrase.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-std::string TrellisNode::Debug(const System &system) const
-{
- stringstream out;
- out << "arcList=" << arcList->size() << " " << ind;
- return out.str();
-}
-
-/////////////////////////////////////////////////////////////////////////////////
-TrellisPath::TrellisPath(const Hypothesis *hypo, const ArcLists &arcLists) :
- prevEdgeChanged(-1)
-{
- AddNodes(hypo, arcLists);
- m_scores = &hypo->GetScores();
-}
-
-TrellisPath::TrellisPath(const TrellisPath &origPath, size_t edgeIndex,
- const TrellisNode &newNode, const ArcLists &arcLists, MemPool &pool,
- const System &system) :
- prevEdgeChanged(edgeIndex)
-{
- nodes.reserve(origPath.nodes.size());
- for (size_t currEdge = 0; currEdge < edgeIndex; currEdge++) {
- // copy path from parent
- nodes.push_back(origPath.nodes[currEdge]);
- }
-
- // 1 deviation
- nodes.push_back(newNode);
-
- // rest of path comes from following best path backwards
- const Hypothesis *arc = static_cast<const Hypothesis*>(newNode.GetHypo());
-
- const Hypothesis *prevHypo = arc->GetPrevHypo();
- while (prevHypo != NULL) {
- const ArcList &arcList = arcLists.GetArcList(prevHypo);
- TrellisNode node(arcList, 0);
- nodes.push_back(node);
-
- prevHypo = prevHypo->GetPrevHypo();
- }
-
- const TrellisNode &origNode = origPath.nodes[edgeIndex];
- const HypothesisBase *origHypo = origNode.GetHypo();
- const HypothesisBase *newHypo = newNode.GetHypo();
-
- CalcScores(origPath.GetScores(), origHypo->GetScores(), newHypo->GetScores(),
- pool, system);
-}
-
-TrellisPath::~TrellisPath()
-{
- // TODO Auto-generated destructor stub
-}
-
-SCORE TrellisPath::GetFutureScore() const
-{
- return m_scores->GetTotalScore();
-}
-
-std::string TrellisPath::Debug(const System &system) const
-{
- stringstream out;
-
- out << OutputTargetPhrase(system);
- out << "||| ";
-
- out << GetScores().Debug(system);
- out << "||| ";
-
- out << GetScores().GetTotalScore();
-
- return out.str();
-}
-
-void TrellisPath::OutputToStream(std::ostream &out, const System &system) const
-{
- out << OutputTargetPhrase(system);
- out << "||| ";
-
- GetScores().OutputBreakdown(out, system);
- out << "||| ";
-
- out << GetScores().GetTotalScore();
-}
-
-std::string TrellisPath::OutputTargetPhrase(const System &system) const
-{
- std::stringstream out;
- for (int i = nodes.size() - 2; i >= 0; --i) {
- const TrellisNode &node = nodes[i];
-
- const Hypothesis *hypo = static_cast<const Hypothesis*>(node.GetHypo());
- const TargetPhrase<Moses2::Word> &tp = hypo->GetTargetPhrase();
-
- const InputPath &path = static_cast<const InputPath&>(hypo->GetInputPath());
- const SubPhrase<Moses2::Word> &subPhrase = path.subPhrase;
-
- tp.OutputToStream(system, subPhrase, out);
- }
- return out.str();
-}
-
-void TrellisPath::CreateDeviantPaths(TrellisPaths<TrellisPath> &paths,
- const ArcLists &arcLists, MemPool &pool, const System &system) const
-{
- const size_t sizePath = nodes.size();
-
- //cerr << "prevEdgeChanged=" << prevEdgeChanged << endl;
- for (size_t currEdge = prevEdgeChanged + 1; currEdge < sizePath; currEdge++) {
- TrellisNode newNode = nodes[currEdge];
- assert(newNode.ind == 0);
- const ArcList &arcList = *newNode.arcList;
-
- //cerr << "arcList=" << arcList.size() << endl;
- for (size_t i = 1; i < arcList.size(); ++i) {
- //cerr << "i=" << i << endl;
- newNode.ind = i;
-
- TrellisPath *deviantPath = new TrellisPath(*this, currEdge, newNode,
- arcLists, pool, system);
- //cerr << "deviantPath=" << deviantPath << endl;
- paths.Add(deviantPath);
- }
- }
-}
-
-void TrellisPath::CalcScores(const Scores &origScores,
- const Scores &origHypoScores, const Scores &newHypoScores, MemPool &pool,
- const System &system)
-{
- Scores *scores = new (pool.Allocate<Scores>()) Scores(system, pool,
- system.featureFunctions.GetNumScores(), origScores);
- scores->PlusEquals(system, newHypoScores);
- scores->MinusEquals(system, origHypoScores);
-
- m_scores = scores;
-}
-
-void TrellisPath::AddNodes(const Hypothesis *hypo, const ArcLists &arcLists)
-{
- if (hypo) {
- // add this hypo
- //cerr << "hypo=" << hypo << " " << flush;
- //cerr << *hypo << endl;
- const ArcList &list = arcLists.GetArcList(hypo);
- TrellisNode node(list, 0);
- nodes.push_back(node);
-
- // add prev hypos
- const Hypothesis *prev = hypo->GetPrevHypo();
- AddNodes(prev, arcLists);
- }
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/PhraseBased/TrellisPath.h b/contrib/moses2/PhraseBased/TrellisPath.h
deleted file mode 100644
index c0b989ad9..000000000
--- a/contrib/moses2/PhraseBased/TrellisPath.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * TrellisPath.h
- *
- * Created on: 16 Mar 2016
- * Author: hieu
- */
-#pragma once
-#include <vector>
-#include "../TypeDef.h"
-#include "../ArcLists.h"
-
-namespace Moses2
-{
-
-class Scores;
-class MemPool;
-class Hypothesis;
-class System;
-
-template<typename T>
-class TrellisPaths;
-
-class TrellisNode
-{
-public:
- const ArcList *arcList;
- size_t ind;
-
- TrellisNode(const ArcList &varcList, size_t vind) :
- arcList(&varcList), ind(vind)
- {
- }
-
- const HypothesisBase *GetHypo() const
- {
- return (*arcList)[ind];
- }
-
- std::string Debug(const System &system) const;
-
-};
-
-class TrellisPath
-{
-public:
- std::vector<TrellisNode> nodes;
- int prevEdgeChanged;
-
- /**< the last node that was wiggled to create this path
- , or NOT_FOUND if this path is the best trans so consist of only hypos
- */
- TrellisPath(const Hypothesis *hypo, const ArcLists &arcLists);
-
- /** create path from another path, deviate at edgeIndex by using arc instead,
- * which may change other hypo back from there
- */
- TrellisPath(const TrellisPath &origPath, size_t edgeIndex,
- const TrellisNode &newNode, const ArcLists &arcLists, MemPool &pool,
- const System &system);
-
- virtual ~TrellisPath();
-
- const Scores &GetScores() const
- {
- return *m_scores;
- }
- SCORE GetFutureScore() const;
-
- std::string Debug(const System &system) const;
-
- void OutputToStream(std::ostream &out, const System &system) const;
- std::string OutputTargetPhrase(const System &system) const;
-
- //! create a set of next best paths by wiggling 1 of the node at a time.
- void CreateDeviantPaths(TrellisPaths<TrellisPath> &paths, const ArcLists &arcLists,
- MemPool &pool, const System &system) const;
-
-protected:
- const Scores *m_scores;
-
- void AddNodes(const Hypothesis *hypo, const ArcLists &arcLists);
- void CalcScores(const Scores &origScores, const Scores &origHypoScores,
- const Scores &newHypoScores, MemPool &pool, const System &system);
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/PhraseImplTemplate.h b/contrib/moses2/PhraseImplTemplate.h
deleted file mode 100644
index a9d377bb0..000000000
--- a/contrib/moses2/PhraseImplTemplate.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * PhraseImplTemplate.h
- *
- * Created on: 22 Feb 2016
- * Author: hieu
- */
-
-#pragma once
-
-#include <vector>
-#include <string>
-#include "Phrase.h"
-#include "SubPhrase.h"
-#include "legacy/Util2.h"
-
-namespace Moses2
-{
-
-template<typename WORD>
-class PhraseImplTemplate : public Phrase<WORD>
-{
-public:
- PhraseImplTemplate(MemPool &pool, size_t size) :
- m_size(size)
- {
- m_words = new (pool.Allocate<WORD>(size)) WORD[size];
-
- }
-
- PhraseImplTemplate(MemPool &pool, const PhraseImplTemplate &copy) :
- m_size(copy.GetSize())
- {
- m_words = new (pool.Allocate<WORD>(m_size)) WORD[m_size];
- for (size_t i = 0; i < m_size; ++i) {
- const WORD &word = copy[i];
- (*this)[i] = word;
- }
- }
-
- virtual ~PhraseImplTemplate()
- {
- }
-
- size_t GetSize() const
- { return m_size; }
-
- WORD& operator[](size_t pos)
- { return m_words[pos]; }
-
- const WORD& operator[](size_t pos) const
- { return m_words[pos]; }
-
- SubPhrase<WORD> GetSubPhrase(size_t start, size_t size) const
- {
- SubPhrase<WORD> ret(*this, start, size);
- return ret;
- }
-
-protected:
- size_t m_size;
- WORD *m_words;
-
- void CreateFromString(FactorCollection &vocab, const System &system,
- const std::vector<std::string> &toks, bool addBOSEOS = false)
- {
- size_t startPos = 0;
- if (addBOSEOS) {
- startPos = 1;
-
- m_words[0].CreateFromString(vocab, system, "<s>");
- m_words[m_size-1].CreateFromString(vocab, system, "</s>");
- }
-
- for (size_t i = 0; i < toks.size(); ++i) {
- WORD &word = (*this)[startPos];
- word.CreateFromString(vocab, system, toks[i]);
- ++startPos;
- }
- }
-};
-
-}
-
diff --git a/contrib/moses2/Recycler.cpp b/contrib/moses2/Recycler.cpp
deleted file mode 100644
index b7a8fb77d..000000000
--- a/contrib/moses2/Recycler.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * Recycler.cpp
- *
- * Created on: 2 Jan 2016
- * Author: hieu
- */
-
-#include "Recycler.h"
-
-namespace Moses2
-{
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/Recycler.h b/contrib/moses2/Recycler.h
deleted file mode 100644
index 3751a2a93..000000000
--- a/contrib/moses2/Recycler.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Recycler.h
- *
- * Created on: 2 Jan 2016
- * Author: hieu
- */
-#pragma once
-
-#include <cstddef>
-#include <deque>
-#include <vector>
-
-namespace Moses2
-{
-
-template<typename T>
-class Recycler
-{
-public:
- Recycler() :
- m_currInd(0)
- {
- }
- virtual ~Recycler()
- {
- }
-
- T Get()
- {
- if (!m_coll.empty()) {
- T &obj = m_coll.back();
- m_coll.pop_back();
- return obj;
- }
- else if (m_currInd) {
- --m_currInd;
- T &obj = m_all[m_currInd];
- return obj;
- }
- else {
- return NULL;
- }
- }
-
- void Clear()
- {
- m_coll.clear();
- m_currInd = m_all.size();
- }
-
- // call this for new objects when u 1st create it. It is assumed the object will be used right away
- void Keep(const T& val)
- {
- m_all.push_back(val);
- }
-
- // call this for existing object to put back into queue for reuse
- void Recycle(const T& val)
- {
- m_coll.push_back(val);
- }
-
-protected:
- // all objects we're looking after
- std::vector<T> m_all;
-
- // pointer to the object that's just been given out.
- // to give out another obj, must decrement THEN give out
- size_t m_currInd;
-
- // objects that have been give back to us
- std::deque<T> m_coll;
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/SCFG/ActiveChart.cpp b/contrib/moses2/SCFG/ActiveChart.cpp
deleted file mode 100644
index 711767b2f..000000000
--- a/contrib/moses2/SCFG/ActiveChart.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-#include <boost/foreach.hpp>
-#include <boost/functional/hash_fwd.hpp>
-#include "ActiveChart.h"
-#include "InputPath.h"
-#include "Word.h"
-#include "Hypothesis.h"
-#include "../Vector.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-SymbolBindElement::SymbolBindElement()
-{
-}
-
-SymbolBindElement::SymbolBindElement(
- const Moses2::Range &range,
- const SCFG::Word &word,
- const Moses2::Hypotheses *hypos)
-:m_range(&range)
-,word(&word)
-,hypos(hypos)
-{
- assert( (word.isNonTerminal && hypos) || (!word.isNonTerminal && hypos == NULL));
-}
-
-size_t hash_value(const SymbolBindElement &obj)
-{
- size_t ret = (size_t) obj.hypos;
- boost::hash_combine(ret, obj.word);
-
- return ret;
-}
-
-std::string SymbolBindElement::Debug(const System &system) const
-{
- stringstream out;
- out << "(";
- out << *m_range;
- out << word->Debug(system);
- out << ")";
-
- return out.str();
-}
-
-////////////////////////////////////////////////////////////////////////////
-SymbolBind::SymbolBind(MemPool &pool)
-:coll(pool)
-,numNT(0)
-{
-}
-
-void SymbolBind::Add(const Range &range, const SCFG::Word &word, const Moses2::Hypotheses *hypos)
-{
- SymbolBindElement ele(range, word, hypos);
- coll.push_back(ele);
-
- if (word.isNonTerminal) {
- ++numNT;
- }
-}
-
-std::vector<const SymbolBindElement*> SymbolBind::GetNTElements() const
-{
- std::vector<const SymbolBindElement*> ret;
-
- for (size_t i = 0; i < coll.size(); ++i) {
- const SymbolBindElement &ele = coll[i];
- //cerr << "ele=" << ele.word->isNonTerminal << " " << ele.hypos << endl;
-
- if (ele.word->isNonTerminal) {
- ret.push_back(&ele);
- }
- }
-
- return ret;
-}
-
-std::string SymbolBind::Debug(const System &system) const
-{
- stringstream out;
- BOOST_FOREACH(const SymbolBindElement &ele, coll) {
- out << ele.Debug(system) << " ";
- }
- return out.str();
-}
-////////////////////////////////////////////////////////////////////////////
-ActiveChartEntry::ActiveChartEntry(MemPool &pool)
-:m_symbolBind(pool)
-{
-}
-
-////////////////////////////////////////////////////////////////////////////
-ActiveChart::ActiveChart(MemPool &pool)
-:entries(pool)
-{
-}
-
-ActiveChart::~ActiveChart()
-{
-
-}
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/ActiveChart.h b/contrib/moses2/SCFG/ActiveChart.h
deleted file mode 100644
index ed9f35d92..000000000
--- a/contrib/moses2/SCFG/ActiveChart.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#pragma once
-#include <vector>
-#include <iostream>
-#include <boost/functional/hash/hash.hpp>
-#include "../legacy/Range.h"
-#include "../HypothesisColl.h"
-
-namespace Moses2
-{
-class System;
-class PhraseTable;
-
-namespace SCFG
-{
-class InputPath;
-class Word;
-
-////////////////////////////////////////////////////////////////////////////
-//! The range covered by each symbol in the source
-//! Terminals will cover only 1 word, NT can cover multiple words
-class SymbolBindElement
-{
-public:
- const SCFG::Word *word; // can be term or non-term
-
- const Moses2::Hypotheses *hypos; // NULL if terminal
-
- SymbolBindElement();
- SymbolBindElement(const Moses2::Range &range, const SCFG::Word &word, const Moses2::Hypotheses *hypos);
-
- const Range &GetRange() const
- { return *m_range; }
-
- bool operator==(const SymbolBindElement &compare) const
- {
- bool ret = hypos == compare.hypos
- && word == compare.word;
- return ret;
- }
-
- std::string Debug(const System &system) const;
-
-protected:
- const Moses2::Range *m_range;
-
-};
-
-size_t hash_value(const SymbolBindElement &obj);
-
-////////////////////////////////////////////////////////////////////////////
-class SymbolBind
-{
-public:
- typedef Vector<SymbolBindElement> Coll;
- Coll coll;
- size_t numNT;
-
- SymbolBind(MemPool &pool);
-
- SymbolBind(MemPool &pool, const SymbolBind &copy)
- :coll(copy.coll)
- ,numNT(copy.numNT)
- {}
-
- size_t GetSize() const
- { return coll.size(); }
-
- std::vector<const SymbolBindElement*> GetNTElements() const;
-
- void Add(const Range &range, const SCFG::Word &word, const Moses2::Hypotheses *hypos);
-
- bool operator==(const SymbolBind &compare) const
- { return coll == compare.coll; }
-
- std::string Debug(const System &system) const;
-
-};
-
-inline size_t hash_value(const SymbolBind &obj)
-{
- return boost::hash_value(obj.coll);
-}
-
-////////////////////////////////////////////////////////////////////////////
-class ActiveChartEntry
-{
-public:
- ActiveChartEntry(MemPool &pool);
-
- ActiveChartEntry(MemPool &pool, const ActiveChartEntry &prevEntry)
- :m_symbolBind(pool, prevEntry.GetSymbolBind())
- {
- //symbolBinds = new (pool.Allocate<SymbolBind>()) SymbolBind(pool, *prevEntry.symbolBinds);
- }
-
- const SymbolBind &GetSymbolBind() const
- { return m_symbolBind; }
-
- virtual void AddSymbolBindElement(
- const Range &range,
- const SCFG::Word &word,
- const Moses2::Hypotheses *hypos,
- const PhraseTable &pt)
- {
- m_symbolBind.Add(range, word, hypos);
- }
-
-protected:
- SymbolBind m_symbolBind;
-
-};
-
-////////////////////////////////////////////////////////////////////////////
-class ActiveChart
-{
-public:
- ActiveChart(MemPool &pool);
- ~ActiveChart();
-
- Vector<ActiveChartEntry*> entries;
-};
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/Hypothesis.cpp b/contrib/moses2/SCFG/Hypothesis.cpp
deleted file mode 100644
index 28411a43e..000000000
--- a/contrib/moses2/SCFG/Hypothesis.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-#include <boost/foreach.hpp>
-#include <sstream>
-#include "Hypothesis.h"
-#include "Manager.h"
-#include "ActiveChart.h"
-#include "TargetPhraseImpl.h"
-#include "Sentence.h"
-#include "../System.h"
-#include "../Scores.h"
-#include "../InputPathBase.h"
-#include "../FF/StatefulFeatureFunction.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-Hypothesis *Hypothesis::Create(MemPool &pool, Manager &mgr)
-{
- // ++g_numHypos;
- Hypothesis *ret;
- //ret = new (pool.Allocate<Hypothesis>()) Hypothesis(pool, mgr.system);
-
- Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
- ret = static_cast<Hypothesis*>(recycler.Get());
- if (ret) {
- // got new hypo from recycler. Do nothing
- }
- else {
- ret = new (pool.Allocate<Hypothesis>()) Hypothesis(pool, mgr.system);
- //cerr << "Hypothesis=" << sizeof(Hypothesis) << " " << ret << endl;
- recycler.Keep(ret);
- }
- return ret;
-}
-
-Hypothesis::Hypothesis(MemPool &pool,
- const System &system)
-:HypothesisBase(pool, system)
-,m_prevHypos(pool)
-{
-
-}
-
-void Hypothesis::Init(SCFG::Manager &mgr,
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind,
- const SCFG::TargetPhraseImpl &tp,
- const Vector<size_t> &prevHyposIndices)
-{
- m_mgr = &mgr;
- m_targetPhrase = &tp;
- m_path = &path;
- m_symbolBind = &symbolBind;
-
- m_scores->Reset(mgr.system);
- m_scores->PlusEquals(mgr.system, GetTargetPhrase().GetScores());
-
- //cerr << "tp=" << tp << endl;
- //cerr << "symbolBind=" << symbolBind << endl;
- //cerr << endl;
- m_prevHypos.resize(symbolBind.numNT);
-
- size_t currInd = 0;
- for (size_t i = 0; i < symbolBind.coll.size(); ++i) {
- const SymbolBindElement &ele = symbolBind.coll[i];
- //cerr << "ele=" << ele.word->isNonTerminal << " " << ele.hypos << endl;
-
- if (ele.hypos) {
- const Hypotheses &sortedHypos = *ele.hypos;
-
- size_t prevHyposInd = prevHyposIndices[currInd];
- assert(prevHyposInd < sortedHypos.size());
-
- const Hypothesis *prevHypo = static_cast<const SCFG::Hypothesis*>(sortedHypos[prevHyposInd]);
- m_prevHypos[currInd] = prevHypo;
-
- m_scores->PlusEquals(mgr.system, prevHypo->GetScores());
-
- ++currInd;
- }
- }
-}
-
-SCORE Hypothesis::GetFutureScore() const
-{
- return GetScores().GetTotalScore();
-}
-
-void Hypothesis::EvaluateWhenApplied()
-{
- const std::vector<const StatefulFeatureFunction*> &sfffs =
- GetManager().system.featureFunctions.GetStatefulFeatureFunctions();
- BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){
- EvaluateWhenApplied(*sfff);
- }
-//cerr << *this << endl;
-
-}
-
-void Hypothesis::EvaluateWhenApplied(const StatefulFeatureFunction &sfff)
-{
- const SCFG::Manager &mgr = static_cast<const SCFG::Manager&>(GetManager());
- size_t statefulInd = sfff.GetStatefulInd();
- FFState *thisState = m_ffStates[statefulInd];
- sfff.EvaluateWhenApplied(mgr, *this, statefulInd, GetScores(),
- *thisState);
-
-}
-
-void Hypothesis::OutputToStream(std::ostream &strm) const
-{
- const SCFG::TargetPhraseImpl &tp = GetTargetPhrase();
- //cerr << "tp=" << tp.Debug(m_mgr->system) << endl;
-
- for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
- const SCFG::Word &word = tp[targetPos];
- //cerr << "word " << targetPos << "=" << word << endl;
- if (word.isNonTerminal) {
- //cerr << "is nt" << endl;
- // non-term. fill out with prev hypo
- size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[targetPos];
- const Hypothesis *prevHypo = m_prevHypos[nonTermInd];
- prevHypo->OutputToStream(strm);
- }
- else {
- word.OutputToStream(*m_mgr, targetPos, *this, strm);
- strm << " ";
- }
-
- }
-}
-
-std::string Hypothesis::Debug(const System &system) const
-{
- stringstream out;
- out << this << flush;
-
- out << " RANGE:";
- out << m_path->range << " ";
- out << m_symbolBind->Debug(system) << " ";
-
- // score
- out << " SCORE:" << GetScores().Debug(GetManager().system) << flush;
-
- out << m_targetPhrase->Debug(GetManager().system);
-
- out << "PREV:";
- for (size_t i = 0; i < m_prevHypos.size(); ++i) {
- const Hypothesis *prevHypo = m_prevHypos[i];
- out << prevHypo << prevHypo->GetInputPath().range << "(" << prevHypo->GetFutureScore() << ") ";
- }
- out << endl;
-
- /*
- // recursive
- for (size_t i = 0; i < m_prevHypos.size(); ++i) {
- const Hypothesis *prevHypo = m_prevHypos[i];
- out << prevHypo->Debug(GetManager().system) << " ";
- }
- */
-
- return out.str();
-}
-
-void Hypothesis::OutputTransOpt(std::ostream &out) const
-{
- out << GetInputPath().range << " "
- << "score=" << GetScores().GetTotalScore() << " "
- << GetTargetPhrase().Debug(m_mgr->system) << endl;
-
- BOOST_FOREACH(const Hypothesis *prevHypo, m_prevHypos) {
- prevHypo->OutputTransOpt(out);
- }
-}
-
-} // namespaces
-}
-
diff --git a/contrib/moses2/SCFG/Hypothesis.h b/contrib/moses2/SCFG/Hypothesis.h
deleted file mode 100644
index 8ece45bb6..000000000
--- a/contrib/moses2/SCFG/Hypothesis.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#pragma once
-#include <vector>
-#include "InputPath.h"
-#include "../HypothesisBase.h"
-#include "../MemPool.h"
-#include "../Vector.h"
-
-namespace Moses2
-{
-class InputPathBase;
-class StatefulFeatureFunction;
-
-namespace SCFG
-{
-class TargetPhraseImpl;
-class Manager;
-class SymbolBind;
-class InputPath;
-
-class Hypothesis: public HypothesisBase
-{
-public:
- static Hypothesis *Create(MemPool &pool, Manager &mgr);
-
- void Init(SCFG::Manager &mgr,
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind,
- const SCFG::TargetPhraseImpl &tp,
- const Vector<size_t> &prevHyposIndices);
-
- virtual SCORE GetFutureScore() const;
- virtual void EvaluateWhenApplied();
-
- const SCFG::TargetPhraseImpl &GetTargetPhrase() const
- { return *m_targetPhrase; }
-
- const SCFG::InputPath &GetInputPath() const
- { return *m_path; }
-
- const SCFG::SymbolBind &GetSymbolBind() const
- { return *m_symbolBind; }
-
- const Vector<const Hypothesis*> &GetPrevHypos() const
- { return m_prevHypos; }
-
- //! get a particular previous hypos
- const Hypothesis* GetPrevHypo(size_t ind) const
- { return m_prevHypos[ind]; }
-
- void OutputToStream(std::ostream &strm) const;
- void OutputTransOpt(std::ostream &strm) const;
-
- std::string Debug(const System &system) const;
-
-protected:
- const SCFG::TargetPhraseImpl *m_targetPhrase;
- const SCFG::InputPath *m_path;
- const SCFG::SymbolBind *m_symbolBind;
-
- Vector<const Hypothesis*> m_prevHypos; // always sorted by source position?
-
- Hypothesis(MemPool &pool,
- const System &system);
-
- void EvaluateWhenApplied(const StatefulFeatureFunction &sfff);
-
-};
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/InputPath.cpp b/contrib/moses2/SCFG/InputPath.cpp
deleted file mode 100644
index 1ebbbf327..000000000
--- a/contrib/moses2/SCFG/InputPath.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * InputPath.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "InputPath.h"
-#include "TargetPhrases.h"
-#include "ActiveChart.h"
-#include "../TranslationModel/PhraseTable.h"
-#include "../MemPoolAllocator.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-
-InputPath::InputPath(MemPool &pool, const SubPhrase<SCFG::Word> &subPhrase,
- const Range &range, size_t numPt, const InputPath *prefixPath)
-:InputPathBase(pool, range, numPt, prefixPath)
-,subPhrase(subPhrase)
-,targetPhrases(MemPoolAllocator<Element>(pool))
-{
- m_activeChart = pool.Allocate<ActiveChart>(numPt);
- for (size_t i = 0; i < numPt; ++i) {
- ActiveChart &memAddr = m_activeChart[i];
- new (&memAddr) ActiveChart(pool);
- }
-}
-
-InputPath::~InputPath()
-{
- // TODO Auto-generated destructor stub
-}
-
-std::string InputPath::Debug(const System &system) const
-{
- stringstream out;
- out << range << " ";
- out << subPhrase.Debug(system);
- out << " " << prefixPath << " ";
-
- const Vector<ActiveChartEntry*> &activeEntries = GetActiveChart(1).entries;
- out << "m_activeChart=" << activeEntries.size() << " ";
-
- for (size_t i = 0; i < activeEntries.size(); ++i) {
- const ActiveChartEntry &entry = *activeEntries[i];
- out << entry.GetSymbolBind().Debug(system);
- out << "| ";
- }
-
- // tps
- out << "tps=" << targetPhrases.size();
-
- out << " ";
- BOOST_FOREACH(const SCFG::InputPath::Coll::value_type &valPair, targetPhrases) {
- const SymbolBind &symbolBind = valPair.first;
- const SCFG::TargetPhrases &tps = *valPair.second;
- out << symbolBind.Debug(system);
- //out << "=" << tps.GetSize() << " ";
- out << tps.Debug(system);
- }
-
- return out.str();
-}
-
-void InputPath::AddTargetPhrasesToPath(
- MemPool &pool,
- const System &system,
- const PhraseTable &pt,
- const SCFG::TargetPhrases &tps,
- const SCFG::SymbolBind &symbolBind)
-{
- targetPhrases.push_back(Element(symbolBind, &tps));
- /*
- Coll::iterator iterColl;
- iterColl = targetPhrases.find(symbolBind);
- assert(iterColl == targetPhrases.end());
-
- targetPhrases[symbolBind] = &tps;
- //cerr << "range=" << range << " symbolBind=" << symbolBind.Debug(system) << " tps=" << tps.Debug(system);
- */
- /*
- SCFG::TargetPhrases *tpsNew;
- tpsNew = new (pool.Allocate<SCFG::TargetPhrases>()) SCFG::TargetPhrases(pool);
- targetPhrases[symbolBind] = tpsNew;
-
- SCFG::TargetPhrases::const_iterator iter;
- for (iter = tps.begin(); iter != tps.end(); ++iter) {
- const SCFG::TargetPhraseImpl *tp = *iter;
- //cerr << "tpCast=" << *tp << endl;
- tpsNew->AddTargetPhrase(*tp);
- }
- cerr << "range=" << range << " symbolBind=" << symbolBind.Debug(system) << " tpsNew=" << tpsNew->Debug(system);
- */
-}
-
-void InputPath::AddActiveChartEntry(size_t ptInd, ActiveChartEntry *chartEntry)
-{
- //cerr << " added " << chartEntry << " " << range << " " << ptInd << endl;
- ActiveChart &activeChart = m_activeChart[ptInd];
- activeChart.entries.push_back(chartEntry);
-}
-
-size_t InputPath::GetNumRules() const
-{
- size_t ret = 0;
- BOOST_FOREACH(const Coll::value_type &valPair, targetPhrases) {
- const SCFG::TargetPhrases &tps = *valPair.second;
- ret += tps.GetSize();
- }
- return ret;
-}
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/InputPath.h b/contrib/moses2/SCFG/InputPath.h
deleted file mode 100644
index c8a7253c2..000000000
--- a/contrib/moses2/SCFG/InputPath.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * InputPath.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <iostream>
-#include <list>
-#include <boost/unordered_map.hpp>
-#include "../InputPathBase.h"
-#include "../MemPoolAllocator.h"
-#include "TargetPhrases.h"
-#include "ActiveChart.h"
-#include "Word.h"
-
-namespace Moses2
-{
-namespace SCFG
-{
-class TargetPhrases;
-class TargetPhraseImpl;
-
-
-////////////////////////////////////////////////////////////////////////////
-class InputPath: public InputPathBase
-{
-public:
- typedef std::pair<SymbolBind, const SCFG::TargetPhrases*> Element;
- typedef std::list<Element, MemPoolAllocator<Element> > Coll;
- Coll targetPhrases;
-
- SubPhrase<SCFG::Word> subPhrase;
-
- InputPath(MemPool &pool, const SubPhrase<SCFG::Word> &subPhrase, const Range &range,
- size_t numPt, const InputPath *prefixPath);
- virtual ~InputPath();
-
- const ActiveChart &GetActiveChart(size_t ptInd) const
- { return m_activeChart[ptInd]; }
-
- void AddActiveChartEntry(size_t ptInd, ActiveChartEntry *chartEntry);
-
- void AddTargetPhrasesToPath(
- MemPool &pool,
- const System &system,
- const PhraseTable &pt,
- const SCFG::TargetPhrases &tps,
- const SCFG::SymbolBind &symbolBind);
-
- size_t GetNumRules() const;
-
- std::string Debug(const System &system) const;
-
-protected:
- ActiveChart *m_activeChart;
-};
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/InputPaths.cpp b/contrib/moses2/SCFG/InputPaths.cpp
deleted file mode 100644
index e1c3f9d21..000000000
--- a/contrib/moses2/SCFG/InputPaths.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * InputPaths.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include <iostream>
-#include "InputPaths.h"
-#include "Sentence.h"
-#include "../System.h"
-#include "../legacy/Range.h"
-#include "Manager.h"
-#include "InputPath.h"
-#include "Word.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-void InputPaths::Init(const InputType &input, const ManagerBase &mgr)
-{
- const Sentence &sentence = static_cast<const Sentence&>(input);
- MemPool &pool = mgr.GetPool();
- size_t numPt = mgr.system.mappings.size();
- size_t size = sentence.GetSize();
- //cerr << "size=" << size << endl;
-
- m_matrix = new (pool.Allocate< Matrix<SCFG::InputPath*> >()) Matrix<SCFG::InputPath*>(pool,
- size, size + 1);
- m_matrix->Init(NULL);
-
- for (size_t startPos = 0; startPos < size; ++startPos) {
- // create path for 0 length string
- Range range(startPos, startPos - 1);
- SubPhrase<SCFG::Word> subPhrase = sentence.GetSubPhrase(startPos, 0);
-
- SCFG::InputPath *path = new (pool.Allocate<SCFG::InputPath>()) SCFG::InputPath(pool,
- subPhrase, range, numPt, NULL);
- //cerr << "path=" << *path << endl;
- m_inputPaths.push_back(path);
- m_matrix->SetValue(startPos, 0, path);
-
- // create normal paths of subphrases through the sentence
- const SCFG::InputPath *prefixPath = path;
- for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
- size_t endPos = startPos + phaseSize - 1; // pb-like indexing. eg. [1-1] covers 1 word, NOT 0
-
- if (endPos >= size) {
- break;
- }
-
- SubPhrase<SCFG::Word> subPhrase = sentence.GetSubPhrase(startPos, phaseSize);
- Range range(startPos, endPos);
-
- SCFG::InputPath *path = new (pool.Allocate<SCFG::InputPath>())
- SCFG::InputPath(pool, subPhrase, range, numPt, prefixPath);
- //cerr << "path=" << *path << endl;
- m_inputPaths.push_back(path);
-
- prefixPath = path;
- m_matrix->SetValue(startPos, phaseSize, path);
- }
- }
-
-}
-
-std::string InputPaths::Debug(const System &system) const
-{
- stringstream out;
- const Matrix<InputPath*> &matrix = GetMatrix();
- for (size_t i = 0; i < matrix.GetRows(); ++i) {
- for (size_t j = 0; j < matrix.GetCols(); ++j) {
- SCFG::InputPath *path = matrix.GetValue(i, j);
- if (path) {
- out << path->Debug(system);
- out << endl;
- }
- }
- }
- return out.str();
-}
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/InputPaths.h b/contrib/moses2/SCFG/InputPaths.h
deleted file mode 100644
index 37e2404cf..000000000
--- a/contrib/moses2/SCFG/InputPaths.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * InputPaths.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <vector>
-#include "../InputPathsBase.h"
-#include "../legacy/Matrix.h"
-
-namespace Moses2
-{
-
-class Sentence;
-class System;
-
-namespace SCFG
-{
-class InputPath;
-
-class InputPaths: public InputPathsBase
-{
-public:
- void Init(const InputType &input, const ManagerBase &mgr);
-
- const Matrix<InputPath*> &GetMatrix() const
- {
- return *m_matrix;
- }
-
- std::string Debug(const System &system) const;
-
-protected:
- Matrix<InputPath*> *m_matrix;
-
-};
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/Manager.cpp b/contrib/moses2/SCFG/Manager.cpp
deleted file mode 100644
index 5db4e2a89..000000000
--- a/contrib/moses2/SCFG/Manager.cpp
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
- * Manager.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include <cstdlib>
-#include <vector>
-#include <sstream>
-#include "../System.h"
-#include "../TranslationModel/PhraseTable.h"
-#include "Manager.h"
-#include "InputPath.h"
-#include "Hypothesis.h"
-#include "TargetPhraseImpl.h"
-#include "ActiveChart.h"
-#include "Sentence.h"
-
-#include "nbest/KBestExtractor.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace SCFG
-{
-
-Manager::Manager(System &sys, const TranslationTask &task,
- const std::string &inputStr, long translationId)
-:ManagerBase(sys, task, inputStr, translationId)
-{
-
-}
-
-Manager::~Manager()
-{
-
-}
-
-void Manager::Decode()
-{
- // init pools etc
- //cerr << "START InitPools()" << endl;
- InitPools();
- //cerr << "START ParseInput()" << endl;
-
- FactorCollection &vocab = system.GetVocab();
- m_input = Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr,
- m_translationId);
-
- const SCFG::Sentence &sentence = static_cast<const SCFG::Sentence&>(GetInput());
-
- size_t inputSize = sentence.GetSize();
- //cerr << "inputSize=" << inputSize << endl;
-
- m_inputPaths.Init(sentence, *this);
- //cerr << "CREATED m_inputPaths" << endl;
-
- m_stacks.Init(*this, inputSize);
- //cerr << "CREATED m_stacks" << endl;
-
- for (int startPos = inputSize - 1; startPos >= 0; --startPos) {
- //cerr << endl << "startPos=" << startPos << endl;
- SCFG::InputPath &initPath = *m_inputPaths.GetMatrix().GetValue(startPos, 0);
-
- //cerr << "BEFORE InitActiveChart=" << initPath.Debug(system) << endl;
- InitActiveChart(initPath);
- //cerr << "AFTER InitActiveChart=" << initPath.Debug(system) << endl;
-
- int maxPhraseSize = inputSize - startPos + 1;
- for (int phraseSize = 1; phraseSize < maxPhraseSize; ++phraseSize) {
- //cerr << endl << "phraseSize=" << phraseSize << endl;
-
- SCFG::InputPath &path = *m_inputPaths.GetMatrix().GetValue(startPos, phraseSize);
-
- Stack &stack = m_stacks.GetStack(startPos, phraseSize);
-
- //cerr << "BEFORE LOOKUP path=" << path.Debug(system) << endl;
- Lookup(path);
- //cerr << "AFTER LOOKUP path=" << path.Debug(system) << endl;
- Decode(path, stack);
- //cerr << "AFTER DECODE path=" << path.Debug(system) << endl;
-
- LookupUnary(path);
- //cerr << "AFTER LookupUnary path=" << path.Debug(system) << endl;
-
- //cerr << "#rules=" << path.GetNumRules() << endl;
- }
- }
-
- /*
- const Stack *stack;
- stack = &m_stacks.GetStack(0, 5);
- cerr << "stack 0,12:" << stack->Debug(system) << endl;
- */
- //m_stacks.OutputStacks();
-}
-
-void Manager::InitActiveChart(SCFG::InputPath &path)
-{
- size_t numPt = system.mappings.size();
- //cerr << "numPt=" << numPt << endl;
-
- for (size_t i = 0; i < numPt; ++i) {
- const PhraseTable &pt = *system.mappings[i];
- //cerr << "START InitActiveChart" << endl;
- pt.InitActiveChart(GetPool(), *this, path);
- //cerr << "FINISHED InitActiveChart" << endl;
- }
-}
-
-void Manager::Lookup(SCFG::InputPath &path)
-{
- size_t numPt = system.mappings.size();
- //cerr << "numPt=" << numPt << endl;
-
- for (size_t i = 0; i < numPt; ++i) {
- const PhraseTable &pt = *system.mappings[i];
- size_t maxChartSpan = system.maxChartSpans[i];
- pt.Lookup(GetPool(), *this, maxChartSpan, m_stacks, path);
- }
-
- /*
- size_t tpsNum = path.targetPhrases.GetSize();
- if (tpsNum) {
- cerr << tpsNum << " " << path << endl;
- }
- */
-}
-
-void Manager::LookupUnary(SCFG::InputPath &path)
-{
- size_t numPt = system.mappings.size();
- //cerr << "numPt=" << numPt << endl;
-
- for (size_t i = 0; i < numPt; ++i) {
- const PhraseTable &pt = *system.mappings[i];
- pt.LookupUnary(GetPool(), *this, m_stacks, path);
- }
-
- /*
- size_t tpsNum = path.targetPhrases.GetSize();
- if (tpsNum) {
- cerr << tpsNum << " " << path << endl;
- }
- */
-}
-
-///////////////////////////////////////////////////////////////
-// CUBE-PRUNING
-///////////////////////////////////////////////////////////////
-void Manager::Decode(SCFG::InputPath &path, Stack &stack)
-{
- // clear cube pruning data
- //std::vector<QueueItem*> &container = Container(m_queue);
- //container.clear();
- Recycler<HypothesisBase*> &hypoRecycler = GetHypoRecycle();
- while (!m_queue.empty()) {
- QueueItem *item = m_queue.top();
- m_queue.pop();
- // recycle unused hypos from queue
- Hypothesis *hypo = item->hypo;
- hypoRecycler.Recycle(hypo);
-
- // recycle queue item
- m_queueItemRecycler.push_back(item);
- }
-
- m_seenPositions.clear();
-
- // init queue
- BOOST_FOREACH(const InputPath::Coll::value_type &valPair, path.targetPhrases) {
- const SymbolBind &symbolBind = valPair.first;
- const SCFG::TargetPhrases &tps = *valPair.second;
-
- CreateQueue(path, symbolBind, tps);
- }
-
- // MAIN LOOP
- size_t pops = 0;
- while (!m_queue.empty() && pops < system.options.cube.pop_limit) {
- //cerr << "pops=" << pops << endl;
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
-
- //cerr << "hypo=" << *hypo << " " << endl;
- stack.Add(hypo, GetHypoRecycle(), arcLists);
- //cerr << "Added " << *hypo << " " << endl;
-
- item->CreateNext(GetSystemPool(), GetPool(), *this, m_queue, m_seenPositions, path);
- //cerr << "Created next " << endl;
- m_queueItemRecycler.push_back(item);
-
- ++pops;
- }
-
-}
-
-void Manager::CreateQueue(
- const SCFG::InputPath &path,
- const SymbolBind &symbolBind,
- const SCFG::TargetPhrases &tps)
-{
- MemPool &pool = GetPool();
-
- SeenPosition *seenItem = new (pool.Allocate<SeenPosition>()) SeenPosition(pool, symbolBind, tps, symbolBind.numNT);
- bool unseen = m_seenPositions.Add(seenItem);
- assert(unseen);
-
- QueueItem *item = QueueItem::Create(GetPool(), *this);
- item->Init(GetPool(), symbolBind, tps, seenItem->hypoIndColl);
- for (size_t i = 0; i < symbolBind.coll.size(); ++i) {
- const SymbolBindElement &ele = symbolBind.coll[i];
- if (ele.hypos) {
- const Moses2::Hypotheses *hypos = ele.hypos;
- item->AddHypos(*hypos);
- }
- }
-
- item->CreateHypo(GetSystemPool(), *this, path, symbolBind);
-
- //cerr << "hypo=" << item->hypo->Debug(system) << endl;
-
- m_queue.push(item);
-}
-
-///////////////////////////////////////////////////////////////
-// NON CUBE-PRUNING
-///////////////////////////////////////////////////////////////
-/*
-void Manager::Decode(SCFG::InputPath &path, Stack &stack)
-{
- //cerr << "path=" << path << endl;
-
- boost::unordered_map<SCFG::SymbolBind, SCFG::TargetPhrases*>::const_iterator iterOuter;
- for (iterOuter = path.targetPhrases->begin(); iterOuter != path.targetPhrases->end(); ++iterOuter) {
- const SCFG::SymbolBind &symbolBind = iterOuter->first;
-
- const SCFG::TargetPhrases &tps = *iterOuter->second;
- //cerr << "symbolBind=" << symbolBind << " tps=" << tps.GetSize() << endl;
-
- SCFG::TargetPhrases::const_iterator iter;
- for (iter = tps.begin(); iter != tps.end(); ++iter) {
- const SCFG::TargetPhraseImpl &tp = **iter;
- //cerr << "tp=" << tp << endl;
- ExpandHypo(path, symbolBind, tp, stack);
- }
- }
-}
-*/
-
-void Manager::ExpandHypo(
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind,
- const SCFG::TargetPhraseImpl &tp,
- Stack &stack)
-{
- Recycler<HypothesisBase*> &hypoRecycler = GetHypoRecycle();
-
- std::vector<const SymbolBindElement*> ntEles = symbolBind.GetNTElements();
- Vector<size_t> prevHyposIndices(GetPool(), symbolBind.numNT);
- assert(ntEles.size() == symbolBind.numNT);
- //cerr << "ntEles:" << ntEles.size() << endl;
-
- size_t ind = 0;
- while (IncrPrevHypoIndices(prevHyposIndices, ind, ntEles)) {
- SCFG::Hypothesis *hypo = SCFG::Hypothesis::Create(GetSystemPool(), *this);
- hypo->Init(*this, path, symbolBind, tp, prevHyposIndices);
- hypo->EvaluateWhenApplied();
-
- stack.Add(hypo, hypoRecycler, arcLists);
-
- ++ind;
- }
-}
-
-bool Manager::IncrPrevHypoIndices(
- Vector<size_t> &prevHyposIndices,
- size_t ind,
- const std::vector<const SymbolBindElement*> ntEles)
-{
- if (ntEles.size() == 0) {
- // no nt. Do the 1st
- return ind ? false : true;
- }
-
- size_t numHypos = 0;
-
- //cerr << "IncrPrevHypoIndices:" << ind << " " << ntEles.size() << " ";
- for (size_t i = 0; i < ntEles.size() - 1; ++i) {
- const SymbolBindElement &ele = *ntEles[i];
- const Hypotheses &hypos = *ele.hypos;
- numHypos = hypos.size();
-
- std::div_t divRet = std::div((int)ind, (int)numHypos);
- ind = divRet.quot;
-
- size_t hypoInd = divRet.rem;
- prevHyposIndices[i] = hypoInd;
- //cerr << "(" << i << "," << ind << "," << numHypos << "," << hypoInd << ")";
- }
-
- // last
- prevHyposIndices.back() = ind;
-
-
- // check if last is over limit
- const SymbolBindElement &ele = *ntEles.back();
- const Hypotheses &hypos = *ele.hypos;
- numHypos = hypos.size();
-
- //cerr << "(" << (ntEles.size() - 1) << "," << ind << "," << numHypos << "," << ind << ")";
- //cerr << endl;
-
- if (ind >= numHypos) {
- return false;
- }
- else {
- return true;
- }
-}
-
-std::string Manager::OutputBest() const
-{
- string out;
- const Stack &lastStack = m_stacks.GetLastStack();
- const SCFG::Hypothesis *bestHypo = lastStack.GetBestHypo();
-
- if (bestHypo) {
- //cerr << "BEST TRANSLATION: " << bestHypo << bestHypo->Debug(system) << endl;
- //cerr << " " << out.str() << endl;
- stringstream outStrm;
- Moses2::FixPrecision(outStrm);
-
- bestHypo->OutputToStream(outStrm);
-
- out = outStrm.str();
- out = out.substr(4, out.size() - 10);
-
- if (system.options.output.ReportHypoScore) {
- out = SPrint(bestHypo->GetScores().GetTotalScore()) + " " + out;
- }
- }
- else {
- if (system.options.output.ReportHypoScore) {
- out = "0 ";
- }
-
- //cerr << "NO TRANSLATION " << GetTranslationId() << endl;
- }
-
- return out;
-}
-
-std::string Manager::OutputNBest()
-{
- stringstream out;
- //Moses2::FixPrecision(out);
-
- arcLists.Sort();
- //cerr << "arcs=" << arcLists.Debug(system) << endl;
-
- KBestExtractor extractor(*this);
- extractor.OutputToStream(out);
-
- return out.str();
-}
-
-std::string Manager::OutputTransOpt()
-{
- const Stack &lastStack = m_stacks.GetLastStack();
- const SCFG::Hypothesis *bestHypo = lastStack.GetBestHypo();
-
- if (bestHypo) {
- stringstream outStrm;
- bestHypo->OutputTransOpt(outStrm);
- return outStrm.str();
- }
- else {
- return "";
- }
-}
-
-} // namespace
-}
-
diff --git a/contrib/moses2/SCFG/Manager.h b/contrib/moses2/SCFG/Manager.h
deleted file mode 100644
index 6bd53cc89..000000000
--- a/contrib/moses2/SCFG/Manager.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Manager.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <queue>
-#include <cstddef>
-#include <string>
-#include <deque>
-#include "../ManagerBase.h"
-#include "Stacks.h"
-#include "InputPaths.h"
-#include "Misc.h"
-
-namespace Moses2
-{
-
-namespace SCFG
-{
-class SymbolBind;
-class TargetPhraseImpl;
-class SymbolBindElement;
-
-class Manager: public Moses2::ManagerBase
-{
-public:
- Manager(System &sys, const TranslationTask &task, const std::string &inputStr,
- long translationId);
-
- virtual ~Manager();
- void Decode();
- std::string OutputBest() const;
- std::string OutputNBest();
- std::string OutputTransOpt();
-
- const InputPaths &GetInputPaths() const
- { return m_inputPaths; }
-
- QueueItemRecycler &GetQueueItemRecycler()
- { return m_queueItemRecycler; }
-
- const Stacks &GetStacks() const
- { return m_stacks; }
-
-protected:
- Stacks m_stacks;
- SCFG::InputPaths m_inputPaths;
-
- void InitActiveChart(SCFG::InputPath &path);
- void Lookup(SCFG::InputPath &path);
- void LookupUnary(SCFG::InputPath &path);
- void Decode(SCFG::InputPath &path, Stack &stack);
-
- void ExpandHypo(
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind,
- const SCFG::TargetPhraseImpl &tp,
- Stack &stack);
-
- bool IncrPrevHypoIndices(
- Vector<size_t> &prevHyposIndices,
- size_t ind,
- const std::vector<const SymbolBindElement*> ntEles);
-
- // cube pruning
- Queue m_queue;
- SeenPositions m_seenPositions;
-
- QueueItemRecycler m_queueItemRecycler;
-
- void CreateQueue(
- const SCFG::InputPath &path,
- const SymbolBind &symbolBind,
- const SCFG::TargetPhrases &tps);
-};
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/Misc.cpp b/contrib/moses2/SCFG/Misc.cpp
deleted file mode 100644
index 1ab053b60..000000000
--- a/contrib/moses2/SCFG/Misc.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * Misc.cpp
- *
- * Created on: 2 Jun 2016
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include <boost/functional/hash.hpp>
-#include "Misc.h"
-#include "Manager.h"
-#include "TargetPhrases.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace SCFG
-{
-
-////////////////////////////////////////////////////////
-SeenPosition::SeenPosition(MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vtps,
- size_t numNT)
-:symbolBind(vSymbolBind)
-,tps(vtps)
-,tpInd(0)
-,hypoIndColl(pool, numNT, 0)
-{
-}
-
-SeenPosition::SeenPosition(MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vtps,
- size_t vtpInd,
- const Vector<size_t> &vhypoIndColl)
-:symbolBind(vSymbolBind)
-,tps(vtps)
-,tpInd(vtpInd)
-,hypoIndColl(pool, vhypoIndColl.size())
-{
- for (size_t i = 0; i < hypoIndColl.size(); ++i) {
- hypoIndColl[i] = vhypoIndColl[i];
- }
-}
-
-std::string SeenPosition::Debug(const System &system) const
-{
- stringstream out;
- out << &tps << " " << tpInd << " ";
-
- for (size_t i = 0; i < hypoIndColl.size(); ++i) {
- out << hypoIndColl[i] << " ";
- }
-
- return out.str();
-}
-
-bool SeenPosition::operator==(const SeenPosition &compare) const
-{
- if (&symbolBind != &compare.symbolBind) {
- return false;
- }
-
- if (&tps != &compare.tps) {
- return false;
- }
-
- if (tpInd != compare.tpInd) {
- return false;
- }
-
- if (hypoIndColl != compare.hypoIndColl) {
- return false;
- }
-
- return true;
-}
-
-size_t SeenPosition::hash() const
-{
- size_t ret = (size_t) &symbolBind;
- boost::hash_combine(ret, &tps);
- boost::hash_combine(ret, tpInd);
- boost::hash_combine(ret, hypoIndColl);
- return ret;
-}
-
-////////////////////////////////////////////////////////
-bool SeenPositions::Add(const SeenPosition *item)
-{
- std::pair<Coll::iterator, bool> ret = m_coll.insert(item);
- return ret.second;
-}
-
-////////////////////////////////////////////////////////
-QueueItem *QueueItem::Create(MemPool &pool, SCFG::Manager &mgr)
-{
- //QueueItem *item = new (pool.Allocate<QueueItem>()) QueueItem(pool);
- //return item;
-
- QueueItemRecycler &queueItemRecycler = mgr.GetQueueItemRecycler();
- QueueItem *ret;
- if (!queueItemRecycler.empty()) {
- // use item from recycle bin
- ret = queueItemRecycler.back();
- queueItemRecycler.pop_back();
- }
- else {
- // create new item
- ret = new (pool.Allocate<QueueItem>()) QueueItem(pool);
- }
-
- return ret;
-
-}
-
-QueueItem::QueueItem(MemPool &pool)
-:m_hypoIndColl(NULL)
-{
-
-}
-
-void QueueItem::Init(
- MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vTPS,
- const Vector<size_t> &hypoIndColl)
-{
- symbolBind = &vSymbolBind;
- tps = &vTPS;
- tpInd = 0;
- m_hyposColl = new (pool.Allocate<HyposColl>()) HyposColl(pool);
- m_hypoIndColl = &hypoIndColl;
-}
-
-void QueueItem::Init(
- MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vTPS,
- size_t vTPInd,
- const Vector<size_t> &hypoIndColl)
-{
- symbolBind = &vSymbolBind;
- tps = &vTPS;
- tpInd = vTPInd;
- m_hyposColl = NULL;
- m_hypoIndColl = &hypoIndColl;
-}
-
-void QueueItem::AddHypos(const Moses2::Hypotheses &hypos)
-{
- m_hyposColl->push_back(&hypos);
-}
-
-void QueueItem::CreateHypo(
- MemPool &systemPool,
- SCFG::Manager &mgr,
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind)
-{
- const SCFG::TargetPhraseImpl &tp = (*tps)[tpInd];
-
- hypo = SCFG::Hypothesis::Create(systemPool, mgr);
- hypo->Init(mgr, path, symbolBind, tp, *m_hypoIndColl);
- hypo->EvaluateWhenApplied();
-}
-
-void QueueItem::CreateNext(
- MemPool &systemPool,
- MemPool &mgrPool,
- SCFG::Manager &mgr,
- SCFG::Queue &queue,
- SeenPositions &seenPositions,
- const SCFG::InputPath &path)
-{
- //cerr << "tpInd=" << tpInd << " " << tps->GetSize() << endl;
- if (tpInd + 1 < tps->GetSize()) {
-
- const SCFG::TargetPhraseImpl &tp = (*tps)[tpInd + 1];
- SeenPosition *seenItem = new (mgrPool.Allocate<SeenPosition>()) SeenPosition(mgrPool, *symbolBind, *tps, tpInd + 1, *m_hypoIndColl);
- bool unseen = seenPositions.Add(seenItem);
-
- if (unseen) {
- QueueItem *item = QueueItem::Create(mgrPool, mgr);
- item->Init(mgrPool, *symbolBind, *tps, tpInd + 1, *m_hypoIndColl);
- item->m_hyposColl = m_hyposColl;
- item->CreateHypo(systemPool, mgr, path, *symbolBind);
-
- queue.push(item);
- }
- }
-
- assert(m_hyposColl->size() == m_hypoIndColl->size());
- const SCFG::TargetPhraseImpl &tp = (*tps)[tpInd];
- for (size_t i = 0; i < m_hyposColl->size(); ++i) {
- const Moses2::Hypotheses &hypos = *(*m_hyposColl)[i];
- size_t hypoInd = (*m_hypoIndColl)[i] + 1; // increment hypo
-
- if (hypoInd < hypos.size()) {
- SeenPosition *seenItem = new (mgrPool.Allocate<SeenPosition>()) SeenPosition(mgrPool, *symbolBind, *tps, tpInd, *m_hypoIndColl);
- seenItem->hypoIndColl[i] = hypoInd;
- bool unseen = seenPositions.Add(seenItem);
-
- if (unseen) {
- QueueItem *item = QueueItem::Create(mgrPool, mgr);
- item->Init(mgrPool, *symbolBind, *tps, tpInd, seenItem->hypoIndColl);
-
- item->m_hyposColl = m_hyposColl;
- item->CreateHypo(systemPool, mgr, path, *symbolBind);
-
- queue.push(item);
- }
- }
- }
-}
-
-std::string QueueItem::Debug(const System &system) const
-{
- stringstream out;
- out << hypo << " " << &(*tps)[tpInd] << "(" << tps << " " << tpInd << ") ";
- for (size_t i = 0; i < m_hypoIndColl->size(); ++i) {
- out << (*m_hypoIndColl)[i] << " ";
- }
-
- return out.str();
-}
-
-}
-}
diff --git a/contrib/moses2/SCFG/Misc.h b/contrib/moses2/SCFG/Misc.h
deleted file mode 100644
index 0e1c2a015..000000000
--- a/contrib/moses2/SCFG/Misc.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Misc.h
- *
- * Created on: 2 Jun 2016
- * Author: hieu
- */
-#pragma once
-#include <vector>
-#include <queue>
-#include <boost/unordered_set.hpp>
-#include "../HypothesisColl.h"
-#include "../Vector.h"
-#include "Hypothesis.h"
-
-namespace Moses2
-{
-
-namespace SCFG
-{
-class SymbolBind;
-class TargetPhrases;
-class Queue;
-
-///////////////////////////////////////////
-class SeenPosition
-{
-public:
- const SymbolBind &symbolBind;
- const SCFG::TargetPhrases &tps;
- size_t tpInd;
- Vector<size_t> hypoIndColl;
-
- SeenPosition(MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vtps,
- size_t numNT);
- SeenPosition(MemPool &pool,
- const SymbolBind &vSymbolBind,
- const SCFG::TargetPhrases &vtps,
- size_t vtpInd,
- const Vector<size_t> &vhypoIndColl);
-
- bool operator==(const SeenPosition &compare) const;
- size_t hash() const;
-
- std::string Debug(const System &system) const;
-
-};
-
-///////////////////////////////////////////
-
-class SeenPositions
-{
-public:
- bool Add(const SeenPosition *item);
-
- void clear()
- { m_coll.clear(); }
-
-
-protected:
- typedef boost::unordered_set<const SeenPosition*,
- UnorderedComparer<SeenPosition>, UnorderedComparer<SeenPosition> > Coll;
- Coll m_coll;
-};
-
-///////////////////////////////////////////
-class QueueItem
-{
-public:
- SCFG::Hypothesis *hypo;
-
- static QueueItem *Create(MemPool &pool, SCFG::Manager &mgr);
-
- void Init(
- MemPool &pool,
- const SymbolBind &symbolBind,
- const SCFG::TargetPhrases &tps,
- const Vector<size_t> &hypoIndColl);
- void Init(
- MemPool &pool,
- const SymbolBind &symbolBind,
- const SCFG::TargetPhrases &tps,
- size_t vTPInd,
- const Vector<size_t> &hypoIndColl);
- void AddHypos(const Moses2::Hypotheses &hypos);
- void CreateHypo(
- MemPool &systemPool,
- SCFG::Manager &mgr,
- const SCFG::InputPath &path,
- const SCFG::SymbolBind &symbolBind);
-
- void CreateNext(
- MemPool &systemPool,
- MemPool &mgrPool,
- SCFG::Manager &mgr,
- SCFG::Queue &queue,
- SeenPositions &seenPositions,
- const SCFG::InputPath &path);
-
- std::string Debug(const System &system) const;
-
-protected:
- typedef Vector<const Moses2::Hypotheses *> HyposColl;
- HyposColl *m_hyposColl;
-
- const SymbolBind *symbolBind;
- const SCFG::TargetPhrases *tps;
- size_t tpInd;
-
- const Vector<size_t> *m_hypoIndColl; // pointer to variable in seen position
- // hypos and ind to the 1 we're using
-
- QueueItem(MemPool &pool);
-
-};
-
-///////////////////////////////////////////
-
-typedef std::deque<QueueItem*> QueueItemRecycler;
-
-///////////////////////////////////////////
-class QueueItemOrderer
-{
-public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const
- {
- HypothesisFutureScoreOrderer orderer;
- return !orderer(itemA->hypo, itemB->hypo);
- }
-};
-
-///////////////////////////////////////////
-class Queue : public std::priority_queue<QueueItem*,
- std::vector<QueueItem*>,
- QueueItemOrderer>
-{
-
-};
-
-
-}
-}
-
-
-
diff --git a/contrib/moses2/SCFG/PhraseImpl.cpp b/contrib/moses2/SCFG/PhraseImpl.cpp
deleted file mode 100644
index 028ede8b0..000000000
--- a/contrib/moses2/SCFG/PhraseImpl.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * PhraseImpl.cpp
- *
- * Created on: 19 Feb 2016
- * Author: hieu
- */
-#include "PhraseImpl.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-PhraseImpl *PhraseImpl::CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str, bool skipLastWord)
-{
- std::vector<std::string> toks = Moses2::Tokenize(str);
- size_t size = toks.size();
- if (skipLastWord) {
- --size;
- }
- PhraseImpl *ret;
-
- ret = new (pool.Allocate<PhraseImpl>()) PhraseImpl(pool, size);
-
- for (size_t i = 0; i < size; ++i) {
- SCFG::Word &word = (*ret)[i];
- word.CreateFromString(vocab, system, toks[i]);
- }
-
- return ret;
-}
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/PhraseImpl.h b/contrib/moses2/SCFG/PhraseImpl.h
deleted file mode 100644
index f26de313d..000000000
--- a/contrib/moses2/SCFG/PhraseImpl.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#pragma once
-#include "../PhraseImplTemplate.h"
-#include "../SubPhrase.h"
-#include "Word.h"
-
-namespace Moses2
-{
-namespace SCFG
-{
-
-class PhraseImpl: public PhraseImplTemplate<SCFG::Word>
-{
-public:
- static PhraseImpl *CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str, bool skipLastWord = true);
-
- PhraseImpl(MemPool &pool, size_t size) :
- PhraseImplTemplate<Word>(pool, size)
- {
- }
-
-};
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/Sentence.cpp b/contrib/moses2/SCFG/Sentence.cpp
deleted file mode 100644
index 5e69a7e23..000000000
--- a/contrib/moses2/SCFG/Sentence.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Sentence.cpp
- *
- * Created on: 14 Dec 2015
- * Author: hieu
- */
-
-#include "Sentence.h"
-#include "../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str, long translationId)
-{
- //cerr << "SCFG Sentence" << endl;
-
- Sentence *ret;
-
- if (system.options.input.xml_policy) {
- // xml
- ret = CreateFromStringXML(pool, vocab, system, str);
- //cerr << "ret=" << ret->Debug(system) << endl;
- }
- else {
- std::vector<std::string> toks = Tokenize(str);
- size_t size = toks.size() + 2;
-
- ret = new (pool.Allocate<SCFG::Sentence>()) Sentence(pool, size);
- ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
-
- }
-
- return ret;
-}
-
-Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str)
-{
- Sentence *ret;
-
- vector<XMLOption*> xmlOptions;
- pugi::xml_document doc;
-
- string str2 = "<xml>" + str + "</xml>";
- pugi::xml_parse_result result = doc.load(str2.c_str(),
- pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
- pugi::xml_node topNode = doc.child("xml");
-
- std::vector<std::string> toks;
- XMLParse(pool, system, 0, topNode, toks, xmlOptions);
-
- // debug
- /*
- cerr << "xmloptions:" << endl;
- for (size_t i = 0; i < xmlOptions.size(); ++i) {
- cerr << xmlOptions[i]->Debug(system) << endl;
- }
- */
-
- // create words
- size_t size = toks.size() + 2;
- ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
- ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
-
- // xml
- for(size_t i=0; i<xmlOptions.size(); i++) {
- const XMLOption *xmlOption = xmlOptions[i];
- if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
- FactorType placeholderFactor = system.options.input.placeholder_factor;
- UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
- "Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
- UTIL_THROW_IF2(xmlOption->phraseSize != 1,
- "Placeholder must only cover 1 word");
-
- const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
- (*ret)[xmlOption->startPos + 1][placeholderFactor] = factor;
- }
- else {
- // default - forced translation. Add to class variable
- ret->AddXMLOption(system, xmlOption);
- }
- }
-
- //cerr << "ret=" << ret->Debug(system) << endl;
- return ret;
-}
-
-void Sentence::XMLParse(
- MemPool &pool,
- const System &system,
- size_t depth,
- const pugi::xml_node &parentNode,
- std::vector<std::string> &toks,
- vector<XMLOption*> &xmlOptions)
-{ // pugixml
- for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
- string nodeName = childNode.name();
- //cerr << depth << " nodeName=" << nodeName << endl;
-
- int startPos = toks.size();
-
- string value = childNode.value();
- if (!value.empty()) {
- //cerr << depth << "childNode text=" << value << endl;
- std::vector<std::string> subPhraseToks = Tokenize(value);
- for (size_t i = 0; i < subPhraseToks.size(); ++i) {
- toks.push_back(subPhraseToks[i]);
- }
- }
-
- if (!nodeName.empty()) {
- XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
-
- pugi::xml_attribute attr;
- attr = childNode.attribute("translation");
- if (!attr.empty()) {
- xmlOption->SetTranslation(pool, attr.as_string());
- }
-
- attr = childNode.attribute("entity");
- if (!attr.empty()) {
- xmlOption->SetEntity(pool, attr.as_string());
- }
-
- attr = childNode.attribute("prob");
- if (!attr.empty()) {
- xmlOption->prob = attr.as_float();
- }
-
- xmlOptions.push_back(xmlOption);
-
- // recursively call this function. For proper recursive trees
- XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
-
- size_t endPos = toks.size();
- xmlOption->phraseSize = endPos - startPos;
-
- /*
- cerr << "xmlOptions=";
- xmlOption->Debug(cerr, system);
- cerr << endl;
- */
- }
-
- }
-}
-
-}
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/SCFG/Sentence.h b/contrib/moses2/SCFG/Sentence.h
deleted file mode 100644
index 7652a677e..000000000
--- a/contrib/moses2/SCFG/Sentence.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Sentence.h
- *
- * Created on: 14 Dec 2015
- * Author: hieu
- */
-#pragma once
-
-#include <string>
-#include "PhraseImpl.h"
-#include "../InputType.h"
-#include "../MemPool.h"
-#include "../legacy/Util2.h"
-#include "../pugixml.hpp"
-
-namespace Moses2
-{
-class FactorCollection;
-class System;
-
-namespace SCFG
-{
-
-class Sentence: public InputType, public PhraseImpl
-{
-public:
- static Sentence *CreateFromString(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str, long translationId);
-
- Sentence(MemPool &pool, size_t size)
- :InputType(pool)
- ,PhraseImpl(pool, size)
- {}
-
- virtual ~Sentence()
- {}
-
-protected:
- static Sentence *CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
- const System &system, const std::string &str);
-
- static void XMLParse(
- MemPool &pool,
- const System &system,
- size_t depth,
- const pugi::xml_node &parentNode,
- std::vector<std::string> &toks,
- std::vector<XMLOption*> &xmlOptions);
-
-};
-
-}
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/SCFG/Stack.cpp b/contrib/moses2/SCFG/Stack.cpp
deleted file mode 100644
index 163761a49..000000000
--- a/contrib/moses2/SCFG/Stack.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-#include <boost/foreach.hpp>
-#include "Stacks.h"
-#include "Hypothesis.h"
-#include "TargetPhraseImpl.h"
-#include "Manager.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace SCFG
-{
-
-Stack::Stack(const Manager &mgr)
-:m_mgr(mgr)
-{
-}
-
-Stack::~Stack()
-{
- BOOST_FOREACH (const Coll::value_type &valPair, m_coll) {
- Moses2::HypothesisColl *hypos = valPair.second;
- delete hypos;
- }
-}
-
-void Stack::Add(SCFG::Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists)
-{
- const SCFG::TargetPhraseImpl &tp = hypo->GetTargetPhrase();
- const SCFG::Word &lhs = tp.lhs;
- //cerr << "lhs=" << lhs << endl;
-
- HypothesisColl &coll = GetColl(lhs);
- coll.Add(m_mgr, hypo, hypoRecycle, arcLists);
-}
-
-size_t Stack::GetSize() const
-{
- size_t ret = 0;
- BOOST_FOREACH (const Coll::value_type &valPair, m_coll) {
- Moses2::HypothesisColl &hypos = *valPair.second;
- ret += hypos.GetSize();
- }
- return ret;
-}
-
-const Moses2::HypothesisColl *Stack::GetColl(const SCFG::Word &nt) const
-{
- assert(nt.isNonTerminal);
- Coll::const_iterator iter = m_coll.find(nt);
- if (iter != m_coll.end()) {
- return NULL;
- }
- else {
- return iter->second;
- }
-}
-
-Moses2::HypothesisColl &Stack::GetColl(const SCFG::Word &nt)
-{
- Moses2::HypothesisColl *ret;
- Coll::iterator iter;
- iter = m_coll.find(nt);
- if (iter == m_coll.end()) {
- ret = new Moses2::HypothesisColl(m_mgr);
- m_coll[nt] = ret;
- }
- else {
- ret = iter->second;
- }
- return *ret;
-}
-
-const Hypothesis *Stack::GetBestHypo() const
-{
- SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
- const HypothesisBase *bestHypo = NULL;
- BOOST_FOREACH(const Coll::value_type &val, m_coll){
- const Moses2::HypothesisColl &hypos = *val.second;
- const Moses2::HypothesisBase *hypo = hypos.GetBestHypo();
-
- if (hypo->GetFutureScore() > bestScore) {
- bestScore = hypo->GetFutureScore();
- bestHypo = hypo;
- }
- }
- return &bestHypo->Cast<SCFG::Hypothesis>();
-}
-
-std::string Stack::Debug(const System &system) const
-{
- stringstream out;
- BOOST_FOREACH (const SCFG::Stack::Coll::value_type &valPair, m_coll) {
- const SCFG::Word &lhs = valPair.first;
- const Moses2::HypothesisColl &hypos = *valPair.second;
- out << "lhs=" << lhs.Debug(system);
- out << "=" << hypos.GetSize() << endl;
- out << hypos.Debug(system);
- out << endl;
- }
-
- return out.str();
-}
-
-}
-}
diff --git a/contrib/moses2/SCFG/Stack.h b/contrib/moses2/SCFG/Stack.h
deleted file mode 100644
index eb7ce2706..000000000
--- a/contrib/moses2/SCFG/Stack.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#pragma once
-#include <boost/unordered_map.hpp>
-#include "../HypothesisColl.h"
-#include "../Recycler.h"
-#include "Word.h"
-
-namespace Moses2
-{
-class HypothesisBase;
-class ArcLists;
-
-namespace SCFG
-{
-class Hypothesis;
-class Manager;
-
-class Stack
-{
-public:
- typedef boost::unordered_map<SCFG::Word, Moses2::HypothesisColl*> Coll;
-
- Stack(const Manager &mgr);
- virtual ~Stack();
-
- const Coll &GetColl() const
- { return m_coll; }
-
- const Moses2::HypothesisColl *GetColl(const SCFG::Word &nt) const;
-
- size_t GetSize() const;
-
- void Add(SCFG::Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
- ArcLists &arcLists);
-
- const Hypothesis *GetBestHypo() const;
-
- std::string Debug(const System &system) const;
-
-protected:
- const Manager &m_mgr;
- Coll m_coll;
-
- Moses2::HypothesisColl &GetColl(const SCFG::Word &nt);
-
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/SCFG/Stacks.cpp b/contrib/moses2/SCFG/Stacks.cpp
deleted file mode 100644
index 63214c7c3..000000000
--- a/contrib/moses2/SCFG/Stacks.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-#include "Stacks.h"
-#include "Stack.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-Stacks::~Stacks()
-{
- for (size_t i = 0; i < m_cells.size(); ++i) {
- std::vector<Stack*> &inner = m_cells[i];
- for (size_t j = 0; j < inner.size(); ++j) {
- Stack *stack = inner[j];
- delete stack;
- }
- }
-}
-
-void Stacks::Init(SCFG::Manager &mgr, size_t size)
-{
- m_cells.resize(size);
- for (size_t startPos = 0; startPos < size; ++startPos) {
- std::vector<Stack*> &inner = m_cells[startPos];
- inner.reserve(size - startPos);
- for (size_t endPos = startPos; endPos < size; ++endPos) {
- inner.push_back(new Stack(mgr));
- }
- }
-}
-
-void Stacks::OutputStacks() const
-{
- size_t size = m_cells.size();
-
- for (size_t startPos = 0; startPos < size; ++startPos) {
- cerr.width(3);
- cerr << startPos << " ";
- }
- cerr << endl;
- for (size_t width = 1; width <= size; width++) {
- for( size_t space = 0; space < width-1; space++ ) {
- cerr << " ";
- }
- for (size_t startPos = 0; startPos <= size-width; ++startPos) {
- cerr.width(3);
- cerr << GetStack(startPos, width).GetSize() << " ";
- }
- cerr << endl;
- }
-
-}
-
-}
-}
diff --git a/contrib/moses2/SCFG/Stacks.h b/contrib/moses2/SCFG/Stacks.h
deleted file mode 100644
index 6594d5763..000000000
--- a/contrib/moses2/SCFG/Stacks.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#pragma once
-
-#include <stddef.h>
-#include <vector>
-#include "Stack.h"
-
-namespace Moses2
-{
-class ManagerBase;
-
-namespace SCFG
-{
-class Stacks
-{
-public:
- virtual ~Stacks();
-
- void Init(SCFG::Manager &mgr, size_t size);
-
- const Stack &GetStack(size_t startPos, size_t size) const
- { return *m_cells[startPos][size - 1]; }
-
- Stack &GetStack(size_t startPos, size_t size)
- { return *m_cells[startPos][size - 1]; }
-
- void OutputStacks() const;
-
- const Stack &GetLastStack() const
- { return GetStack(0, m_cells.size()); }
-
-protected:
- std::vector<std::vector<Stack*> > m_cells;
-
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/SCFG/TargetPhraseImpl.cpp b/contrib/moses2/SCFG/TargetPhraseImpl.cpp
deleted file mode 100644
index ebea6cef7..000000000
--- a/contrib/moses2/SCFG/TargetPhraseImpl.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * TargetPhraseImpl.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#include <stdlib.h>
-#include "TargetPhraseImpl.h"
-#include "../Scores.h"
-#include "../System.h"
-#include "../MemPool.h"
-#include "../PhraseBased/Manager.h"
-#include "../AlignmentInfoCollection.h"
-#include "../TranslationModel/PhraseTable.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-
-TargetPhraseImpl *TargetPhraseImpl::CreateFromString(MemPool &pool,
- const PhraseTable &pt, const System &system, const std::string &str)
-{
- //cerr << "str=" << str << endl;
- FactorCollection &vocab = system.GetVocab();
-
- vector<string> toks = Tokenize(str);
- size_t size = toks.size() - 1;
- TargetPhraseImpl *ret =
- new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, pt, system,
- size);
-
- for (size_t i = 0; i < size; ++i) {
- SCFG::Word &word = (*ret)[i];
- word.CreateFromString(vocab, system, toks[i]);
- }
-
- // lhs
- ret->lhs.CreateFromString(vocab, system, toks.back());
- //cerr << "ret=" << *ret << endl;
- return ret;
-}
-
-TargetPhraseImpl::TargetPhraseImpl(MemPool &pool,
- const PhraseTable &pt,
- const System &system,
- size_t size)
-:Moses2::TargetPhrase<SCFG::Word>(pool, pt, system, size)
-,m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
-
-{
- m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
- system.featureFunctions.GetNumScores());
-
-}
-
-TargetPhraseImpl::~TargetPhraseImpl()
-{
- // TODO Auto-generated destructor stub
-}
-
-std::string TargetPhraseImpl::Debug(const System &system) const
-{
- stringstream out;
- out << lhs.Debug(system);
- out << " -> ";
- for (size_t i = 0; i < GetSize(); ++i) {
- const SCFG::Word &word = (*this)[i];
- out << word.Debug(system) << " ";
- }
- out << "pt=" << pt.GetName();
- out << " SCORES:" << GetScores().Debug(system);
- out << " ALIGN-T:";
- out << GetAlignTerm().Debug(system);
- out << " ALIGN-NT:";
- out << GetAlignNonTerm().Debug(system);
-
- return out.str();
-}
-
-void TargetPhraseImpl::SetAlignmentInfo(const std::string &alignString)
-{
- AlignmentInfo::CollType alignTerm, alignNonTerm;
-
- vector<string> toks = Tokenize(alignString);
- for (size_t i = 0; i < toks.size(); ++i) {
- vector<size_t> alignPair = Tokenize<size_t>(toks[i], "-");
- UTIL_THROW_IF2(alignPair.size() != 2, "Wrong alignment format");
-
- size_t sourcePos = alignPair[0];
- size_t targetPos = alignPair[1];
-
- if ((*this)[targetPos].isNonTerminal) {
- alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
- } else {
- alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
- }
- }
-
- SetAlignTerm(alignTerm);
- SetAlignNonTerm(alignNonTerm);
- // cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n";
-
- //cerr << "alignTerm=" << alignTerm.size() << endl;
- //cerr << "alignNonTerm=" << alignNonTerm.size() << endl;
-
-}
-
-size_t TargetPhraseImpl::GetNumNonTerms() const
-{
- size_t ret = 0;
- for (size_t i = 0; i < GetSize(); ++i) {
- if ((*this)[i].isNonTerminal) {
- ++ret;
- }
- }
- return ret;
-}
-
-
-}
-}
diff --git a/contrib/moses2/SCFG/TargetPhraseImpl.h b/contrib/moses2/SCFG/TargetPhraseImpl.h
deleted file mode 100644
index f526d02e7..000000000
--- a/contrib/moses2/SCFG/TargetPhraseImpl.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * TargetPhraseImpl.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <iostream>
-#include "../Phrase.h"
-#include "../PhraseImplTemplate.h"
-#include "../TargetPhrase.h"
-#include "../MemPool.h"
-#include "../SubPhrase.h"
-#include "../AlignmentInfoCollection.h"
-#include "Word.h"
-
-namespace Moses2
-{
-class Scores;
-class Manager;
-class System;
-class PhraseTable;
-class AlignmentInfo;
-
-namespace SCFG
-{
-
-class TargetPhraseImpl: public Moses2::TargetPhrase<SCFG::Word>
-{
-public:
- typedef Moses2::TargetPhrase<SCFG::Word> Parent;
-
- SCFG::Word lhs;
-
- static TargetPhraseImpl *CreateFromString(MemPool &pool,
- const PhraseTable &pt, const System &system, const std::string &str);
-
- TargetPhraseImpl(MemPool &pool, const PhraseTable &pt, const System &system,
- size_t size);
- //TargetPhraseImpl(MemPool &pool, const System &system, const TargetPhraseImpl &copy);
-
- virtual ~TargetPhraseImpl();
-
- const AlignmentInfo &GetAlignNonTerm() const {
- return *m_alignNonTerm;
- }
-
- void SetAlignNonTerm(const AlignmentInfo &alignInfo) {
- m_alignNonTerm = &alignInfo;
- }
-
- void SetAlignmentInfo(const std::string &alignString);
-
- SCORE GetFutureScore() const
- { return m_scores->GetTotalScore() + m_estimatedScore; }
-
- virtual SCORE GetScoreForPruning() const
- { return GetFutureScore(); }
-
- void SetEstimatedScore(const SCORE &value)
- { m_estimatedScore = value; }
-
- std::string Debug(const System &system) const;
-
- size_t GetNumNonTerms() const;
-
- //mutable void *chartState;
-protected:
- SCORE m_estimatedScore;
-
- const AlignmentInfo *m_alignNonTerm;
-
- // ALNREP = alignment representation,
- // see AlignmentInfo constructors for supported representations
- template<typename ALNREP>
- void
- SetAlignNonTerm(const ALNREP &coll) {
- m_alignNonTerm = AlignmentInfoCollection::Instance().Add(coll);
- }
-
-};
-
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/TargetPhrases.cpp b/contrib/moses2/SCFG/TargetPhrases.cpp
deleted file mode 100644
index f3d4b9790..000000000
--- a/contrib/moses2/SCFG/TargetPhrases.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * TargetPhrases.cpp
- *
- * Created on: 15 Apr 2016
- * Author: hieu
- */
-
-#include <boost/foreach.hpp>
-#include <sstream>
-#include <algorithm>
-#include "TargetPhrases.h"
-#include "TargetPhraseImpl.h"
-#include "../TargetPhrase.h"
-#include "../TranslationModel/PhraseTable.h"
-
-namespace Moses2
-{
-namespace SCFG
-{
-TargetPhrases::TargetPhrases(MemPool &pool)
-:m_coll(pool)
-{
-}
-
-TargetPhrases::TargetPhrases(MemPool &pool, size_t size)
-:m_coll(pool)
-{
- m_coll.reserve(size);
-}
-
-TargetPhrases::~TargetPhrases()
-{
- // TODO Auto-generated destructor stub
-}
-
-void TargetPhrases::SortAndPrune(size_t tableLimit)
-{
- iterator iterMiddle;
- iterMiddle =
- (tableLimit == 0 || m_coll.size() < tableLimit) ?
- m_coll.end() : m_coll.begin() + tableLimit;
-
- std::partial_sort(m_coll.begin(), iterMiddle, m_coll.end(),
- CompareScoreForPruning<SCFG::TargetPhraseImpl>());
-
- if (tableLimit && m_coll.size() > tableLimit) {
- m_coll.resize(tableLimit);
- }
-
- //cerr << "TargetPhrases=" << GetSize() << endl;
-}
-
-std::string TargetPhrases::Debug(const System &system) const
-{
- std::stringstream out;
-
- out << m_coll.size() << std::endl;
- BOOST_FOREACH(const SCFG::TargetPhraseImpl *tp, m_coll) {
- out << tp->Debug(system);
- out << std::endl;
- }
- return out.str();
-}
-
-}
-} /* namespace Moses2 */
diff --git a/contrib/moses2/SCFG/TargetPhrases.h b/contrib/moses2/SCFG/TargetPhrases.h
deleted file mode 100644
index 22502b3ef..000000000
--- a/contrib/moses2/SCFG/TargetPhrases.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * TargetPhrases.h
- *
- * Created on: 15 Apr 2016
- * Author: hieu
- */
-
-#pragma once
-#include <vector>
-#include <stddef.h>
-#include "../Vector.h"
-
-namespace Moses2
-{
-class MemPool;
-class System;
-
-namespace SCFG
-{
-class TargetPhraseImpl;
-
-class TargetPhrases
-{
- typedef Moses2::Vector<const SCFG::TargetPhraseImpl*> Coll;
-
-public:
- typedef Coll::iterator iterator;
- typedef Coll::const_iterator const_iterator;
- //! iterators
- const_iterator begin() const
- {
- return m_coll.begin();
- }
- const_iterator end() const
- {
- return m_coll.end();
- }
-
- const SCFG::TargetPhraseImpl& operator[](size_t ind) const
- {
- return *m_coll[ind];
- }
-
- TargetPhrases(MemPool &pool);
- TargetPhrases(MemPool &pool, size_t size);
- virtual ~TargetPhrases();
-
- size_t GetSize() const
- { return m_coll.size(); }
-
- void AddTargetPhrase(const SCFG::TargetPhraseImpl &targetPhrase)
- {
- m_coll.push_back(&targetPhrase);
- }
-
- void SortAndPrune(size_t tableLimit);
-
- std::string Debug(const System &system) const;
-
-protected:
- Coll m_coll;
-
-};
-
-}
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/SCFG/Word.cpp b/contrib/moses2/SCFG/Word.cpp
deleted file mode 100644
index 1794706da..000000000
--- a/contrib/moses2/SCFG/Word.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Word.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <boost/functional/hash.hpp>
-#include "Word.h"
-#include "Hypothesis.h"
-#include "ActiveChart.h"
-#include "TargetPhraseImpl.h"
-#include "Sentence.h"
-#include "../legacy/Util2.h"
-#include "../System.h"
-#include "../AlignmentInfo.h"
-#include "../ManagerBase.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-Word::Word(const SCFG::Word &copy)
-:Moses2::Word(copy)
-,isNonTerminal(copy.isNonTerminal)
-{
-}
-
-void Word::CreateFromString(FactorCollection &vocab,
- const System &system,
- const std::string &str)
-{
- vector<string> toks;
-
- if (str[0] == '[' && str[str.size() - 1] == ']') {
- isNonTerminal = true;
-
- size_t startPos = str.find("[", 1);
- bool doubleNT = startPos != string::npos;
-
- if (doubleNT) {
- assert(startPos != string::npos);
- string str2 = str.substr(startPos + 1, str.size() - startPos - 2);
- toks = Tokenize(str2, "|");
- }
- else {
- string str2 = str.substr(1, str.size() - 2);
- toks = Tokenize(str2, "|");
- }
- }
- else {
- isNonTerminal = false;
- toks = Tokenize(str, "|");
- }
-
- // parse string
- for (size_t i = 0; i < toks.size(); ++i) {
- const string &tok = toks[i];
- //cerr << "tok=" << tok << endl;
-
- const Factor *factor = vocab.AddFactor(tok, system, isNonTerminal);
- m_factors[i] = factor;
- }
-}
-
-size_t Word::hash() const
-{
- size_t ret = Moses2::Word::hash();
- boost::hash_combine(ret, isNonTerminal);
- return ret;
-}
-
-size_t Word::hash(const std::vector<FactorType> &factors) const
-{
- size_t seed = isNonTerminal;
- for (size_t i = 0; i < factors.size(); ++i) {
- FactorType factorType = factors[i];
- const Factor *factor = m_factors[factorType];
- boost::hash_combine(seed, factor);
- }
- return seed;
-}
-
-void Word::OutputToStream(const System &system, std::ostream &out) const
-{
- if (isNonTerminal) {
- out << "[";
- }
- Moses2::Word::OutputToStream(system, out);
- if (isNonTerminal) {
- out << "]";
- }
-}
-
-void Word::OutputToStream(
- const ManagerBase &mgr,
- size_t targetPos,
- const SCFG::Hypothesis &hypo,
- std::ostream &out) const
-{
- const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
- const SCFG::SymbolBind &symbolBind = hypo.GetSymbolBind();
-
- bool outputWord = true;
- if (mgr.system.options.input.placeholder_factor != NOT_FOUND) {
- const AlignmentInfo &alignInfo = tp.GetAlignTerm();
- std::set<size_t> sourceAligns = alignInfo.GetAlignmentsForTarget(targetPos);
- if (sourceAligns.size() == 1) {
- size_t sourcePos = *sourceAligns.begin();
- /*
- cerr << "sourcePos=" << sourcePos << endl;
- cerr << "tp=" << tp.Debug(mgr.system) << endl;
- cerr << "m_symbolBind=" << symbolBind.Debug(mgr.system) << endl;
- */
- assert(sourcePos < symbolBind.GetSize());
- const Range &inputRange = symbolBind.coll[sourcePos].GetRange();
- assert(inputRange.GetNumWordsCovered() == 1);
- const SCFG::Sentence &sentence = static_cast<const SCFG::Sentence &>(mgr.GetInput());
- const SCFG::Word &sourceWord = sentence[inputRange.GetStartPos()];
- const Factor *factor = sourceWord[mgr.system.options.input.placeholder_factor];
- if (factor) {
- out << factor->GetString();
- outputWord = false;
- }
- }
- }
-
- if (outputWord){
- OutputToStream(mgr.system, out);
- }
-}
-
-std::string Word::Debug(const System &system) const
-{
- stringstream out;
- if (isNonTerminal) {
- out << "[";
- }
- out << Moses2::Word::Debug(system);
- if (isNonTerminal) {
- out << "]";
- }
- return out.str();
-}
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/Word.h b/contrib/moses2/SCFG/Word.h
deleted file mode 100644
index 0c3aa158a..000000000
--- a/contrib/moses2/SCFG/Word.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Word.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include "../Word.h"
-
-namespace Moses2
-{
-class ManagerBase;
-
-namespace SCFG
-{
-class Hypothesis;
-
-class Word: public Moses2::Word
-{
-public:
- bool isNonTerminal;
-
- explicit Word() {}
- explicit Word(const SCFG::Word &copy);
-
- void CreateFromString(FactorCollection &vocab,
- const System &system,
- const std::string &str);
-
- bool operator==(const SCFG::Word &compare) const
- {
- int cmp = Moses2::Word::Compare(compare);
- if (cmp == 0 && isNonTerminal == compare.isNonTerminal) {
- return true;
- }
- else {
- return false;
- }
- }
-
- size_t hash() const;
- virtual size_t hash(const std::vector<FactorType> &factors) const;
-
- virtual void OutputToStream(const System &system, std::ostream &out) const;
- virtual void OutputToStream(
- const ManagerBase &mgr,
- size_t targetPos,
- const SCFG::Hypothesis &hypo,
- std::ostream &out) const;
-
- virtual std::string Debug(const System &system) const;
-
-protected:
-};
-
-inline size_t hash_value(const SCFG::Word &word)
-{ return word.hash(); }
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/nbest/KBestExtractor.cpp b/contrib/moses2/SCFG/nbest/KBestExtractor.cpp
deleted file mode 100644
index ae7ec8634..000000000
--- a/contrib/moses2/SCFG/nbest/KBestExtractor.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * KBestExtractor.cpp
- *
- * Created on: 2 Aug 2016
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include <sstream>
-#include "KBestExtractor.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../Stacks.h"
-#include "../Stack.h"
-#include "../Sentence.h"
-#include "../../System.h"
-#include "../../Scores.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-//bool g_debug = false;
-
-namespace SCFG
-{
-/////////////////////////////////////////////////////////////
-KBestExtractor::KBestExtractor(const SCFG::Manager &mgr)
-:m_mgr(mgr)
-{
-
-}
-
-KBestExtractor::~KBestExtractor()
-{
-}
-
-void KBestExtractor::OutputToStream(std::stringstream &strm)
-{
- //cerr << "1" << flush;
- const Stack &lastStack = m_mgr.GetStacks().GetLastStack();
- UTIL_THROW_IF2(lastStack.GetColl().size() != 1, "Only suppose to be 1 hypo coll in last stack");
- UTIL_THROW_IF2(lastStack.GetColl().begin()->second == NULL, "NULL hypo collection");
-
- const Hypotheses &hypos = lastStack.GetColl().begin()->second->GetSortedAndPrunedHypos(m_mgr, m_mgr.arcLists);
- UTIL_THROW_IF2(hypos.size() != 1, "Only suppose to be 1 hypo in collection");
- const HypothesisBase *hypo = hypos[0];
-
- const ArcLists &arcLists = m_mgr.arcLists;
- const ArcList &arcList = arcLists.GetArcList(hypo);
- NBests &nbests = m_nbestColl.GetOrCreateNBests(m_mgr, arcList);
-
- size_t ind = 0;
- while (nbests.Extend(m_mgr, m_nbestColl, ind)) {
- const NBest &deriv = nbests.Get(ind);
- strm << m_mgr.GetTranslationId() << " ||| ";
- //cerr << "1" << flush;
- strm << deriv.GetStringExclSentenceMarkers();
- //cerr << "2" << flush;
- strm << " ||| ";
- deriv.GetScores().OutputBreakdown(strm, m_mgr.system);
- //cerr << "3" << flush;
- strm << "||| ";
- strm << deriv.GetScores().GetTotalScore();
- //cerr << "4" << flush;
-
- strm << endl;
-
- ++ind;
- }
-}
-
-}
-} /* namespace Moses2 */
diff --git a/contrib/moses2/SCFG/nbest/KBestExtractor.h b/contrib/moses2/SCFG/nbest/KBestExtractor.h
deleted file mode 100644
index 91b62d60b..000000000
--- a/contrib/moses2/SCFG/nbest/KBestExtractor.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * KBestExtractor.h
- *
- * Created on: 2 Aug 2016
- * Author: hieu
- */
-#pragma once
-#include <vector>
-#include <sstream>
-#include <boost/unordered_map.hpp>
-#include "NBest.h"
-#include "NBests.h"
-#include "NBestColl.h"
-
-namespace Moses2
-{
-class Scores;
-
-namespace SCFG
-{
-class Manager;
-class Hypothesis;
-class NBests;
-class NBestScoreOrderer;
-
-/////////////////////////////////////////////////////////////
-class KBestExtractor
-{
-public:
- KBestExtractor(const SCFG::Manager &mgr);
- virtual ~KBestExtractor();
-
- void OutputToStream(std::stringstream &strm);
-protected:
- const SCFG::Manager &m_mgr;
- NBestColl m_nbestColl;
-};
-
-}
-} /* namespace Moses2 */
diff --git a/contrib/moses2/SCFG/nbest/NBest.cpp b/contrib/moses2/SCFG/nbest/NBest.cpp
deleted file mode 100644
index 99c005ee3..000000000
--- a/contrib/moses2/SCFG/nbest/NBest.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * NBest.cpp
- *
- * Created on: 24 Aug 2016
- * Author: hieu
- */
-#include <sstream>
-#include <boost/foreach.hpp>
-#include "util/exception.hh"
-#include "NBest.h"
-#include "NBests.h"
-#include "NBestColl.h"
-#include "../Manager.h"
-#include "../TargetPhraseImpl.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-
-NBest::NBest(
- const SCFG::Manager &mgr,
- const ArcList &varcList,
- size_t vind,
- NBestColl &nbestColl)
-:arcList(&varcList)
-,arcInd(vind)
-{
- const SCFG::Hypothesis &hypo = GetHypo();
-
- // copy scores from best hypo
- MemPool &pool = mgr.GetPool();
- m_scores = new (pool.Allocate<Scores>())
- Scores(mgr.system, pool, mgr.system.featureFunctions.GetNumScores(), hypo.GetScores());
-
- // children
- const ArcLists &arcLists = mgr.arcLists;
- //const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
-
- const Vector<const Hypothesis*> &prevHypos = hypo.GetPrevHypos();
- for (size_t i = 0; i < prevHypos.size(); ++i) {
- const SCFG::Hypothesis *prevHypo = prevHypos[i];
- const ArcList &childArc = arcLists.GetArcList(prevHypo);
- NBests &childNBests = nbestColl.GetOrCreateNBests(mgr, childArc);
- Child child(&childNBests, 0);
- children.push_back(child);
- }
-
- stringstream strm;
- OutputToStream(mgr, strm);
- m_str = strm.str();
-}
-
-NBest::NBest(const SCFG::Manager &mgr,
- const NBest &orig,
- size_t childInd,
- NBestColl &nbestColl)
-:arcList(orig.arcList)
-,arcInd(orig.arcInd)
-,children(orig.children)
-{
- Child &child = children[childInd];
- size_t &ind = child.second;
- ++ind;
- UTIL_THROW_IF2(ind >= child.first->GetSize(),
- "out of bound:" << ind << ">=" << child.first->GetSize());
-
- // scores
- MemPool &pool = mgr.GetPool();
- m_scores = new (pool.Allocate<Scores>())
- Scores(mgr.system,
- pool,
- mgr.system.featureFunctions.GetNumScores(),
- orig.GetScores());
-
- const Scores &origScores = orig.GetChild(childInd).GetScores();
- const Scores &newScores = GetChild(childInd).GetScores();
-
- m_scores->MinusEquals(mgr.system, origScores);
- m_scores->PlusEquals(mgr.system, newScores);
-
- stringstream strm;
- OutputToStream(mgr, strm);
- m_str = strm.str();
-}
-
-const SCFG::Hypothesis &NBest::GetHypo() const
-{
- const HypothesisBase *hypoBase = (*arcList)[arcInd];
- const SCFG::Hypothesis &hypo = *static_cast<const SCFG::Hypothesis*>(hypoBase);
- return hypo;
-}
-
-const NBest &NBest::GetChild(size_t ind) const
-{
- const Child &child = children[ind];
- const NBests &nbests = *child.first;
- const NBest &nbest = nbests.Get(child.second);
- return nbest;
-}
-
-
-void NBest::CreateDeviants(
- const SCFG::Manager &mgr,
- NBestColl &nbestColl,
- Contenders &contenders) const
-{
- if (arcInd + 1 < arcList->size()) {
- // to use next arclist, all children must be 1st. Not sure if this is correct
- bool ok = true;
- BOOST_FOREACH(const Child &child, children) {
- if (child.second) {
- ok = false;
- break;
- }
- }
-
- if (ok) {
- NBest *next = new NBest(mgr, *arcList, arcInd + 1, nbestColl);
- contenders.push(next);
- }
- }
-
- for (size_t childInd = 0; childInd < children.size(); ++childInd) {
- const Child &child = children[childInd];
- NBests &childNBests = *child.first;
- bool extended = childNBests.Extend(mgr, nbestColl, child.second + 1);
- if (extended) {
- //cerr << "HH1 " << childInd << endl;
- NBest *next = new NBest(mgr, *this, childInd, nbestColl);
-
- //cerr << "HH2 " << childInd << endl;
- contenders.push(next);
- //cerr << "HH3 " << childInd << endl;
- }
- }
-}
-
-void NBest::OutputToStream(
- const SCFG::Manager &mgr,
- std::stringstream &strm) const
-{
- const SCFG::Hypothesis &hypo = GetHypo();
- //strm << &hypo << " ";
-
- const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
-
- for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
- const SCFG::Word &word = tp[targetPos];
- //cerr << "word " << pos << "=" << word << endl;
- if (word.isNonTerminal) {
- //cerr << "is nt" << endl;
- // non-term. fill out with prev hypo
- size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[targetPos];
-
- UTIL_THROW_IF2(nonTermInd >= children.size(), "Out of bounds:" << nonTermInd << ">=" << children.size());
-
- const NBest &nbest = GetChild(nonTermInd);
- strm << nbest.GetString();
- }
- else {
- //cerr << "not nt" << endl;
- word.OutputToStream(hypo.GetManager(), targetPos, hypo, strm);
-
- strm << " ";
- }
- }
-}
-
-std::string NBest::Debug(const System &system) const
-{
- stringstream strm;
- strm << GetScores().GetTotalScore() << " "
- << arcList << "("
- << arcList->size() << ")["
- << arcInd << "] ";
- for (size_t i = 0; i < children.size(); ++i) {
- const Child &child = children[i];
- const NBest &childNBest = child.first->Get(child.second);
-
- strm << child.first << "("
- << child.first->GetSize() << ")["
- << child.second << "]";
- strm << childNBest.GetScores().GetTotalScore() << " ";
- }
- return strm.str();
-}
-
-}
-}
diff --git a/contrib/moses2/SCFG/nbest/NBest.h b/contrib/moses2/SCFG/nbest/NBest.h
deleted file mode 100644
index fa21866bb..000000000
--- a/contrib/moses2/SCFG/nbest/NBest.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * NBest.h
- *
- * Created on: 24 Aug 2016
- * Author: hieu
- */
-
-#pragma once
-#include <queue>
-#include <vector>
-#include <string>
-#include <stdlib.h>
-#include "../../Scores.h"
-#include "../../ArcLists.h"
-
-namespace Moses2
-{
-class Scores;
-class System;
-
-namespace SCFG
-{
-class NBest;
-class NBests;
-class NBestScoreOrderer;
-class Manager;
-class NBestColl;
-class Hypothesis;
-
-/////////////////////////////////////////////////////////////
-typedef std::priority_queue<NBest*, std::vector<NBest*>, NBestScoreOrderer> Contenders;
-
-/////////////////////////////////////////////////////////////
-class NBest
-{
-public:
- const ArcList *arcList;
- size_t arcInd;
-
- typedef std::pair<NBests*, size_t> Child; // key to another NBest
- typedef std::vector<Child> Children;
- Children children;
-
- NBest(const SCFG::Manager &mgr,
- const ArcList &varcList,
- size_t vind,
- NBestColl &nbestColl);
-
- NBest(const SCFG::Manager &mgr,
- const NBest &orig,
- size_t childInd,
- NBestColl &nbestColl);
-
-
- void CreateDeviants(
- const SCFG::Manager &mgr,
- NBestColl &nbestColl,
- Contenders &contenders) const;
-
- const Scores &GetScores() const
- { return *m_scores; }
-
- const NBest &GetChild(size_t ind) const;
-
- const std::string &GetString() const
- { return m_str; }
-
- std::string GetStringExclSentenceMarkers() const
- {
- std::string ret = m_str.substr(4, m_str.size() - 10);
- return ret;
- }
-
- std::string Debug(const System &system) const;
-
-protected:
- Scores *m_scores;
- std::string m_str;
-
- const SCFG::Hypothesis &GetHypo() const;
-
- void OutputToStream(
- const SCFG::Manager &mgr,
- std::stringstream &strm) const;
-};
-
-/////////////////////////////////////////////////////////////
-class NBestScoreOrderer
-{
-public:
- bool operator()(const NBest* a, const NBest* b) const
- {
- return a->GetScores().GetTotalScore() < b->GetScores().GetTotalScore();
- }
-};
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/nbest/NBestColl.cpp b/contrib/moses2/SCFG/nbest/NBestColl.cpp
deleted file mode 100644
index 8cd386a08..000000000
--- a/contrib/moses2/SCFG/nbest/NBestColl.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * NBestColl.cpp
- *
- * Created on: 24 Aug 2016
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "util/exception.hh"
-#include "NBestColl.h"
-#include "NBests.h"
-#include "../Manager.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-
-/////////////////////////////////////////////////////////////
-NBestColl::~NBestColl()
-{
- BOOST_FOREACH(const Coll::value_type &valPair, m_candidates) {
- NBests *nbests = valPair.second;
- delete nbests;
- }
-}
-
-void NBestColl::Add(const SCFG::Manager &mgr, const ArcList &arcList)
-{
- NBests &nbests = GetOrCreateNBests(mgr, arcList);
- //cerr << "nbests for " << &nbests << ":";
-}
-
-NBests &NBestColl::GetOrCreateNBests(const SCFG::Manager &mgr, const ArcList &arcList)
-{
- NBests *ret;
- Coll::iterator iter = m_candidates.find(&arcList);
- if(iter == m_candidates.end()) {
- ret = new NBests(mgr, arcList, *this);
- m_candidates[&arcList] = ret;
- }
- else {
- ret = iter->second;
- }
- return *ret;
-}
-
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/nbest/NBestColl.h b/contrib/moses2/SCFG/nbest/NBestColl.h
deleted file mode 100644
index 1ef8a5698..000000000
--- a/contrib/moses2/SCFG/nbest/NBestColl.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * NBestColl.h
- *
- * Created on: 24 Aug 2016
- * Author: hieu
- */
-#pragma once
-#include <boost/unordered_map.hpp>
-#include "../../ArcLists.h"
-
-
-namespace Moses2
-{
-namespace SCFG
-{
-class NBests;
-class Manager;
-
-class NBestColl
-{
-public:
- virtual ~NBestColl();
-
- void Add(const SCFG::Manager &mgr, const ArcList &arcList);
- NBests &GetOrCreateNBests(const SCFG::Manager &mgr, const ArcList &arcList);
-
-protected:
- typedef boost::unordered_map<const ArcList*, NBests*> Coll;
- Coll m_candidates;
-
-};
-
-}
-}
-
-
diff --git a/contrib/moses2/SCFG/nbest/NBests.cpp b/contrib/moses2/SCFG/nbest/NBests.cpp
deleted file mode 100644
index ea7e835dc..000000000
--- a/contrib/moses2/SCFG/nbest/NBests.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * NBests.cpp
- *
- * Created on: 24 Aug 2016
- * Author: hieu
- */
-
-#include <boost/foreach.hpp>
-#include "NBests.h"
-#include "../Manager.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-namespace SCFG
-{
-NBests::NBests(const SCFG::Manager &mgr,
- const ArcList &arcList,
- NBestColl &nbestColl)
-:indIter(0)
-{
- // best
- NBest *contender = new NBest(mgr, arcList, 0, nbestColl);
- contenders.push(contender);
- bool extended = Extend(mgr, nbestColl, 0);
- assert(extended);
-}
-
-NBests::~NBests()
-{
- BOOST_FOREACH(const NBest *nbest, m_coll) {
- delete nbest;
- }
-
- // delete bad contenders left in queue
- while (!contenders.empty()) {
- NBest *contender = contenders.top();
- contenders.pop();
- delete contender;
- }
-}
-
-bool NBests::Extend(const SCFG::Manager &mgr,
- NBestColl &nbestColl,
- size_t ind)
-{
- if (ind < m_coll.size()) {
- // asking for 1 we've dont already
- return true;
- }
-
- assert(ind == m_coll.size());
-
- // checks
- if (ind >= mgr.system.options.nbest.nbest_size) {
- return false;
- }
-
- size_t maxIter = mgr.system.options.nbest.nbest_size * mgr.system.options.nbest.factor;
-
- // MAIN LOOP, create 1 new deriv.
- // The loop is for distinct nbest
- bool ok = false;
- while (!ok) {
- ++indIter;
- if (indIter > maxIter) {
- return false;
- }
-
- if (contenders.empty()) {
- return false;
- }
-
- NBest *contender = contenders.top();
- contenders.pop();
-
- contender->CreateDeviants(mgr, nbestColl, contenders);
-
- if (mgr.system.options.nbest.only_distinct) {
- const string &tgtPhrase = contender->GetString();
- //cerr << "tgtPhrase=" << tgtPhrase << endl;
- boost::hash<std::string> string_hash;
- size_t hash = string_hash(tgtPhrase);
-
- if (distinctHypos.insert(hash).second) {
- ok = true;
- }
- }
- else {
- ok = true;
- }
-
- if (ok) {
- Add(contender);
- //cerr << best->GetScores().GetTotalScore() << " ";
- //cerr << best->Debug(mgr.system) << endl;
- return true;
- }
- else {
- delete contender;
- }
- }
-
- return false;
-}
-
-}
-}
-
diff --git a/contrib/moses2/SCFG/nbest/NBests.h b/contrib/moses2/SCFG/nbest/NBests.h
deleted file mode 100644
index a9cb93a5d..000000000
--- a/contrib/moses2/SCFG/nbest/NBests.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * NBests.h
- *
- * Created on: 24 Aug 2016
- * Author: hieu
- */
-
-#pragma once
-#include <boost/unordered_set.hpp>
-#include "NBest.h"
-
-namespace Moses2
-{
-namespace SCFG
-{
-
-class NBests
-{
-public:
- Contenders contenders;
- boost::unordered_set<size_t> distinctHypos;
-
- NBests(const SCFG::Manager &mgr,
- const ArcList &arcList,
- NBestColl &nbestColl);
-
- virtual ~NBests();
-
- size_t GetSize() const
- { return m_coll.size(); }
-
- const NBest &Get(size_t ind) const
- { return *m_coll[ind]; }
-
- bool Extend(const SCFG::Manager &mgr,
- NBestColl &nbestColl,
- size_t ind);
-
-protected:
- std::vector<const NBest*> m_coll;
- size_t indIter;
-
- void Add(const NBest *nbest)
- {
- m_coll.push_back(nbest);
- }
-
-};
-
-
-}
-}
-
diff --git a/contrib/moses2/Scores.cpp b/contrib/moses2/Scores.cpp
deleted file mode 100644
index b6e731807..000000000
--- a/contrib/moses2/Scores.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Scores.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#include <boost/foreach.hpp>
-#include <vector>
-#include <cstddef>
-#include <stdio.h>
-#include "Scores.h"
-#include "Weights.h"
-#include "System.h"
-#include "FF/FeatureFunction.h"
-#include "FF/FeatureFunctions.h"
-#include "legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-Scores::Scores(const System &system, MemPool &pool, size_t numScores) :
- m_total(0)
-{
- if (system.options.nbest.nbest_size) {
- m_scores = new (pool.Allocate<SCORE>(numScores)) SCORE[numScores];
- Init<SCORE>(m_scores, numScores, 0);
- }
- else {
- m_scores = NULL;
- }
-}
-
-Scores::Scores(const System &system, MemPool &pool, size_t numScores,
- const Scores &origScores) :
- m_total(origScores.m_total)
-{
- if (system.options.nbest.nbest_size) {
- m_scores = new (pool.Allocate<SCORE>(numScores)) SCORE[numScores];
- memcpy(m_scores, origScores.m_scores, sizeof(SCORE) * numScores);
- }
- else {
- m_scores = NULL;
- }
-}
-
-Scores::~Scores()
-{
-
-}
-
-const SCORE *Scores::GetScores(const FeatureFunction &featureFunction) const
-{
- assert(m_scores);
- size_t ffStartInd = featureFunction.GetStartInd();
- const SCORE &scores = m_scores[ffStartInd];
- return &scores;
-}
-
-void Scores::Reset(const System &system)
-{
- if (system.options.nbest.nbest_size) {
- size_t numScores = system.featureFunctions.GetNumScores();
- Init<SCORE>(m_scores, numScores, 0);
- }
- m_total = 0;
-}
-
-void Scores::PlusEquals(const System &system,
- const FeatureFunction &featureFunction, const SCORE &score)
-{
- assert(featureFunction.GetNumScores() == 1);
-
- const Weights &weights = system.weights;
-
- size_t ffStartInd = featureFunction.GetStartInd();
- if (system.options.nbest.nbest_size) {
- m_scores[ffStartInd] += score;
- }
- SCORE weight = weights[ffStartInd];
- m_total += score * weight;
-}
-
-void Scores::PlusEquals(const System &system,
- const FeatureFunction &featureFunction, const SCORE &score, size_t offset)
-{
- assert(offset < featureFunction.GetNumScores());
-
- const Weights &weights = system.weights;
-
- size_t ffStartInd = featureFunction.GetStartInd();
- if (system.options.nbest.nbest_size) {
- m_scores[ffStartInd + offset] += score;
- }
- SCORE weight = weights[ffStartInd + offset];
- m_total += score * weight;
-}
-
-void Scores::PlusEquals(const System &system,
- const FeatureFunction &featureFunction, const std::vector<SCORE> &scores)
-{
- assert(scores.size() == featureFunction.GetNumScores());
-
- const Weights &weights = system.weights;
-
- size_t ffStartInd = featureFunction.GetStartInd();
- for (size_t i = 0; i < scores.size(); ++i) {
- SCORE incrScore = scores[i];
- if (system.options.nbest.nbest_size) {
- m_scores[ffStartInd + i] += incrScore;
- }
- //cerr << "ffStartInd=" << ffStartInd << " " << i << endl;
- SCORE weight = weights[ffStartInd + i];
- m_total += incrScore * weight;
- }
-}
-
-void Scores::PlusEquals(const System &system,
- const FeatureFunction &featureFunction, SCORE scores[])
-{
- //assert(scores.size() == featureFunction.GetNumScores());
-
- const Weights &weights = system.weights;
-
- size_t ffStartInd = featureFunction.GetStartInd();
- for (size_t i = 0; i < featureFunction.GetNumScores(); ++i) {
- SCORE incrScore = scores[i];
- if (system.options.nbest.nbest_size) {
- m_scores[ffStartInd + i] += incrScore;
- }
- //cerr << "ffStartInd=" << ffStartInd << " " << i << endl;
- SCORE weight = weights[ffStartInd + i];
- m_total += incrScore * weight;
- }
-}
-
-void Scores::PlusEquals(const System &system, const Scores &other)
-{
- size_t numScores = system.featureFunctions.GetNumScores();
- if (system.options.nbest.nbest_size) {
- for (size_t i = 0; i < numScores; ++i) {
- m_scores[i] += other.m_scores[i];
- }
- }
- m_total += other.m_total;
-}
-
-void Scores::MinusEquals(const System &system, const Scores &other)
-{
- size_t numScores = system.featureFunctions.GetNumScores();
- if (system.options.nbest.nbest_size) {
- for (size_t i = 0; i < numScores; ++i) {
- m_scores[i] -= other.m_scores[i];
- }
- }
- m_total -= other.m_total;
-}
-
-void Scores::Assign(const System &system,
- const FeatureFunction &featureFunction, const SCORE &score)
-{
- assert(featureFunction.GetNumScores() == 1);
-
- const Weights &weights = system.weights;
-
- size_t ffStartInd = featureFunction.GetStartInd();
-
- if (system.options.nbest.nbest_size) {
- assert(m_scores[ffStartInd] == 0);
- m_scores[ffStartInd] = score;
- }
- SCORE weight = weights[ffStartInd];
- m_total += score * weight;
-
-}
-
-void Scores::Assign(const System &system,
- const FeatureFunction &featureFunction, const std::vector<SCORE> &scores)
-{
- assert(scores.size() == featureFunction.GetNumScores());
-
- const Weights &weights = system.weights;
-
- size_t ffStartInd = featureFunction.GetStartInd();
- for (size_t i = 0; i < scores.size(); ++i) {
- SCORE incrScore = scores[i];
-
- if (system.options.nbest.nbest_size) {
- assert(m_scores[ffStartInd + i] == 0);
- m_scores[ffStartInd + i] = incrScore;
- }
- //cerr << "ffStartInd=" << ffStartInd << " " << i << endl;
- SCORE weight = weights[ffStartInd + i];
- m_total += incrScore * weight;
- }
-}
-
-void Scores::CreateFromString(const std::string &str,
- const FeatureFunction &featureFunction, const System &system,
- bool transformScores)
-{
- vector<SCORE> scores = Tokenize<SCORE>(str);
- if (transformScores) {
- std::transform(scores.begin(), scores.end(), scores.begin(),
- TransformScore);
- std::transform(scores.begin(), scores.end(), scores.begin(), FloorScore);
- }
-
- /*
- std::copy(scores.begin(),scores.end(),
- std::ostream_iterator<SCORE>(cerr," "));
- */
-
- PlusEquals(system, featureFunction, scores);
-}
-
-std::string Scores::Debug(const System &system) const
-{
- stringstream out;
- out << "total=" << m_total;
-
- if (system.options.nbest.nbest_size) {
- out << ", ";
- BOOST_FOREACH(const FeatureFunction *ff, system.featureFunctions.GetFeatureFunctions()){
- out << ff->GetName() << "= ";
- for (size_t i = ff->GetStartInd(); i < (ff->GetStartInd() + ff->GetNumScores()); ++i) {
- out << m_scores[i] << " ";
- }
- }
- }
-
- return out.str();
-}
-
-void Scores::OutputBreakdown(std::ostream &out, const System &system) const
-{
- if (system.options.nbest.nbest_size) {
- BOOST_FOREACH(const FeatureFunction *ff, system.featureFunctions.GetFeatureFunctions()){
- if (ff->IsTuneable()) {
- out << ff->GetName() << "= ";
- for (size_t i = ff->GetStartInd(); i < (ff->GetStartInd() + ff->GetNumScores()); ++i) {
- out << m_scores[i] << " ";
- }
- }
- }
- }
-}
-
-// static functions to work out estimated scores
-SCORE Scores::CalcWeightedScore(const System &system,
- const FeatureFunction &featureFunction, SCORE scores[])
-{
- SCORE ret = 0;
-
- const Weights &weights = system.weights;
-
- size_t ffStartInd = featureFunction.GetStartInd();
- for (size_t i = 0; i < featureFunction.GetNumScores(); ++i) {
- SCORE incrScore = scores[i];
-
- //cerr << "ffStartInd=" << ffStartInd << " " << i << endl;
- SCORE weight = weights[ffStartInd + i];
- ret += incrScore * weight;
- }
-
- return ret;
-}
-
-SCORE Scores::CalcWeightedScore(const System &system,
- const FeatureFunction &featureFunction, SCORE score)
-{
- const Weights &weights = system.weights;
- assert(featureFunction.GetNumScores() == 1);
-
- size_t ffStartInd = featureFunction.GetStartInd();
- SCORE weight = weights[ffStartInd];
- SCORE ret = score * weight;
-
- return ret;
-}
-
-}
-
diff --git a/contrib/moses2/Scores.h b/contrib/moses2/Scores.h
deleted file mode 100644
index ef4896ad1..000000000
--- a/contrib/moses2/Scores.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Scores.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-#include <iostream>
-#include <string>
-#include "TypeDef.h"
-#include "MemPool.h"
-
-namespace Moses2
-{
-
-class FeatureFunction;
-class FeatureFunctions;
-class System;
-
-class Scores
-{
-public:
- Scores(const System &system, MemPool &pool, size_t numScores);
- Scores(const System &system, MemPool &pool, size_t numScores,
- const Scores &origScores);
-
- virtual ~Scores();
-
- SCORE GetTotalScore() const
- { return m_total; }
-
- const SCORE *GetScores(const FeatureFunction &featureFunction) const;
-
- void Reset(const System &system);
-
- void CreateFromString(const std::string &str,
- const FeatureFunction &featureFunction, const System &system,
- bool transformScores);
-
- void PlusEquals(const System &system, const FeatureFunction &featureFunction,
- const SCORE &score);
-
- void PlusEquals(const System &system, const FeatureFunction &featureFunction,
- const SCORE &score, size_t offset);
-
- void PlusEquals(const System &system, const FeatureFunction &featureFunction,
- const std::vector<SCORE> &scores);
-
- void PlusEquals(const System &system, const FeatureFunction &featureFunction,
- SCORE scores[]);
-
- void PlusEquals(const System &system, const Scores &scores);
-
- void MinusEquals(const System &system, const Scores &scores);
-
- void Assign(const System &system, const FeatureFunction &featureFunction,
- const SCORE &score);
-
- void Assign(const System &system, const FeatureFunction &featureFunction,
- const std::vector<SCORE> &scores);
-
- std::string Debug(const System &system) const;
-
- void OutputBreakdown(std::ostream &out, const System &system) const;
-
- // static functions to work out estimated scores
- static SCORE CalcWeightedScore(const System &system,
- const FeatureFunction &featureFunction, SCORE scores[]);
-
- static SCORE CalcWeightedScore(const System &system,
- const FeatureFunction &featureFunction, SCORE score);
-
-protected:
- SCORE *m_scores;
- SCORE m_total;
-};
-
-}
-
diff --git a/contrib/moses2/SubPhrase.cpp b/contrib/moses2/SubPhrase.cpp
deleted file mode 100644
index 4d3c20f14..000000000
--- a/contrib/moses2/SubPhrase.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * SubPhrase.cpp
- *
- * Created on: 19 Feb 2016
- * Author: hieu
- */
-#include "SubPhrase.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-
-
-}
-
diff --git a/contrib/moses2/SubPhrase.h b/contrib/moses2/SubPhrase.h
deleted file mode 100644
index 893a7ba8f..000000000
--- a/contrib/moses2/SubPhrase.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#pragma once
-#include <sstream>
-#include "Phrase.h"
-#include "Word.h"
-#include "SCFG/Word.h"
-
-namespace Moses2
-{
-class System;
-
-template<typename WORD>
-class SubPhrase: public Phrase<WORD>
-{
-public:
- SubPhrase(const Phrase<WORD> &origPhrase, size_t start, size_t size)
- :m_origPhrase(&origPhrase)
- ,m_start(start)
- ,m_size(size)
- {}
-
- virtual const WORD& operator[](size_t pos) const
- { return (*m_origPhrase)[pos + m_start]; }
-
- virtual size_t GetSize() const
- { return m_size; }
-
- SubPhrase GetSubPhrase(size_t start, size_t size) const
- {
- SubPhrase ret(*m_origPhrase, m_start + start, size);
- return ret;
- }
-
- virtual std::string Debug(const System &system) const
- {
- std::stringstream out;
- if (GetSize()) {
- out << (*this)[0].Debug(system);
- for (size_t i = 1; i < GetSize(); ++i) {
- const WORD &word = (*this)[i];
- out << " " << word.Debug(system);
- }
- }
-
- return out.str();
- }
-
-protected:
- const Phrase<WORD> *m_origPhrase;
- size_t m_start, m_size;
-};
-
-
-}
-
diff --git a/contrib/moses2/System.cpp b/contrib/moses2/System.cpp
deleted file mode 100644
index c02c47a6c..000000000
--- a/contrib/moses2/System.cpp
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * System.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <string>
-#include <iostream>
-#include <boost/foreach.hpp>
-#include <boost/thread.hpp>
-#include <boost/thread/mutex.hpp>
-#include "System.h"
-#include "FF/FeatureFunction.h"
-#include "TranslationModel/UnknownWordPenalty.h"
-#include "legacy/Util2.h"
-#include "util/exception.hh"
-
-using namespace std;
-
-namespace Moses2
-{
-
-System::System(const Parameter &paramsArg) :
- params(paramsArg), featureFunctions(*this)
-{
- options.init(paramsArg);
- IsPb();
-
- bestCollector.reset(new OutputCollector());
-
- params.SetParameter(cpuAffinityOffset, "cpu-affinity-offset", -1);
- params.SetParameter(cpuAffinityOffsetIncr, "cpu-affinity-increment", 1);
-
- const PARAM_VEC *section;
-
- // output collectors
- if (options.nbest.nbest_size) {
- nbestCollector.reset(new OutputCollector(options.nbest.output_file_path));
- }
-
- if (!options.output.detailed_transrep_filepath.empty()) {
- detailedTranslationCollector.reset(new OutputCollector(options.output.detailed_transrep_filepath));
- }
-
- featureFunctions.Create();
- LoadWeights();
-
- if (params.GetParam("show-weights")) {
- cerr << "Showing weights then exit" << endl;
- featureFunctions.ShowWeights(weights);
- //return;
- }
-
- cerr << "START featureFunctions.Load()" << endl;
- featureFunctions.Load();
- cerr << "START LoadMappings()" << endl;
- LoadMappings();
- cerr << "END LoadMappings()" << endl;
- LoadDecodeGraphBackoff();
- cerr << "END LoadDecodeGraphBackoff()" << endl;
-
- UTIL_THROW_IF2(options.input.xml_policy == XmlConstraint, "XmlConstraint not supported");
-
- // max spans for scfg decoding
- if (!isPb) {
- section = params.GetParam("max-chart-span");
- if (section && section->size()) {
- maxChartSpans = Scan<size_t>(*section);
- maxChartSpans.resize(mappings.size(), DEFAULT_MAX_CHART_SPAN);
-
- /*
- cerr << "maxChartSpans=" << maxChartSpans.size();
- for (size_t i = 0; i < maxChartSpans.size(); ++i) {
- cerr << " " << mappings[i]->GetName() << "=" << maxChartSpans[i];
- }
- cerr << endl;
- */
- }
- }
-
-}
-
-System::~System()
-{
-}
-
-void System::LoadWeights()
-{
- weights.Init(featureFunctions);
-
- //cerr << "Weights:" << endl;
- typedef std::map<std::string, std::vector<float> > WeightMap;
- const WeightMap &allWeights = params.GetAllWeights();
-
- // check all weights are there for all FF
- const std::vector<const FeatureFunction*> &ffs = featureFunctions.GetFeatureFunctions();
- BOOST_FOREACH(const FeatureFunction *ff, ffs) {
- if (ff->IsTuneable()) {
- const std::string &ffName = ff->GetName();
- WeightMap::const_iterator iterWeight = allWeights.find(ffName);
- UTIL_THROW_IF2(iterWeight == allWeights.end(), "Must specify weight for " << ffName);
- }
- }
-
-
- // set weight
- BOOST_FOREACH(const WeightMap::value_type &valPair, allWeights) {
- const string &ffName = valPair.first;
- const std::vector<float> &ffWeights = valPair.second;
- /*
- cerr << ffName << "=";
- for (size_t i = 0; i < ffWeights.size(); ++i) {
- cerr << ffWeights[i] << " ";
- }
- cerr << endl;
- */
- weights.SetWeights(featureFunctions, ffName, ffWeights);
- }
-}
-
-void System::LoadMappings()
-{
- const PARAM_VEC *vec = params.GetParam("mapping");
- UTIL_THROW_IF2(vec == NULL, "Must have [mapping] section");
-
- BOOST_FOREACH(const std::string &line, *vec){
- vector<string> toks = Tokenize(line);
- assert( (toks.size() == 2 && toks[0] == "T") || (toks.size() == 3 && toks[1] == "T") );
-
- size_t ptInd;
- if (toks.size() == 2) {
- ptInd = Scan<size_t>(toks[1]);
- }
- else {
- ptInd = Scan<size_t>(toks[2]);
- }
- const PhraseTable *pt = featureFunctions.GetPhraseTableExcludeUnknownWordPenalty(ptInd);
- mappings.push_back(pt);
-}
-
-// unk pt
- const UnknownWordPenalty *unkWP = featureFunctions.GetUnknownWordPenalty();
- if (unkWP) {
- mappings.push_back(unkWP);
- }
-}
-
-void System::LoadDecodeGraphBackoff()
-{
- const PARAM_VEC *vec = params.GetParam("decoding-graph-backoff");
-
- for (size_t i = 0; i < mappings.size(); ++i) {
- PhraseTable *pt = const_cast<PhraseTable*>(mappings[i]);
-
- if (vec && vec->size() < i) {
- pt->decodeGraphBackoff = Scan<int>((*vec)[i]);
- }
- else if (pt == featureFunctions.GetUnknownWordPenalty()) {
- pt->decodeGraphBackoff = 1;
- }
- else {
- pt->decodeGraphBackoff = 0;
- }
- }
-}
-
-MemPool &System::GetSystemPool() const
-{
- return GetThreadSpecificObj(m_systemPool);
-}
-
-MemPool &System::GetManagerPool() const
-{
- return GetThreadSpecificObj(m_managerPool);
-}
-
-FactorCollection &System::GetVocab() const
-{
- return m_vocab;
-}
-
-Recycler<HypothesisBase*> &System::GetHypoRecycler() const
-{
- return GetThreadSpecificObj(m_hypoRecycler);
-}
-
-Batch &System::GetBatch(MemPool &pool) const
-{
- Batch *obj;
- obj = m_batch.get();
- if (obj == NULL) {
- obj = new Batch(pool);
- m_batch.reset(obj);
- }
- assert(obj);
- return *obj;
-}
-
-void System::IsPb()
-{
- switch (options.search.algo) {
- case Normal:
- case NormalBatch:
- case CubePruning:
- case CubePruningPerMiniStack:
- case CubePruningPerBitmap:
- case CubePruningCardinalStack:
- case CubePruningBitmapStack:
- case CubePruningMiniStack:
- isPb = true;
- break;
- case CYKPlus:
- isPb = false;
- break;
- default:
- abort();
- break;
- }
-}
-
-
-}
-
diff --git a/contrib/moses2/System.h b/contrib/moses2/System.h
deleted file mode 100644
index 1d60e96a0..000000000
--- a/contrib/moses2/System.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * System.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-#include <vector>
-#include <deque>
-#include <boost/thread/tss.hpp>
-#include <boost/pool/object_pool.hpp>
-#include <boost/shared_ptr.hpp>
-#include "FF/FeatureFunctions.h"
-#include "Weights.h"
-#include "MemPool.h"
-#include "Recycler.h"
-#include "legacy/FactorCollection.h"
-#include "legacy/Parameter.h"
-#include "TypeDef.h"
-#include "legacy/Bitmaps.h"
-#include "legacy/OutputCollector.h"
-#include "parameters/AllOptions.h"
-
-namespace Moses2
-{
-namespace NSCubePruning
-{
-class Stack;
-}
-
-class FeatureFunction;
-class StatefulFeatureFunction;
-class PhraseTable;
-class HypothesisBase;
-
-class System
-{
-public:
- const Parameter &params;
- AllOptions options;
- FeatureFunctions featureFunctions;
- Weights weights;
- std::vector<const PhraseTable*> mappings;
-
- std::vector<size_t> maxChartSpans;
- bool isPb;
-
- mutable boost::shared_ptr<OutputCollector> bestCollector, nbestCollector, detailedTranslationCollector;
-
- // moses.ini params
- int cpuAffinityOffset;
- int cpuAffinityOffsetIncr;
-
- System(const Parameter &paramsArg);
- virtual ~System();
-
- MemPool &GetSystemPool() const;
- MemPool &GetManagerPool() const;
- FactorCollection &GetVocab() const;
-
- Recycler<HypothesisBase*> &GetHypoRecycler() const;
-
- Batch &GetBatch(MemPool &pool) const;
-
-protected:
- mutable FactorCollection m_vocab;
- mutable boost::thread_specific_ptr<MemPool> m_managerPool;
- mutable boost::thread_specific_ptr<MemPool> m_systemPool;
-
- mutable boost::thread_specific_ptr<Recycler<HypothesisBase*> > m_hypoRecycler;
-
- mutable boost::thread_specific_ptr<Batch> m_batch;
-
- void LoadWeights();
- void LoadMappings();
- void LoadDecodeGraphBackoff();
-
- void IsPb();
-
-};
-
-}
-
diff --git a/contrib/moses2/TargetPhrase.cpp b/contrib/moses2/TargetPhrase.cpp
deleted file mode 100644
index 600d41ae7..000000000
--- a/contrib/moses2/TargetPhrase.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * TargetPhrase.cpp
- *
- * Created on: 26 Apr 2016
- * Author: hieu
- */
-
-#include "TargetPhrase.h"
-#include "System.h"
-#include "Scores.h"
-
-namespace Moses2
-{
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/TargetPhrase.h b/contrib/moses2/TargetPhrase.h
deleted file mode 100644
index 50f66326a..000000000
--- a/contrib/moses2/TargetPhrase.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * TargetPhrase.h
- *
- * Created on: 26 Apr 2016
- * Author: hieu
- */
-
-#pragma once
-#include <sstream>
-#include "PhraseImplTemplate.h"
-#include "System.h"
-#include "Scores.h"
-#include "AlignmentInfoCollection.h"
-#include "TranslationModel/PhraseTable.h"
-
-namespace Moses2
-{
-class AlignmentInfo;
-
-template<typename WORD>
-class TargetPhrase: public PhraseImplTemplate<WORD>
-{
-public:
- typedef PhraseImplTemplate<WORD> Parent;
- const PhraseTable &pt;
- mutable void **ffData;
- SCORE *scoreProperties;
-
- TargetPhrase(MemPool &pool, const PhraseTable &pt, const System &system, size_t size)
- : PhraseImplTemplate<WORD>(pool, size)
- , pt(pt)
- , scoreProperties(NULL)
- , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
- {
- m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
- system.featureFunctions.GetNumScores());
- }
-
- Scores &GetScores()
- { return *m_scores; }
-
- const Scores &GetScores() const
- { return *m_scores; }
-
- virtual SCORE GetScoreForPruning() const = 0;
-
- SCORE *GetScoresProperty(int propertyInd) const
- { return scoreProperties ? scoreProperties + propertyInd : NULL; }
-
- const AlignmentInfo &GetAlignTerm() const {
- return *m_alignTerm;
- }
-
- void SetAlignTerm(const AlignmentInfo &alignInfo) {
- m_alignTerm = &alignInfo;
- }
-
- // ALNREP = alignment representation,
- // see AlignmentInfo constructors for supported representations
- template<typename ALNREP>
- void
- SetAlignTerm(const ALNREP &coll) {
- m_alignTerm = AlignmentInfoCollection::Instance().Add(coll);
- }
-
- virtual void SetAlignmentInfo(const std::string &alignString)
- {
- AlignmentInfo::CollType alignTerm;
-
- std::vector<std::string> toks = Tokenize(alignString);
- for (size_t i = 0; i < toks.size(); ++i) {
- std::vector<size_t> alignPair = Tokenize<size_t>(toks[i], "-");
- UTIL_THROW_IF2(alignPair.size() != 2, "Wrong alignment format");
-
- size_t sourcePos = alignPair[0];
- size_t targetPos = alignPair[1];
-
- alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
- }
-
- SetAlignTerm(alignTerm);
- // cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n";
-
- //cerr << "alignTerm=" << alignTerm.size() << endl;
- //cerr << "alignNonTerm=" << alignNonTerm.size() << endl;
-
- }
-
- void OutputToStream(const System &system, const Phrase<WORD> &inputPhrase, std::ostream &out) const
- {
- // get placeholders
- FactorType placeholderFactor = system.options.input.placeholder_factor;
- std::map<size_t, const Factor*> placeholders;
- if (placeholderFactor != NOT_FOUND) {
- // creates map of target position -> factor for placeholders
- placeholders = GetPlaceholders(system, inputPhrase);
- }
-
- size_t size = PhraseImplTemplate<WORD>::GetSize();
- for (size_t i = 0; i < size; ++i) {
- // output placeholder, if any
- std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(i);
- if (iter == placeholders.end()) {
- const WORD &word = (*this)[i];
- word.OutputToStream(system, out);
- }
- else {
- const Factor *factor = iter->second;
- out << *factor;
- }
-
- out << " ";
- }
- }
-
- std::map<size_t, const Factor*> GetPlaceholders(const System &system, const Phrase<WORD> &inputPhrase) const
- {
- FactorType placeholderFactor = system.options.input.placeholder_factor;
- std::map<size_t, const Factor*> ret;
- //std::cerr << "inputPhrase=" << inputPhrase.Debug(system) << std::endl;
-
- for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
- const Factor *factor = inputPhrase[sourcePos][placeholderFactor];
- if (factor) {
- //std::cerr << "factor=" << *factor << std::endl;
- //std::cerr << "tp=" << Debug(system) << std::endl;
- std::set<size_t> targetPos = GetAlignTerm().GetAlignmentsForSource(sourcePos);
- UTIL_THROW_IF2(targetPos.size() != 1,
- "Placeholder should be aligned to 1, and only 1, word:" << targetPos.size() << "!=1");
- ret[*targetPos.begin()] = factor;
- }
- }
-
- return ret;
- }
-
- virtual std::string Debug(const System &system) const
- {
- std::stringstream out;
- out << Phrase<WORD>::Debug(system);
- out << " pt=" << pt.GetName() << " ";
- out << " SCORES:" << GetScores().Debug(system);
- out << " ALIGN-T:";
- out << GetAlignTerm().Debug(system);
-
- return out.str();
- }
-
-protected:
- Scores *m_scores;
- const AlignmentInfo *m_alignTerm;
-};
-
-///////////////////////////////////////////////////////////////////////
-template<typename TP>
-struct CompareScoreForPruning
-{
- bool operator()(const TP *a, const TP *b) const
- {
- return a->GetScoreForPruning() > b->GetScoreForPruning();
- }
-
- bool operator()(const TP &a, const TP &b) const
- {
- return a.GetScoreForPruning() > b.GetScoreForPruning();
- }
-};
-
-} /* namespace Moses2a */
-
diff --git a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp b/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp
deleted file mode 100644
index 338a8e221..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp
+++ /dev/null
@@ -1,418 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include "ThrowingFwrite.h"
-#include "BlockHashIndex.h"
-#include "CmphStringVectorAdapter.h"
-#include "util/exception.hh"
-#include "util/string_stream.hh"
-
-#ifdef HAVE_CMPH
-#include "cmph.h"
-#endif
-
-namespace Moses2
-{
-#ifdef WITH_THREADS
-BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
- size_t threadsNum) :
- m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits), m_fileHandle(0), m_fileHandleStart(
- 0), m_landmarks(true), m_size(0), m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(
- 0), m_threadPool(threadsNum)
-{
-#ifndef HAVE_CMPH
- std::cerr << "minphr: CMPH support not compiled in." << std::endl;
- exit(1);
-#endif
-}
-#else
-BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits)
-: m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
-m_fileHandle(0), m_fileHandleStart(0), m_size(0),
-m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0)
-{
-#ifndef HAVE_CMPH
- std::cerr << "minphr: CMPH support not compiled in." << std::endl;
- exit(1);
-#endif
-}
-#endif
-
-BlockHashIndex::~BlockHashIndex()
-{
-#ifdef HAVE_CMPH
- for (std::vector<void*>::iterator it = m_hashes.begin(); it != m_hashes.end();
- it++)
- if (*it != 0) cmph_destroy((cmph_t*) *it);
-
- for (std::vector<PairedPackedArray<>*>::iterator it = m_arrays.begin();
- it != m_arrays.end(); it++)
- if (*it != 0) delete *it;
-#endif
-}
-
-size_t BlockHashIndex::GetHash(const char* key)
-{
- std::string keyStr(key);
- size_t i = std::distance(m_landmarks.begin(),
- std::upper_bound(m_landmarks.begin(), m_landmarks.end(), keyStr)) - 1;
-
- if (i == 0ul - 1) return GetSize();
-
- size_t pos = GetHash(i, key);
- if (pos != GetSize()) return (1ul << m_orderBits) * i + pos;
- else return GetSize();
-}
-
-size_t BlockHashIndex::GetFprint(const char* key) const
-{
- size_t hash;
- MurmurHash3_x86_32(key, std::strlen(key), 100000, &hash);
- hash &= (1ul << m_fingerPrintBits) - 1;
- return hash;
-}
-
-size_t BlockHashIndex::GetHash(size_t i, const char* key)
-{
-//#ifdef WITH_THREADS
-// boost::mutex::scoped_lock lock(m_mutex);
-//#endif
- //if(m_hashes[i] == 0)
- //LoadRange(i);
-#ifdef HAVE_CMPH
- size_t idx = cmph_search((cmph_t*) m_hashes[i], key,
- (cmph_uint32) strlen(key));
-#else
- assert(0);
- size_t idx = 0;
-#endif
-
- std::pair<size_t, size_t> orderPrint = m_arrays[i]->Get(idx, m_orderBits,
- m_fingerPrintBits);
- m_clocks[i] = clock();
-
- if (GetFprint(key) == orderPrint.second) return orderPrint.first;
- else return GetSize();
-}
-
-size_t BlockHashIndex::GetHash(std::string key)
-{
- return GetHash(key.c_str());
-}
-
-size_t BlockHashIndex::operator[](std::string key)
-{
- return GetHash(key);
-}
-
-size_t BlockHashIndex::operator[](char* key)
-{
- return GetHash(key);
-}
-
-size_t BlockHashIndex::Save(std::string filename)
-{
- std::FILE* mphf = std::fopen(filename.c_str(), "w");
- size_t size = Save(mphf);
- std::fclose(mphf);
- return size;
-}
-
-void BlockHashIndex::BeginSave(std::FILE * mphf)
-{
- m_fileHandle = mphf;
- ThrowingFwrite(&m_orderBits, sizeof(size_t), 1, m_fileHandle);
- ThrowingFwrite(&m_fingerPrintBits, sizeof(size_t), 1, m_fileHandle);
-
- m_fileHandleStart = std::ftell(m_fileHandle);
-
- size_t relIndexPos = 0;
- ThrowingFwrite(&relIndexPos, sizeof(size_t), 1, m_fileHandle);
-}
-
-void BlockHashIndex::SaveRange(size_t i)
-{
-#ifdef HAVE_CMPH
- if (m_seekIndex.size() <= i) m_seekIndex.resize(i + 1);
- m_seekIndex[i] = std::ftell(m_fileHandle) - m_fileHandleStart;
- cmph_dump((cmph_t*) m_hashes[i], m_fileHandle);
- m_arrays[i]->Save(m_fileHandle);
-#endif
-}
-
-void BlockHashIndex::SaveLastRange()
-{
-#ifdef WITH_THREADS
- boost::mutex::scoped_lock lock(m_mutex);
-#endif
-
- while (!m_queue.empty() && m_lastSaved + 1 == -m_queue.top()) {
- size_t current = -m_queue.top();
- m_queue.pop();
- SaveRange(current);
- m_lastSaved = current;
- }
-}
-
-void BlockHashIndex::DropRange(size_t i)
-{
-#ifdef HAVE_CMPH
- if (m_hashes[i] != 0) {
- cmph_destroy((cmph_t*) m_hashes[i]);
- m_hashes[i] = 0;
- }
- if (m_arrays[i] != 0) {
- delete m_arrays[i];
- m_arrays[i] = 0;
- m_clocks[i] = 0;
- }
- m_numLoadedRanges--;
-#endif
-}
-
-void BlockHashIndex::DropLastRange()
-{
-#ifdef WITH_THREADS
- boost::mutex::scoped_lock lock(m_mutex);
-#endif
-
- while (m_lastDropped != m_lastSaved)
- DropRange(++m_lastDropped);
-}
-
-#ifdef WITH_THREADS
-void BlockHashIndex::WaitAll()
-{
- m_threadPool.Stop(true);
-}
-#endif
-
-size_t BlockHashIndex::FinalizeSave()
-{
-#ifdef WITH_THREADS
- m_threadPool.Stop(true);
-#endif
-
- SaveLastRange();
-
- size_t relIndexPos = std::ftell(m_fileHandle) - m_fileHandleStart;
-
- std::fseek(m_fileHandle, m_fileHandleStart, SEEK_SET);
- ThrowingFwrite(&relIndexPos, sizeof(size_t), 1, m_fileHandle);
-
- std::fseek(m_fileHandle, m_fileHandleStart + relIndexPos, SEEK_SET);
- m_landmarks.save(m_fileHandle);
-
- size_t seekIndexSize = m_seekIndex.size();
- ThrowingFwrite(&seekIndexSize, sizeof(size_t), 1, m_fileHandle);
- ThrowingFwrite(&m_seekIndex[0], sizeof(size_t), seekIndexSize, m_fileHandle);
-
- ThrowingFwrite(&m_size, sizeof(size_t), 1, m_fileHandle);
-
- size_t fileHandleStop = std::ftell(m_fileHandle);
- return fileHandleStop - m_fileHandleStart + sizeof(m_orderBits)
- + sizeof(m_fingerPrintBits);
-}
-
-size_t BlockHashIndex::Save(std::FILE * mphf)
-{
- m_queue = std::priority_queue<int>();
- BeginSave(mphf);
- for (size_t i = 0; i < m_hashes.size(); i++)
- SaveRange(i);
- return FinalizeSave();
-}
-
-size_t BlockHashIndex::LoadIndex(std::FILE* mphf)
-{
- m_fileHandle = mphf;
-
- size_t beginning = std::ftell(mphf);
-
- size_t read = 0;
- read += std::fread(&m_orderBits, sizeof(size_t), 1, mphf);
- read += std::fread(&m_fingerPrintBits, sizeof(size_t), 1, mphf);
- m_fileHandleStart = std::ftell(m_fileHandle);
-
- size_t relIndexPos;
- read += std::fread(&relIndexPos, sizeof(size_t), 1, mphf);
- std::fseek(m_fileHandle, m_fileHandleStart + relIndexPos, SEEK_SET);
-
- m_landmarks.load(mphf);
-
- size_t seekIndexSize;
- read += std::fread(&seekIndexSize, sizeof(size_t), 1, m_fileHandle);
- m_seekIndex.resize(seekIndexSize);
- read += std::fread(&m_seekIndex[0], sizeof(size_t), seekIndexSize,
- m_fileHandle);
- m_hashes.resize(seekIndexSize, 0);
- m_clocks.resize(seekIndexSize, 0);
- m_arrays.resize(seekIndexSize, 0);
-
- read += std::fread(&m_size, sizeof(size_t), 1, m_fileHandle);
-
- size_t end = std::ftell(mphf);
-
- return end - beginning;
-}
-
-void BlockHashIndex::LoadRange(size_t i)
-{
-#ifdef HAVE_CMPH
- std::fseek(m_fileHandle, m_fileHandleStart + m_seekIndex[i], SEEK_SET);
- cmph_t* hash = cmph_load(m_fileHandle);
- m_arrays[i] = new PairedPackedArray<>(0, m_orderBits, m_fingerPrintBits);
- m_arrays[i]->Load(m_fileHandle);
-
- m_hashes[i] = (void*) hash;
- m_clocks[i] = clock();
-
- m_numLoadedRanges++;
-#endif
-}
-
-size_t BlockHashIndex::Load(std::string filename)
-{
- std::FILE* mphf = std::fopen(filename.c_str(), "r");
- size_t size = Load(mphf);
- std::fclose(mphf);
- return size;
-}
-
-size_t BlockHashIndex::Load(std::FILE * mphf)
-{
- size_t byteSize = LoadIndex(mphf);
- size_t end = std::ftell(mphf);
-
- for (size_t i = 0; i < m_seekIndex.size(); i++)
- LoadRange(i);
- std::fseek(m_fileHandle, end, SEEK_SET);
- return byteSize;
-}
-
-size_t BlockHashIndex::GetSize() const
-{
- return m_size;
-}
-
-void BlockHashIndex::KeepNLastRanges(float ratio, float tolerance)
-{
- /*
- #ifdef WITH_THREADS
- boost::mutex::scoped_lock lock(m_mutex);
- #endif
- size_t n = m_hashes.size() * ratio;
- size_t max = n * (1 + tolerance);
- if(m_numLoadedRanges > max) {
- typedef std::vector<std::pair<clock_t, size_t> > LastLoaded;
- LastLoaded lastLoaded;
- for(size_t i = 0; i < m_hashes.size(); i++)
- if(m_hashes[i] != 0)
- lastLoaded.push_back(std::make_pair(m_clocks[i], i));
-
- std::sort(lastLoaded.begin(), lastLoaded.end());
- for(LastLoaded::reverse_iterator it = lastLoaded.rbegin() + size_t(n * (1 - tolerance));
- it != lastLoaded.rend(); it++)
- DropRange(it->second);
- }*/
-}
-
-void BlockHashIndex::CalcHash(size_t current, void* source_void)
-{
-#ifdef HAVE_CMPH
- cmph_io_adapter_t* source = (cmph_io_adapter_t*) source_void;
- cmph_config_t *config = cmph_config_new(source);
- cmph_config_set_algo(config, CMPH_CHD);
-
- cmph_t* hash = cmph_new(config);
- PairedPackedArray<> *pv = new PairedPackedArray<>(source->nkeys, m_orderBits,
- m_fingerPrintBits);
-
- size_t i = 0;
-
- source->rewind(source->data);
-
- std::string lastKey = "";
- while (i < source->nkeys) {
- unsigned keylen;
- char* key;
- source->read(source->data, &key, &keylen);
- std::string temp(key, keylen);
- source->dispose(source->data, key, keylen);
-
- if (lastKey > temp) {
- if (source->nkeys != 2 || temp != "###DUMMY_KEY###") {
- util::StringStream strme;
- strme
- << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort\n";
- strme << "1: " << lastKey << "\n";
- strme << "2: " << temp << "\n";
- UTIL_THROW2(strme.str());
- }
- }
- lastKey = temp;
-
- size_t fprint = GetFprint(temp.c_str());
- size_t idx = cmph_search(hash, temp.c_str(), (cmph_uint32) temp.size());
-
- pv->Set(idx, i, fprint, m_orderBits, m_fingerPrintBits);
- i++;
- }
-
- cmph_config_destroy(config);
-
-#ifdef WITH_THREADS
- boost::mutex::scoped_lock lock(m_mutex);
-#endif
-
- if (m_hashes.size() <= current) {
- m_hashes.resize(current + 1, 0);
- m_arrays.resize(current + 1, 0);
- m_clocks.resize(current + 1, 0);
- }
-
- m_hashes[current] = (void*) hash;
- m_arrays[current] = pv;
- m_clocks[current] = clock();
- m_queue.push(-current);
-#endif
-}
-
-#ifdef HAVE_CMPH
-void* BlockHashIndex::vectorAdapter(std::vector<std::string>& v)
-{
- return (void*) CmphVectorAdapter(v);
-}
-
-void* BlockHashIndex::vectorAdapter(
- StringVector<unsigned, size_t, std::allocator>& sv)
-{
- return (void*) CmphStringVectorAdapter(sv);
-}
-
-void* BlockHashIndex::vectorAdapter(
- StringVector<unsigned, size_t, MmapAllocator>& sv)
-{
- return (void*) CmphStringVectorAdapter(sv);
-}
-#endif
-
-}
diff --git a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h b/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h
deleted file mode 100644
index b91ef8f6c..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h
+++ /dev/null
@@ -1,200 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifndef moses_BlockHashIndex_h
-#define moses_BlockHashIndex_h
-
-#include <iostream>
-#include <sstream>
-#include <string>
-#include <vector>
-#include <queue>
-#include <cstring>
-#include <cstdio>
-#include <boost/thread.hpp>
-
-#include "MurmurHash3.h"
-#include "StringVector.h"
-#include "PackedArray.h"
-#include "util/exception.hh"
-#include "util/string_stream.hh"
-
-#ifdef WITH_THREADS
-#include "../../legacy/ThreadPool.h"
-#else
-#include <ctime>
-#endif
-
-#include <boost/shared_ptr.hpp>
-
-namespace Moses2
-{
-
-class BlockHashIndex
-{
-private:
- std::priority_queue<int> m_queue;
-
- size_t m_orderBits;
- size_t m_fingerPrintBits;
-
- std::FILE* m_fileHandle;
- size_t m_fileHandleStart;
-
- StringVector<unsigned char, unsigned long> m_landmarks;
-
- std::vector<void*> m_hashes;
- std::vector<clock_t> m_clocks;
- std::vector<PairedPackedArray<>*> m_arrays;
-
- std::vector<size_t> m_seekIndex;
-
- size_t m_size;
- int m_lastSaved;
- int m_lastDropped;
- size_t m_numLoadedRanges;
-
-#ifdef WITH_THREADS
- ThreadPool m_threadPool;
- boost::mutex m_mutex;
-
- template<typename Keys>
- class HashTask: public Task
- {
- public:
- HashTask(int id, BlockHashIndex& hash, Keys& keys) :
- m_id(id), m_hash(hash), m_keys(new Keys(keys))
- {
- }
-
- virtual void Run()
- {
- m_hash.CalcHash(m_id, *m_keys);
- }
-
- virtual ~HashTask()
- {
- delete m_keys;
- }
-
- private:
- int m_id;
- BlockHashIndex& m_hash;
- Keys* m_keys;
- };
-#endif
-
- size_t GetFprint(const char* key) const;
- size_t GetHash(size_t i, const char* key);
-
-public:
-#ifdef WITH_THREADS
- BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
- size_t threadsNum = 2);
-#else
- BlockHashIndex(size_t orderBits, size_t fingerPrintBits);
-#endif
-
- ~BlockHashIndex();
-
- size_t GetHash(const char* key);
- size_t GetHash(std::string key);
-
- size_t operator[](std::string key);
- size_t operator[](char* key);
-
- void BeginSave(std::FILE* mphf);
- void SaveRange(size_t i);
- void SaveLastRange();
- size_t FinalizeSave();
-
-#ifdef WITH_THREADS
- void WaitAll();
-#endif
-
- void DropRange(size_t i);
- void DropLastRange();
-
- size_t LoadIndex(std::FILE* mphf);
- void LoadRange(size_t i);
-
- size_t Save(std::string filename);
- size_t Save(std::FILE * mphf);
-
- size_t Load(std::string filename);
- size_t Load(std::FILE * mphf);
-
- size_t GetSize() const;
-
- void KeepNLastRanges(float ratio = 0.1, float tolerance = 0.1);
-
- template<typename Keys>
- void AddRange(Keys &keys)
- {
- size_t current = m_landmarks.size();
-
- if (m_landmarks.size() && m_landmarks.back().str() >= keys[0]) {
- util::StringStream strme;
- strme
- << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort\n";
- strme << "1: " << m_landmarks.back().str() << "\n";
- strme << "2: " << keys[0] << "\n";
- UTIL_THROW2(strme.str());
- }
-
- m_landmarks.push_back(keys[0]);
- m_size += keys.size();
-
- if (keys.size() == 1) {
- // add dummy key to avoid null hash
- keys.push_back("###DUMMY_KEY###");
- }
-
-#ifdef WITH_THREADS
-
- boost::shared_ptr<HashTask<Keys> > ht(
- new HashTask<Keys>(current, *this, keys));
- m_threadPool.Submit(ht);
-#else
- CalcHash(current, keys);
-#endif
- }
-
- template<typename Keys>
- void CalcHash(size_t current, Keys &keys)
- {
-#ifdef HAVE_CMPH
- void* source = vectorAdapter(keys);
- CalcHash(current, source);
-#endif
- }
-
- void CalcHash(size_t current, void* source);
-
-#ifdef HAVE_CMPH
- void* vectorAdapter(std::vector<std::string>& v);
- void* vectorAdapter(StringVector<unsigned, size_t, std::allocator>& sv);
- void* vectorAdapter(StringVector<unsigned, size_t, MmapAllocator>& sv);
-#endif
-};
-
-}
-#endif
diff --git a/contrib/moses2/TranslationModel/CompactPT/CanonicalHuffman.h b/contrib/moses2/TranslationModel/CompactPT/CanonicalHuffman.h
deleted file mode 100644
index ffb6488c0..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/CanonicalHuffman.h
+++ /dev/null
@@ -1,345 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifndef moses_CanonicalHuffman_h
-#define moses_CanonicalHuffman_h
-
-#include <string>
-#include <algorithm>
-#include <boost/dynamic_bitset.hpp>
-#include <boost/unordered_map.hpp>
-
-#include "ThrowingFwrite.h"
-
-namespace Moses2
-{
-
-template<typename Data>
-class CanonicalHuffman
-{
-private:
- std::vector<Data> m_symbols;
- std::vector<size_t> m_firstCodes;
- std::vector<size_t> m_lengthIndex;
-
- typedef boost::unordered_map<Data, boost::dynamic_bitset<> > EncodeMap;
- EncodeMap m_encodeMap;
-
- struct MinHeapSorter
- {
- std::vector<size_t>& m_vec;
-
- MinHeapSorter(std::vector<size_t>& vec) :
- m_vec(vec)
- {
- }
-
- bool operator()(size_t a, size_t b)
- {
- return m_vec[a] > m_vec[b];
- }
- };
-
- template<class Iterator>
- void CalcLengths(Iterator begin, Iterator end, std::vector<size_t>& lengths)
- {
- size_t n = std::distance(begin, end);
- std::vector<size_t> A(2 * n, 0);
-
- m_symbols.resize(n);
- size_t i = 0;
- for (Iterator it = begin; it != end; it++) {
- m_symbols[i] = it->first;
-
- A[i] = n + i;
- A[n + i] = it->second;
- i++;
- }
-
- if (n == 1) {
- lengths.push_back(1);
- return;
- }
-
- MinHeapSorter hs(A);
- std::make_heap(A.begin(), A.begin() + n, hs);
-
- size_t h = n;
- size_t m1, m2;
- while (h > 1) {
- m1 = A[0];
- std::pop_heap(A.begin(), A.begin() + h, hs);
-
- h--;
-
- m2 = A[0];
- std::pop_heap(A.begin(), A.begin() + h, hs);
-
- A[h] = A[m1] + A[m2];
- A[h - 1] = h;
- A[m1] = A[m2] = h;
-
- std::push_heap(A.begin(), A.begin() + h, hs);
- }
-
- A[1] = 0;
- for (size_t i = 2; i < 2 * n; i++)
- A[i] = A[A[i]] + 1;
-
- lengths.resize(n);
- for (size_t i = 0; i < n; i++)
- lengths[i] = A[i + n];
- }
-
- void CalcCodes(std::vector<size_t>& lengths)
- {
- std::vector<size_t> numLength;
- for (std::vector<size_t>::iterator it = lengths.begin();
- it != lengths.end(); it++) {
- size_t length = *it;
- if (numLength.size() <= length) numLength.resize(length + 1, 0);
- numLength[length]++;
- }
-
- m_lengthIndex.resize(numLength.size());
- m_lengthIndex[0] = 0;
- for (size_t l = 1; l < numLength.size(); l++)
- m_lengthIndex[l] = m_lengthIndex[l - 1] + numLength[l - 1];
-
- size_t maxLength = numLength.size() - 1;
-
- m_firstCodes.resize(maxLength + 1, 0);
- for (size_t l = maxLength - 1; l > 0; l--)
- m_firstCodes[l] = (m_firstCodes[l + 1] + numLength[l + 1]) / 2;
-
- std::vector<Data> t_symbols;
- t_symbols.resize(lengths.size());
-
- std::vector<size_t> nextCode = m_firstCodes;
- for (size_t i = 0; i < lengths.size(); i++) {
- Data data = m_symbols[i];
- size_t length = lengths[i];
-
- size_t pos = m_lengthIndex[length]
- + (nextCode[length] - m_firstCodes[length]);
- t_symbols[pos] = data;
-
- nextCode[length] = nextCode[length] + 1;
- }
-
- m_symbols.swap(t_symbols);
- }
-
- void CreateCodeMap()
- {
- for (size_t l = 1; l < m_lengthIndex.size(); l++) {
- size_t intCode = m_firstCodes[l];
- size_t num = (
- (l + 1 < m_lengthIndex.size()) ?
- m_lengthIndex[l + 1] : m_symbols.size()) - m_lengthIndex[l];
-
- for (size_t i = 0; i < num; i++) {
- Data data = m_symbols[m_lengthIndex[l] + i];
- boost::dynamic_bitset<> bitCode(l, intCode);
- m_encodeMap[data] = bitCode;
- intCode++;
- }
- }
- }
-
- const boost::dynamic_bitset<>& Encode(Data data) const
- {
- typename EncodeMap::const_iterator it = m_encodeMap.find(data);
- UTIL_THROW_IF2(it == m_encodeMap.end(),
- "Cannot find symbol in encoding map");
- return it->second;
- }
-
- template<class BitWrapper>
- void PutCode(BitWrapper& bitWrapper, const boost::dynamic_bitset<>& code)
- {
- for (int j = code.size() - 1; j >= 0; j--)
- bitWrapper.Put(code[j]);
- }
-
-public:
-
- template<class Iterator>
- CanonicalHuffman(Iterator begin, Iterator end, bool forEncoding = true)
- {
- std::vector<size_t> lengths;
- CalcLengths(begin, end, lengths);
- CalcCodes(lengths);
-
- if (forEncoding) CreateCodeMap();
- }
-
- CanonicalHuffman(std::FILE* pFile, bool forEncoding = false)
- {
- Load(pFile);
-
- if (forEncoding) CreateCodeMap();
- }
-
- template<class BitWrapper>
- void Put(BitWrapper& bitWrapper, Data data)
- {
- PutCode(bitWrapper, Encode(data));
- }
-
- template<class BitWrapper>
- Data Read(BitWrapper& bitWrapper)
- {
- if (bitWrapper.TellFromEnd()) {
- size_t intCode = bitWrapper.Read();
- size_t len = 1;
- while (intCode < m_firstCodes[len]) {
- intCode = 2 * intCode + bitWrapper.Read();
- len++;
- }
- return m_symbols[m_lengthIndex[len] + (intCode - m_firstCodes[len])];
- }
- return Data();
- }
-
- size_t Load(std::FILE* pFile)
- {
- size_t start = std::ftell(pFile);
- size_t read = 0;
-
- size_t size;
- read += std::fread(&size, sizeof(size_t), 1, pFile);
- m_symbols.resize(size);
- read += std::fread(&m_symbols[0], sizeof(Data), size, pFile);
-
- read += std::fread(&size, sizeof(size_t), 1, pFile);
- m_firstCodes.resize(size);
- read += std::fread(&m_firstCodes[0], sizeof(size_t), size, pFile);
-
- read += std::fread(&size, sizeof(size_t), 1, pFile);
- m_lengthIndex.resize(size);
- read += std::fread(&m_lengthIndex[0], sizeof(size_t), size, pFile);
-
- return std::ftell(pFile) - start;
- }
-
- size_t Save(std::FILE* pFile)
- {
- size_t start = std::ftell(pFile);
-
- size_t size = m_symbols.size();
- ThrowingFwrite(&size, sizeof(size_t), 1, pFile);
- ThrowingFwrite(&m_symbols[0], sizeof(Data), size, pFile);
-
- size = m_firstCodes.size();
- ThrowingFwrite(&size, sizeof(size_t), 1, pFile);
- ThrowingFwrite(&m_firstCodes[0], sizeof(size_t), size, pFile);
-
- size = m_lengthIndex.size();
- ThrowingFwrite(&size, sizeof(size_t), 1, pFile);
- ThrowingFwrite(&m_lengthIndex[0], sizeof(size_t), size, pFile);
-
- return std::ftell(pFile) - start;
- }
-};
-
-template<class Container = std::string>
-class BitWrapper
-{
-private:
- Container& m_data;
-
- typename Container::iterator m_iterator;
- typename Container::value_type m_currentValue;
-
- size_t m_valueBits;
- typename Container::value_type m_mask;
- size_t m_bitPos;
-
-public:
-
- BitWrapper(Container &data) :
- m_data(data), m_iterator(m_data.begin()), m_currentValue(0), m_valueBits(
- sizeof(typename Container::value_type) * 8), m_mask(1), m_bitPos(0)
- {
- }
-
- bool Read()
- {
- if (m_bitPos % m_valueBits == 0) {
- if (m_iterator != m_data.end()) m_currentValue = *m_iterator++;
- }
- else m_currentValue = m_currentValue >> 1;
-
- m_bitPos++;
- return (m_currentValue & m_mask);
- }
-
- void Put(bool bit)
- {
- if (m_bitPos % m_valueBits == 0) m_data.push_back(0);
-
- if (bit) m_data[m_data.size() - 1] |= m_mask << (m_bitPos % m_valueBits);
-
- m_bitPos++;
- }
-
- size_t Tell()
- {
- return m_bitPos;
- }
-
- size_t TellFromEnd()
- {
- if (m_data.size() * m_valueBits < m_bitPos) return 0;
- return m_data.size() * m_valueBits - m_bitPos;
- }
-
- void Seek(size_t bitPos)
- {
- m_bitPos = bitPos;
- m_iterator = m_data.begin() + int((m_bitPos - 1) / m_valueBits);
- m_currentValue = (*m_iterator) >> ((m_bitPos - 1) % m_valueBits);
- m_iterator++;
- }
-
- void SeekFromEnd(size_t bitPosFromEnd)
- {
- size_t bitPos = m_data.size() * m_valueBits - bitPosFromEnd;
- Seek(bitPos);
- }
-
- void Reset()
- {
- m_iterator = m_data.begin();
- m_currentValue = 0;
- m_bitPos = 0;
- }
-
- Container& GetContainer()
- {
- return m_data;
- }
-};
-
-}
-
-#endif
diff --git a/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp b/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp
deleted file mode 100644
index a51dc5a45..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifdef HAVE_CMPH
-
-#include "CmphStringVectorAdapter.h"
-
-namespace Moses2
-{
-
-void CmphStringVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen)
-{
- delete[] key;
-}
-
-void CmphStringVectorAdapterRewind(void *data)
-{
- cmph_vector_t *cmph_vector = (cmph_vector_t *) data;
- cmph_vector->position = 0;
-}
-
-//************************************************************************//
-
-cmph_io_adapter_t *CmphVectorAdapterNew(std::vector<std::string>& v)
-{
- cmph_io_adapter_t * key_source = (cmph_io_adapter_t *) malloc(
- sizeof(cmph_io_adapter_t));
- cmph_vector_t * cmph_vector = (cmph_vector_t *) malloc(sizeof(cmph_vector_t));
- assert(key_source);
- assert(cmph_vector);
-
- cmph_vector->vector = (void *) &v;
- cmph_vector->position = 0;
- key_source->data = (void *) cmph_vector;
- key_source->nkeys = v.size();
-
- return key_source;
-}
-
-int CmphVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen)
-{
- cmph_vector_t *cmph_vector = (cmph_vector_t *) data;
- std::vector<std::string>* v = (std::vector<std::string>*) cmph_vector->vector;
- size_t size;
- *keylen = (*v)[cmph_vector->position].size();
- size = *keylen;
- *key = new char[size + 1];
- std::string temp = (*v)[cmph_vector->position];
- strcpy(*key, temp.c_str());
- cmph_vector->position = cmph_vector->position + 1;
- return (int) (*keylen);
-}
-
-void CmphVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen)
-{
- delete[] key;
-}
-
-void CmphVectorAdapterRewind(void *data)
-{
- cmph_vector_t *cmph_vector = (cmph_vector_t *) data;
- cmph_vector->position = 0;
-}
-
-cmph_io_adapter_t* CmphVectorAdapter(std::vector<std::string>& v)
-{
- cmph_io_adapter_t * key_source = CmphVectorAdapterNew(v);
-
- key_source->read = CmphVectorAdapterRead;
- key_source->dispose = CmphVectorAdapterDispose;
- key_source->rewind = CmphVectorAdapterRewind;
- return key_source;
-}
-
-}
-
-#endif
diff --git a/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h b/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h
deleted file mode 100644
index 20d43a80c..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifndef moses_CmphStringVectorAdapterNew_h
-#define moses_CmphStringVectorAdapterNew_h
-
-#include <cassert>
-#include <cstring>
-
-#ifdef HAVE_CMPH
-#include "cmph.h"
-
-#include "StringVector.h"
-
-namespace Moses2
-{
-
-typedef struct
-{
- void *vector;
- cmph_uint32 position;
-} cmph_vector_t;
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-cmph_io_adapter_t *CmphStringVectorAdapterNew(
- StringVector<ValueT, PosT, Allocator>& sv)
-{
- cmph_io_adapter_t * key_source = (cmph_io_adapter_t *) malloc(
- sizeof(cmph_io_adapter_t));
- cmph_vector_t * cmph_vector = (cmph_vector_t *) malloc(sizeof(cmph_vector_t));
- assert(key_source);
- assert(cmph_vector);
-
- cmph_vector->vector = (void *) &sv;
- cmph_vector->position = 0;
- key_source->data = (void *) cmph_vector;
- key_source->nkeys = sv.size();
-
- return key_source;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-int CmphStringVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen)
-{
- cmph_vector_t *cmph_vector = (cmph_vector_t *) data;
- StringVector<ValueT, PosT, Allocator>* sv = (StringVector<ValueT, PosT,
- Allocator>*) cmph_vector->vector;
- size_t size;
- *keylen = (*sv)[cmph_vector->position].size();
- size = *keylen;
- *key = new char[size + 1];
- std::string temp = (*sv)[cmph_vector->position];
- std::strcpy(*key, temp.c_str());
- cmph_vector->position = cmph_vector->position + 1;
- return (int) (*keylen);
-}
-
-void CmphStringVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen);
-
-void CmphStringVectorAdapterRewind(void *data);
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-cmph_io_adapter_t* CmphStringVectorAdapter(
- StringVector<ValueT, PosT, Allocator>& sv)
-{
- cmph_io_adapter_t * key_source = CmphStringVectorAdapterNew(sv);
-
- key_source->read = CmphStringVectorAdapterRead<ValueT, PosT, Allocator>;
- key_source->dispose = CmphStringVectorAdapterDispose;
- key_source->rewind = CmphStringVectorAdapterRewind;
- return key_source;
-}
-
-//************************************************************************//
-
-cmph_io_adapter_t *CmphVectorAdapterNew(std::vector<std::string>& v);
-
-int CmphVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen);
-
-void CmphVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen);
-
-void CmphVectorAdapterRewind(void *data);
-
-cmph_io_adapter_t* CmphVectorAdapter(std::vector<std::string>& v);
-
-}
-
-#endif
-
-#endif
diff --git a/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp b/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp
deleted file mode 100644
index 1d32b9a6f..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-// -*- c++ -*-
-// vim:tabstop=2
-// $Id$
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include "LexicalReorderingTableCompact.h"
-#include "../../SubPhrase.h"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-//////////////////////////////////////////////////////////////////////////////////////////////
-
-bool LexicalReorderingTableCompact::s_inMemoryByDefault = false;
-
-LexicalReorderingTableCompact::LexicalReorderingTableCompact(
- const std::string& filePath, const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors) :
- LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory(
- s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees(
- true), m_hash(10, 16), m_scoreTrees(1)
-{
- Load(filePath);
-}
-
-LexicalReorderingTableCompact::LexicalReorderingTableCompact(
- const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors) :
- LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory(
- s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees(
- true), m_hash(10, 16), m_scoreTrees(1)
-{
-}
-
-LexicalReorderingTableCompact::~LexicalReorderingTableCompact()
-{
- for (size_t i = 0; i < m_scoreTrees.size(); i++)
- delete m_scoreTrees[i];
-}
-
-std::vector<float> LexicalReorderingTableCompact::GetScore(const Phrase<Moses2::Word>& f,
- const Phrase<Moses2::Word>& e, const Phrase<Moses2::Word>& c)
-{
- std::string key;
- std::vector<float> scores;
-
- if (0 == c.GetSize()) key = MakeKey(f, e, c);
- else {
- for (size_t i = 0; i <= c.GetSize(); ++i) {
- SubPhrase<Moses2::Word> sub_c = c.GetSubPhrase(i, c.GetSize() - i);
- key = MakeKey(f, e, sub_c);
- }
- }
-
- size_t index = m_hash[key];
- if (m_hash.GetSize() != index) {
- std::string scoresString;
- if (m_inMemory) scoresString = m_scoresMemory[index].str();
- else scoresString = m_scoresMapped[index].str();
-
- BitWrapper<> bitStream(scoresString);
- for (size_t i = 0; i < m_numScoreComponent; i++)
- scores.push_back(
- m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream));
-
- return scores;
- }
-
- return std::vector<float>();
-}
-
-std::string LexicalReorderingTableCompact::MakeKey(const Phrase<Moses2::Word>& f,
- const Phrase<Moses2::Word>& e, const Phrase<Moses2::Word>& c) const
-{
- return MakeKey(Trim(f.GetString(m_FactorsF)), Trim(e.GetString(m_FactorsE)),
- Trim(c.GetString(m_FactorsC)));
-}
-
-std::string LexicalReorderingTableCompact::MakeKey(const std::string& f,
- const std::string& e, const std::string& c) const
-{
- std::string key;
- if (!f.empty()) key += f;
- if (!m_FactorsE.empty()) {
- if (!key.empty()) key += " ||| ";
- key += e;
- }
- if (!m_FactorsC.empty()) {
- if (!key.empty()) key += " ||| ";
- key += c;
- }
- key += " ||| ";
- return key;
-}
-
-LexicalReorderingTable*
-LexicalReorderingTableCompact::CheckAndLoad(const std::string& filePath,
- const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors)
-{
-#ifdef HAVE_CMPH
- std::string minlexr = ".minlexr";
- // file name is specified without suffix
- if (FileExists(filePath + minlexr)) {
- //there exists a compact binary version use that
- std::cerr << "Using compact lexical reordering table" << std::endl;
- return new LexicalReorderingTableCompact(filePath + minlexr, f_factors,
- e_factors, c_factors);
- }
- // file name is specified with suffix
- if (filePath.substr(filePath.length() - minlexr.length(), minlexr.length())
- == minlexr && FileExists(filePath)) {
- //there exists a compact binary version use that
- std::cerr << "Using compact lexical reordering table" << std::endl;
- return new LexicalReorderingTableCompact(filePath, f_factors, e_factors,
- c_factors);
- }
-#endif
- return 0;
-}
-
-void LexicalReorderingTableCompact::Load(std::string filePath)
-{
- std::FILE* pFile = std::fopen(filePath.c_str(), "r");
- UTIL_THROW_IF2(pFile == NULL, "File " << filePath << " could not be opened");
-
- //if(m_inMemory)
- m_hash.Load(pFile);
- //else
- //m_hash.LoadIndex(pFile);
-
- size_t read = 0;
- read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1,
- pFile);
- read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1,
- pFile);
-
- if (m_multipleScoreTrees) {
- m_scoreTrees.resize(m_numScoreComponent);
- for (size_t i = 0; i < m_numScoreComponent; i++)
- m_scoreTrees[i] = new CanonicalHuffman<float>(pFile);
- }
- else {
- m_scoreTrees.resize(1);
- m_scoreTrees[0] = new CanonicalHuffman<float>(pFile);
- }
-
- if (m_inMemory) m_scoresMemory.load(pFile, false);
- else m_scoresMapped.load(pFile, true);
-}
-
-}
diff --git a/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h b/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h
deleted file mode 100644
index 90abf4197..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifndef moses_LexicalReorderingTableCompact_h
-#define moses_LexicalReorderingTableCompact_h
-
-#include "BlockHashIndex.h"
-#include "CanonicalHuffman.h"
-#include "StringVector.h"
-#include "../../TypeDef.h"
-#include "../../Phrase.h"
-
-namespace Moses2
-{
-
-//! additional types
-class LexicalReorderingTable
-{
-public:
- LexicalReorderingTable(const FactorList& f_factors,
- const FactorList& e_factors, const FactorList& c_factors) :
- m_FactorsF(f_factors), m_FactorsE(e_factors), m_FactorsC(c_factors)
- {
- }
-
- virtual ~LexicalReorderingTable()
- {
- }
-
-public:
-
- virtual std::vector<float>
- GetScore(const Phrase<Moses2::Word>& f, const Phrase<Moses2::Word>& e, const Phrase<Moses2::Word>& c) = 0;
-
- virtual
- void InitializeForInput()
- {
- /* override for on-demand loading */
- }
- ;
-
- virtual
- void InitializeForInputPhrase(const Phrase<Moses2::Word>&)
- {
- }
-
- const FactorList& GetFFactorMask() const
- {
- return m_FactorsF;
- }
- const FactorList& GetEFactorMask() const
- {
- return m_FactorsE;
- }
- const FactorList& GetCFactorMask() const
- {
- return m_FactorsC;
- }
-
- virtual
- void DbgDump(std::ostream* out) const
- {
- *out << "Overwrite in subclass...\n";
- }
- ;
- // why is this not a pure virtual function? - UG
-
-protected:
- FactorList m_FactorsF;
- FactorList m_FactorsE;
- FactorList m_FactorsC;
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////////
-class LexicalReorderingTableCompact: public LexicalReorderingTable
-{
-private:
- static bool s_inMemoryByDefault;
- bool m_inMemory;
-
- size_t m_numScoreComponent;
- bool m_multipleScoreTrees;
-
- BlockHashIndex m_hash;
-
- typedef CanonicalHuffman<float> ScoreTree;
- std::vector<ScoreTree*> m_scoreTrees;
-
- StringVector<unsigned char, unsigned long, MmapAllocator> m_scoresMapped;
- StringVector<unsigned char, unsigned long, std::allocator> m_scoresMemory;
-
- std::string MakeKey(const Phrase<Moses2::Word>& f, const Phrase<Moses2::Word>& e, const Phrase<Moses2::Word>& c) const;
- std::string MakeKey(const std::string& f, const std::string& e,
- const std::string& c) const;
-
-public:
- LexicalReorderingTableCompact(const std::string& filePath,
- const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors);
-
- LexicalReorderingTableCompact(const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors);
-
- virtual
- ~LexicalReorderingTableCompact();
-
- virtual std::vector<float>
- GetScore(const Phrase<Moses2::Word>& f, const Phrase<Moses2::Word>& e, const Phrase<Moses2::Word>& c);
-
- static LexicalReorderingTable*
- CheckAndLoad(const std::string& filePath,
- const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors);
-
- void
- Load(std::string filePath);
-
-};
-
-}
-
-#endif
diff --git a/contrib/moses2/TranslationModel/CompactPT/ListCoders.h b/contrib/moses2/TranslationModel/CompactPT/ListCoders.h
deleted file mode 100644
index 5a01274d9..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/ListCoders.h
+++ /dev/null
@@ -1,394 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifndef moses_ListCoders_h
-#define moses_ListCoders_h
-
-#include <cmath>
-#include <cassert>
-
-namespace Moses2
-{
-
-template<typename T = unsigned int>
-class VarIntType
-{
-private:
- template<typename IntType, typename OutIt>
- static void EncodeSymbol(IntType input, OutIt output)
- {
- if (input == 0) {
- *output = 0;
- output++;
- return;
- }
-
- T msb = 1 << (sizeof(T) * 8 - 1);
- IntType mask = ~msb;
- IntType shift = (sizeof(T) * 8 - 1);
-
- while (input) {
- T res = input & mask;
- input >>= shift;
- if (input) res |= msb;
- *output = res;
- output++;
- }
- }
- ;
-
- template<typename InIt, typename IntType>
- static void DecodeSymbol(InIt &it, InIt end, IntType &output)
- {
- T msb = 1 << (sizeof(T) * 8 - 1);
- IntType shift = (sizeof(T) * 8 - 1);
-
- output = 0;
- size_t i = 0;
- while (it != end && *it & msb) {
- IntType temp = *it & ~msb;
- temp <<= shift * i;
- output |= temp;
- it++;
- i++;
- }
- assert(it != end);
-
- IntType temp = *it;
- temp <<= shift * i;
- output |= temp;
- it++;
- }
-
-public:
-
- template<typename InIt, typename OutIt>
- static void Encode(InIt it, InIt end, OutIt outIt)
- {
- while (it != end) {
- EncodeSymbol(*it, outIt);
- it++;
- }
- }
-
- template<typename InIt, typename OutIt>
- static void Decode(InIt &it, InIt end, OutIt outIt)
- {
- while (it != end) {
- size_t output;
- DecodeSymbol(it, end, output);
- *outIt = output;
- outIt++;
- }
- }
-
- template<typename InIt>
- static size_t DecodeAndSum(InIt &it, InIt end, size_t num)
- {
- size_t sum = 0;
- size_t curr = 0;
-
- while (it != end && curr < num) {
- size_t output;
- DecodeSymbol(it, end, output);
- sum += output;
- curr++;
- }
-
- return sum;
- }
-
-};
-
-typedef VarIntType<unsigned char> VarByte;
-
-typedef VarByte VarInt8;
-typedef VarIntType<unsigned short> VarInt16;
-typedef VarIntType<unsigned int> VarInt32;
-
-class Simple9
-{
-private:
- typedef unsigned int uint;
-
- template<typename InIt>
- inline static void EncodeSymbol(uint &output, InIt it, InIt end)
- {
- uint length = end - it;
-
- uint type = 0;
- uint bitlength = 0;
-
- switch (length) {
- case 1:
- type = 1;
- bitlength = 28;
- break;
- case 2:
- type = 2;
- bitlength = 14;
- break;
- case 3:
- type = 3;
- bitlength = 9;
- break;
- case 4:
- type = 4;
- bitlength = 7;
- break;
- case 5:
- type = 5;
- bitlength = 5;
- break;
- case 7:
- type = 6;
- bitlength = 4;
- break;
- case 9:
- type = 7;
- bitlength = 3;
- break;
- case 14:
- type = 8;
- bitlength = 2;
- break;
- case 28:
- type = 9;
- bitlength = 1;
- break;
- }
-
- output = 0;
- output |= (type << 28);
-
- uint i = 0;
- while (it != end) {
- UTIL_THROW_IF2(*it > 268435455,
- "You are trying to encode " << *it
- << " with Simple9. Cannot encode numbers larger than 268435455 (2^28-1)");
-
- uint l = bitlength * (length - i - 1);
- output |= *it << l;
- it++;
- i++;
- }
- }
-
- template<typename OutIt>
- static inline void DecodeSymbol(uint input, OutIt outIt)
- {
- uint type = (input >> 28);
-
- uint bitlen = 0;
- uint shift = 0;
- uint mask = 0;
-
- switch (type) {
- case 1:
- bitlen = 28;
- shift = 0;
- mask = 268435455;
- break;
- case 2:
- bitlen = 14;
- shift = 14;
- mask = 16383;
- break;
- case 3:
- bitlen = 9;
- shift = 18;
- mask = 511;
- break;
- case 4:
- bitlen = 7;
- shift = 21;
- mask = 127;
- break;
- case 5:
- bitlen = 5;
- shift = 20;
- mask = 31;
- break;
- case 6:
- bitlen = 4;
- shift = 24;
- mask = 15;
- break;
- case 7:
- bitlen = 3;
- shift = 24;
- mask = 7;
- break;
- case 8:
- bitlen = 2;
- shift = 26;
- mask = 3;
- break;
- case 9:
- bitlen = 1;
- shift = 27;
- mask = 1;
- break;
- }
-
- while (shift > 0) {
- *outIt = (input >> shift) & mask;
- shift -= bitlen;
- outIt++;
- }
- *outIt = input & mask;
- outIt++;
- }
-
- static inline size_t DecodeAndSumSymbol(uint input, size_t num, size_t &curr)
- {
- uint type = (input >> 28);
-
- uint bitlen = 0;
- uint shift = 0;
- uint mask = 0;
-
- switch (type) {
- case 1:
- bitlen = 28;
- shift = 0;
- mask = 268435455;
- break;
- case 2:
- bitlen = 14;
- shift = 14;
- mask = 16383;
- break;
- case 3:
- bitlen = 9;
- shift = 18;
- mask = 511;
- break;
- case 4:
- bitlen = 7;
- shift = 21;
- mask = 127;
- break;
- case 5:
- bitlen = 5;
- shift = 20;
- mask = 31;
- break;
- case 6:
- bitlen = 4;
- shift = 24;
- mask = 15;
- break;
- case 7:
- bitlen = 3;
- shift = 24;
- mask = 7;
- break;
- case 8:
- bitlen = 2;
- shift = 26;
- mask = 3;
- break;
- case 9:
- bitlen = 1;
- shift = 27;
- mask = 1;
- break;
- }
-
- size_t sum = 0;
- while (shift > 0) {
- sum += (input >> shift) & mask;
- shift -= bitlen;
- if (++curr == num) return sum;
- }
- sum += input & mask;
- curr++;
- return sum;
- }
-
-public:
- template<typename InIt, typename OutIt>
- static void Encode(InIt it, InIt end, OutIt outIt)
- {
- uint parts[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 };
-
- uint buffer[28];
- for (InIt i = it; i < end; i++) {
- uint lastbit = 1;
- uint lastpos = 0;
- uint lastyes = 0;
- uint j = 0;
-
- double log2 = log(2);
- while (j < 9 && lastpos < 28 && (i + lastpos) < end) {
- if (lastpos >= parts[j]) j++;
-
- buffer[lastpos] = *(i + lastpos);
-
- uint reqbit = ceil(log(buffer[lastpos] + 1) / log2);
- assert(reqbit <= 28);
-
- uint bit = 28 / floor(28 / reqbit);
- if (lastbit < bit) lastbit = bit;
-
- if (parts[j] > 28 / lastbit) break;
- else if (lastpos == parts[j] - 1) lastyes = lastpos;
-
- lastpos++;
- }
- i += lastyes;
-
- uint length = lastyes + 1;
- uint output;
- EncodeSymbol(output, buffer, buffer + length);
-
- *outIt = output;
- outIt++;
- }
- }
-
- template<typename InIt, typename OutIt>
- static void Decode(InIt &it, InIt end, OutIt outIt)
- {
- while (it != end) {
- DecodeSymbol(*it, outIt);
- it++;
- }
- }
-
- template<typename InIt>
- static size_t DecodeAndSum(InIt &it, InIt end, size_t num)
- {
- size_t sum = 0;
- size_t curr = 0;
- while (it != end && curr < num) {
- sum += DecodeAndSumSymbol(*it, num, curr);
- it++;
- }
- assert(curr == num);
- return sum;
- }
-};
-
-}
-
-#endif
diff --git a/contrib/moses2/TranslationModel/CompactPT/MmapAllocator.h b/contrib/moses2/TranslationModel/CompactPT/MmapAllocator.h
deleted file mode 100644
index 1e40d8d41..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/MmapAllocator.h
+++ /dev/null
@@ -1,217 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifndef moses_MmapAllocator_h
-#define moses_MmapAllocator_h
-
-#include <limits>
-#include <iostream>
-#include <cstdio>
-#include <unistd.h>
-
-#if defined(_WIN32) || defined(_WIN64)
-#include <windows.h>
-#include <io.h>
-#else
-#include <sys/mman.h>
-#endif
-
-#include "util/mmap.hh"
-
-namespace Moses2
-{
-template<class T>
-class MmapAllocator
-{
-protected:
- std::FILE* m_file_ptr;
- size_t m_file_desc;
-
- size_t m_page_size;
- size_t m_map_size;
-
- char* m_data_ptr;
- size_t m_data_offset;
- bool m_fixed;
- size_t* m_count;
-
-public:
- typedef T value_type;
- typedef T* pointer;
- typedef const T* const_pointer;
- typedef T& reference;
- typedef const T& const_reference;
- typedef std::size_t size_type;
- typedef std::ptrdiff_t difference_type;
-
- MmapAllocator() throw () :
- m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)), m_page_size(
- util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(0), m_fixed(
- false), m_count(new size_t(0))
- {
- }
-
- MmapAllocator(std::FILE* f_ptr) throw () :
- m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)), m_page_size(
- util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(0), m_fixed(
- false), m_count(new size_t(0))
- {
- }
-
- MmapAllocator(std::FILE* f_ptr, size_t data_offset) throw () :
- m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)), m_page_size(
- util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(
- data_offset), m_fixed(true), m_count(new size_t(0))
- {
- }
-
- MmapAllocator(std::string fileName) throw () :
- m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc(
- fileno(m_file_ptr)), m_page_size(util::SizePage()), m_map_size(0), m_data_ptr(
- 0), m_data_offset(0), m_fixed(false), m_count(new size_t(0))
- {
- }
-
- MmapAllocator(const MmapAllocator& c) throw () :
- m_file_ptr(c.m_file_ptr), m_file_desc(c.m_file_desc), m_page_size(
- c.m_page_size), m_map_size(c.m_map_size), m_data_ptr(c.m_data_ptr), m_data_offset(
- c.m_data_offset), m_fixed(c.m_fixed), m_count(c.m_count)
- {
- (*m_count)++;
- }
-
- ~MmapAllocator() throw ()
- {
- if (m_data_ptr && *m_count == 0) {
- util::UnmapOrThrow(m_data_ptr, m_map_size);
- if (!m_fixed && std::ftell(m_file_ptr) != -1) std::fclose(m_file_ptr);
- }
- (*m_count)--;
- }
-
- template<class U>
- struct rebind
- {
- typedef MmapAllocator<U> other;
- };
-
- pointer address(reference value) const
- {
- return &value;
- }
-
- const_pointer address(const_reference value) const
- {
- return &value;
- }
-
- size_type max_size() const throw ()
- {
- return std::numeric_limits<size_t>::max() / sizeof(value_type);
- }
-
- pointer allocate(size_type num, const void* = 0)
- {
- m_map_size = num * sizeof(T);
-
-#if defined(_WIN32) || defined(_WIN64)
- // On Windows, MAP_SHARED is not defined and MapOrThrow ignores the flags.
- const int map_shared = 0;
-#else
- const int map_shared = MAP_SHARED;
-#endif
- if (!m_fixed) {
- size_t read = 0;
- read += ftruncate(m_file_desc, m_map_size);
- m_data_ptr = (char *) util::MapOrThrow(m_map_size, true, map_shared,
- false, m_file_desc, 0);
- return (pointer) m_data_ptr;
- }
- else {
- const size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
- const size_t relative_offset = m_data_offset - map_offset;
- const size_t adjusted_map_size = m_map_size + relative_offset;
-
- m_data_ptr = (char *) util::MapOrThrow(adjusted_map_size, false,
- map_shared, false, m_file_desc, map_offset);
-
- return (pointer) (m_data_ptr + relative_offset);
- }
- }
-
- void deallocate(pointer p, size_type num)
- {
- if (!m_fixed) {
- util::UnmapOrThrow(p, num * sizeof(T));
- }
- else {
- const size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
- const size_t relative_offset = m_data_offset - map_offset;
- const size_t adjusted_map_size = m_map_size + relative_offset;
-
- util::UnmapOrThrow((pointer) ((char*) p - relative_offset),
- adjusted_map_size);
- }
- }
-
- void construct(pointer p, const T& value)
- {
- if (!m_fixed) new (p) value_type(value);
- }
- void destroy(pointer p)
- {
- if (!m_fixed) p->~T();
- }
-
- template<class T1, class T2>
- friend bool operator==(const MmapAllocator<T1>&,
- const MmapAllocator<T2>&) throw ();
-
- template<class T1, class T2>
- friend bool operator!=(const MmapAllocator<T1>&,
- const MmapAllocator<T2>&) throw ();
-};
-
-template<class T1, class T2>
-bool operator==(const MmapAllocator<T1>& a1,
- const MmapAllocator<T2>& a2) throw ()
-{
- bool equal = true;
- equal &= a1.m_file_ptr == a2.m_file_ptr;
- equal &= a1.m_file_desc == a2.m_file_desc;
- equal &= a1.m_page_size == a2.m_page_size;
- equal &= a1.m_map_size == a2.m_map_size;
- equal &= a1.m_data_ptr == a2.m_data_ptr;
- equal &= a1.m_data_offset == a2.m_data_offset;
- equal &= a1.m_fixed == a2.m_fixed;
- return equal;
-}
-
-template<class T1, class T2>
-bool operator!=(const MmapAllocator<T1>& a1,
- const MmapAllocator<T2>& a2) throw ()
-{
- return !(a1 == a2);
-}
-
-}
-
-#endif
diff --git a/contrib/moses2/TranslationModel/CompactPT/MonotonicVector.h b/contrib/moses2/TranslationModel/CompactPT/MonotonicVector.h
deleted file mode 100644
index 586397db8..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/MonotonicVector.h
+++ /dev/null
@@ -1,247 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifndef moses_MonotonicVector_h
-#define moses_MonotonicVector_h
-
-// MonotonicVector - Represents a monotonic increasing function that maps
-// positive integers of any size onto a given number type. Each value has to be
-// equal or larger than the previous one. Depending on the stepSize it can save
-// up to 90% of memory compared to a std::vector<long>. Time complexity is roughly
-// constant, in the worst case, however, stepSize times slower than a normal
-// std::vector.
-
-#include <vector>
-#include <limits>
-#include <algorithm>
-#include <cstdio>
-#include <cassert>
-
-#include "ThrowingFwrite.h"
-#include "ListCoders.h"
-#include "MmapAllocator.h"
-
-namespace Moses2
-{
-
-template<typename PosT = size_t, typename NumT = size_t, PosT stepSize = 32,
- template<typename > class Allocator = std::allocator>
-class MonotonicVector
-{
-private:
- typedef std::vector<NumT, Allocator<NumT> > Anchors;
- typedef std::vector<unsigned int, Allocator<unsigned int> > Diffs;
-
- Anchors m_anchors;
- Diffs m_diffs;
- std::vector<unsigned int> m_tempDiffs;
-
- size_t m_size;
- PosT m_last;
- bool m_final;
-
-public:
- typedef PosT value_type;
-
- MonotonicVector() :
- m_size(0), m_last(0), m_final(false)
- {
- }
-
- size_t size() const
- {
- return m_size + m_tempDiffs.size();
- }
-
- PosT at(size_t i) const
- {
- PosT s = stepSize;
- PosT j = m_anchors[i / s];
- PosT r = i % s;
-
- typename Diffs::const_iterator it = m_diffs.begin() + j;
-
- PosT k = 0;
- k += VarInt32::DecodeAndSum(it, m_diffs.end(), 1);
- if (i < m_size) k += Simple9::DecodeAndSum(it, m_diffs.end(), r);
- else if (i < m_size + m_tempDiffs.size()) for (size_t l = 0; l < r; l++)
- k += m_tempDiffs[l];
-
- return k;
- }
-
- PosT operator[](PosT i) const
- {
- return at(i);
- }
-
- PosT back() const
- {
- return at(size() - 1);
- }
-
- void push_back(PosT i)
- {
- assert(m_final != true);
-
- if (m_anchors.size() == 0 && m_tempDiffs.size() == 0) {
- m_anchors.push_back(0);
- VarInt32::Encode(&i, &i + 1, std::back_inserter(m_diffs));
- m_last = i;
- m_size++;
-
- return;
- }
-
- if (m_tempDiffs.size() == stepSize - 1) {
- Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(),
- std::back_inserter(m_diffs));
- m_anchors.push_back(m_diffs.size());
- VarInt32::Encode(&i, &i + 1, std::back_inserter(m_diffs));
-
- m_size += m_tempDiffs.size() + 1;
- m_tempDiffs.clear();
- }
- else {
- PosT last = m_last;
- PosT diff = i - last;
- m_tempDiffs.push_back(diff);
- }
- m_last = i;
- }
-
- void commit()
- {
- assert(m_final != true);
- Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(),
- std::back_inserter(m_diffs));
- m_size += m_tempDiffs.size();
- m_tempDiffs.clear();
- m_final = true;
- }
-
- size_t usage()
- {
- return m_diffs.size() * sizeof(unsigned int)
- + m_anchors.size() * sizeof(NumT);
- }
-
- size_t load(std::FILE* in, bool map = false)
- {
- size_t byteSize = 0;
-
- byteSize += fread(&m_final, sizeof(bool), 1, in) * sizeof(bool);
- byteSize += fread(&m_size, sizeof(size_t), 1, in) * sizeof(size_t);
- byteSize += fread(&m_last, sizeof(PosT), 1, in) * sizeof(PosT);
-
- byteSize += loadVector(m_diffs, in, map);
- byteSize += loadVector(m_anchors, in, map);
-
- return byteSize;
- }
-
- template<typename ValueT>
- size_t loadVector(std::vector<ValueT, std::allocator<ValueT> >& v,
- std::FILE* in, bool map = false)
- {
- // Can only be read into memory. Mapping not possible with std:allocator.
- assert(map == false);
-
- size_t byteSize = 0;
-
- size_t valSize;
- byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
-
- v.resize(valSize, 0);
- byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
-
- return byteSize;
- }
-
- template<typename ValueT>
- size_t loadVector(std::vector<ValueT, MmapAllocator<ValueT> >& v,
- std::FILE* in, bool map = false)
- {
- size_t byteSize = 0;
-
- size_t valSize;
- byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
-
- if (map == false) {
- // Read data into temporary file (default constructor of MmapAllocator)
- // and map memory onto temporary file. Can be resized.
-
- v.resize(valSize, 0);
- byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in)
- * sizeof(ValueT);
- }
- else {
- // Map it directly on specified region of file "in" starting at valPos
- // with length valSize * sizeof(ValueT). Mapped region cannot be resized.
-
- size_t valPos = std::ftell(in);
-
- Allocator<ValueT> alloc(in, valPos);
- std::vector<ValueT, Allocator<ValueT> > vTemp(alloc);
- vTemp.resize(valSize);
- v.swap(vTemp);
-
- std::fseek(in, valSize * sizeof(ValueT), SEEK_CUR);
- byteSize += valSize * sizeof(ValueT);
- }
-
- return byteSize;
- }
-
- size_t save(std::FILE* out)
- {
- if (!m_final) commit();
-
- bool byteSize = 0;
- byteSize += ThrowingFwrite(&m_final, sizeof(bool), 1, out) * sizeof(bool);
- byteSize += ThrowingFwrite(&m_size, sizeof(size_t), 1, out)
- * sizeof(size_t);
- byteSize += ThrowingFwrite(&m_last, sizeof(PosT), 1, out) * sizeof(PosT);
-
- size_t size = m_diffs.size();
- byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t);
- byteSize += ThrowingFwrite(&m_diffs[0], sizeof(unsigned int), size, out)
- * sizeof(unsigned int);
-
- size = m_anchors.size();
- byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t);
- byteSize += ThrowingFwrite(&m_anchors[0], sizeof(NumT), size, out)
- * sizeof(NumT);
-
- return byteSize;
- }
-
- void swap(MonotonicVector<PosT, NumT, stepSize, Allocator> &mv)
- {
- if (!m_final) commit();
-
- m_diffs.swap(mv.m_diffs);
- m_anchors.swap(mv.m_anchors);
- }
-};
-
-}
-#endif
diff --git a/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.cpp b/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.cpp
deleted file mode 100644
index c3e567af6..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.cpp
+++ /dev/null
@@ -1,424 +0,0 @@
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-// Note - The x86 and x64 versions do _not_ produce the same results, as the
-// algorithms are optimized for their respective platforms. You can still
-// compile and run any of them on any platform, but your performance with the
-// non-native version will be less than optimal.
-
-#include "MurmurHash3.h"
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define FORCE_INLINE __forceinline
-
-#include <cstdlib>
-
-#define ROTL32(x,y) _rotl(x,y)
-#define ROTL64(x,y) _rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x)
-
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#define FORCE_INLINE inline __attribute__((always_inline))
-
-inline uint32_t rotl32(uint32_t x, int8_t r)
-{
- return (x << r) | (x >> (32 - r));
-}
-
-inline uint64_t rotl64(uint64_t x, int8_t r)
-{
- return (x << r) | (x >> (64 - r));
-}
-
-#define ROTL32(x,y) rotl32(x,y)
-#define ROTL64(x,y) rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-// Block read - if your platform needs to do endian-swapping or can only
-// handle aligned reads, do the conversion here
-
-FORCE_INLINE uint32_t getblock(const uint32_t * p, int i)
-{
- return p[i];
-}
-
-FORCE_INLINE uint64_t getblock(const uint64_t * p, int i)
-{
- return p[i];
-}
-
-//-----------------------------------------------------------------------------
-// Finalization mix - force all bits of a hash block to avalanche
-
-FORCE_INLINE uint32_t fmix(uint32_t h)
-{
- h ^= h >> 16;
- h *= 0x85ebca6b;
- h ^= h >> 13;
- h *= 0xc2b2ae35;
- h ^= h >> 16;
-
- return h;
-}
-
-//----------
-
-FORCE_INLINE uint64_t fmix(uint64_t k)
-{
- k ^= k >> 33;
- k *= BIG_CONSTANT(0xff51afd7ed558ccd);
- k ^= k >> 33;
- k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
- k ^= k >> 33;
-
- return k;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32(const void * key, int len, uint32_t seed, void * out)
-{
- const uint8_t * data = (const uint8_t*) key;
- const int nblocks = len / 4;
-
- uint32_t h1 = seed;
-
- uint32_t c1 = 0xcc9e2d51;
- uint32_t c2 = 0x1b873593;
-
- //----------
- // body
-
- const uint32_t * blocks = (const uint32_t *) (data + nblocks * 4);
-
- for (int i = -nblocks; i; i++) {
- uint32_t k1 = getblock(blocks, i);
-
- k1 *= c1;
- k1 = ROTL32(k1, 15);
- k1 *= c2;
-
- h1 ^= k1;
- h1 = ROTL32(h1, 13);
- h1 = h1 * 5 + 0xe6546b64;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*) (data + nblocks * 4);
-
- uint32_t k1 = 0;
-
- switch (len & 3) {
- case 3:
- k1 ^= tail[2] << 16;
- case 2:
- k1 ^= tail[1] << 8;
- case 1:
- k1 ^= tail[0];
- k1 *= c1;
- k1 = ROTL32(k1, 15);
- k1 *= c2;
- h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len;
-
- h1 = fmix(h1);
-
- *(uint32_t*) out = h1;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_128(const void * key, const int len, uint32_t seed,
- void * out)
-{
- const uint8_t * data = (const uint8_t*) key;
- const int nblocks = len / 16;
-
- uint32_t h1 = seed;
- uint32_t h2 = seed;
- uint32_t h3 = seed;
- uint32_t h4 = seed;
-
- uint32_t c1 = 0x239b961b;
- uint32_t c2 = 0xab0e9789;
- uint32_t c3 = 0x38b34ae5;
- uint32_t c4 = 0xa1e38b93;
-
- //----------
- // body
-
- const uint32_t * blocks = (const uint32_t *) (data + nblocks * 16);
-
- for (int i = -nblocks; i; i++) {
- uint32_t k1 = getblock(blocks, i * 4 + 0);
- uint32_t k2 = getblock(blocks, i * 4 + 1);
- uint32_t k3 = getblock(blocks, i * 4 + 2);
- uint32_t k4 = getblock(blocks, i * 4 + 3);
-
- k1 *= c1;
- k1 = ROTL32(k1, 15);
- k1 *= c2;
- h1 ^= k1;
-
- h1 = ROTL32(h1, 19);
- h1 += h2;
- h1 = h1 * 5 + 0x561ccd1b;
-
- k2 *= c2;
- k2 = ROTL32(k2, 16);
- k2 *= c3;
- h2 ^= k2;
-
- h2 = ROTL32(h2, 17);
- h2 += h3;
- h2 = h2 * 5 + 0x0bcaa747;
-
- k3 *= c3;
- k3 = ROTL32(k3, 17);
- k3 *= c4;
- h3 ^= k3;
-
- h3 = ROTL32(h3, 15);
- h3 += h4;
- h3 = h3 * 5 + 0x96cd1c35;
-
- k4 *= c4;
- k4 = ROTL32(k4, 18);
- k4 *= c1;
- h4 ^= k4;
-
- h4 = ROTL32(h4, 13);
- h4 += h1;
- h4 = h4 * 5 + 0x32ac3b17;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*) (data + nblocks * 16);
-
- uint32_t k1 = 0;
- uint32_t k2 = 0;
- uint32_t k3 = 0;
- uint32_t k4 = 0;
-
- switch (len & 15) {
- case 15:
- k4 ^= tail[14] << 16;
- case 14:
- k4 ^= tail[13] << 8;
- case 13:
- k4 ^= tail[12] << 0;
- k4 *= c4;
- k4 = ROTL32(k4, 18);
- k4 *= c1;
- h4 ^= k4;
-
- case 12:
- k3 ^= tail[11] << 24;
- case 11:
- k3 ^= tail[10] << 16;
- case 10:
- k3 ^= tail[9] << 8;
- case 9:
- k3 ^= tail[8] << 0;
- k3 *= c3;
- k3 = ROTL32(k3, 17);
- k3 *= c4;
- h3 ^= k3;
-
- case 8:
- k2 ^= tail[7] << 24;
- case 7:
- k2 ^= tail[6] << 16;
- case 6:
- k2 ^= tail[5] << 8;
- case 5:
- k2 ^= tail[4] << 0;
- k2 *= c2;
- k2 = ROTL32(k2, 16);
- k2 *= c3;
- h2 ^= k2;
-
- case 4:
- k1 ^= tail[3] << 24;
- case 3:
- k1 ^= tail[2] << 16;
- case 2:
- k1 ^= tail[1] << 8;
- case 1:
- k1 ^= tail[0] << 0;
- k1 *= c1;
- k1 = ROTL32(k1, 15);
- k1 *= c2;
- h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len;
- h2 ^= len;
- h3 ^= len;
- h4 ^= len;
-
- h1 += h2;
- h1 += h3;
- h1 += h4;
- h2 += h1;
- h3 += h1;
- h4 += h1;
-
- h1 = fmix(h1);
- h2 = fmix(h2);
- h3 = fmix(h3);
- h4 = fmix(h4);
-
- h1 += h2;
- h1 += h3;
- h1 += h4;
- h2 += h1;
- h3 += h1;
- h4 += h1;
-
- ((uint32_t*) out)[0] = h1;
- ((uint32_t*) out)[1] = h2;
- ((uint32_t*) out)[2] = h3;
- ((uint32_t*) out)[3] = h4;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x64_128(const void * key, const int len, const uint32_t seed,
- void * out)
-{
- const uint8_t * data = (const uint8_t*) key;
- const int nblocks = len / 16;
-
- uint64_t h1 = seed;
- uint64_t h2 = seed;
-
- uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
- uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
-
- //----------
- // body
-
- const uint64_t * blocks = (const uint64_t *) (data);
-
- for (int i = 0; i < nblocks; i++) {
- uint64_t k1 = getblock(blocks, i * 2 + 0);
- uint64_t k2 = getblock(blocks, i * 2 + 1);
-
- k1 *= c1;
- k1 = ROTL64(k1, 31);
- k1 *= c2;
- h1 ^= k1;
-
- h1 = ROTL64(h1, 27);
- h1 += h2;
- h1 = h1 * 5 + 0x52dce729;
-
- k2 *= c2;
- k2 = ROTL64(k2, 33);
- k2 *= c1;
- h2 ^= k2;
-
- h2 = ROTL64(h2, 31);
- h2 += h1;
- h2 = h2 * 5 + 0x38495ab5;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*) (data + nblocks * 16);
-
- uint64_t k1 = 0;
- uint64_t k2 = 0;
-
- switch (len & 15) {
- case 15:
- k2 ^= uint64_t(tail[14]) << 48;
- case 14:
- k2 ^= uint64_t(tail[13]) << 40;
- case 13:
- k2 ^= uint64_t(tail[12]) << 32;
- case 12:
- k2 ^= uint64_t(tail[11]) << 24;
- case 11:
- k2 ^= uint64_t(tail[10]) << 16;
- case 10:
- k2 ^= uint64_t(tail[9]) << 8;
- case 9:
- k2 ^= uint64_t(tail[8]) << 0;
- k2 *= c2;
- k2 = ROTL64(k2, 33);
- k2 *= c1;
- h2 ^= k2;
-
- case 8:
- k1 ^= uint64_t(tail[7]) << 56;
- case 7:
- k1 ^= uint64_t(tail[6]) << 48;
- case 6:
- k1 ^= uint64_t(tail[5]) << 40;
- case 5:
- k1 ^= uint64_t(tail[4]) << 32;
- case 4:
- k1 ^= uint64_t(tail[3]) << 24;
- case 3:
- k1 ^= uint64_t(tail[2]) << 16;
- case 2:
- k1 ^= uint64_t(tail[1]) << 8;
- case 1:
- k1 ^= uint64_t(tail[0]) << 0;
- k1 *= c1;
- k1 = ROTL64(k1, 31);
- k1 *= c2;
- h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len;
- h2 ^= len;
-
- h1 += h2;
- h2 += h1;
-
- h1 = fmix(h1);
- h2 = fmix(h2);
-
- h1 += h2;
- h2 += h1;
-
- ((uint64_t*) out)[0] = h1;
- ((uint64_t*) out)[1] = h2;
-}
-
-//-----------------------------------------------------------------------------
-
diff --git a/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.h b/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.h
deleted file mode 100644
index f513008cf..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH3_H_
-#define _MURMURHASH3_H_
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
-
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#include <stdint.h>
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32(const void * key, int len, uint32_t seed, void * out);
-
-void MurmurHash3_x86_128(const void * key, int len, uint32_t seed, void * out);
-
-void MurmurHash3_x64_128(const void * key, int len, uint32_t seed, void * out);
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH3_H_
diff --git a/contrib/moses2/TranslationModel/CompactPT/PackedArray.h b/contrib/moses2/TranslationModel/CompactPT/PackedArray.h
deleted file mode 100644
index 409c3cca8..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/PackedArray.h
+++ /dev/null
@@ -1,207 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifndef moses_PackedArray_h
-#define moses_PackedArray_h
-
-#include <vector>
-#include <cmath>
-#include <cstring>
-#include <cstdio>
-
-#include "ThrowingFwrite.h"
-
-namespace Moses2
-{
-
-template<typename T = size_t, typename D = unsigned char>
-class PackedArray
-{
-protected:
- static size_t m_dataBits;
-
- size_t m_size;
- size_t m_storageSize;
- D* m_storage;
-
-public:
- PackedArray()
- {
- m_size = 0;
- m_storageSize = 0;
- m_storage = new D[0];
- }
-
- PackedArray(size_t size, size_t bits) :
- m_size(size)
- {
- m_storageSize = ceil(float(bits * size) / float(m_dataBits));
- m_storage = new D[m_storageSize];
- }
-
- PackedArray(const PackedArray<T, D> &c)
- {
- m_size = c.m_size;
-
- m_storageSize = c.m_storageSize;
- m_storage = new D[m_storageSize];
-
- std::memcpy(m_storage, c.m_storage, m_storageSize * sizeof(D));
- }
-
- virtual ~PackedArray()
- {
- delete[] m_storage;
- m_size = 0;
- m_storageSize = 0;
- m_storage = 0;
- }
-
- T Get(size_t i, size_t bits) const
- {
- T out = 0;
-
- size_t bitstart = (i * bits);
- size_t bitpos = bitstart;
-
- size_t zero = ((1ul << (bits)) - 1);
-
- while (bitpos - bitstart < bits) {
- size_t pos = bitpos / m_dataBits;
- size_t off = bitpos % m_dataBits;
-
- out |= (T(m_storage[pos]) << (bitpos - bitstart)) >> off;
-
- bitpos += (m_dataBits - off);
- }
-
- out &= zero;
- return out;
- }
-
- void Set(size_t i, T v, size_t bits)
- {
- size_t bitstart = (i * bits);
- size_t bitpos = bitstart;
-
- while (bitpos - bitstart < bits) {
- size_t pos = bitpos / m_dataBits;
- size_t off = bitpos % m_dataBits;
-
- size_t rest = bits - (bitpos - bitstart);
- D zero = ~((1ul << (rest + off)) - 1) | ((1ul << off) - 1);
-
- m_storage[pos] &= zero;
- m_storage[pos] |= v << off;
- v = v >> (m_dataBits - off);
- bitpos += (m_dataBits - off);
- }
- }
-
- virtual D*& GetStorage()
- {
- return m_storage;
- }
-
- virtual size_t GetStorageSize() const
- {
- return m_storageSize;
- }
-
- virtual size_t Size() const
- {
- return m_size;
- }
-
- virtual size_t Load(std::FILE* in)
- {
- size_t a1 = std::ftell(in);
-
- size_t read = 0;
- read += std::fread(&m_size, sizeof(m_size), 1, in);
- read += std::fread(&m_storageSize, sizeof(m_storageSize), 1, in);
- delete[] m_storage;
- m_storage = new D[m_storageSize];
- read += std::fread(m_storage, sizeof(D), m_storageSize, in);
-
- size_t a2 = std::ftell(in);
- return a2 - a1;
- }
-
- virtual size_t Save(std::FILE* out)
- {
- size_t a1 = std::ftell(out);
-
- ThrowingFwrite(&m_size, sizeof(m_size), 1, out);
- ThrowingFwrite(&m_storageSize, sizeof(m_storageSize), 1, out);
- ThrowingFwrite(m_storage, sizeof(D), m_storageSize, out);
-
- size_t a2 = std::ftell(out);
- return a2 - a1;
- }
-
-};
-
-template<typename T, typename D>
-size_t PackedArray<T, D>::m_dataBits = sizeof(D) * 8;
-
-/**************************************************************************/
-
-template<typename T = size_t, typename D = unsigned char>
-class PairedPackedArray: public PackedArray<T, D>
-{
-public:
- PairedPackedArray() :
- PackedArray<T, D>()
- {
- }
-
- PairedPackedArray(size_t size, size_t bits1, size_t bits2) :
- PackedArray<T, D>(size, bits1 + bits2)
- {
- }
-
- void Set(size_t i, T a, T b, size_t bits1, size_t bits2)
- {
- T c = 0;
- c = a | (b << bits1);
- PackedArray<T, D>::Set(i, c, bits1 + bits2);
- }
-
- void Set(size_t i, std::pair<T, T> p, size_t bits1, size_t bits2)
- {
- T c = 0;
- c = p.second | (p.first << bits1);
- PackedArray<T, D>::Set(i, c);
- }
-
- std::pair<T, T> Get(size_t i, size_t bits1, size_t bits2)
- {
- T v = PackedArray<T, D>::Get(i, bits1 + bits2);
- T a = v & ((1 << bits1) - 1);
- T b = v >> bits1;
- return std::pair<T, T>(a, b);
- }
-};
-
-}
-
-#endif
diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp
deleted file mode 100644
index 7860fed94..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp
+++ /dev/null
@@ -1,466 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include <deque>
-
-#include "PhraseDecoder.h"
-#include "../../System.h"
-#include "../../SubPhrase.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-PhraseDecoder::PhraseDecoder(
- PhraseTableCompact &phraseDictionary,
- const std::vector<FactorType>* input,
- const std::vector<FactorType>* output,
- size_t numScoreComponent
- // , const std::vector<float>* weight
-)
- : m_coding(None), m_numScoreComponent(numScoreComponent),
- m_containsAlignmentInfo(true), m_maxRank(0),
- m_symbolTree(0), m_multipleScoreTrees(false),
- m_scoreTrees(1), m_alignTree(0),
- m_phraseDictionary(phraseDictionary), m_input(input), m_output(output),
- // m_weight(weight),
- m_separator(" ||| ")
-{ }
-
-PhraseDecoder::~PhraseDecoder()
-{
- if(m_symbolTree)
- delete m_symbolTree;
-
- for(size_t i = 0; i < m_scoreTrees.size(); i++)
- if(m_scoreTrees[i])
- delete m_scoreTrees[i];
-
- if(m_alignTree)
- delete m_alignTree;
-}
-
-inline unsigned PhraseDecoder::GetSourceSymbolId(std::string& symbol)
-{
- boost::unordered_map<std::string, unsigned>::iterator it
- = m_sourceSymbolsMap.find(symbol);
- if(it != m_sourceSymbolsMap.end())
- return it->second;
-
- size_t idx = m_sourceSymbols.find(symbol);
- m_sourceSymbolsMap[symbol] = idx;
- return idx;
-}
-
-inline std::string PhraseDecoder::GetTargetSymbol(unsigned idx) const
-{
- if(idx < m_targetSymbols.size())
- return m_targetSymbols[idx];
- return std::string("##ERROR##");
-}
-
-inline size_t PhraseDecoder::GetREncType(unsigned encodedSymbol)
-{
- return (encodedSymbol >> 30) + 1;
-}
-
-inline size_t PhraseDecoder::GetPREncType(unsigned encodedSymbol)
-{
- return (encodedSymbol >> 31) + 1;
-}
-
-inline unsigned PhraseDecoder::GetTranslation(unsigned srcIdx, size_t rank)
-{
- size_t srcTrgIdx = m_lexicalTableIndex[srcIdx];
- return m_lexicalTable[srcTrgIdx + rank].second;
-}
-
-size_t PhraseDecoder::GetMaxSourcePhraseLength()
-{
- return m_maxPhraseLength;
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol1(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(3 << 30);
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol2Rank(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(255 << 24);
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol2Position(unsigned encodedSymbol)
-{
- encodedSymbol &= ~(3 << 30);
- encodedSymbol >>= 24;
- return encodedSymbol;
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol3(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(3 << 30);
-}
-
-inline unsigned PhraseDecoder::DecodePREncSymbol1(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(1 << 31);
-}
-
-inline int PhraseDecoder::DecodePREncSymbol2Left(unsigned encodedSymbol)
-{
- return ((encodedSymbol >> 25) & 63) - 32;
-}
-
-inline int PhraseDecoder::DecodePREncSymbol2Right(unsigned encodedSymbol)
-{
- return ((encodedSymbol >> 19) & 63) - 32;
-}
-
-inline unsigned PhraseDecoder::DecodePREncSymbol2Rank(unsigned encodedSymbol)
-{
- return (encodedSymbol & 524287);
-}
-
-size_t PhraseDecoder::Load(std::FILE* in)
-{
- size_t start = std::ftell(in);
- size_t read = 0;
-
- read += std::fread(&m_coding, sizeof(m_coding), 1, in);
- read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, in);
- read += std::fread(&m_containsAlignmentInfo, sizeof(m_containsAlignmentInfo), 1, in);
- read += std::fread(&m_maxRank, sizeof(m_maxRank), 1, in);
- read += std::fread(&m_maxPhraseLength, sizeof(m_maxPhraseLength), 1, in);
-
- if(m_coding == REnc) {
- m_sourceSymbols.load(in);
-
- size_t size;
- read += std::fread(&size, sizeof(size_t), 1, in);
- m_lexicalTableIndex.resize(size);
- read += std::fread(&m_lexicalTableIndex[0], sizeof(size_t), size, in);
-
- read += std::fread(&size, sizeof(size_t), 1, in);
- m_lexicalTable.resize(size);
- read += std::fread(&m_lexicalTable[0], sizeof(SrcTrg), size, in);
- }
-
- m_targetSymbols.load(in);
-
- m_symbolTree = new CanonicalHuffman<unsigned>(in);
-
- read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, in);
- if(m_multipleScoreTrees) {
- m_scoreTrees.resize(m_numScoreComponent);
- for(size_t i = 0; i < m_numScoreComponent; i++)
- m_scoreTrees[i] = new CanonicalHuffman<float>(in);
- } else {
- m_scoreTrees.resize(1);
- m_scoreTrees[0] = new CanonicalHuffman<float>(in);
- }
-
- if(m_containsAlignmentInfo)
- m_alignTree = new CanonicalHuffman<AlignPoint>(in);
-
- size_t end = std::ftell(in);
- return end - start;
-}
-
-std::string PhraseDecoder::MakeSourceKey(std::string &source)
-{
- return source + m_separator;
-}
-
-TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(
- const ManagerBase &mgr,
- const Phrase<Word> &sourcePhrase,
- bool topLevel,
- bool eval)
-{
-
- // Not using TargetPhraseCollection avoiding "new" operator
- // which can introduce heavy locking with multiple threads
- TargetPhraseVectorPtr tpv(new TargetPhraseVector());
- size_t bitsLeft = 0;
-
- if(m_coding == PREnc) {
- std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl
- = m_decodingCache.Retrieve(sourcePhrase);
-
- // Has been cached and is complete or does not need to be completed
- if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0))
- return cachedPhraseColl.first;
-
- // Has been cached, but is incomplete
- else if(cachedPhraseColl.first != NULL) {
- bitsLeft = cachedPhraseColl.second;
- tpv->resize(cachedPhraseColl.first->size());
- std::copy(cachedPhraseColl.first->begin(),
- cachedPhraseColl.first->end(),
- tpv->begin());
- }
- }
-
- // Retrieve source phrase identifier
- std::string sourcePhraseString = sourcePhrase.GetString(*m_input);
- size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)];
- /*
- cerr << "sourcePhraseString=" << sourcePhraseString << " "
- << sourcePhraseId
- << endl;
- */
- if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize()) {
- // Retrieve compressed and encoded target phrase collection
- std::string encodedPhraseCollection;
- if(m_phraseDictionary.m_inMemory)
- encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId].str();
- else
- encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId].str();
-
- BitWrapper<> encodedBitStream(encodedPhraseCollection);
- if(m_coding == PREnc && bitsLeft)
- encodedBitStream.SeekFromEnd(bitsLeft);
-
- // Decompress and decode target phrase collection
- TargetPhraseVectorPtr decodedPhraseColl =
- DecodeCollection(mgr, tpv, encodedBitStream, sourcePhrase, topLevel, eval);
-
- return decodedPhraseColl;
- } else
- return TargetPhraseVectorPtr();
-}
-
-TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
- const ManagerBase &mgr,
- TargetPhraseVectorPtr tpv,
- BitWrapper<> &encodedBitStream,
- const Phrase<Word> &sourcePhrase,
- bool topLevel,
- bool eval)
-{
- const System &system = mgr.system;
- FactorCollection &vocab = system.GetVocab();
-
- bool extending = tpv->size();
- size_t bitsLeft = encodedBitStream.TellFromEnd();
-
- std::vector<int> sourceWords;
- if(m_coding == REnc) {
- for(size_t i = 0; i < sourcePhrase.GetSize(); i++) {
- std::string sourceWord
- = sourcePhrase[i].GetString(*m_input);
- unsigned idx = GetSourceSymbolId(sourceWord);
- sourceWords.push_back(idx);
- }
- }
-
- unsigned phraseStopSymbol = 0;
- AlignPoint alignStopSymbol(-1, -1);
-
- std::vector<float> scores;
- std::set<AlignPointSizeT> alignment;
-
- enum DecodeState { New, Symbol, Score, Alignment, Add } state = New;
-
- size_t srcSize = sourcePhrase.GetSize();
-
- TPCompact* targetPhrase = NULL;
- while(encodedBitStream.TellFromEnd()) {
-
- if(state == New) {
- // Creating new TargetPhrase on the heap
- tpv->push_back(TPCompact());
- targetPhrase = &tpv->back();
-
- alignment.clear();
- scores.clear();
-
- state = Symbol;
- }
-
- if(state == Symbol) {
- unsigned symbol = m_symbolTree->Read(encodedBitStream);
- if(symbol == phraseStopSymbol) {
- state = Score;
- } else {
- if(m_coding == REnc) {
- std::string wordString;
- size_t type = GetREncType(symbol);
-
- if(type == 1) {
- unsigned decodedSymbol = DecodeREncSymbol1(symbol);
- wordString = GetTargetSymbol(decodedSymbol);
- } else if (type == 2) {
- size_t rank = DecodeREncSymbol2Rank(symbol);
- size_t srcPos = DecodeREncSymbol2Position(symbol);
-
- if(srcPos >= sourceWords.size())
- return TargetPhraseVectorPtr();
-
- wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
- if(m_phraseDictionary.m_useAlignmentInfo) {
- size_t trgPos = targetPhrase->words.size();
- alignment.insert(AlignPoint(srcPos, trgPos));
- }
- } else if(type == 3) {
- size_t rank = DecodeREncSymbol3(symbol);
- size_t srcPos = targetPhrase->words.size();
-
- if(srcPos >= sourceWords.size())
- return TargetPhraseVectorPtr();
-
- wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
- if(m_phraseDictionary.m_useAlignmentInfo) {
- size_t trgPos = srcPos;
- alignment.insert(AlignPoint(srcPos, trgPos));
- }
- }
-
- Word word;
- word.CreateFromString(vocab, system, wordString);
- targetPhrase->words.push_back(word);
- } else if(m_coding == PREnc) {
- // if the symbol is just a word
- if(GetPREncType(symbol) == 1) {
- unsigned decodedSymbol = DecodePREncSymbol1(symbol);
-
- Word word;
- word.CreateFromString(vocab, system, GetTargetSymbol(decodedSymbol));
- targetPhrase->words.push_back(word);
- }
- // if the symbol is a subphrase pointer
- else {
- int left = DecodePREncSymbol2Left(symbol);
- int right = DecodePREncSymbol2Right(symbol);
- unsigned rank = DecodePREncSymbol2Rank(symbol);
-
- int srcStart = left + targetPhrase->words.size();
- int srcEnd = srcSize - right - 1;
-
- // false positive consistency check
- if(0 > srcStart || srcStart > srcEnd || unsigned(srcEnd) >= srcSize)
- return TargetPhraseVectorPtr();
-
- // false positive consistency check
- if(m_maxRank && rank > m_maxRank)
- return TargetPhraseVectorPtr();
-
- // set subphrase by default to itself
- TargetPhraseVectorPtr subTpv = tpv;
-
- // if range smaller than source phrase retrieve subphrase
- if(unsigned(srcEnd - srcStart + 1) != srcSize) {
- SubPhrase<Word> subPhrase = sourcePhrase.GetSubPhrase(srcStart, srcEnd - srcStart + 1);
- subTpv = CreateTargetPhraseCollection(mgr, subPhrase, false);
- } else {
- // false positive consistency check
- if(rank >= tpv->size()-1)
- return TargetPhraseVectorPtr();
- }
-
- // false positive consistency check
- if(subTpv != NULL && rank < subTpv->size()) {
- // insert the subphrase into the main target phrase
- TPCompact& subTp = subTpv->at(rank);
- if(m_phraseDictionary.m_useAlignmentInfo) {
- // reconstruct the alignment data based on the alignment of the subphrase
- for(std::set<AlignPointSizeT>::const_iterator it = subTp.alignment.begin();
- it != subTp.alignment.end(); it++) {
- alignment.insert(AlignPointSizeT(srcStart + it->first,
- targetPhrase->words.size() + it->second));
- }
- }
-
- std::copy(subTp.words.begin(), subTp.words.end(), std::back_inserter(targetPhrase->words));
- } else
- return TargetPhraseVectorPtr();
- }
- } else {
- Word word;
- word.CreateFromString(vocab, system, GetTargetSymbol(symbol));
- targetPhrase->words.push_back(word);
- }
- }
- } else if(state == Score) {
- size_t idx = m_multipleScoreTrees ? scores.size() : 0;
- float score = m_scoreTrees[idx]->Read(encodedBitStream);
- scores.push_back(score);
-
- if(scores.size() == m_numScoreComponent) {
- targetPhrase->scores = scores;
-
- if(m_containsAlignmentInfo)
- state = Alignment;
- else
- state = Add;
- }
- } else if(state == Alignment) {
- AlignPoint alignPoint = m_alignTree->Read(encodedBitStream);
- if(alignPoint == alignStopSymbol) {
- state = Add;
- } else {
- if(m_phraseDictionary.m_useAlignmentInfo)
- alignment.insert(AlignPointSizeT(alignPoint));
- }
- }
-
- if(state == Add) {
- if(m_phraseDictionary.m_useAlignmentInfo) {
- size_t sourceSize = sourcePhrase.GetSize();
- size_t targetSize = targetPhrase->words.size();
- for(std::set<AlignPointSizeT>::iterator it = alignment.begin(); it != alignment.end(); it++) {
- if(it->first >= sourceSize || it->second >= targetSize)
- return TargetPhraseVectorPtr();
- }
- targetPhrase->alignment = alignment;
- }
-
- if(m_coding == PREnc) {
- if(!m_maxRank || tpv->size() <= m_maxRank)
- bitsLeft = encodedBitStream.TellFromEnd();
-
- if(!topLevel && m_maxRank && tpv->size() >= m_maxRank)
- break;
- }
-
- if(encodedBitStream.TellFromEnd() <= 8)
- break;
-
- state = New;
- }
- }
-
- if(m_coding == PREnc && !extending) {
- bitsLeft = bitsLeft > 8 ? bitsLeft : 0;
- m_decodingCache.Cache(sourcePhrase, tpv, bitsLeft, m_maxRank);
- }
-
- return tpv;
-}
-
-void PhraseDecoder::PruneCache()
-{
- m_decodingCache.Prune();
-}
-
-}
diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h
deleted file mode 100644
index 79faa38a6..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h
+++ /dev/null
@@ -1,142 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#pragma once
-
-#include <sstream>
-#include <vector>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <string>
-#include <iterator>
-#include <algorithm>
-#include <sys/stat.h>
-
-#include "PhraseTableCompact.h"
-#include "StringVector.h"
-#include "CanonicalHuffman.h"
-#include "TargetPhraseCollectionCache.h"
-
-#include "../../Phrase.h"
-#include "../../ManagerBase.h"
-
-namespace Moses2
-{
-
-class PhraseTableCompact;
-
-class PhraseDecoder
-{
-protected:
-
- friend class PhraseTableCompact;
-
- typedef std::pair<unsigned char, unsigned char> AlignPoint;
- typedef std::pair<unsigned, unsigned> SrcTrg;
-
- enum Coding { None, REnc, PREnc } m_coding;
-
- size_t m_numScoreComponent;
- bool m_containsAlignmentInfo;
- size_t m_maxRank;
- size_t m_maxPhraseLength;
-
- boost::unordered_map<std::string, unsigned> m_sourceSymbolsMap;
- StringVector<unsigned char, unsigned, std::allocator> m_sourceSymbols;
- StringVector<unsigned char, unsigned, std::allocator> m_targetSymbols;
-
- std::vector<size_t> m_lexicalTableIndex;
- std::vector<SrcTrg> m_lexicalTable;
-
- CanonicalHuffman<unsigned>* m_symbolTree;
-
- bool m_multipleScoreTrees;
- std::vector<CanonicalHuffman<float>*> m_scoreTrees;
-
- CanonicalHuffman<AlignPoint>* m_alignTree;
-
- TargetPhraseCollectionCache m_decodingCache;
-
- PhraseTableCompact& m_phraseDictionary;
-
- // ***********************************************
-
- const std::vector<FactorType>* m_input;
- const std::vector<FactorType>* m_output;
-
- std::string m_separator;
-
- // ***********************************************
-
- unsigned GetSourceSymbolId(std::string& s);
- std::string GetTargetSymbol(unsigned id) const;
-
- size_t GetREncType(unsigned encodedSymbol);
- size_t GetPREncType(unsigned encodedSymbol);
-
- unsigned GetTranslation(unsigned srcIdx, size_t rank);
-
- size_t GetMaxSourcePhraseLength();
-
- unsigned DecodeREncSymbol1(unsigned encodedSymbol);
- unsigned DecodeREncSymbol2Rank(unsigned encodedSymbol);
- unsigned DecodeREncSymbol2Position(unsigned encodedSymbol);
- unsigned DecodeREncSymbol3(unsigned encodedSymbol);
-
- unsigned DecodePREncSymbol1(unsigned encodedSymbol);
- int DecodePREncSymbol2Left(unsigned encodedSymbol);
- int DecodePREncSymbol2Right(unsigned encodedSymbol);
- unsigned DecodePREncSymbol2Rank(unsigned encodedSymbol);
-
- std::string MakeSourceKey(std::string &);
-
-public:
-
- PhraseDecoder(
- PhraseTableCompact &phraseDictionary,
- const std::vector<FactorType>* input,
- const std::vector<FactorType>* output,
- size_t numScoreComponent
- );
-
- ~PhraseDecoder();
-
- size_t Load(std::FILE* in);
-
- TargetPhraseVectorPtr CreateTargetPhraseCollection(
- const ManagerBase &mgr,
- const Phrase<Word> &sourcePhrase,
- bool topLevel = false,
- bool eval = true);
-
- TargetPhraseVectorPtr DecodeCollection(
- const ManagerBase &mgr,
- TargetPhraseVectorPtr tpv,
- BitWrapper<> &encodedBitStream,
- const Phrase<Word> &sourcePhrase,
- bool topLevel,
- bool eval);
-
- void PruneCache();
-};
-
-}
-
diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp b/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp
deleted file mode 100644
index 49244df1b..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-#include <boost/algorithm/string/predicate.hpp>
-#include <boost/thread/tss.hpp>
-#include "PhraseTableCompact.h"
-#include "PhraseDecoder.h"
-#include "../../PhraseBased/InputPath.h"
-#include "../../PhraseBased/Manager.h"
-#include "../../PhraseBased/TargetPhrases.h"
-#include "../../PhraseBased/TargetPhraseImpl.h"
-#include "../../PhraseBased/Sentence.h"
-
-using namespace std;
-using namespace boost::algorithm;
-
-namespace Moses2
-{
-bool PhraseTableCompact::s_inMemoryByDefault = false;
-
-PhraseTableCompact::PhraseTableCompact(size_t startInd, const std::string &line)
-:PhraseTable(startInd, line)
-,m_inMemory(s_inMemoryByDefault)
-,m_useAlignmentInfo(true)
-,m_hash(10, 16)
-,m_phraseDecoder(0)
-{
- ReadParameters();
-}
-
-PhraseTableCompact::~PhraseTableCompact()
-{
-
-}
-
-void PhraseTableCompact::Load(System &system)
-{
- std::string tFilePath = m_path;
-
- std::string suffix = ".minphr";
- if (!ends_with(tFilePath, suffix)) tFilePath += suffix;
- if (!FileExists(tFilePath))
- throw runtime_error("Error: File " + tFilePath + " does not exist.");
-
- m_phraseDecoder
- = new PhraseDecoder(*this, &m_input, &m_output, GetNumScores());
-
- std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
-
- size_t indexSize;
- //if(m_inMemory)
- // Load source phrase index into memory
- indexSize = m_hash.Load(pFile);
- // else
- // Keep source phrase index on disk
- //indexSize = m_hash.LoadIndex(pFile);
-
- size_t coderSize = m_phraseDecoder->Load(pFile);
-
- size_t phraseSize;
- if(m_inMemory) {
- // Load target phrase collections into memory
- phraseSize = m_targetPhrasesMemory.load(pFile, false);
- }
- else {
- // Keep target phrase collections on disk
- phraseSize = m_targetPhrasesMapped.load(pFile, true);
- }
-
- UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0,
- "Not successfully loaded");
-}
-
-void PhraseTableCompact::SetParameter(const std::string& key, const std::string& value)
-{
- if (key == "blah") {
-
- }
- else {
- PhraseTable::SetParameter(key, value);
- }
-}
-
-void PhraseTableCompact::CleanUpAfterSentenceProcessing() const
-{
- //if(!m_sentenceCache.get())
- // m_sentenceCache.reset(new PhraseCache());
-
- m_phraseDecoder->PruneCache();
- //m_sentenceCache->clear();
-}
-
-
-// pb
-void PhraseTableCompact::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const
-{
- size_t inputSize = static_cast<const Sentence&>(mgr.GetInput()).GetSize();
- InputPaths &inputPathsCast = static_cast<InputPaths&>(inputPaths);
-
- for (size_t i = 0; i < inputSize; ++i) {
- for (size_t startPos = 0; startPos < inputSize; ++startPos) {
- size_t endPos = startPos + i;
- if (endPos >= inputSize) {
- break;
- }
- InputPath *path = inputPathsCast.GetMatrix().GetValue(startPos, i);
- //cerr << "path=" << path->Debug(mgr.system) << endl;
- TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
- path->AddTargetPhrases(*this, tps);
- }
- }
-}
-
-TargetPhrases *PhraseTableCompact::Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const
-{
- TargetPhrases *ret = NULL;
-
- const Phrase<Word> &sourcePhrase = inputPath.subPhrase;
- //cerr << "sourcePhrase=" << sourcePhrase.Debug(mgr.system) << endl;
-
- // There is no souch source phrase if source phrase is longer than longest
- // observed source phrase during compilation
- if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
- return ret;
-
- // Retrieve target phrase collection from phrase table
- TargetPhraseVectorPtr decodedPhraseColl
- = m_phraseDecoder->CreateTargetPhraseCollection(mgr, sourcePhrase, true, true);
-
- if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
- TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
- //TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection);
- ret = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, decodedPhraseColl->size());
-
- for (size_t i = 0; i < decodedPhraseColl->size(); ++i) {
- const TPCompact &tpCompact = decodedPhraseColl->at(i);
- const TargetPhraseImpl *tp = CreateTargetPhrase(mgr, tpCompact, sourcePhrase);
-
- ret->AddTargetPhrase(*tp);
- }
-
- ret->SortAndPrune(m_tableLimit);
- mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *ret, sourcePhrase);
-
- //cerr << "RET2=" << ret->Debug(mgr.system) << endl;
- /*
- // Cache phrase pair for clean-up or retrieval with PREnc
- const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
-
- return phraseColl;
- */
- }
-
- return ret;
-
-}
-
-const TargetPhraseImpl *PhraseTableCompact::CreateTargetPhrase(
- const Manager &mgr,
- const TPCompact &tpCompact,
- const Phrase<Word> &sourcePhrase) const
-{
- MemPool &pool = mgr.GetPool();
-
- size_t size = tpCompact.words.size();
- TargetPhraseImpl *ret = new TargetPhraseImpl(pool, *this, mgr.system, size);
-
- // words
- for (size_t i = 0; i < size; ++i) {
- const Word &compactWord = tpCompact.words[i];
- Word &tpWord = (*ret)[i];
- tpWord = compactWord;
- }
-
- // scores
- Scores &scores = ret->GetScores();
- scores.Assign(mgr.system, *this, tpCompact.scores);
-
- // align
- ret->SetAlignTerm(tpCompact.alignment);
-
- // score
- mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *ret);
-
- // Cache phrase pair for clean-up or retrieval with PREnc
- //const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
-
- //cerr << "ret=" << ret->Debug(mgr.system) << endl;
- return ret;
-}
-
-
-// scfg
-void PhraseTableCompact::InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void PhraseTableCompact::Lookup(
- MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void PhraseTableCompact::LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.h b/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.h
deleted file mode 100644
index 84ea7e4b2..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#pragma once
-#include "../PhraseTable.h"
-#include "BlockHashIndex.h"
-
-namespace Moses2
-{
-class PhraseDecoder;
-class TPCompact;
-
-class PhraseTableCompact: public PhraseTable
-{
-public:
- PhraseTableCompact(size_t startInd, const std::string &line);
- virtual ~PhraseTableCompact();
- void Load(System &system);
- virtual void SetParameter(const std::string& key, const std::string& value);
-
- virtual void CleanUpAfterSentenceProcessing() const;
-
- virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
-
- // scfg
- virtual void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const;
-
- virtual void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
-
- virtual void Lookup(
- MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
-protected:
- static bool s_inMemoryByDefault;
- bool m_inMemory;
- bool m_useAlignmentInfo;
-
- BlockHashIndex m_hash;
-
- StringVector<unsigned char, size_t, MmapAllocator> m_targetPhrasesMapped;
- StringVector<unsigned char, size_t, std::allocator> m_targetPhrasesMemory;
-
- friend class PhraseDecoder;
- PhraseDecoder* m_phraseDecoder;
-
- const TargetPhraseImpl *CreateTargetPhrase(
- const Manager &mgr,
- const TPCompact &tpCompact,
- const Phrase<Word> &sourcePhrase) const;
-
- // SCFG
- virtual void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
-};
-
-}
diff --git a/contrib/moses2/TranslationModel/CompactPT/StringVector.h b/contrib/moses2/TranslationModel/CompactPT/StringVector.h
deleted file mode 100644
index 87d6388bf..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/StringVector.h
+++ /dev/null
@@ -1,662 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifndef moses_StringVector_h
-#define moses_StringVector_h
-
-#include <vector>
-#include <algorithm>
-#include <string>
-#include <iterator>
-#include <cstdio>
-#include <cassert>
-
-#include <boost/iterator/iterator_facade.hpp>
-
-#include "ThrowingFwrite.h"
-#include "MonotonicVector.h"
-#include "MmapAllocator.h"
-
-namespace Moses2
-{
-
-// ********** ValueIteratorRange **********
-
-template<typename ValueIteratorT>
-class ValueIteratorRange
-{
-private:
- ValueIteratorT m_begin;
- ValueIteratorT m_end;
-
-public:
- ValueIteratorRange(ValueIteratorT begin, ValueIteratorT end);
-
- const ValueIteratorT& begin() const;
- const ValueIteratorT& end() const;
- const std::string str() const;
- operator const std::string()
- {
- return str();
- }
-
- size_t size()
- {
- return std::distance(m_begin, m_end);
- }
-
- template<typename StringT>
- bool operator==(const StringT& o) const;
- bool operator==(const char* c) const;
-
- template<typename StringT>
- bool operator<(const StringT& o) const;
- bool operator<(const char* c) const;
-};
-
-// ********** StringVector **********
-
-template<typename ValueT = unsigned char, typename PosT = unsigned int,
- template<typename > class Allocator = std::allocator>
-class StringVector
-{
-protected:
- bool m_sorted;
- bool m_memoryMapped;
-
- std::vector<ValueT, Allocator<ValueT> >* m_charArray;
- MonotonicVector<PosT, unsigned int, 32> m_positions;
-
- virtual const ValueT* value_ptr(PosT i) const;
-
-public:
- //typedef ValueIteratorRange<typename std::vector<ValueT, Allocator<ValueT> >::const_iterator> range;
- typedef ValueIteratorRange<const ValueT *> range;
-
- // ********** RangeIterator **********
-
- class RangeIterator: public boost::iterator_facade<RangeIterator, range,
- std::random_access_iterator_tag, range, PosT>
- {
-
- private:
- PosT m_index;
- StringVector<ValueT, PosT, Allocator>* m_container;
-
- public:
- RangeIterator();
- RangeIterator(StringVector<ValueT, PosT, Allocator> &sv, PosT index = 0);
-
- PosT get_index();
-
- private:
- friend class boost::iterator_core_access;
-
- range dereference() const;
- bool equal(RangeIterator const& other) const;
- void increment();
- void decrement();
- void advance(PosT n);
-
- PosT distance_to(RangeIterator const& other) const;
- };
-
- // ********** StringIterator **********
-
- class StringIterator: public boost::iterator_facade<StringIterator,
- std::string, std::random_access_iterator_tag, const std::string, PosT>
- {
-
- private:
- PosT m_index;
- StringVector<ValueT, PosT, Allocator>* m_container;
-
- public:
- StringIterator();
- StringIterator(StringVector<ValueT, PosT, Allocator> &sv, PosT index = 0);
-
- PosT get_index();
-
- private:
- friend class boost::iterator_core_access;
-
- const std::string dereference() const;
- bool equal(StringIterator const& other) const;
- void increment();
- void decrement();
- void advance(PosT n);
- PosT distance_to(StringIterator const& other) const;
- };
-
- typedef RangeIterator iterator;
- typedef StringIterator string_iterator;
-
- StringVector(bool allocate = false);
- StringVector(Allocator<ValueT>& alloc);
-
- virtual ~StringVector()
- {
- delete m_charArray;
- }
-
- void swap(StringVector<ValueT, PosT, Allocator> &c)
- {
- m_positions.commit();
- m_positions.swap(c.m_positions);
- m_charArray->swap(*c.m_charArray);
-
- bool temp = m_sorted;
- m_sorted = c.m_sorted;
- c.m_sorted = temp;
- }
-
- bool is_sorted() const;
- PosT size() const;
- virtual PosT size2() const;
-
- template<class Iterator> Iterator begin() const;
- template<class Iterator> Iterator end() const;
-
- iterator begin() const;
- iterator end() const;
-
- PosT length(PosT i) const;
- //typename std::vector<ValueT, Allocator<ValueT> >::const_iterator begin(PosT i) const;
- //typename std::vector<ValueT, Allocator<ValueT> >::const_iterator end(PosT i) const;
- const ValueT* begin(PosT i) const;
- const ValueT* end(PosT i) const;
-
- void clear()
- {
- m_charArray->clear();
- m_sorted = true;
- m_positions = MonotonicVector<PosT, unsigned int, 32>();
- }
-
- range at(PosT i) const;
- range operator[](PosT i) const;
- range back() const;
-
- template<typename StringT>
- void push_back(StringT s);
- void push_back(const char* c);
-
- template<typename StringT>
- PosT find(StringT &s) const;
- PosT find(const char* c) const;
-
- virtual size_t load(std::FILE* in, bool memoryMapped = false)
- {
- size_t size = 0;
- m_memoryMapped = memoryMapped;
-
- size += std::fread(&m_sorted, sizeof(bool), 1, in) * sizeof(bool);
- size += m_positions.load(in, false);
-
- size += loadCharArray(m_charArray, in, m_memoryMapped);
- return size;
- }
-
- size_t loadCharArray(std::vector<ValueT, std::allocator<ValueT> >*& c,
- std::FILE* in, bool map = false)
- {
- // Can only be read into memory. Mapping not possible with std:allocator.
- assert(map == false);
-
- size_t byteSize = 0;
-
- size_t valSize;
- byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
-
- c = new std::vector<ValueT, std::allocator<ValueT> >(valSize, 0);
- byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in)
- * sizeof(ValueT);
-
- return byteSize;
- }
-
- size_t loadCharArray(std::vector<ValueT, MmapAllocator<ValueT> >*& c,
- std::FILE* in, bool map = false)
- {
- size_t byteSize = 0;
-
- size_t valSize;
- byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
-
- if (map == false) {
- // Read data into temporary file (default constructor of MmapAllocator)
- // and map memory onto temporary file. Can be resized.
- c = new std::vector<ValueT, MmapAllocator<ValueT> >(valSize, 0);
- byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in)
- * sizeof(ValueT);
- }
- else {
- // Map it directly on specified region of file "in" starting at valPos
- // with length valSize * sizeof(ValueT). Mapped region cannot be resized.
-
- size_t valPos = std::ftell(in);
- Allocator<ValueT> alloc(in, valPos);
- c = new std::vector<ValueT, Allocator<ValueT> >(alloc);
- c->resize(valSize, 0);
-
- byteSize += valSize * sizeof(ValueT);
- }
-
- return byteSize;
- }
-
- size_t load(std::string filename, bool memoryMapped = false)
- {
- std::FILE* pFile = fopen(filename.c_str(), "r");
- size_t byteSize = load(pFile, memoryMapped);
- fclose(pFile);
- return byteSize;
- }
-
- size_t save(std::FILE* out)
- {
- size_t byteSize = 0;
- byteSize += ThrowingFwrite(&m_sorted, sizeof(bool), 1, out) * sizeof(bool);
-
- byteSize += m_positions.save(out);
-
- size_t valSize = size2();
- byteSize += ThrowingFwrite(&valSize, sizeof(size_t), 1, out)
- * sizeof(size_t);
- byteSize += ThrowingFwrite(&(*m_charArray)[0], sizeof(ValueT), valSize, out)
- * sizeof(ValueT);
-
- return byteSize;
- }
-
- size_t save(std::string filename)
- {
- std::FILE* pFile = fopen(filename.c_str(), "w");
- size_t byteSize = save(pFile);
- fclose(pFile);
- return byteSize;
- }
-
-};
-
-// ********** Implementation **********
-
-// ValueIteratorRange
-
-template<typename ValueIteratorT>
-ValueIteratorRange<ValueIteratorT>::ValueIteratorRange(ValueIteratorT begin,
- ValueIteratorT end) :
- m_begin(begin), m_end(end)
-{
-}
-
-template<typename ValueIteratorT>
-const ValueIteratorT& ValueIteratorRange<ValueIteratorT>::begin() const
-{
- return m_begin;
-}
-
-template<typename ValueIteratorT>
-const ValueIteratorT& ValueIteratorRange<ValueIteratorT>::end() const
-{
- return m_end;
-}
-
-template<typename ValueIteratorT>
-const std::string ValueIteratorRange<ValueIteratorT>::str() const
-{
- std::string dummy;
- for (ValueIteratorT it = m_begin; it != m_end; it++)
- dummy.push_back(*it);
- return dummy;
-}
-
-template<typename ValueIteratorT>
-template<typename StringT>
-bool ValueIteratorRange<ValueIteratorT>::operator==(const StringT& o) const
-{
- if (std::distance(m_begin, m_end) == std::distance(o.begin(), o.end())) return std::equal(
- m_begin, m_end, o.begin());
- else return false;
-}
-
-template<typename ValueIteratorT>
-bool ValueIteratorRange<ValueIteratorT>::operator==(const char* c) const
-{
- return *this == std::string(c);
-}
-
-template<typename ValueIteratorT>
-template<typename StringT>
-bool ValueIteratorRange<ValueIteratorT>::operator<(const StringT &s2) const
-{
- return std::lexicographical_compare(m_begin, m_end, s2.begin(), s2.end(),
- std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
-}
-
-template<typename ValueIteratorT>
-bool ValueIteratorRange<ValueIteratorT>::operator<(const char* c) const
-{
- return *this < std::string(c);
-}
-
-template<typename StringT, typename ValueIteratorT>
-bool operator<(const StringT &s1, const ValueIteratorRange<ValueIteratorT> &s2)
-{
- return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(),
- s2.end(),
- std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
-}
-
-template<typename ValueIteratorT>
-bool operator<(const char* c, const ValueIteratorRange<ValueIteratorT> &s2)
-{
- size_t len = std::char_traits<char>::length(c);
- return std::lexicographical_compare(c, c + len, s2.begin(), s2.end(),
- std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
-}
-
-template<typename OStream, typename ValueIteratorT>
-OStream& operator<<(OStream &os, ValueIteratorRange<ValueIteratorT> cr)
-{
- ValueIteratorT it = cr.begin();
- while (it != cr.end())
- os << *(it++);
- return os;
-}
-
-// StringVector
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-StringVector<ValueT, PosT, Allocator>::StringVector(bool allocate) :
- m_sorted(true), m_memoryMapped(false), m_charArray(
- allocate ? new std::vector<ValueT, Allocator<ValueT> >() : 0)
-{
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-StringVector<ValueT, PosT, Allocator>::StringVector(Allocator<ValueT> &alloc) :
- m_sorted(true), m_memoryMapped(false), m_charArray(
- new std::vector<ValueT, Allocator<ValueT> >(alloc))
-{
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-template<typename StringT>
-void StringVector<ValueT, PosT, Allocator>::push_back(StringT s)
-{
- if (is_sorted() && size() && !(back() < s)) m_sorted = false;
-
- m_positions.push_back(size2());
- std::copy(s.begin(), s.end(), std::back_inserter(*m_charArray));
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-void StringVector<ValueT, PosT, Allocator>::push_back(const char* c)
-{
- std::string dummy(c);
- push_back(dummy);
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-template<typename Iterator>
-Iterator StringVector<ValueT, PosT, Allocator>::begin() const
-{
- return Iterator(const_cast<StringVector<ValueT, PosT, Allocator>&>(*this), 0);
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-template<typename Iterator>
-Iterator StringVector<ValueT, PosT, Allocator>::end() const
-{
- return Iterator(const_cast<StringVector<ValueT, PosT, Allocator>&>(*this),
- size());
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-typename StringVector<ValueT, PosT, Allocator>::iterator StringVector<ValueT,
- PosT, Allocator>::begin() const
-{
- return begin<iterator>();
-}
-;
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-typename StringVector<ValueT, PosT, Allocator>::iterator StringVector<ValueT,
- PosT, Allocator>::end() const
-{
- return end<iterator>();
-}
-;
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-bool StringVector<ValueT, PosT, Allocator>::is_sorted() const
-{
- return m_sorted;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-PosT StringVector<ValueT, PosT, Allocator>::size() const
-{
- return m_positions.size();
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-PosT StringVector<ValueT, PosT, Allocator>::size2() const
-{
- return m_charArray->size();
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT,
- Allocator>::at(PosT i) const
-{
- return range(begin(i), end(i));
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT,
- Allocator>::operator[](PosT i) const
-{
- return at(i);
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT,
- Allocator>::back() const
-{
- return at(size() - 1);
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-PosT StringVector<ValueT, PosT, Allocator>::length(PosT i) const
-{
- if (i + 1 < size()) return m_positions[i + 1] - m_positions[i];
- else return size2() - m_positions[i];
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-const ValueT* StringVector<ValueT, PosT, Allocator>::value_ptr(PosT i) const
-{
- return &(*m_charArray)[m_positions[i]];
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-//typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVector<ValueT, PosT, Allocator>::begin(PosT i) const
-const ValueT* StringVector<ValueT, PosT, Allocator>::begin(PosT i) const
-{
- //return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i));
- return value_ptr(i);
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-//typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVector<ValueT, PosT, Allocator>::end(PosT i) const
-const ValueT* StringVector<ValueT, PosT, Allocator>::end(PosT i) const
-{
- //return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i) + length(i));
- return value_ptr(i) + length(i);
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-template<typename StringT>
-PosT StringVector<ValueT, PosT, Allocator>::find(StringT &s) const
-{
- if (m_sorted) return std::distance(begin(),
- std::lower_bound(begin(), end(), s));
- return std::distance(begin(), std::find(begin(), end(), s));
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-PosT StringVector<ValueT, PosT, Allocator>::find(const char* c) const
-{
- std::string s(c);
- return find(s);
-}
-
-// RangeIterator
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-StringVector<ValueT, PosT, Allocator>::RangeIterator::RangeIterator() :
- m_index(0), m_container(0)
-{
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-StringVector<ValueT, PosT, Allocator>::RangeIterator::RangeIterator(
- StringVector<ValueT, PosT, Allocator> &sv, PosT index) :
- m_index(index), m_container(&sv)
-{
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-PosT StringVector<ValueT, PosT, Allocator>::RangeIterator::get_index()
-{
- return m_index;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT,
- Allocator>::RangeIterator::dereference() const
-{
- return typename StringVector<ValueT, PosT, Allocator>::range(
- m_container->begin(m_index), m_container->end(m_index));
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-bool StringVector<ValueT, PosT, Allocator>::RangeIterator::equal(
- StringVector<ValueT, PosT, Allocator>::RangeIterator const& other) const
-{
- return m_index == other.m_index && m_container == other.m_container;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-void StringVector<ValueT, PosT, Allocator>::RangeIterator::increment()
-{
- m_index++;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-void StringVector<ValueT, PosT, Allocator>::RangeIterator::decrement()
-{
- m_index--;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-void StringVector<ValueT, PosT, Allocator>::RangeIterator::advance(PosT n)
-{
- m_index += n;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-PosT StringVector<ValueT, PosT, Allocator>::RangeIterator::distance_to(
- StringVector<ValueT, PosT, Allocator>::RangeIterator const& other) const
-{
- return other.m_index - m_index;
-}
-
-// StringIterator
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-StringVector<ValueT, PosT, Allocator>::StringIterator::StringIterator() :
- m_index(0), m_container(0)
-{
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-StringVector<ValueT, PosT, Allocator>::StringIterator::StringIterator(
- StringVector<ValueT, PosT, Allocator> &sv, PosT index) :
- m_index(index), m_container(&sv)
-{
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-PosT StringVector<ValueT, PosT, Allocator>::StringIterator::get_index()
-{
- return m_index;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-const std::string StringVector<ValueT, PosT, Allocator>::StringIterator::dereference() const
-{
- return StringVector<ValueT, PosT, Allocator>::range(
- m_container->begin(m_index), m_container->end(m_index)).str();
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-bool StringVector<ValueT, PosT, Allocator>::StringIterator::equal(
- StringVector<ValueT, PosT, Allocator>::StringIterator const& other) const
-{
- return m_index == other.m_index && m_container == other.m_container;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-void StringVector<ValueT, PosT, Allocator>::StringIterator::increment()
-{
- m_index++;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-void StringVector<ValueT, PosT, Allocator>::StringIterator::decrement()
-{
- m_index--;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-void StringVector<ValueT, PosT, Allocator>::StringIterator::advance(PosT n)
-{
- m_index += n;
-}
-
-template<typename ValueT, typename PosT, template<typename > class Allocator>
-PosT StringVector<ValueT, PosT, Allocator>::StringIterator::distance_to(
- StringVector<ValueT, PosT, Allocator>::StringIterator const& other) const
-{
- return other.m_index - m_index;
-}
-
-// ********** Some typedefs **********
-
-typedef StringVector<unsigned char, unsigned int> MediumStringVector;
-typedef StringVector<unsigned char, unsigned long> LongStringVector;
-
-}
-
-#endif
diff --git a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp
deleted file mode 100644
index 07d0469e0..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include "TargetPhraseCollectionCache.h"
-
-namespace Moses2
-{
-
-boost::thread_specific_ptr<TargetPhraseCollectionCache::CacheMap>
-TargetPhraseCollectionCache::m_phraseCache;
-
-PhraseCompact::PhraseCompact(const Phrase<Word> &copy)
-{
- for (size_t i = 0; i < copy.GetSize(); ++i) {
- const Word &word = copy[i];
- push_back(word);
- }
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h
deleted file mode 100644
index 3a9e6f170..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h
+++ /dev/null
@@ -1,176 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#pragma once
-
-#include <map>
-#include <set>
-#include <vector>
-
-#include <boost/thread/tss.hpp>
-#include <boost/shared_ptr.hpp>
-
-#include "../../Word.h"
-#include "../../Phrase.h"
-
-namespace Moses2
-{
-typedef std::pair<size_t, size_t> AlignPointSizeT;
-
-struct PhraseCompact : public std::vector<Word>
-{
-public:
- PhraseCompact(const Phrase<Word> &copy);
-};
-
-struct TPCompact
-{
- std::vector<Word> words;
- std::set<AlignPointSizeT> alignment;
- std::vector<float> scores;
-
-};
-
-// Avoid using new due to locking
-typedef std::vector<TPCompact> TargetPhraseVector;
-typedef boost::shared_ptr<TargetPhraseVector> TargetPhraseVectorPtr;
-
-/** Implementation of Persistent Cache **/
-class TargetPhraseCollectionCache
-{
-private:
- size_t m_max;
- float m_tolerance;
-
- struct LastUsed {
- clock_t m_clock;
- TargetPhraseVectorPtr m_tpv;
- size_t m_bitsLeft;
-
- LastUsed() : m_clock(0), m_bitsLeft(0) {}
-
- LastUsed(clock_t clock, TargetPhraseVectorPtr tpv, size_t bitsLeft = 0)
- : m_clock(clock), m_tpv(tpv), m_bitsLeft(bitsLeft) {}
- };
-
- typedef std::map<PhraseCompact, LastUsed> CacheMap;
- static boost::thread_specific_ptr<CacheMap> m_phraseCache;
-
-public:
-
- typedef CacheMap::iterator iterator;
- typedef CacheMap::const_iterator const_iterator;
-
- TargetPhraseCollectionCache(size_t max = 5000, float tolerance = 0.2)
- : m_max(max), m_tolerance(tolerance) {
- }
-
- iterator Begin() {
- if(!m_phraseCache.get())
- m_phraseCache.reset(new CacheMap());
- return m_phraseCache->begin();
- }
-
- const_iterator Begin() const {
- if(!m_phraseCache.get())
- m_phraseCache.reset(new CacheMap());
- return m_phraseCache->begin();
- }
-
- iterator End() {
- if(!m_phraseCache.get())
- m_phraseCache.reset(new CacheMap());
- return m_phraseCache->end();
- }
-
- const_iterator End() const {
- if(!m_phraseCache.get())
- m_phraseCache.reset(new CacheMap());
- return m_phraseCache->end();
- }
-
- /** retrieve translations for source phrase from persistent cache **/
- void Cache(const Phrase<Word> &sourcePhrase, TargetPhraseVectorPtr tpv,
- size_t bitsLeft = 0, size_t maxRank = 0) {
- if(!m_phraseCache.get())
- m_phraseCache.reset(new CacheMap());
- // check if source phrase is already in cache
- iterator it = m_phraseCache->find(sourcePhrase);
- if(it != m_phraseCache->end())
- // if found, just update clock
- it->second.m_clock = clock();
- else {
- // else, add to cache
- if(maxRank && tpv->size() > maxRank) {
- TargetPhraseVectorPtr tpv_temp(new TargetPhraseVector());
- tpv_temp->resize(maxRank);
- std::copy(tpv->begin(), tpv->begin() + maxRank, tpv_temp->begin());
- (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft);
- } else
- (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft);
- }
- }
-
- std::pair<TargetPhraseVectorPtr, size_t> Retrieve(const Phrase<Word> &sourcePhrase) {
- if(!m_phraseCache.get())
- m_phraseCache.reset(new CacheMap());
- iterator it = m_phraseCache->find(sourcePhrase);
- if(it != m_phraseCache->end()) {
- LastUsed &lu = it->second;
- lu.m_clock = clock();
- return std::make_pair(lu.m_tpv, lu.m_bitsLeft);
- } else
- return std::make_pair(TargetPhraseVectorPtr(), 0);
- }
-
- // if cache full, reduce
- void Prune() {
- if(!m_phraseCache.get())
- m_phraseCache.reset(new CacheMap());
- if(m_phraseCache->size() > m_max * (1 + m_tolerance)) {
- typedef std::set<std::pair<clock_t, PhraseCompact > > Cands;
- Cands cands;
- for(CacheMap::iterator it = m_phraseCache->begin();
- it != m_phraseCache->end(); it++) {
- LastUsed &lu = it->second;
- cands.insert(std::make_pair(lu.m_clock, it->first));
- }
-
- for(Cands::iterator it = cands.begin(); it != cands.end(); it++) {
- const PhraseCompact& p = it->second;
- m_phraseCache->erase(p);
-
- if(m_phraseCache->size() < (m_max * (1 - m_tolerance)))
- break;
- }
- }
- }
-
- void CleanUp() {
- if(!m_phraseCache.get())
- m_phraseCache.reset(new CacheMap());
- m_phraseCache->clear();
- }
-
-};
-
-}
-
diff --git a/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp b/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp
deleted file mode 100644
index d9fec5013..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include "ThrowingFwrite.h"
-
-size_t ThrowingFwrite(const void *ptr, size_t size, size_t count, FILE* stream)
-{
- assert(size);
- size_t returnValue = std::fwrite(ptr, size, count, stream);
- UTIL_THROW_IF2(count != returnValue, "Short fwrite; requested size " << size);
- return returnValue;
-}
diff --git a/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.h b/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.h
deleted file mode 100644
index 2a0c71a27..000000000
--- a/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifndef moses_ThrowingFwrite_h
-#define moses_ThrowingFwrite_h
-
-#include <cassert>
-#include <cstdio>
-#include "util/exception.hh"
-
-size_t ThrowingFwrite(const void *ptr, size_t size, size_t count, FILE* stream);
-
-#endif
diff --git a/contrib/moses2/TranslationModel/Memory/Node.h b/contrib/moses2/TranslationModel/Memory/Node.h
deleted file mode 100644
index 97fa9618e..000000000
--- a/contrib/moses2/TranslationModel/Memory/Node.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Node.h
- *
- * Created on: 22 Apr 2016
- * Author: hieu
- */
-#pragma once
-#include <boost/unordered_map.hpp>
-#include <boost/foreach.hpp>
-#include "../../PhraseBased/TargetPhrases.h"
-#include "../../System.h"
-#include "../../Phrase.h"
-
-namespace Moses2
-{
-class System;
-
-namespace PtMem
-{
-
-template<class WORD, class SP, class TP, class TPS>
-class Node
-{
-public:
- typedef boost::unordered_map<size_t, Node> Children;
-
- Node()
- :m_targetPhrases(NULL)
- ,m_unsortedTPS(NULL)
- {}
-
- ~Node()
- {}
-
- void AddRule(const std::vector<FactorType> &factors, SP &source, TP *target)
- {
- AddRule(factors, source, target, 0);
- }
-
- TPS *Find(const std::vector<FactorType> &factors, const SP &source, size_t pos = 0) const
- {
- assert(source.GetSize());
- if (pos == source.GetSize()) {
- return m_targetPhrases;
- }
- else {
- const WORD &word = source[pos];
- //cerr << "word=" << word << endl;
- typename Children::const_iterator iter = m_children.find(word.hash(factors));
- if (iter == m_children.end()) {
- return NULL;
- }
- else {
- const Node &child = iter->second;
- return child.Find(factors, source, pos + 1);
- }
- }
- }
-
- const Node *Find(const std::vector<FactorType> &factors, const WORD &word) const
- {
- typename Children::const_iterator iter = m_children.find(word.hash(factors));
- if (iter == m_children.end()) {
- return NULL;
- }
- else {
- const Node &child = iter->second;
- return &child;
- }
- }
-
- const TPS *GetTargetPhrases() const
- { return m_targetPhrases; }
-
- void SortAndPrune(size_t tableLimit, MemPool &pool, System &system)
- {
- BOOST_FOREACH(typename Children::value_type &val, m_children){
- Node &child = val.second;
- child.SortAndPrune(tableLimit, pool, system);
- }
-
- // prune target phrases in this node
- if (m_unsortedTPS) {
- m_targetPhrases = new (pool.Allocate<TPS>()) TPS(pool, m_unsortedTPS->size());
-
- for (size_t i = 0; i < m_unsortedTPS->size(); ++i) {
- TP *tp = (*m_unsortedTPS)[i];
- m_targetPhrases->AddTargetPhrase(*tp);
- }
-
- m_targetPhrases->SortAndPrune(tableLimit);
- system.featureFunctions.EvaluateAfterTablePruning(system.GetSystemPool(), *m_targetPhrases, *m_source);
-
- delete m_unsortedTPS;
- }
- }
-
- const Children &GetChildren() const
- { return m_children; }
-
- void Debug(std::ostream &out, const System &system) const {
- BOOST_FOREACH(const typename Children::value_type &valPair, m_children) {
- const WORD &word = valPair.first;
- //std::cerr << word << "(" << word.hash() << ") ";
- }
- }
-protected:
- Children m_children;
- TPS *m_targetPhrases;
- Phrase<WORD> *m_source;
- std::vector<TP*> *m_unsortedTPS;
-
- Node &AddRule(const std::vector<FactorType> &factors, SP &source, TP *target, size_t pos)
- {
- if (pos == source.GetSize()) {
- if (m_unsortedTPS == NULL) {
- m_unsortedTPS = new std::vector<TP*>();
- m_source = &source;
- }
-
- m_unsortedTPS->push_back(target);
- return *this;
- }
- else {
- const WORD &word = source[pos];
- Node &child = m_children[word.hash(factors)];
- //std::cerr << "added " << word << " " << &child << " from " << this << std::endl;
-
- return child.AddRule(factors, source, target, pos + 1);
- }
- }
-
-};
-
-
-}
-} // namespace
-
diff --git a/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.cpp b/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.cpp
deleted file mode 100644
index 09eead137..000000000
--- a/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.cpp
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * PhraseTableMemory.cpp
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-
-#include <cassert>
-#include <boost/foreach.hpp>
-#include "PhraseTableMemory.h"
-#include "../../PhraseBased/PhraseImpl.h"
-#include "../../Phrase.h"
-#include "../../System.h"
-#include "../../Scores.h"
-#include "../../InputPathsBase.h"
-#include "../../legacy/InputFileStream.h"
-#include "util/exception.hh"
-
-#include "../../PhraseBased/InputPath.h"
-#include "../../PhraseBased/TargetPhraseImpl.h"
-#include "../../PhraseBased/TargetPhrases.h"
-
-#include "../../SCFG/PhraseImpl.h"
-#include "../../SCFG/TargetPhraseImpl.h"
-#include "../../SCFG/InputPath.h"
-#include "../../SCFG/Stack.h"
-#include "../../SCFG/Stacks.h"
-#include "../../SCFG/Manager.h"
-
-
-using namespace std;
-
-namespace Moses2
-{
-
-
-////////////////////////////////////////////////////////////////////////
-
-PhraseTableMemory::PhraseTableMemory(size_t startInd, const std::string &line)
-:PhraseTable(startInd, line)
-,m_rootPb(NULL)
-,m_rootSCFG(NULL)
-{
- ReadParameters();
-}
-
-PhraseTableMemory::~PhraseTableMemory()
-{
- delete m_rootPb;
- delete m_rootSCFG;
-}
-
-void PhraseTableMemory::Load(System &system)
-{
- FactorCollection &vocab = system.GetVocab();
- MemPool &systemPool = system.GetSystemPool();
- MemPool tmpSourcePool;
-
- if (system.isPb) {
- m_rootPb = new PBNODE();
- }
- else {
- m_rootSCFG = new SCFGNODE();
- //cerr << "m_rootSCFG=" << m_rootSCFG << endl;
- }
-
- vector<string> toks;
- size_t lineNum = 0;
- InputFileStream strme(m_path);
- string line;
- while (getline(strme, line)) {
- if (++lineNum % 1000000 == 0) {
- cerr << lineNum << " ";
- }
- toks.clear();
- TokenizeMultiCharSeparator(toks, line, "|||");
- UTIL_THROW_IF2(toks.size() < 3, "Wrong format");
- //cerr << "line=" << line << endl;
- //cerr << "system.isPb=" << system.isPb << endl;
-
- if (system.isPb) {
- PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
- toks[0]);
- //cerr << "created soure" << endl;
- TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(systemPool, *this, system,
- toks[1]);
- //cerr << "created target" << endl;
- target->GetScores().CreateFromString(toks[2], *this, system, true);
- //cerr << "created scores:" << *target << endl;
-
- if (toks.size() >= 4) {
- //cerr << "alignstr=" << toks[3] << endl;
- target->SetAlignmentInfo(toks[3]);
- }
-
- // properties
- if (toks.size() == 7) {
- //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1);
- //strcpy(target->properties, toks[6].c_str());
- }
-
- system.featureFunctions.EvaluateInIsolation(systemPool, system, *source,
- *target);
- //cerr << "EvaluateInIsolation:" << *target << endl;
- m_rootPb->AddRule(m_input, *source, target);
-
- //cerr << "target=" << target->Debug(system) << endl;
- }
- else {
- SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
- toks[0]);
- //cerr << "created source:" << *source << endl;
- SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this,
- system, toks[1]);
-
- //cerr << "created target " << *target << " source=" << *source << endl;
-
- target->GetScores().CreateFromString(toks[2], *this, system, true);
- //cerr << "created scores:" << *target << endl;
-
- //vector<SCORE> scores = Tokenize<SCORE>(toks[2]);
- //target->sortScore = (scores.size() >= 3) ? TransformScore(scores[2]) : 0;
-
- target->SetAlignmentInfo(toks[3]);
-
- // properties
- if (toks.size() == 7) {
- //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1);
- //strcpy(target->properties, toks[6].c_str());
- }
-
- system.featureFunctions.EvaluateInIsolation(systemPool, system, *source,
- *target);
- //cerr << "EvaluateInIsolation:" << *target << endl;
- m_rootSCFG->AddRule(m_input, *source, target);
- }
- }
-
- if (system.isPb) {
- m_rootPb->SortAndPrune(m_tableLimit, systemPool, system);
- //cerr << "root=" << &m_rootPb << endl;
- }
- else {
- m_rootSCFG->SortAndPrune(m_tableLimit, systemPool, system);
- //cerr << "root=" << &m_rootPb << endl;
- }
- /*
- BOOST_FOREACH(const PtMem::Node<Word>::Children::value_type &valPair, m_rootPb.GetChildren()) {
- const Word &word = valPair.first;
- cerr << word << " ";
- }
- cerr << endl;
- */
-}
-
-TargetPhrases* PhraseTableMemory::Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const
-{
- const SubPhrase<Moses2::Word> &phrase = inputPath.subPhrase;
- TargetPhrases *tps = m_rootPb->Find(m_input, phrase);
- return tps;
-}
-
-void PhraseTableMemory::InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const
-{
- size_t ptInd = GetPtInd();
- ActiveChartEntryMem *chartEntry = new (pool.Allocate<ActiveChartEntryMem>()) ActiveChartEntryMem(pool, *m_rootSCFG);
- path.AddActiveChartEntry(ptInd, chartEntry);
- //cerr << "InitActiveChart=" << path << endl;
-}
-
-void PhraseTableMemory::Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
-{
- if (path.range.GetNumWordsCovered() > maxChartSpan) {
- return;
- }
-
- size_t endPos = path.range.GetEndPos();
-
- const SCFG::InputPath *prevPath = static_cast<const SCFG::InputPath*>(path.prefixPath);
- UTIL_THROW_IF2(prevPath == NULL, "prefixPath == NULL");
-
- // TERMINAL
- const SCFG::Word &lastWord = path.subPhrase.Back();
-
- const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(endPos, 1);
-
- //cerr << "BEFORE LookupGivenWord=" << *prevPath << endl;
- LookupGivenWord(pool, mgr, *prevPath, lastWord, NULL, subPhrasePath.range, path);
- //cerr << "AFTER LookupGivenWord=" << *prevPath << endl;
-
- // NON-TERMINAL
- //const SCFG::InputPath *prefixPath = static_cast<const SCFG::InputPath*>(path.prefixPath);
- while (prevPath) {
- const Range &prevRange = prevPath->range;
- //cerr << "prevRange=" << prevRange << endl;
-
- size_t startPos = prevRange.GetEndPos() + 1;
- size_t ntSize = endPos - startPos + 1;
- const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(startPos, ntSize);
-
- LookupNT(pool, mgr, subPhrasePath.range, *prevPath, stacks, path);
-
- prevPath = static_cast<const SCFG::InputPath*>(prevPath->prefixPath);
- }
-}
-
-void PhraseTableMemory::LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
-{
- const ActiveChartEntryMem &prevEntryCast = static_cast<const ActiveChartEntryMem&>(prevEntry);
-
- const SCFGNODE &prevNode = prevEntryCast.node;
- UTIL_THROW_IF2(&prevNode == NULL, "node == NULL");
-
- size_t ptInd = GetPtInd();
- const SCFGNODE *nextNode = prevNode.Find(m_input, wordSought);
-
- /*
- if (outPath.range.GetStartPos() == 1 || outPath.range.GetStartPos() == 2) {
- cerr << "range=" << outPath.range
- << " prevEntry=" << prevEntry.GetSymbolBind().Debug(mgr.system)
- << " wordSought=" << wordSought.Debug(mgr.system)
- << " nextNode=" << nextNode
- << endl;
- }
- */
- if (nextNode) {
- // new entries
- ActiveChartEntryMem *chartEntry = new (pool.Allocate<ActiveChartEntryMem>()) ActiveChartEntryMem(pool, *nextNode, prevEntry);
-
- chartEntry->AddSymbolBindElement(subPhraseRange, wordSought, hypos, *this);
- //cerr << "AFTER Add=" << symbolBind << endl;
-
- outPath.AddActiveChartEntry(ptInd, chartEntry);
-
- const SCFG::TargetPhrases *tps = nextNode->GetTargetPhrases();
- if (tps) {
- // there are some rules
- /*
- cerr << "outPath=" << outPath.range
- << " bind=" << chartEntry->GetSymbolBind().Debug(mgr.system)
- << " pt=" << GetPtInd()
- << " tps=" << tps->Debug(mgr.system) << endl;
- */
- outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tps, chartEntry->GetSymbolBind());
-
- }
-
- //cerr << "AFTER outPath=" << outPath << endl;
- }
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.h b/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.h
deleted file mode 100644
index 035c7c9c5..000000000
--- a/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * PhraseTableMemory.h
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-#pragma once
-
-#include "../PhraseTable.h"
-#include "../../legacy/Util2.h"
-#include "../../SCFG/InputPath.h"
-#include "Node.h"
-#include "../../PhraseBased/PhraseImpl.h"
-#include "../../PhraseBased/TargetPhraseImpl.h"
-#include "../../PhraseBased/TargetPhrases.h"
-#include "../../SCFG/PhraseImpl.h"
-#include "../../SCFG/TargetPhraseImpl.h"
-#include "../../SCFG/TargetPhrases.h"
-
-namespace Moses2
-{
-
-class PhraseTableMemory: public PhraseTable
-{
- typedef PtMem::Node<Word, Phrase<Word>, TargetPhraseImpl, TargetPhrases> PBNODE;
- typedef PtMem::Node<SCFG::Word, Phrase<SCFG::Word>, SCFG::TargetPhraseImpl, SCFG::TargetPhrases> SCFGNODE;
-
-//////////////////////////////////////
- class ActiveChartEntryMem : public SCFG::ActiveChartEntry
- {
- typedef SCFG::ActiveChartEntry Parent;
- public:
- const PhraseTableMemory::SCFGNODE &node;
-
- ActiveChartEntryMem(MemPool &pool, const PhraseTableMemory::SCFGNODE &vnode)
- :Parent(pool)
- ,node(vnode)
- {}
-
- ActiveChartEntryMem(
- MemPool &pool,
- const PhraseTableMemory::SCFGNODE &vnode,
- const ActiveChartEntry &prevEntry)
- :Parent(prevEntry)
- ,node(vnode)
- {}
- };
-
- //////////////////////////////////////
-public:
- PhraseTableMemory(size_t startInd, const std::string &line);
- virtual ~PhraseTableMemory();
-
- virtual void Load(System &system);
- virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
-
- virtual void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const;
-
- void Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
-protected:
- PBNODE *m_rootPb;
- SCFGNODE *m_rootSCFG;
-
- void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
-};
-
-}
-
diff --git a/contrib/moses2/TranslationModel/PhraseTable.cpp b/contrib/moses2/TranslationModel/PhraseTable.cpp
deleted file mode 100644
index c790147bb..000000000
--- a/contrib/moses2/TranslationModel/PhraseTable.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * PhraseTable.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include <queue>
-#include "PhraseTable.h"
-#include "../legacy/Util2.h"
-#include "../TypeDef.h"
-#include "../InputType.h"
-#include "../PhraseBased/Manager.h"
-#include "../PhraseBased/InputPath.h"
-#include "../SCFG/InputPath.h"
-#include "../SCFG/Manager.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-////////////////////////////////////////////////////////////////////////////
-PhraseTable::PhraseTable(size_t startInd, const std::string &line) :
- StatelessFeatureFunction(startInd, line), m_tableLimit(20) // default
- , m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
-{
- m_input.push_back(0);
-}
-
-PhraseTable::~PhraseTable()
-{
- // TODO Auto-generated destructor stub
-}
-
-void PhraseTable::SetParameter(const std::string& key, const std::string& value)
-{
- if (key == "cache-size") {
- m_maxCacheSize = Scan<size_t>(value);
- }
- else if (key == "path") {
- m_path = value;
- }
- else if (key == "input-factor") {
- m_input = Tokenize<FactorType>(value, ",");
- }
- else if (key == "output-factor") {
- m_output = Tokenize<FactorType>(value, ",");
- }
- else if (key == "table-limit") {
- m_tableLimit = Scan<size_t>(value);
- }
- else {
- StatelessFeatureFunction::SetParameter(key, value);
- }
-}
-
-bool PhraseTable::SatisfyBackoff(const Manager &mgr, const InputPath &path) const
-{
- const InputType &input = mgr.GetInput();
- if ((mgr.system.options.input.xml_policy == XmlExclusive)
- && input.XmlOverlap(path.range.GetStartPos(), path.range.GetEndPos())) {
- return false;
- }
-
- //cerr << GetName() << "=" << GetPtInd() << "=" << decodeGraphBackoff << endl;
- if (decodeGraphBackoff == 0) {
- // always lookup
- return true;
- }
- else if (decodeGraphBackoff == -1) {
- // lookup only if there's no existing rules
- return path.GetNumRules() ? false : true;
- }
- else if (path.range.GetNumWordsCovered() <= decodeGraphBackoff) {
- return path.GetNumRules() ? false : true;
- }
-
- return false;
-}
-
-void PhraseTable::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const
-{
- BOOST_FOREACH(InputPathBase *pathBase, inputPaths){
- InputPath *path = static_cast<InputPath*>(pathBase);
- //cerr << "path=" << path->range << " ";
-
- if (SatisfyBackoff(mgr, *path)) {
- TargetPhrases *tpsPtr = Lookup(mgr, mgr.GetPool(), *path);
- /*
- cerr << "tpsPtr=" << tpsPtr << " ";
- if (tps.get()) {
- cerr << tps.get()->GetSize();
- }
- cerr << endl;
- */
-
- path->AddTargetPhrases(*this, tpsPtr);
- }
- }
-
-}
-
-TargetPhrases *PhraseTable::Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void PhraseTable::EvaluateInIsolation(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-}
-
-void PhraseTable::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-
-}
-
-// scfg
-void PhraseTable::LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
-{
- //cerr << "BEFORE LookupUnary" << path.Debug(mgr.system) << endl;
- size_t startPos = path.range.GetStartPos();
- const SCFG::InputPath *prevPath = mgr.GetInputPaths().GetMatrix().GetValue(startPos, 0);
- LookupNT(pool, mgr, path.range, *prevPath, stacks, path);
- //cerr << "AFTER LookupUnary" << path.Debug(mgr.system) << endl;
-}
-
-void PhraseTable::LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const
-{
- size_t endPos = outPath.range.GetEndPos();
-
- const Range &prevRange = prevPath.range;
-
- size_t startPos = prevRange.GetEndPos() + 1;
- size_t ntSize = endPos - startPos + 1;
-
- const SCFG::Stack &ntStack = stacks.GetStack(startPos, ntSize);
- const SCFG::Stack::Coll &stackColl = ntStack.GetColl();
-
- BOOST_FOREACH (const SCFG::Stack::Coll::value_type &valPair, stackColl) {
- const SCFG::Word &ntSought = valPair.first;
- const Moses2::HypothesisColl *hypos = valPair.second;
- const Moses2::Hypotheses &sortedHypos = hypos->GetSortedAndPrunedHypos(mgr, mgr.arcLists);
- //cerr << "ntSought=" << ntSought << ntSought.isNonTerminal << endl;
- LookupGivenWord(pool, mgr, prevPath, ntSought, &sortedHypos, subPhraseRange, outPath);
- }
-}
-
-void PhraseTable::LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
-{
- size_t ptInd = GetPtInd();
-
-
- BOOST_FOREACH(const SCFG::ActiveChartEntry *prevEntry, prevPath.GetActiveChart(ptInd).entries) {
- //cerr << "BEFORE LookupGivenNode=" << prevPath << endl;
- LookupGivenNode(pool, mgr, *prevEntry, wordSought, hypos, subPhraseRange, outPath);
- //cerr << "AFTER LookupGivenNode=" << prevPath << endl;
- }
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/PhraseTable.h b/contrib/moses2/TranslationModel/PhraseTable.h
deleted file mode 100644
index 9237f5ba6..000000000
--- a/contrib/moses2/TranslationModel/PhraseTable.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * PhraseTable.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#pragma once
-#include <string>
-#include <boost/unordered_map.hpp>
-#include "../Word.h"
-#include "../HypothesisColl.h"
-#include "../FF/StatelessFeatureFunction.h"
-#include "../legacy/Util2.h"
-
-namespace Moses2
-{
-
-class System;
-class InputPathsBase;
-class InputPath;
-class Manager;
-class TargetPhrases;
-class Range;
-
-namespace SCFG
-{
-class InputPath;
-class Stacks;
-class Manager;
-class ActiveChartEntry;
-}
-
-////////////////////////////////////////////////////////////////////////
-class PhraseTable: public StatelessFeatureFunction
-{
-public:
- int decodeGraphBackoff;
-
- PhraseTable(size_t startInd, const std::string &line);
- virtual ~PhraseTable();
-
- virtual void SetParameter(const std::string& key, const std::string& value);
- virtual void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
- virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
-
- void SetPtInd(size_t ind)
- { m_ptInd = ind; }
-
- size_t GetPtInd() const
- { return m_ptInd; }
-
- bool SatisfyBackoff(const Manager &mgr, const InputPath &path) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void
- EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
- const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- // scfg
- virtual void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const = 0;
-
- virtual void Lookup(
- MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const = 0;
-
- virtual void LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
-protected:
- std::string m_path;
- size_t m_ptInd; // in the order that it is list in [feature], NOT order of [mapping]
- size_t m_tableLimit;
- std::vector<FactorType> m_input, m_output;
-
- // cache
- size_t m_maxCacheSize; // 0 = no caching
-
- struct CacheCollEntry2
- {
- TargetPhrases *tpsPtr;
- clock_t clock;
- };
-
- // scfg
- virtual void LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const;
-
- virtual void LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
- virtual void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const = 0;
-
-};
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp
deleted file mode 100644
index 1f22f45be..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp
+++ /dev/null
@@ -1,756 +0,0 @@
-/*
- * ProbingPT.cpp
- *
- * Created on: 3 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "ProbingPT.h"
-#include "querying.hh"
-#include "probing_hash_utils.hh"
-#include "util/exception.hh"
-#include "../../System.h"
-#include "../../Scores.h"
-#include "../../Phrase.h"
-#include "../../legacy/InputFileStream.h"
-#include "../../legacy/FactorCollection.h"
-#include "../../legacy/Util2.h"
-#include "../../FF/FeatureFunctions.h"
-#include "../../PhraseBased/PhraseImpl.h"
-#include "../../PhraseBased/TargetPhraseImpl.h"
-#include "../../PhraseBased/Manager.h"
-#include "../../PhraseBased/TargetPhrases.h"
-#include "../../SCFG/InputPath.h"
-#include "../../SCFG/Manager.h"
-#include "../../SCFG/TargetPhraseImpl.h"
-#include "../../SCFG/PhraseImpl.h"
-
-using namespace std;
-
-namespace Moses2
-{
-ProbingPT::ActiveChartEntryProbing::ActiveChartEntryProbing(
- MemPool &pool,
- const ActiveChartEntryProbing &prevEntry)
-:Parent(prevEntry)
-,m_key(prevEntry.m_key)
-{}
-
-void ProbingPT::ActiveChartEntryProbing::AddSymbolBindElement(
- const Range &range,
- const SCFG::Word &word,
- const Moses2::Hypotheses *hypos,
- const Moses2::PhraseTable &pt)
-{
- const ProbingPT &probingPt = static_cast<const ProbingPT&>(pt);
- std::pair<bool, uint64_t> key = GetKey(word, probingPt);
- UTIL_THROW_IF2(!key.first, "Word should have been in source vocab");
- m_key = key.second;
-
- ActiveChartEntry::AddSymbolBindElement(range, word, hypos, pt);
-}
-
-std::pair<bool, uint64_t> ProbingPT::ActiveChartEntryProbing::GetKey(const SCFG::Word &nextWord, const ProbingPT &pt) const
-{
- std::pair<bool, uint64_t> ret;
- ret.second = m_key;
- uint64_t probingId = pt.GetSourceProbingId(nextWord);
- if (probingId == pt.GetUnk()) {
- ret.first = false;
- return ret;
- }
-
- ret.first = true;
- size_t phraseSize = m_symbolBind.coll.size();
- ret.second += probingId << phraseSize;
- return ret;
-}
-
-////////////////////////////////////////////////////////////////////////////
-ProbingPT::ProbingPT(size_t startInd, const std::string &line)
-:PhraseTable(startInd, line)
-,load_method(util::POPULATE_OR_READ)
-{
- ReadParameters();
-}
-
-ProbingPT::~ProbingPT()
-{
- delete m_engine;
-}
-
-void ProbingPT::Load(System &system)
-{
- m_engine = new QueryEngine(m_path.c_str(), load_method);
-
- m_unkId = 456456546456;
-
- FactorCollection &vocab = system.GetVocab();
-
- // source vocab
- const std::map<uint64_t, std::string> &sourceVocab =
- m_engine->getSourceVocab();
- std::map<uint64_t, std::string>::const_iterator iterSource;
- for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end();
- ++iterSource) {
- string wordStr = iterSource->second;
- bool isNT;
- //cerr << "wordStr=" << wordStr << endl;
- ReformatWord(system, wordStr, isNT);
- //cerr << "wordStr=" << wordStr << endl;
-
- const Factor *factor = vocab.AddFactor(wordStr, system, isNT);
-
- uint64_t probingId = iterSource->first;
- size_t factorId = factor->GetId();
-
- if (factorId >= m_sourceVocab.size()) {
- m_sourceVocab.resize(factorId + 1, m_unkId);
- }
- m_sourceVocab[factorId] = probingId;
- }
-
- // target vocab
- InputFileStream targetVocabStrme(m_path + "/TargetVocab.dat");
- string line;
- while (getline(targetVocabStrme, line)) {
- vector<string> toks = Tokenize(line, "\t");
- UTIL_THROW_IF2(toks.size() != 2, string("Incorrect format:") + line + "\n");
-
- bool isNT;
- //cerr << "wordStr=" << toks[0] << endl;
- ReformatWord(system, toks[0], isNT);
- //cerr << "wordStr=" << toks[0] << endl;
-
- const Factor *factor = vocab.AddFactor(toks[0], system, isNT);
- uint32_t probingId = Scan<uint32_t>(toks[1]);
-
- if (probingId >= m_targetVocab.size()) {
- m_targetVocab.resize(probingId + 1);
- }
-
- std::pair<bool, const Factor*> ele(isNT, factor);
- m_targetVocab[probingId] = ele;
- }
-
- // alignments
- CreateAlignmentMap(system, m_path + "/Alignments.dat");
-
- // cache
- CreateCache(system);
-}
-
-void ProbingPT::SetParameter(const std::string& key, const std::string& value)
-{
- if (key == "load") {
- if (value == "lazy") {
- load_method = util::LAZY;
- }
- else if (value == "populate_or_lazy") {
- load_method = util::POPULATE_OR_LAZY;
- }
- else if (value == "populate_or_read" || value == "populate") {
- load_method = util::POPULATE_OR_READ;
- }
- else if (value == "read") {
- load_method = util::READ;
- }
- else if (value == "parallel_read") {
- load_method = util::PARALLEL_READ;
- }
- else {
- UTIL_THROW2("load method not supported" << value);
- }
- }
- else {
- PhraseTable::SetParameter(key, value);
- }
-}
-
-void ProbingPT::CreateAlignmentMap(System &system, const std::string path)
-{
- const std::vector< std::vector<unsigned char> > &probingAlignColl = m_engine->getAlignments();
- m_aligns.resize(probingAlignColl.size(), NULL);
-
- for (size_t i = 0; i < probingAlignColl.size(); ++i) {
- AlignmentInfo::CollType aligns;
-
- const std::vector<unsigned char> &probingAligns = probingAlignColl[i];
- for (size_t j = 0; j < probingAligns.size(); j += 2) {
- size_t startPos = probingAligns[j];
- size_t endPos = probingAligns[j+1];
- //cerr << "startPos=" << startPos << " " << endPos << endl;
- aligns.insert(std::pair<size_t,size_t>(startPos, endPos));
- }
-
- const AlignmentInfo *align = AlignmentInfoCollection::Instance().Add(aligns);
- m_aligns[i] = align;
- //cerr << "align=" << align->Debug(system) << endl;
- }
-}
-
-void ProbingPT::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const
-{
- BOOST_FOREACH(InputPathBase *pathBase, inputPaths){
- InputPath *path = static_cast<InputPath*>(pathBase);
-
- if (SatisfyBackoff(mgr, *path)) {
- TargetPhrases *tpsPtr;
- tpsPtr = Lookup(mgr, mgr.GetPool(), *path);
- path->AddTargetPhrases(*this, tpsPtr);
- }
- }
-}
-
-TargetPhrases* ProbingPT::Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const
-{
- /*
- if (inputPath.prefixPath && inputPath.prefixPath->GetTargetPhrases(*this) == NULL) {
- // assume all paths have prefixes, except rules with 1 word source
- return NULL;
- }
- else {
- const Phrase &sourcePhrase = inputPath.subPhrase;
- std::pair<TargetPhrases*, uint64_t> tpsAndKey = CreateTargetPhrase(pool, mgr.system, sourcePhrase);
- return tpsAndKey.first;
- }
- */
- const Phrase<Moses2::Word> &sourcePhrase = inputPath.subPhrase;
-
- // get hash for source phrase
- std::pair<bool, uint64_t> keyStruct = GetKey(sourcePhrase);
- if (!keyStruct.first) {
- return NULL;
- }
-
- // check in cache
- CachePb::const_iterator iter = m_cachePb.find(keyStruct.second);
- if (iter != m_cachePb.end()) {
- //cerr << "FOUND IN CACHE " << keyStruct.second << " " << sourcePhrase.Debug(mgr.system) << endl;
- TargetPhrases *tps = iter->second;
- return tps;
- }
-
- // query pt
- TargetPhrases *tps = CreateTargetPhrases(pool, mgr.system, sourcePhrase,
- keyStruct.second);
- return tps;
-}
-
-std::pair<bool, uint64_t> ProbingPT::GetKey(const Phrase<Moses2::Word> &sourcePhrase) const
-{
- std::pair<bool, uint64_t> ret;
-
- // create a target phrase from the 1st word of the source, prefix with 'ProbingPT:'
- size_t sourceSize = sourcePhrase.GetSize();
- assert(sourceSize);
-
- uint64_t probingSource[sourceSize];
- GetSourceProbingIds(sourcePhrase, ret.first, probingSource);
- if (!ret.first) {
- // source phrase contains a word unknown in the pt.
- // We know immediately there's no translation for it
- }
- else {
- ret.second = m_engine->getKey(probingSource, sourceSize);
- }
-
- return ret;
-
-}
-
-TargetPhrases *ProbingPT::CreateTargetPhrases(MemPool &pool,
- const System &system, const Phrase<Moses2::Word> &sourcePhrase, uint64_t key) const
-{
- TargetPhrases *tps = NULL;
-
- //Actual lookup
- std::pair<bool, uint64_t> query_result; // 1st=found, 2nd=target file offset
- query_result = m_engine->query(key);
- //cerr << "key2=" << query_result.second << endl;
-
- if (query_result.first) {
- const char *offset = m_engine->memTPS + query_result.second;
- uint64_t *numTP = (uint64_t*) offset;
-
- tps = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, *numTP);
-
- offset += sizeof(uint64_t);
- for (size_t i = 0; i < *numTP; ++i) {
- TargetPhraseImpl *tp = CreateTargetPhrase(pool, system, offset);
- assert(tp);
- const FeatureFunctions &ffs = system.featureFunctions;
- ffs.EvaluateInIsolation(pool, system, sourcePhrase, *tp);
-
- tps->AddTargetPhrase(*tp);
-
- }
-
- tps->SortAndPrune(m_tableLimit);
- system.featureFunctions.EvaluateAfterTablePruning(pool, *tps, sourcePhrase);
- //cerr << *tps << endl;
- }
-
- return tps;
-}
-
-TargetPhraseImpl *ProbingPT::CreateTargetPhrase(
- MemPool &pool,
- const System &system,
- const char *&offset) const
-{
- TargetPhraseInfo *tpInfo = (TargetPhraseInfo*) offset;
- size_t numRealWords = tpInfo->numWords / m_output.size();
-
- TargetPhraseImpl *tp =
- new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this,
- system, numRealWords);
-
- offset += sizeof(TargetPhraseInfo);
-
- // scores
- SCORE *scores = (SCORE*) offset;
-
- size_t totalNumScores = m_engine->num_scores + m_engine->num_lex_scores;
-
- if (m_engine->logProb) {
- // set pt score for rule
- tp->GetScores().PlusEquals(system, *this, scores);
-
- // save scores for other FF, eg. lex RO. Just give the offset
- if (m_engine->num_lex_scores) {
- tp->scoreProperties = scores + m_engine->num_scores;
- }
- }
- else {
- // log score 1st
- SCORE logScores[totalNumScores];
- for (size_t i = 0; i < totalNumScores; ++i) {
- logScores[i] = FloorScore(TransformScore(scores[i]));
- }
-
- // set pt score for rule
- tp->GetScores().PlusEquals(system, *this, logScores);
-
- // save scores for other FF, eg. lex RO.
- tp->scoreProperties = pool.Allocate<SCORE>(m_engine->num_lex_scores);
- for (size_t i = 0; i < m_engine->num_lex_scores; ++i) {
- tp->scoreProperties[i] = logScores[i + m_engine->num_scores];
- }
- }
-
- offset += sizeof(SCORE) * totalNumScores;
-
- // words
- for (size_t targetPos = 0; targetPos < numRealWords; ++targetPos) {
- for (size_t i = 0; i < m_output.size(); ++i) {
- FactorType factorType = m_output[i];
-
- uint32_t *probingId = (uint32_t*) offset;
-
- const std::pair<bool, const Factor *> *factorPair = GetTargetFactor(*probingId);
- assert(factorPair);
- assert(!factorPair->first);
-
- Word &word = (*tp)[targetPos];
- word[factorType] = factorPair->second;
-
- offset += sizeof(uint32_t);
- }
- }
-
- // align
- uint32_t alignTerm = tpInfo->alignTerm;
- //cerr << "alignTerm=" << alignTerm << endl;
- UTIL_THROW_IF2(alignTerm >= m_aligns.size(), "Unknown alignInd");
- tp->Parent::SetAlignTerm(*m_aligns[alignTerm]);
-
- // properties TODO
-
- return tp;
-}
-
-void ProbingPT::GetSourceProbingIds(const Phrase<Moses2::Word> &sourcePhrase,
- bool &ok, uint64_t probingSource[]) const
-{
-
- size_t size = sourcePhrase.GetSize();
- for (size_t i = 0; i < size; ++i) {
- const Word &word = sourcePhrase[i];
- uint64_t probingId = GetSourceProbingId(word);
- if (probingId == m_unkId) {
- ok = false;
- return;
- }
- else {
- probingSource[i] = probingId;
- }
- }
-
- ok = true;
-}
-
-uint64_t ProbingPT::GetSourceProbingId(const Word &word) const
-{
- uint64_t ret = 0;
-
- for (size_t i = 0; i < m_input.size(); ++i) {
- FactorType factorType = m_input[i];
- const Factor *factor = word[factorType];
-
- size_t factorId = factor->GetId();
- if (factorId >= m_sourceVocab.size()) {
- return m_unkId;
- }
- ret += m_sourceVocab[factorId];
- }
-
- return ret;
-}
-
-void ProbingPT::CreateCache(System &system)
-{
- if (m_maxCacheSize == 0) {
- return;
- }
-
- string filePath = m_path + "/cache";
- InputFileStream strme(filePath);
-
- string line;
- getline(strme, line);
- //float totalCount = Scan<float>(line);
-
- MemPool &pool = system.GetSystemPool();
- FactorCollection &vocab = system.GetVocab();
-
- MemPool tmpSourcePool;
-
- size_t lineCount = 0;
- while (getline(strme, line) && lineCount < m_maxCacheSize) {
- vector<string> toks = Tokenize(line, "\t");
- assert(toks.size() == 3);
- uint64_t key = Scan<uint64_t>(toks[1]);
- //cerr << "line=" << line << endl;
-
- if (system.isPb) {
- PhraseImpl *sourcePhrase = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, toks[2]);
-
- /*
- std::pair<bool, uint64_t> retStruct = GetKey(*sourcePhrase);
- if (!retStruct.first) {
- UTIL_THROW2("Unknown cache entry");
- }
- cerr << "key=" << retStruct.second << " " << key << endl;
- */
- TargetPhrases *tps = CreateTargetPhrases(pool, system, *sourcePhrase, key);
- assert(tps);
-
- m_cachePb[key] = tps;
- }
- else {
- // SCFG
- SCFG::PhraseImpl *sourcePhrase = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, toks[2], false);
- //cerr << "sourcePhrase=" << sourcePhrase->Debug(system) << endl;
-
- std::pair<bool, SCFG::TargetPhrases*> tpsPair = CreateTargetPhrasesSCFG(pool, system, *sourcePhrase, key);
- assert(tpsPair.first && tpsPair.second);
-
- m_cacheSCFG[key] = tpsPair.second;
- }
- ++lineCount;
- }
-
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// SCFG
-///////////////////////////////////////////////////////////////////////////////
-
-void ProbingPT::ReformatWord(System &system, std::string &wordStr, bool &isNT)
-{
- isNT = false;
- if (system.isPb) {
- return;
- }
- else {
- isNT = (wordStr[0] == '[' && wordStr[wordStr.size() - 1] == ']');
- //cerr << "nt=" << nt << endl;
-
- if (isNT) {
- size_t startPos = wordStr.find("][");
- if (startPos == string::npos) {
- startPos = 1;
- }
- else {
- startPos += 2;
- }
-
- wordStr = wordStr.substr(startPos, wordStr.size() - startPos - 1);
- //cerr << "wordStr=" << wordStr << endl;
- }
- }
-}
-
-void ProbingPT::InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const
-{
- //cerr << "InitActiveChart=" << path.Debug(cerr, mgr.system) << endl;
- size_t ptInd = GetPtInd();
- ActiveChartEntryProbing *chartEntry = new (pool.Allocate<ActiveChartEntryProbing>()) ActiveChartEntryProbing(pool);
- path.AddActiveChartEntry(ptInd, chartEntry);
-}
-
-void ProbingPT::Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
-{
- //cerr << "Lookup=" << endl;
- if (path.range.GetNumWordsCovered() > maxChartSpan) {
- return;
- }
-
- size_t endPos = path.range.GetEndPos();
-
- const SCFG::InputPath *prevPath = static_cast<const SCFG::InputPath*>(path.prefixPath);
- UTIL_THROW_IF2(prevPath == NULL, "prefixPath == NULL");
-
- // TERMINAL
- const SCFG::Word &lastWord = path.subPhrase.Back();
-
- const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(endPos, 1);
-
- //cerr << "BEFORE LookupGivenWord=" << *prevPath << endl;
- LookupGivenWord(pool, mgr, *prevPath, lastWord, NULL, subPhrasePath.range, path);
- //cerr << "AFTER LookupGivenWord=" << *prevPath << endl;
-
- // NON-TERMINAL
- //const SCFG::InputPath *prefixPath = static_cast<const SCFG::InputPath*>(path.prefixPath);
- while (prevPath) {
- const Range &prevRange = prevPath->range;
- //cerr << "prevRange=" << prevRange << endl;
-
- size_t startPos = prevRange.GetEndPos() + 1;
- size_t ntSize = endPos - startPos + 1;
- const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(startPos, ntSize);
-
- LookupNT(pool, mgr, subPhrasePath.range, *prevPath, stacks, path);
-
- prevPath = static_cast<const SCFG::InputPath*>(prevPath->prefixPath);
- }
-}
-
-void ProbingPT::LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
-{
- const ActiveChartEntryProbing &prevEntryCast = static_cast<const ActiveChartEntryProbing&>(prevEntry);
-
- std::pair<bool, uint64_t> key = prevEntryCast.GetKey(wordSought, *this);
-
- if (!key.first) {
- // should only occasionally happen when looking up unary rules
- return;
- }
-
- const Phrase<SCFG::Word> &sourcePhrase = outPath.subPhrase;
-
- // check in cache
- CacheSCFG::const_iterator iter = m_cacheSCFG.find(key.second);
- if (iter != m_cacheSCFG.end()) {
- //cerr << "FOUND IN CACHE " << key.second << " " << sourcePhrase.Debug(mgr.system) << endl;
- SCFG::TargetPhrases *tps = iter->second;
-
- ActiveChartEntryProbing *chartEntry = new (pool.Allocate<ActiveChartEntryProbing>()) ActiveChartEntryProbing(pool, prevEntryCast);
- //cerr << "AFTER chartEntry" << endl;
-
- chartEntry->AddSymbolBindElement(subPhraseRange, wordSought, hypos, *this);
- //cerr << "AFTER AddSymbolBindElement" << endl;
-
- size_t ptInd = GetPtInd();
- outPath.AddActiveChartEntry(ptInd, chartEntry);
-
- outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tps, chartEntry->GetSymbolBind());
- }
- else {
- // not in cache. Lookup
- std::pair<bool, SCFG::TargetPhrases*> tpsPair = CreateTargetPhrasesSCFG(pool, mgr.system, sourcePhrase, key.second);
- assert(tpsPair.first && tpsPair.second);
-
- if (tpsPair.first) {
- // new entries
- ActiveChartEntryProbing *chartEntry = new (pool.Allocate<ActiveChartEntryProbing>()) ActiveChartEntryProbing(pool, prevEntryCast);
- //cerr << "AFTER chartEntry" << endl;
-
- chartEntry->AddSymbolBindElement(subPhraseRange, wordSought, hypos, *this);
- //cerr << "AFTER AddSymbolBindElement" << endl;
-
- size_t ptInd = GetPtInd();
- outPath.AddActiveChartEntry(ptInd, chartEntry);
- //cerr << "AFTER AddActiveChartEntry" << endl;
-
- if (tpsPair.second) {
- // there are some rules
- //cerr << "symbolbind=" << chartEntry->GetSymbolBind().Debug(mgr.system) << endl;
- outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tpsPair.second, chartEntry->GetSymbolBind());
- }
- }
- }
-}
-
-SCFG::TargetPhraseImpl *ProbingPT::CreateTargetPhraseSCFG(
- MemPool &pool,
- const System &system,
- const char *&offset) const
-{
- TargetPhraseInfo *tpInfo = (TargetPhraseInfo*) offset;
- SCFG::TargetPhraseImpl *tp =
- new (pool.Allocate<SCFG::TargetPhraseImpl>()) SCFG::TargetPhraseImpl(pool, *this,
- system, tpInfo->numWords - 1);
-
- offset += sizeof(TargetPhraseInfo);
-
- // scores
- SCORE *scores = (SCORE*) offset;
-
- size_t totalNumScores = m_engine->num_scores + m_engine->num_lex_scores;
-
- if (m_engine->logProb) {
- // set pt score for rule
- tp->GetScores().PlusEquals(system, *this, scores);
-
- // save scores for other FF, eg. lex RO. Just give the offset
- if (m_engine->num_lex_scores) {
- tp->scoreProperties = scores + m_engine->num_scores;
- }
- }
- else {
- // log score 1st
- SCORE logScores[totalNumScores];
- for (size_t i = 0; i < totalNumScores; ++i) {
- logScores[i] = FloorScore(TransformScore(scores[i]));
- }
-
- // set pt score for rule
- tp->GetScores().PlusEquals(system, *this, logScores);
-
- // save scores for other FF, eg. lex RO.
- tp->scoreProperties = pool.Allocate<SCORE>(m_engine->num_lex_scores);
- for (size_t i = 0; i < m_engine->num_lex_scores; ++i) {
- tp->scoreProperties[i] = logScores[i + m_engine->num_scores];
- }
- }
-
- offset += sizeof(SCORE) * totalNumScores;
-
- // words
- for (size_t i = 0; i < tpInfo->numWords - 1; ++i) {
- uint32_t *probingId = (uint32_t*) offset;
-
- const std::pair<bool, const Factor *> *factorPair = GetTargetFactor(*probingId);
- assert(factorPair);
-
- SCFG::Word &word = (*tp)[i];
- word[0] = factorPair->second;
- word.isNonTerminal = factorPair->first;
-
- offset += sizeof(uint32_t);
- }
-
- // lhs
- uint32_t *probingId = (uint32_t*) offset;
-
- const std::pair<bool, const Factor *> *factorPair = GetTargetFactor(*probingId);
- assert(factorPair);
- assert(factorPair->first);
-
- tp->lhs[0] = factorPair->second;
- tp->lhs.isNonTerminal = factorPair->first;
-
- offset += sizeof(uint32_t);
-
- // align
- uint32_t alignTerm = tpInfo->alignTerm;
- //cerr << "alignTerm=" << alignTerm << endl;
- UTIL_THROW_IF2(alignTerm >= m_aligns.size(), "Unknown alignInd");
- tp->Parent::SetAlignTerm(*m_aligns[alignTerm]);
-
- uint32_t alignNonTerm = tpInfo->alignNonTerm;
- //cerr << "alignTerm=" << alignTerm << endl;
- UTIL_THROW_IF2(alignNonTerm >= m_aligns.size(), "Unknown alignInd");
- tp->SetAlignNonTerm(*m_aligns[alignNonTerm]);
-
- // properties TODO
-
- return tp;
-}
-
-std::pair<bool, SCFG::TargetPhrases*> ProbingPT::CreateTargetPhrasesSCFG(MemPool &pool, const System &system,
- const Phrase<SCFG::Word> &sourcePhrase, uint64_t key) const
-{
- std::pair<bool, SCFG::TargetPhrases*> ret(false, NULL);
-
- std::pair<bool, uint64_t> query_result; // 1st=found, 2nd=target file offset
- query_result = m_engine->query(key);
- //cerr << "query_result=" << query_result.first << endl;
-
- /*
- if (outPath.range.GetStartPos() == 1 || outPath.range.GetStartPos() == 2) {
- cerr << "range=" << outPath.range
- << " prevEntry=" << prevEntry.GetSymbolBind().Debug(mgr.system) << " " << prevEntryCast.GetKey()
- << " wordSought=" << wordSought.Debug(mgr.system)
- << " key=" << key.first << " " << key.second
- << " query_result=" << query_result.first << " " << (query_result.second == NONE)
- << endl;
- }
- */
-
- if (query_result.first) {
- ret.first = true;
- size_t ptInd = GetPtInd();
-
- if (query_result.second != NONE) {
- // there are some rules
- const FeatureFunctions &ffs = system.featureFunctions;
-
- const char *offset = m_engine->memTPS + query_result.second;
- uint64_t *numTP = (uint64_t*) offset;
- //cerr << "numTP=" << *numTP << endl;
-
- SCFG::TargetPhrases *tps = new (pool.Allocate<SCFG::TargetPhrases>()) SCFG::TargetPhrases(pool, *numTP);
- ret.second = tps;
-
- offset += sizeof(uint64_t);
- for (size_t i = 0; i < *numTP; ++i) {
- SCFG::TargetPhraseImpl *tp = CreateTargetPhraseSCFG(pool, system, offset);
- assert(tp);
- //cerr << "tp=" << tp->Debug(mgr.system) << endl;
-
- ffs.EvaluateInIsolation(pool, system, sourcePhrase, *tp);
-
- tps->AddTargetPhrase(*tp);
-
- }
-
- tps->SortAndPrune(m_tableLimit);
- ffs.EvaluateAfterTablePruning(pool, *tps, sourcePhrase);
- //cerr << "tps=" << tps->GetSize() << endl;
-
- }
- }
-
- return ret;
-}
-
-} // namespace
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h
deleted file mode 100644
index c5fbefd6f..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * ProbingPT.h
- *
- * Created on: 3 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <boost/iostreams/device/mapped_file.hpp>
-#include <boost/thread/tss.hpp>
-#include <boost/bimap.hpp>
-#include <deque>
-#include "../PhraseTable.h"
-#include "../../Vector.h"
-#include "../../Phrase.h"
-#include "../../SCFG/ActiveChart.h"
-#include "util/mmap.hh"
-
-namespace Moses2
-{
-class AlignmentInfo;
-class QueryEngine;
-class target_text;
-class MemPool;
-class System;
-class RecycleData;
-
-namespace SCFG
-{
-class TargetPhraseImpl;
-class TargetPhrases;
-}
-
-class ProbingPT: public Moses2::PhraseTable
-{
- //////////////////////////////////////
- class ActiveChartEntryProbing : public SCFG::ActiveChartEntry
- {
- typedef SCFG::ActiveChartEntry Parent;
- public:
-
- ActiveChartEntryProbing(MemPool &pool)
- :Parent(pool)
- ,m_key(0)
- {}
-
- ActiveChartEntryProbing(
- MemPool &pool,
- const ActiveChartEntryProbing &prevEntry);
-
- uint64_t GetKey() const
- { return m_key; }
-
- std::pair<bool, uint64_t> GetKey(const SCFG::Word &nextWord, const ProbingPT &pt) const;
-
- virtual void AddSymbolBindElement(
- const Range &range,
- const SCFG::Word &word,
- const Moses2::Hypotheses *hypos,
- const Moses2::PhraseTable &pt);
-
- protected:
- uint64_t m_key;
- };
- //////////////////////////////////////
-
-public:
- ProbingPT(size_t startInd, const std::string &line);
- virtual ~ProbingPT();
- void Load(System &system);
-
- virtual void SetParameter(const std::string& key, const std::string& value);
- void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
-
- uint64_t GetUnk() const
- { return m_unkId; }
-
- // SCFG
- void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const;
-
- virtual void Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
-
-protected:
- std::vector<uint64_t> m_sourceVocab; // factor id -> pt id
- std::vector< std::pair<bool, const Factor*> > m_targetVocab; // pt id -> factor*
- std::vector<const AlignmentInfo*> m_aligns;
- util::LoadMethod load_method;
-
- uint64_t m_unkId;
- QueryEngine *m_engine;
-
- void CreateAlignmentMap(System &system, const std::string path);
-
- TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
- TargetPhrases *CreateTargetPhrases(MemPool &pool, const System &system,
- const Phrase<Moses2::Word> &sourcePhrase, uint64_t key) const;
- TargetPhraseImpl *CreateTargetPhrase(MemPool &pool, const System &system,
- const char *&offset) const;
-
- inline const std::pair<bool, const Factor*> *GetTargetFactor(uint32_t probingId) const
- {
- if (probingId >= m_targetVocab.size()) {
- return NULL;
- }
- return &m_targetVocab[probingId];
- }
-
- std::pair<bool, uint64_t> GetKey(const Phrase<Moses2::Word> &sourcePhrase) const;
-
- void GetSourceProbingIds(const Phrase<Moses2::Word> &sourcePhrase, bool &ok,
- uint64_t probingSource[]) const;
-
- uint64_t GetSourceProbingId(const Word &word) const;
-
- // caching
- typedef boost::unordered_map<uint64_t, TargetPhrases*> CachePb;
- CachePb m_cachePb;
-
- typedef boost::unordered_map<uint64_t, SCFG::TargetPhrases*> CacheSCFG;
- CacheSCFG m_cacheSCFG;
-
- void CreateCache(System &system);
-
- void ReformatWord(System &system, std::string &wordStr, bool &isNT);
-
- // SCFG
- void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
- std::pair<bool, SCFG::TargetPhrases*> CreateTargetPhrasesSCFG(MemPool &pool, const System &system,
- const Phrase<SCFG::Word> &sourcePhrase, uint64_t key) const;
- // return value: 1st = there are actual rules, not just a empty cell for prefix
-
- SCFG::TargetPhraseImpl *CreateTargetPhraseSCFG(
- MemPool &pool,
- const System &system,
- const char *&offset) const;
-
-
-};
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.cpp b/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.cpp
deleted file mode 100644
index 326aaea5f..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.cpp
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * StoreTarget.cpp
- *
- * Created on: 19 Jan 2016
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "StoreTarget.h"
-#include "line_splitter.hh"
-#include "probing_hash_utils.hh"
-#include "../../legacy/OutputFileStream.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-StoreTarget::StoreTarget(const std::string &basepath)
-:m_basePath(basepath)
-,m_vocab(basepath + "/TargetVocab.dat")
-{
- std::string path = basepath + "/TargetColl.dat";
- m_fileTargetColl.open(path.c_str(),
- std::ios::out | std::ios::binary | std::ios::ate | std::ios::trunc);
- if (!m_fileTargetColl.is_open()) {
- throw "can't create file ";
- }
-
-}
-
-StoreTarget::~StoreTarget()
-{
- assert(m_coll.empty());
- m_fileTargetColl.close();
-
- // vocab
- m_vocab.Save();
-}
-
-uint64_t StoreTarget::Save()
-{
- uint64_t ret = m_fileTargetColl.tellp();
-
- // save to disk
- uint64_t numTP = m_coll.size();
- m_fileTargetColl.write((char*) &numTP, sizeof(uint64_t));
-
- for (size_t i = 0; i < m_coll.size(); ++i) {
- Save(*m_coll[i]);
- }
-
- // clear coll
- RemoveAllInColl(m_coll);
- m_coll.clear();
-
- // starting position of coll
- return ret;
-}
-
-void StoreTarget::Save(const target_text &rule)
-{
- // metadata for each tp
- TargetPhraseInfo tpInfo;
- tpInfo.alignTerm = GetAlignId(rule.word_align_term);
- tpInfo.alignNonTerm = GetAlignId(rule.word_align_non_term);
- tpInfo.numWords = rule.target_phrase.size();
- tpInfo.propLength = rule.property.size();
-
- //cerr << "TPInfo=" << sizeof(TPInfo);
- m_fileTargetColl.write((char*) &tpInfo, sizeof(TargetPhraseInfo));
-
- // scores
- for (size_t i = 0; i < rule.prob.size(); ++i) {
- float prob = rule.prob[i];
- m_fileTargetColl.write((char*) &prob, sizeof(prob));
- }
-
- // tp
- for (size_t i = 0; i < rule.target_phrase.size(); ++i) {
- uint32_t vocabId = rule.target_phrase[i];
- m_fileTargetColl.write((char*) &vocabId, sizeof(vocabId));
- }
-
- // prop TODO
-
-}
-
-void StoreTarget::SaveAlignment()
-{
- std::string path = m_basePath + "/Alignments.dat";
- Moses2::OutputFileStream file(path);
-
- BOOST_FOREACH(Alignments::value_type &valPair, m_aligns) {
- file << valPair.second << "\t";
-
- const std::vector<size_t> &aligns = valPair.first;
- BOOST_FOREACH(size_t align, aligns) {
- file << align << " ";
- }
- file << endl;
- }
-
-}
-
-void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg)
-{
- target_text *rule = new target_text;
- //cerr << "line.target_phrase=" << line.target_phrase << endl;
-
- // target_phrase
- vector<bool> nonTerms;
- util::TokenIter<util::SingleCharacter> it;
- it = util::TokenIter<util::SingleCharacter>(line.target_phrase,
- util::SingleCharacter(' '));
- while (it) {
- StringPiece word = *it;
- //cerr << "word=" << word << endl;
-
- bool nonTerm = false;
- if (scfg) {
- // not really sure how to handle factored SCFG and NT
- if (scfg && word[0] == '[' && word[word.size() - 1] == ']') {
- //cerr << "NON-TERM=" << tok << " " << nonTerms.size() << endl;
- nonTerm = true;
- }
- nonTerms.push_back(nonTerm);
- }
-
- util::TokenIter<util::SingleCharacter> itFactor;
- itFactor = util::TokenIter<util::SingleCharacter>(word,
- util::SingleCharacter('|'));
- while (itFactor) {
- StringPiece factor = *itFactor;
-
- string factorStr = factor.as_string();
- uint32_t vocabId = m_vocab.GetVocabId(factorStr);
-
- rule->target_phrase.push_back(vocabId);
-
- itFactor++;
- }
-
- it++;
- }
-
- // probs
- it = util::TokenIter<util::SingleCharacter>(line.prob,
- util::SingleCharacter(' '));
- while (it) {
- string tok = it->as_string();
- float prob = Scan<float>(tok);
-
- if (log_prob) {
- prob = FloorScore(log(prob));
- if (prob == 0.0f) prob = 0.0000000001;
- }
-
- rule->prob.push_back(prob);
- it++;
- }
-
- /*
- cerr << "nonTerms=";
- for (size_t i = 0; i < nonTerms.size(); ++i) {
- cerr << nonTerms[i] << " ";
- }
- cerr << endl;
- */
-
- // alignment
- it = util::TokenIter<util::SingleCharacter>(line.word_align,
- util::SingleCharacter(' '));
- while (it) {
- string tokPair = Trim(it->as_string());
- if (tokPair.empty()) {
- break;
- }
-
- vector<size_t> alignPair = Tokenize<size_t>(tokPair, "-");
- assert(alignPair.size() == 2);
-
- bool nonTerm = false;
- size_t sourcePos = alignPair[0];
- size_t targetPos = alignPair[1];
- if (scfg) {
- nonTerm = nonTerms[targetPos];
- }
-
- //cerr << targetPos << "=" << nonTerm << endl;
-
- if (nonTerm) {
- rule->word_align_non_term.push_back(sourcePos);
- rule->word_align_non_term.push_back(targetPos);
- //cerr << (int) rule->word_all1.back() << " ";
- }
- else {
- rule->word_align_term.push_back(sourcePos);
- rule->word_align_term.push_back(targetPos);
- }
-
- it++;
- }
-
- // extra scores
- string prop = line.property.as_string();
- AppendLexRO(prop, rule->prob, log_prob);
-
- //cerr << "line.property=" << line.property << endl;
- //cerr << "prop=" << prop << endl;
-
- // properties
- /*
- for (size_t i = 0; i < prop.size(); ++i) {
- rule->property.push_back(prop[i]);
- }
- */
- m_coll.push_back(rule);
-}
-
-uint32_t StoreTarget::GetAlignId(const std::vector<size_t> &align)
-{
- boost::unordered_map<std::vector<size_t>, uint32_t>::iterator iter =
- m_aligns.find(align);
- if (iter == m_aligns.end()) {
- uint32_t ind = m_aligns.size();
- m_aligns[align] = ind;
- return ind;
- }
- else {
- return iter->second;
- }
-}
-
-void StoreTarget::AppendLexRO(std::string &prop, std::vector<float> &retvector,
- bool log_prob) const
-{
- size_t startPos = prop.find("{{LexRO ");
-
- if (startPos != string::npos) {
- size_t endPos = prop.find("}}", startPos + 8);
- string lexProb = prop.substr(startPos + 8, endPos - startPos - 8);
- //cerr << "lexProb=" << lexProb << endl;
-
- // append lex probs to pt probs
- vector<float> scores = Tokenize<float>(lexProb);
-
- if (log_prob) {
- for (size_t i = 0; i < scores.size(); ++i) {
- scores[i] = FloorScore(log(scores[i]));
- if (scores[i] == 0.0f) scores[i] = 0.0000000001;
- }
- }
-
- for (size_t i = 0; i < scores.size(); ++i) {
- retvector.push_back(scores[i]);
- }
-
- // exclude LexRO property from property column
- prop = prop.substr(0, startPos)
- + prop.substr(endPos + 2, prop.size() - endPos - 2);
- //cerr << "line.property_to_be_binarized=" << line.property_to_be_binarized << "AAAA" << endl;
- }
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.h b/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.h
deleted file mode 100644
index 6fc3b1f66..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * StoreTarget.h
- *
- * Created on: 19 Jan 2016
- * Author: hieu
- */
-#pragma once
-#include <string>
-#include <fstream>
-#include <vector>
-#include <inttypes.h>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include "StoreVocab.h"
-
-namespace Moses2
-{
-
-class line_text;
-class target_text;
-
-class StoreTarget
-{
-public:
- StoreTarget(const std::string &basepath);
- virtual ~StoreTarget();
-
- uint64_t Save();
- void SaveAlignment();
-
- void Append(const line_text &line, bool log_prob, bool scfg);
-protected:
- std::string m_basePath;
- std::fstream m_fileTargetColl;
- StoreVocab<uint32_t> m_vocab;
-
- typedef boost::unordered_map<std::vector<size_t>, uint32_t> Alignments;
- Alignments m_aligns;
-
- std::vector<target_text*> m_coll;
-
- uint32_t GetAlignId(const std::vector<size_t> &align);
- void Save(const target_text &rule);
-
- void AppendLexRO(std::string &prop, std::vector<float> &retvector,
- bool log_prob) const;
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.cpp b/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.cpp
deleted file mode 100644
index e0b5b0b08..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * StoreVocab.cpp
- *
- * Created on: 15 Jun 2016
- * Author: hieu
- */
-#include <fstream>
-#include "StoreVocab.h"
-
-namespace Moses2
-{
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.h b/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.h
deleted file mode 100644
index e9808707a..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * StoreVocab.h
- *
- * Created on: 15 Jun 2016
- * Author: hieu
- */
-#pragma once
-#include <string>
-#include <boost/unordered_map.hpp>
-#include "../../legacy/OutputFileStream.h"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-template<typename VOCABID>
-class StoreVocab
-{
-protected:
- std::string m_path;
-
- typedef boost::unordered_map<std::string, VOCABID> Coll;
- Coll m_vocab;
-
-public:
- StoreVocab(const std::string &path)
- :m_path(path)
- {}
-
- virtual ~StoreVocab() {}
-
- VOCABID GetVocabId(const std::string &word)
- {
- typename Coll::iterator iter = m_vocab.find(word);
- if (iter == m_vocab.end()) {
- VOCABID ind = m_vocab.size() + 1;
- m_vocab[word] = ind;
- return ind;
- }
- else {
- return iter->second;
- }
- }
-
- void Insert(VOCABID id, const std::string &word)
- {
- m_vocab[word] = id;
- }
-
- void Save()
- {
- OutputFileStream strme(m_path);
-
- typename Coll::const_iterator iter;
- for (iter = m_vocab.begin(); iter != m_vocab.end(); ++iter) {
- strme << iter->first << "\t" << iter->second << std::endl;
- }
-
- strme.Close();
- }
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/hash.cpp b/contrib/moses2/TranslationModel/ProbingPT/hash.cpp
deleted file mode 100644
index aab5ee2b3..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/hash.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-#include <iostream>
-#include "hash.hh"
-
-using namespace std;
-
-namespace Moses2
-{
-
-uint64_t getHash(StringPiece text)
-{
- std::size_t len = text.size();
- uint64_t key = util::MurmurHashNative(text.data(), len);
- return key;
-}
-
-std::vector<uint64_t> getVocabIDs(const StringPiece &textin)
-{
- //Tokenize
- std::vector<uint64_t> output;
-
- util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));
-
- while (itWord) {
- StringPiece word = *itWord;
- uint64_t id = 0;
-
- util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
- while (itFactor) {
- StringPiece factor = *itFactor;
- //cerr << "factor=" << factor << endl;
-
- id += getHash(factor);
- itFactor++;
- }
-
- output.push_back(id);
- itWord++;
- }
-
- return output;
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/hash.hh b/contrib/moses2/TranslationModel/ProbingPT/hash.hh
deleted file mode 100644
index 78cc27999..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/hash.hh
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-#include "util/string_piece.hh"
-#include "util/murmur_hash.hh"
-#include "util/string_piece.hh" //Tokenization and work with StringPiece
-#include "util/tokenize_piece.hh"
-#include <vector>
-
-namespace Moses2
-{
-
-//Gets the MurmurmurHash for give string
-uint64_t getHash(StringPiece text);
-
-std::vector<uint64_t> getVocabIDs(const StringPiece &textin);
-
-}
diff --git a/contrib/moses2/TranslationModel/ProbingPT/line_splitter.cpp b/contrib/moses2/TranslationModel/ProbingPT/line_splitter.cpp
deleted file mode 100644
index e4b5e2694..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/line_splitter.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-#include "line_splitter.hh"
-
-namespace Moses2
-{
-
-line_text splitLine(const StringPiece &textin, bool scfg)
-{
- const char delim[] = "|||";
- line_text output;
-
- //Tokenize
- util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
- //Get source phrase
- output.source_phrase = Trim(*it);
- //std::cerr << "output.source_phrase=" << output.source_phrase << "AAAA" << std::endl;
-
- //Get target_phrase
- it++;
- output.target_phrase = Trim(*it);
- //std::cerr << "output.target_phrase=" << output.target_phrase << "AAAA" << std::endl;
-
- if (scfg) {
- /*
- std::cerr << "output.source_phrase=" << output.source_phrase << std::endl;
- std::cerr << "output.target_phrase=" << output.target_phrase << std::endl;
- reformatSCFG(output);
- std::cerr << "output.source_phrase=" << output.source_phrase << std::endl;
- std::cerr << "output.target_phrase=" << output.target_phrase << std::endl;
- */
- }
-
- //Get probabilities
- it++;
- output.prob = Trim(*it);
- //std::cerr << "output.prob=" << output.prob << "AAAA" << std::endl;
-
- //Get WordAllignment
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.word_align = Trim(*it);
- //std::cerr << "output.word_align=" << output.word_align << "AAAA" << std::endl;
-
- //Get count
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.counts = Trim(*it);
- //std::cerr << "output.counts=" << output.counts << "AAAA" << std::endl;
-
- //Get sparse_score
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.sparse_score = Trim(*it);
- //std::cerr << "output.sparse_score=" << output.sparse_score << "AAAA" << std::endl;
-
- //Get property
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.property = Trim(*it);
- //std::cerr << "output.property=" << output.property << "AAAA" << std::endl;
-
- return output;
-}
-
-std::vector<unsigned char> splitWordAll1(const StringPiece &textin)
-{
- const char delim[] = " ";
- const char delim2[] = "-";
- std::vector<unsigned char> output;
-
- //Case with no word alignments.
- if (textin.size() == 0) {
- return output;
- }
-
- //Split on space
- util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
-
- //For each int
- while (it) {
- //Split on dash (-)
- util::TokenIter<util::MultiCharacter> itInner(*it,
- util::MultiCharacter(delim2));
-
- //Insert the two entries in the vector. User will read entry 0 and 1 to get the first,
- //2 and 3 for second etc. Use unsigned char instead of int to save space, as
- //word allignments are all very small numbers that fit in a single byte
- output.push_back((unsigned char) (atoi(itInner->data())));
- itInner++;
- output.push_back((unsigned char) (atoi(itInner->data())));
- it++;
- }
-
- return output;
-
-}
-
-void reformatSCFG(line_text &output)
-{
-
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/line_splitter.hh b/contrib/moses2/TranslationModel/ProbingPT/line_splitter.hh
deleted file mode 100644
index 3b086b44a..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/line_splitter.hh
+++ /dev/null
@@ -1,59 +0,0 @@
-#pragma once
-
-#include "util/string_piece.hh"
-#include "util/tokenize_piece.hh"
-#include "util/file_piece.hh"
-#include <vector>
-#include <cstdlib> //atof
-#include "util/string_piece.hh" //Tokenization and work with StringPiece
-#include "util/tokenize_piece.hh"
-#include <vector>
-
-namespace Moses2
-{
-
-//Struct for holding processed line
-struct line_text
-{
- StringPiece source_phrase;
- StringPiece target_phrase;
- StringPiece prob;
- StringPiece word_align;
- StringPiece counts;
- StringPiece sparse_score;
- StringPiece property;
- std::string property_to_be_binarized;
-};
-
-//Struct for holding processed line
-struct target_text
-{
- std::vector<unsigned int> target_phrase;
- std::vector<float> prob;
- std::vector<size_t> word_align_term;
- std::vector<size_t> word_align_non_term;
- std::vector<char> counts;
- std::vector<char> sparse_score;
- std::vector<char> property;
-
- /*
- void Reset()
- {
- target_phrase.clear();
- prob.clear();
- word_all1.clear();
- counts.clear();
- sparse_score.clear();
- property.clear();
- }
- */
-};
-
-//Ask if it's better to have it receive a pointer to a line_text struct
-line_text splitLine(const StringPiece &textin, bool scfg);
-void reformatSCFG(line_text &output);
-
-std::vector<unsigned char> splitWordAll1(const StringPiece &textin);
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp
deleted file mode 100644
index 96c317b65..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#include <iostream>
-#include "probing_hash_utils.hh"
-#include "util/file.hh"
-
-namespace Moses2
-{
-
-//Read table from disk, return memory map location
-char * readTable(const char * filename, util::LoadMethod load_method, util::scoped_fd &file, util::scoped_memory &memory)
-{
- //std::cerr << "filename=" << filename << std::endl;
- file.reset(util::OpenReadOrThrow(filename));
- uint64_t total_size_ = util::SizeFile(file.get());
-
- MapRead(load_method, file.get(), 0, total_size_, memory);
-
- return (char*) memory.get();
-}
-
-void serialize_table(char *mem, size_t size, const std::string &filename)
-{
- std::ofstream os(filename.c_str(), std::ios::binary);
- os.write((const char*) &mem[0], size);
- os.close();
-
-}
-
-uint64_t getKey(const uint64_t source_phrase[], size_t size)
-{
- //TOO SLOW
- //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
- uint64_t key = 0;
- for (size_t i = 0; i < size; i++) {
- key += (source_phrase[i] << i);
- }
- return key;
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh
deleted file mode 100644
index 368147807..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh
+++ /dev/null
@@ -1,55 +0,0 @@
-#pragma once
-
-#include "util/probing_hash_table.hh"
-
-#include <sys/mman.h>
-#include <boost/functional/hash.hpp>
-#include <fcntl.h>
-#include <fstream>
-
-namespace Moses2
-{
-
-#define API_VERSION 15
-
-//Hash table entry
-struct Entry
-{
- typedef uint64_t Key;
- Key key;
-
- Key GetKey() const
- {
- return key;
- }
-
- void SetKey(Key to)
- {
- key = to;
- }
-
- uint64_t value;
-};
-
-#define NONE std::numeric_limits<uint64_t>::max()
-
-//Define table
-typedef util::ProbingHashTable<Entry, boost::hash<uint64_t> > Table;
-
-void serialize_table(char *mem, size_t size, const std::string &filename);
-
-char * readTable(const char * filename, util::LoadMethod load_method, util::scoped_fd &file, util::scoped_memory &memory);
-
-uint64_t getKey(const uint64_t source_phrase[], size_t size);
-
-struct TargetPhraseInfo
-{
- uint32_t alignTerm;
- uint32_t alignNonTerm;
- uint16_t numWords;
- uint16_t propLength;
- uint16_t filler;
-};
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp
deleted file mode 100644
index 9ea2d8cb6..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-#include "querying.hh"
-#include "util/exception.hh"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method)
-{
-
- //Create filepaths
- std::string basepath(filepath);
- std::string path_to_config = basepath + "/config";
- std::string path_to_hashtable = basepath + "/probing_hash.dat";
- std::string path_to_source_vocabid = basepath + "/source_vocabids";
- std::string alignPath = basepath + "/Alignments.dat";
-
- file_exits(basepath);
-
- ///Source phrase vocabids
- read_map(source_vocabids, path_to_source_vocabid.c_str());
-
- // alignments
- read_alignments(alignPath);
-
- // target phrase
- string targetCollPath = basepath + "/TargetColl.dat";
- memTPS = readTable(targetCollPath.c_str(), load_method, fileTPS_, memoryTPS_);
-
- //Read config file
- boost::unordered_map<std::string, std::string> keyValue;
-
- std::ifstream config(path_to_config.c_str());
- std::string line;
- while (getline(config, line)) {
- std::vector<std::string> toks = Moses2::Tokenize(line, "\t");
- UTIL_THROW_IF2(toks.size() != 2, "Wrong config format:" << line);
- keyValue[ toks[0] ] = toks[1];
- }
-
- bool found;
- //Check API version:
- int version;
- found = Get(keyValue, "API_VERSION", version);
- if (!found) {
- std::cerr << "Old or corrupted version of ProbingPT. Please rebinarize your phrase tables." << std::endl;
- }
- else if (version != API_VERSION) {
- std::cerr << "The ProbingPT API has changed. " << version << "!="
- << API_VERSION << " Please rebinarize your phrase tables." << std::endl;
- exit(EXIT_FAILURE);
- }
-
- //Get tablesize.
- int tablesize;
- found = Get(keyValue, "uniq_entries", tablesize);
- if (!found) {
- std::cerr << "uniq_entries not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- //Number of scores
- found = Get(keyValue, "num_scores", num_scores);
- if (!found) {
- std::cerr << "num_scores not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- //How may scores from lex reordering models
- found = Get(keyValue, "num_lex_scores", num_lex_scores);
- if (!found) {
- std::cerr << "num_lex_scores not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- // have the scores been log() and FloorScore()?
- found = Get(keyValue, "log_prob", logProb);
- if (!found) {
- std::cerr << "logProb not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- config.close();
-
- //Read hashtable
- table_filesize = Table::Size(tablesize, 1.2);
- mem = readTable(path_to_hashtable.c_str(), load_method, file_, memory_);
- Table table_init(mem, table_filesize);
- table = table_init;
-
- std::cerr << "Initialized successfully! " << std::endl;
-}
-
-QueryEngine::~QueryEngine()
-{
- //Clear mmap content from memory.
- //munmap(mem, table_filesize);
-
-}
-
-uint64_t QueryEngine::getKey(uint64_t source_phrase[], size_t size) const
-{
- //TOO SLOW
- //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
- return Moses2::getKey(source_phrase, size);
-}
-
-std::pair<bool, uint64_t> QueryEngine::query(uint64_t key)
-{
- std::pair<bool, uint64_t> ret;
-
- const Entry * entry;
- ret.first = table.Find(key, entry);
- if (ret.first) {
- ret.second = entry->value;
- }
- return ret;
-}
-
-void QueryEngine::read_alignments(const std::string &alignPath)
-{
- std::ifstream strm(alignPath.c_str());
-
- string line;
- while (getline(strm, line)) {
- vector<string> toks = Moses2::Tokenize(line, "\t ");
- UTIL_THROW_IF2(toks.size() == 0, "Corrupt alignment file");
-
- uint32_t alignInd = Scan<uint32_t>(toks[0]);
- if (alignInd >= alignColl.size()) {
- alignColl.resize(alignInd + 1);
- }
-
- Alignments &aligns = alignColl[alignInd];
- for (size_t i = 1; i < toks.size(); ++i) {
- size_t pos = Scan<size_t>(toks[i]);
- aligns.push_back(pos);
- }
- }
-}
-
-void QueryEngine::file_exits(const std::string &basePath)
-{
- if (!FileExists(basePath + "/Alignments.dat")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/Alignments.dat");
- }
- if (!FileExists(basePath + "/TargetColl.dat")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetColl.dat");
- }
- if (!FileExists(basePath + "/TargetVocab.dat")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetVocab.dat");
- }
- if (!FileExists(basePath + "/cache")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/cache");
- }
- if (!FileExists(basePath + "/config")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/config");
- }
- if (!FileExists(basePath + "/probing_hash.dat")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/probing_hash.dat");
- }
- if (!FileExists(basePath + "/source_vocabids")) {
- UTIL_THROW2("Require file does not exist in: " << basePath << "/source_vocabids");
- }
-
- /*
-
- if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) ||
- !FileExists(path_to_source_vocabid) || !FileExists(basepath + alignPath) ||
- !FileExists(basepath + "/TargetColl.dat") || !FileExists(basepath + "/TargetVocab.dat") ||
- !FileExists(basepath + "/cache")) {
- UTIL_THROW2("A required table doesn't exist in: " << basepath);
- }
- */
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.hh b/contrib/moses2/TranslationModel/ProbingPT/querying.hh
deleted file mode 100644
index dcdd2a75a..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/querying.hh
+++ /dev/null
@@ -1,77 +0,0 @@
-#pragma once
-
-#include <boost/iostreams/device/mapped_file.hpp>
-#include <boost/unordered_map.hpp>
-#include <sys/stat.h> //For finding size of file
-#include "vocabid.hh"
-#include <algorithm> //toLower
-#include <deque>
-#include "probing_hash_utils.hh"
-#include "hash.hh" //Includes line splitter
-#include "line_splitter.hh"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-class QueryEngine
-{
- std::map<uint64_t, std::string> source_vocabids;
-
- typedef std::vector<unsigned char> Alignments;
- std::vector<Alignments> alignColl;
-
- Table table;
- char *mem; //Memory for the table, necessary so that we can correctly destroy the object
-
- size_t table_filesize;
- bool is_reordering;
-
- util::scoped_fd file_;
- util::scoped_memory memory_;
-
- // target phrases
- boost::iostreams::mapped_file_source file;
-
- util::scoped_fd fileTPS_;
- util::scoped_memory memoryTPS_;
-
- void read_alignments(const std::string &alignPath);
- void file_exits(const std::string &basePath);
-
-public:
- int num_scores;
- int num_lex_scores;
- bool logProb;
- const char *memTPS;
-
- QueryEngine(const char *, util::LoadMethod load_method);
- ~QueryEngine();
-
- std::pair<bool, uint64_t> query(uint64_t key);
-
- const std::map<uint64_t, std::string> &getSourceVocab() const
- { return source_vocabids; }
-
- const std::vector<Alignments> &getAlignments() const
- { return alignColl; }
-
- uint64_t getKey(uint64_t source_phrase[], size_t size) const;
-
- template<typename T>
- inline bool Get(const boost::unordered_map<std::string, std::string> &keyValue, const std::string &sought, T &found) const
- {
- boost::unordered_map<std::string, std::string>::const_iterator iter = keyValue.find(sought);
- if (iter == keyValue.end()) {
- return false;
- }
-
- const std::string &foundStr = iter->second;
- found = Scan<T>(foundStr);
- return true;
- }
-
-};
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/storing.cpp b/contrib/moses2/TranslationModel/ProbingPT/storing.cpp
deleted file mode 100644
index 75cdcc038..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/storing.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-#include <sys/stat.h>
-#include <boost/foreach.hpp>
-#include "line_splitter.hh"
-#include "storing.hh"
-#include "StoreTarget.h"
-#include "StoreVocab.h"
-#include "../../legacy/Util2.h"
-#include "../../legacy/InputFileStream.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-///////////////////////////////////////////////////////////////////////
-void Node::Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos)
-{
- if (pos < sourcePhrase.size()) {
- uint64_t vocabId = sourcePhrase[pos];
-
- Node *child;
- Children::iterator iter = m_children.find(vocabId);
- if (iter == m_children.end()) {
- // New node. Write other children then discard them
- BOOST_FOREACH(Children::value_type &valPair, m_children) {
- Node &otherChild = valPair.second;
- otherChild.Write(table);
- }
- m_children.clear();
-
- // create new node
- child = &m_children[vocabId];
- assert(!child->done);
- child->key = key + (vocabId << pos);
- }
- else {
- child = &iter->second;
- }
-
- child->Add(table, sourcePhrase, pos + 1);
- }
- else {
- // this node was written previously 'cos it has rules
- done = true;
- }
-}
-
-void Node::Write(Table &table)
-{
- //cerr << "START write " << done << " " << key << endl;
- BOOST_FOREACH(Children::value_type &valPair, m_children) {
- Node &child = valPair.second;
- child.Write(table);
- }
-
- if (!done) {
- // save
- Entry sourceEntry;
- sourceEntry.value = NONE;
- sourceEntry.key = key;
-
- //Put into table
- table.Insert(sourceEntry);
- }
-}
-
-///////////////////////////////////////////////////////////////////////
-void createProbingPT(const std::string &phrasetable_path,
- const std::string &basepath, int num_scores, int num_lex_scores,
- bool log_prob, int max_cache_size, bool scfg)
-{
- std::cerr << "Starting..." << std::endl;
-
- //Get basepath and create directory if missing
- mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
-
- StoreTarget storeTarget(basepath);
-
- //Get uniq lines:
- unsigned long uniq_entries = countUniqueSource(phrasetable_path);
-
- //Source phrase vocabids
- StoreVocab<uint64_t> sourceVocab(basepath + "/source_vocabids");
-
- //Read the file
- util::FilePiece filein(phrasetable_path.c_str());
-
- //Init the probing hash table
- size_t size = Table::Size(uniq_entries, 1.2);
- char * mem = new char[size];
- memset(mem, 0, size);
- Table sourceEntries(mem, size);
-
- std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> cache;
- float totalSourceCount = 0;
-
- //Keep track of the size of each group of target phrases
- size_t line_num = 0;
-
- //Read everything and processs
- std::string prevSource;
-
- Node sourcePhrases;
- sourcePhrases.done = true;
- sourcePhrases.key = 0;
-
- while (true) {
- try {
- //Process line read
- line_text line;
- line = splitLine(filein.ReadLine(), scfg);
- //cerr << "line=" << line.source_phrase << endl;
-
- ++line_num;
- if (line_num % 1000000 == 0) {
- std::cerr << line_num << " " << std::flush;
- }
-
- //Add source phrases to vocabularyIDs
- add_to_map(sourceVocab, line.source_phrase);
-
- if (prevSource.empty()) {
- // 1st line
- prevSource = line.source_phrase.as_string();
- storeTarget.Append(line, log_prob, scfg);
- }
- else if (prevSource == line.source_phrase) {
- //If we still have the same line, just append to it:
- storeTarget.Append(line, log_prob, scfg);
- }
- else {
- assert(prevSource != line.source_phrase);
-
- //Create a new entry even
-
- // save
- uint64_t targetInd = storeTarget.Save();
-
- // next line
- storeTarget.Append(line, log_prob, scfg);
-
- //Create an entry for the previous source phrase:
- Entry sourceEntry;
- sourceEntry.value = targetInd;
- //The key is the sum of hashes of individual words bitshifted by their position in the phrase.
- //Probably not entirerly correct, but fast and seems to work fine in practise.
- std::vector<uint64_t> vocabid_source = getVocabIDs(prevSource);
- if (scfg) {
- // storing prefixes?
- sourcePhrases.Add(sourceEntries, vocabid_source);
- }
- sourceEntry.key = getKey(vocabid_source);
-
- /*
- cerr << "prevSource=" << prevSource << flush
- << " vocabids=" << Debug(vocabid_source) << flush
- << " key=" << sourceEntry.key << endl;
- */
- //Put into table
- sourceEntries.Insert(sourceEntry);
-
- // update cache - CURRENT source phrase, not prev
- if (max_cache_size) {
- std::string countStr = line.counts.as_string();
- countStr = Trim(countStr);
- if (!countStr.empty()) {
- std::vector<float> toks = Tokenize<float>(countStr);
- //cerr << "CACHE:" << line.source_phrase << " " << countStr << " " << toks[1] << endl;
-
- if (toks.size() >= 2) {
- totalSourceCount += toks[1];
-
- // compute key for CURRENT source
- std::vector<uint64_t> currVocabidSource = getVocabIDs(line.source_phrase.as_string());
- uint64_t currKey = getKey(currVocabidSource);
-
- CacheItem *item = new CacheItem(
- Trim(line.source_phrase.as_string()),
- currKey,
- toks[1]);
- cache.push(item);
-
- if (max_cache_size > 0 && cache.size() > max_cache_size) {
- cache.pop();
- }
- }
- }
- }
-
- //Set prevLine
- prevSource = line.source_phrase.as_string();
- }
-
- }
- catch (util::EndOfFileException e) {
- std::cerr
- << "Reading phrase table finished, writing remaining files to disk."
- << std::endl;
-
- //After the final entry is constructed we need to add it to the phrase_table
- //Create an entry for the previous source phrase:
- uint64_t targetInd = storeTarget.Save();
-
- Entry sourceEntry;
- sourceEntry.value = targetInd;
-
- //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
- std::vector<uint64_t> vocabid_source = getVocabIDs(prevSource);
- sourceEntry.key = getKey(vocabid_source);
-
- //Put into table
- sourceEntries.Insert(sourceEntry);
-
- break;
- }
- }
-
- sourcePhrases.Write(sourceEntries);
-
- storeTarget.SaveAlignment();
-
- serialize_table(mem, size, (basepath + "/probing_hash.dat"));
-
- sourceVocab.Save();
-
- serialize_cache(cache, (basepath + "/cache"), totalSourceCount);
-
- delete[] mem;
-
- //Write configfile
- std::ofstream configfile;
- configfile.open((basepath + "/config").c_str());
- configfile << "API_VERSION\t" << API_VERSION << '\n';
- configfile << "uniq_entries\t" << uniq_entries << '\n';
- configfile << "num_scores\t" << num_scores << '\n';
- configfile << "num_lex_scores\t" << num_lex_scores << '\n';
- configfile << "log_prob\t" << log_prob << '\n';
- configfile.close();
-}
-
-size_t countUniqueSource(const std::string &path)
-{
- size_t ret = 0;
- InputFileStream strme(path);
-
- std::string line, prevSource;
- while (std::getline(strme, line)) {
- std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
- assert(toks.size() != 0);
-
- if (prevSource != toks[0]) {
- prevSource = toks[0];
- ++ret;
- }
- }
-
- return ret;
-}
-
-void serialize_cache(
- std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
- const std::string &path, float totalSourceCount)
-{
- std::vector<const CacheItem*> vec(cache.size());
-
- size_t ind = cache.size() - 1;
- while (!cache.empty()) {
- const CacheItem *item = cache.top();
- vec[ind] = item;
- cache.pop();
- --ind;
- }
-
- std::ofstream os(path.c_str());
-
- os << totalSourceCount << std::endl;
- for (size_t i = 0; i < vec.size(); ++i) {
- const CacheItem *item = vec[i];
- os << item->count << "\t" << item->sourceKey << "\t" << item->source << std::endl;
- delete item;
- }
-
- os.close();
-}
-
-uint64_t getKey(const std::vector<uint64_t> &vocabid_source)
-{
- return Moses2::getKey(vocabid_source.data(), vocabid_source.size());
-}
-
-std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos)
-{
- assert(endPos < vocabid_source.size());
-
- std::vector<uint64_t> ret(endPos + 1);
- for (size_t i = 0; i <= endPos; ++i) {
- ret[i] = vocabid_source[i];
- }
- return ret;
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/storing.hh b/contrib/moses2/TranslationModel/ProbingPT/storing.hh
deleted file mode 100644
index 10d7050d3..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/storing.hh
+++ /dev/null
@@ -1,95 +0,0 @@
-#pragma once
-
-#include <boost/unordered_set.hpp>
-#include <boost/unordered_map.hpp>
-#include <cstdio>
-#include <sstream>
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <queue>
-#include <sys/stat.h> //mkdir
-
-#include "hash.hh" //Includes line_splitter
-#include "probing_hash_utils.hh"
-
-#include "util/file_piece.hh"
-#include "util/file.hh"
-#include "vocabid.hh"
-
-namespace Moses2
-{
-typedef std::vector<uint64_t> SourcePhrase;
-
-
-class Node
-{
- typedef boost::unordered_map<uint64_t, Node> Children;
- Children m_children;
-
-public:
- uint64_t key;
- bool done;
-
- Node()
- :done(false)
- {}
-
- void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0);
- void Write(Table &table);
-};
-
-
-void createProbingPT(const std::string &phrasetable_path,
- const std::string &basepath, int num_scores, int num_lex_scores,
- bool log_prob, int max_cache_size, bool scfg);
-uint64_t getKey(const std::vector<uint64_t> &source_phrase);
-
-std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos);
-
-template<typename T>
-std::string Debug(const std::vector<T> &vec)
-{
- std::stringstream strm;
- for (size_t i = 0; i < vec.size(); ++i) {
- strm << vec[i] << " ";
- }
- return strm.str();
-}
-
-size_t countUniqueSource(const std::string &path);
-
-class CacheItem
-{
-public:
- std::string source;
- uint64_t sourceKey;
- float count;
- CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount)
- :source(vSource)
- ,sourceKey(vSourceKey)
- ,count(vCount)
- {
- }
-
- bool operator<(const CacheItem &other) const
- {
- return count > other.count;
- }
-};
-
-class CacheItemOrderer
-{
-public:
- bool operator()(const CacheItem* a, const CacheItem* b) const
- {
- return (*a) < (*b);
- }
-};
-
-void serialize_cache(
- std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
- const std::string &path, float totalSourceCount);
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/vocabid.cpp b/contrib/moses2/TranslationModel/ProbingPT/vocabid.cpp
deleted file mode 100644
index 696373ee5..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/vocabid.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <boost/foreach.hpp>
-#include "vocabid.hh"
-#include "StoreVocab.h"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-void add_to_map(StoreVocab<uint64_t> &sourceVocab,
- const StringPiece &textin)
-{
- //Tokenize
- util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));
-
- while (itWord) {
- StringPiece word = *itWord;
-
- util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
- while (itFactor) {
- StringPiece factor = *itFactor;
-
- sourceVocab.Insert(getHash(factor), factor.as_string());
- itFactor++;
- }
- itWord++;
- }
-}
-
-void serialize_map(const std::map<uint64_t, std::string> &karta,
- const std::string &filename)
-{
- std::ofstream os(filename.c_str());
-
- std::map<uint64_t, std::string>::const_iterator iter;
- for (iter = karta.begin(); iter != karta.end(); ++iter) {
- os << iter->first << '\t' << iter->second << std::endl;
- }
-
- os.close();
-}
-
-void read_map(std::map<uint64_t, std::string> &karta, const char* filename)
-{
- std::ifstream is(filename);
-
- std::string line;
- while (getline(is, line)) {
- std::vector<std::string> toks = Tokenize(line, "\t");
- assert(toks.size() == 2);
- uint64_t ind = Scan<uint64_t>(toks[1]);
- karta[ind] = toks[0];
- }
-
- //Close the stream after we are done.
- is.close();
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/ProbingPT/vocabid.hh b/contrib/moses2/TranslationModel/ProbingPT/vocabid.hh
deleted file mode 100644
index 55d99d453..000000000
--- a/contrib/moses2/TranslationModel/ProbingPT/vocabid.hh
+++ /dev/null
@@ -1,29 +0,0 @@
-//Serialization
-#include <boost/serialization/serialization.hpp>
-#include <boost/serialization/map.hpp>
-#include <boost/archive/text_iarchive.hpp>
-#include <boost/archive/text_oarchive.hpp>
-#include <fstream>
-#include <iostream>
-#include <vector>
-
-#include <map> //Container
-#include "hash.hh" //Hash of elements
-
-#include "util/string_piece.hh" //Tokenization and work with StringPiece
-#include "util/tokenize_piece.hh"
-
-namespace Moses2
-{
-template<typename VOCABID>
-class StoreVocab;
-
-void add_to_map(StoreVocab<uint64_t> &sourceVocab,
- const StringPiece &textin);
-
-void serialize_map(const std::map<uint64_t, std::string> &karta,
- const std::string &filename);
-
-void read_map(std::map<uint64_t, std::string> &karta, const char* filename);
-
-}
diff --git a/contrib/moses2/TranslationModel/Transliteration.cpp b/contrib/moses2/TranslationModel/Transliteration.cpp
deleted file mode 100644
index f92348ee9..000000000
--- a/contrib/moses2/TranslationModel/Transliteration.cpp
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Transliteration.cpp
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Transliteration.h"
-#include "../System.h"
-#include "../Scores.h"
-#include "../InputType.h"
-#include "../PhraseBased/Manager.h"
-#include "../PhraseBased/TargetPhraseImpl.h"
-#include "../PhraseBased/InputPath.h"
-#include "../PhraseBased/TargetPhrases.h"
-#include "../PhraseBased/Sentence.h"
-#include "../SCFG/InputPath.h"
-#include "../SCFG/TargetPhraseImpl.h"
-#include "../SCFG/Manager.h"
-#include "../SCFG/Sentence.h"
-#include "../SCFG/ActiveChart.h"
-#include "util/tempfile.hh"
-#include "../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-Transliteration::Transliteration(size_t startInd, const std::string &line) :
- PhraseTable(startInd, line)
-{
- ReadParameters();
- UTIL_THROW_IF2(m_mosesDir.empty() ||
- m_scriptDir.empty() ||
- m_externalDir.empty() ||
- m_inputLang.empty() ||
- m_outputLang.empty(), "Must specify all arguments");
-}
-
-Transliteration::~Transliteration()
-{
- // TODO Auto-generated destructor stub
-}
-
-void
-Transliteration::
-SetParameter(const std::string& key, const std::string& value)
-{
- if (key == "moses-dir") {
- m_mosesDir = value;
- } else if (key == "script-dir") {
- m_scriptDir = value;
- } else if (key == "external-dir") {
- m_externalDir = value;
- } else if (key == "input-lang") {
- m_inputLang = value;
- } else if (key == "output-lang") {
- m_outputLang = value;
- } else {
- PhraseTable::SetParameter(key, value);
- }
-}
-
-void Transliteration::Lookup(const Manager &mgr,
- InputPathsBase &inputPaths) const
-{
- BOOST_FOREACH(InputPathBase *pathBase, inputPaths){
- InputPath *path = static_cast<InputPath*>(pathBase);
-
- if (SatisfyBackoff(mgr, *path)) {
- const SubPhrase<Moses2::Word> &phrase = path->subPhrase;
-
- TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
- path->AddTargetPhrases(*this, tps);
- }
- }
-
-}
-
-TargetPhrases *Transliteration::Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const
-{
- const SubPhrase<Moses2::Word> &sourcePhrase = inputPath.subPhrase;
- size_t hash = sourcePhrase.hash();
-
- // TRANSLITERATE
- const util::temp_file inFile;
- const util::temp_dir outDir;
-
- ofstream inStream(inFile.path().c_str());
- inStream << sourcePhrase.Debug(mgr.system) << endl;
- inStream.close();
-
- string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
- " --transliteration-model-dir " + m_filePath +
- " --moses-src-dir " + m_mosesDir +
- " --external-bin-dir " + m_externalDir +
- " --input-extension " + m_inputLang +
- " --output-extension " + m_outputLang +
- " --oov-file " + inFile.path() +
- " --out-dir " + outDir.path();
-
- int ret = system(cmd.c_str());
- UTIL_THROW_IF2(ret != 0, "Transliteration script error");
-
- TargetPhrases *tps = NULL;
- tps = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, 1);
-
- vector<TargetPhraseImpl*> targetPhrases
- = CreateTargetPhrases(mgr, pool, sourcePhrase, outDir.path());
-
- vector<TargetPhraseImpl*>::const_iterator iter;
- for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
- TargetPhraseImpl *tp = *iter;
- tps->AddTargetPhrase(*tp);
- }
- mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *tps, sourcePhrase);
-
- inputPath.AddTargetPhrases(*this, tps);
-}
-
-std::vector<TargetPhraseImpl*> Transliteration::CreateTargetPhrases(
- const Manager &mgr,
- MemPool &pool,
- const SubPhrase<Moses2::Word> &sourcePhrase,
- const std::string &outDir) const
-{
- std::vector<TargetPhraseImpl*> ret;
-
- string outPath = outDir + "/out.txt";
- ifstream outStream(outPath.c_str());
-
- string line;
- while (getline(outStream, line)) {
- vector<string> toks = Moses2::Tokenize(line, "\t");
- UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
-
- TargetPhraseImpl *tp =
- new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this, mgr.system, 1);
- Moses2::Word &word = (*tp)[0];
- word.CreateFromString(mgr.system.GetVocab(), mgr.system, toks[0]);
-
- float score = Scan<float>(toks[1]);
- tp->GetScores().PlusEquals(mgr.system, *this, score);
-
- // score of all other ff when this rule is being loaded
- mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *tp);
-
- ret.push_back(tp);
- }
-
- outStream.close();
-
- return ret;
-
-}
-
-
-void Transliteration::EvaluateInIsolation(const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-// SCFG ///////////////////////////////////////////////////////////////////////////////////////////
-void Transliteration::InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void Transliteration::Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void Transliteration::LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void Transliteration::LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void Transliteration::LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void Transliteration::LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/Transliteration.h b/contrib/moses2/TranslationModel/Transliteration.h
deleted file mode 100644
index 15f262ac8..000000000
--- a/contrib/moses2/TranslationModel/Transliteration.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Transliteration.h
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include "PhraseTable.h"
-
-namespace Moses2
-{
-class Sentence;
-class InputPaths;
-class Range;
-
-class Transliteration: public PhraseTable
-{
-public:
- Transliteration(size_t startInd, const std::string &line);
- virtual ~Transliteration();
-
- void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
- virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
-
- virtual void
- EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const;
-
- void Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
- void LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
-protected:
- virtual void LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const;
-
- virtual void LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
- virtual void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
- void SetParameter(const std::string& key, const std::string& value);
-
-protected:
- std::string m_filePath;
- std::string m_mosesDir, m_scriptDir, m_externalDir, m_inputLang, m_outputLang;
-
- std::vector<TargetPhraseImpl*> CreateTargetPhrases(
- const Manager &mgr,
- MemPool &pool,
- const SubPhrase<Moses2::Word> &sourcePhrase,
- const std::string &outDir) const;
-
-};
-
-}
-
diff --git a/contrib/moses2/TranslationModel/UnknownWordPenalty.cpp b/contrib/moses2/TranslationModel/UnknownWordPenalty.cpp
deleted file mode 100644
index d786b2cff..000000000
--- a/contrib/moses2/TranslationModel/UnknownWordPenalty.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * UnknownWordPenalty.cpp
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "UnknownWordPenalty.h"
-#include "../System.h"
-#include "../Scores.h"
-#include "../InputType.h"
-#include "../PhraseBased/Manager.h"
-#include "../PhraseBased/TargetPhraseImpl.h"
-#include "../PhraseBased/InputPath.h"
-#include "../PhraseBased/TargetPhrases.h"
-#include "../PhraseBased/Sentence.h"
-#include "../SCFG/InputPath.h"
-#include "../SCFG/TargetPhraseImpl.h"
-#include "../SCFG/Manager.h"
-#include "../SCFG/Sentence.h"
-#include "../SCFG/ActiveChart.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-UnknownWordPenalty::UnknownWordPenalty(size_t startInd, const std::string &line)
-:PhraseTable(startInd, line)
-,m_drop(false)
-{
- m_tuneable = false;
- ReadParameters();
-}
-
-UnknownWordPenalty::~UnknownWordPenalty()
-{
- // TODO Auto-generated destructor stub
-}
-
-void UnknownWordPenalty::SetParameter(const std::string& key, const std::string& value)
-{
- if (key == "drop") {
- m_drop = Scan<bool>(value);
- }
- else if (key == "prefix") {
- m_prefix = value;
- }
- else if (key == "suffix") {
- m_suffix = value;
- }
- else {
- PhraseTable::SetParameter(key, value);
- }
-}
-
-void UnknownWordPenalty::ProcessXML(
- const Manager &mgr,
- MemPool &pool,
- const Sentence &sentence,
- InputPaths &inputPaths) const
-{
- const Vector<const InputType::XMLOption*> &xmlOptions = sentence.GetXMLOptions();
- BOOST_FOREACH(const InputType::XMLOption *xmlOption, xmlOptions) {
- TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(pool, *this, mgr.system, xmlOption->GetTranslation());
-
- if (xmlOption->prob) {
- Scores &scores = target->GetScores();
- scores.PlusEquals(mgr.system, *this, Moses2::TransformScore(xmlOption->prob));
- }
-
- InputPath *path = inputPaths.GetMatrix().GetValue(xmlOption->startPos, xmlOption->phraseSize - 1);
- const SubPhrase<Moses2::Word> &source = path->subPhrase;
-
- mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, source, *target);
-
- TargetPhrases *tps = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, 1);
-
- tps->AddTargetPhrase(*target);
- mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *tps, source);
-
- path->AddTargetPhrases(*this, tps);
- }
-}
-
-void UnknownWordPenalty::Lookup(const Manager &mgr,
- InputPathsBase &inputPaths) const
-{
- BOOST_FOREACH(InputPathBase *pathBase, inputPaths){
- InputPath *path = static_cast<InputPath*>(pathBase);
-
- if (SatisfyBackoff(mgr, *path)) {
- const SubPhrase<Moses2::Word> &phrase = path->subPhrase;
-
- TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
- path->AddTargetPhrases(*this, tps);
- }
- }
-
-}
-
-TargetPhrases *UnknownWordPenalty::Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const
-{
- const System &system = mgr.system;
- TargetPhrases *tps = NULL;
-
- // any other pt translate this?
- size_t numPt = mgr.system.mappings.size();
- const TargetPhrases **allTPS =
- static_cast<InputPath&>(inputPath).targetPhrases;
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *otherTps = allTPS[i];
-
- if (otherTps && otherTps->GetSize()) {
- return tps;
- }
- }
-
- const SubPhrase<Moses2::Word> &source = inputPath.subPhrase;
- const Moses2::Word &sourceWord = source[0];
- const Factor *factor = sourceWord[0];
-
- tps = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, 1);
-
- size_t numWords = m_drop ? 0 : 1;
-
- TargetPhraseImpl *target =
- new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this,
- system, numWords);
-
- if (!m_drop) {
- Moses2::Word &word = (*target)[0];
-
- if (m_prefix.empty() && m_suffix.empty()) {
- word[0] = factor;
- }
- else {
- stringstream strm;
- if (!m_prefix.empty()) {
- strm << m_prefix;
- }
- strm << factor->GetString();
- if (!m_suffix.empty()) {
- strm << m_suffix;
- }
-
- FactorCollection &fc = system.GetVocab();
- const Factor *targetFactor = fc.AddFactor(strm.str(), system, false);
- word[0] = targetFactor;
- }
- }
-
- Scores &scores = target->GetScores();
- scores.PlusEquals(mgr.system, *this, -100);
-
- MemPool &memPool = mgr.GetPool();
- system.featureFunctions.EvaluateInIsolation(memPool, system, source, *target);
-
- tps->AddTargetPhrase(*target);
- system.featureFunctions.EvaluateAfterTablePruning(memPool, *tps, source);
-
- return tps;
-}
-
-void UnknownWordPenalty::EvaluateInIsolation(const System &system,
- const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const
-{
-
-}
-
-// SCFG ///////////////////////////////////////////////////////////////////////////////////////////
-void UnknownWordPenalty::InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const
-{
-}
-
-void UnknownWordPenalty::Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
-{
- const System &system = mgr.system;
-
- size_t numWords = path.range.GetNumWordsCovered();
- if (numWords > 1) {
- // only create 1 word phrases
- return;
- }
-
- if (path.GetNumRules()) {
- // only create rules if no other rules
- return;
- }
-
- // don't do 1st if 1st word
- if (path.range.GetStartPos() == 0) {
- return;
- }
-
- // don't do 1st if last word
- const SCFG::Sentence &sentence = static_cast<const SCFG::Sentence&>(mgr.GetInput());
- if (path.range.GetStartPos() + 1 == sentence.GetSize()) {
- return;
- }
-
- // terminal
- const SCFG::Word &lastWord = path.subPhrase.Back();
- //cerr << "UnknownWordPenalty lastWord=" << lastWord << endl;
-
- const Factor *factor = lastWord[0];
- SCFG::TargetPhraseImpl *tp = new (pool.Allocate<SCFG::TargetPhraseImpl>()) SCFG::TargetPhraseImpl(pool, *this, system, 1);
- SCFG::Word &word = (*tp)[0];
- word.CreateFromString(system.GetVocab(), system, factor->GetString().as_string());
-
- tp->lhs.CreateFromString(system.GetVocab(), system, "[X]");
-
- size_t endPos = path.range.GetEndPos();
- const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(endPos, 1);
-
- SCFG::ActiveChartEntry *chartEntry = new (pool.Allocate<SCFG::ActiveChartEntry>()) SCFG::ActiveChartEntry(pool);
- chartEntry->AddSymbolBindElement(subPhrasePath.range, lastWord, NULL, *this);
- path.AddActiveChartEntry(GetPtInd(), chartEntry);
-
- Scores &scores = tp->GetScores();
- scores.PlusEquals(mgr.system, *this, -100);
-
- MemPool &memPool = mgr.GetPool();
- const SubPhrase<SCFG::Word> &source = path.subPhrase;
- system.featureFunctions.EvaluateInIsolation(memPool, system, source, *tp);
-
- SCFG::TargetPhrases *tps = new (pool.Allocate<SCFG::TargetPhrases>()) SCFG::TargetPhrases(pool);
- tps->AddTargetPhrase(*tp);
-
- path.AddTargetPhrasesToPath(pool, mgr.system, *this, *tps, chartEntry->GetSymbolBind());
-}
-
-void UnknownWordPenalty::LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
-{
-}
-
-void UnknownWordPenalty::LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void UnknownWordPenalty::LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void UnknownWordPenalty::LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
-
diff --git a/contrib/moses2/TranslationModel/UnknownWordPenalty.h b/contrib/moses2/TranslationModel/UnknownWordPenalty.h
deleted file mode 100644
index 52c235a36..000000000
--- a/contrib/moses2/TranslationModel/UnknownWordPenalty.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * UnknownWordPenalty.h
- *
- * Created on: 28 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include "PhraseTable.h"
-
-namespace Moses2
-{
-class Sentence;
-class InputPaths;
-class Range;
-
-class UnknownWordPenalty: public PhraseTable
-{
-public:
- UnknownWordPenalty(size_t startInd, const std::string &line);
- virtual ~UnknownWordPenalty();
-
- virtual void SetParameter(const std::string& key, const std::string& value);
-
- void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
- virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
-
- void ProcessXML(
- const Manager &mgr,
- MemPool &pool,
- const Sentence &sentence,
- InputPaths &inputPaths) const;
-
- virtual void
- EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
- const TargetPhraseImpl &targetPhrase, Scores &scores,
- SCORE &estimatedScore) const;
-
- virtual void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const;
-
- void Lookup(MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
- void LookupUnary(MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
-protected:
- virtual void LookupNT(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const Moses2::Range &subPhraseRange,
- const SCFG::InputPath &prevPath,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &outPath) const;
-
- virtual void LookupGivenWord(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::InputPath &prevPath,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
- virtual void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-protected:
- bool m_drop;
- std::string m_prefix, m_suffix;
-};
-
-}
-
diff --git a/contrib/moses2/TranslationTask.cpp b/contrib/moses2/TranslationTask.cpp
deleted file mode 100644
index 375e4709b..000000000
--- a/contrib/moses2/TranslationTask.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-#include "TranslationTask.h"
-#include "System.h"
-#include "InputType.h"
-#include "PhraseBased/Manager.h"
-#include "SCFG/Manager.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-TranslationTask::TranslationTask(System &system,
- const std::string &line,
- long translationId)
-{
- if (system.isPb) {
- m_mgr = new Manager(system, *this, line, translationId);
- }
- else {
- m_mgr = new SCFG::Manager(system, *this, line, translationId);
- }
-}
-
-TranslationTask::~TranslationTask()
-{
-}
-
-void TranslationTask::Run()
-{
-
- m_mgr->Decode();
-
- string out;
-
- out = m_mgr->OutputBest() + "\n";
- m_mgr->system.bestCollector->Write(m_mgr->GetTranslationId(), out);
-
- if (m_mgr->system.options.nbest.nbest_size) {
- out = m_mgr->OutputNBest();
- m_mgr->system.nbestCollector->Write(m_mgr->GetTranslationId(), out);
- }
-
- if (!m_mgr->system.options.output.detailed_transrep_filepath.empty()) {
- out = m_mgr->OutputTransOpt();
- m_mgr->system.detailedTranslationCollector->Write(m_mgr->GetTranslationId(), out);
- }
-
- delete m_mgr;
-}
-
-}
-
diff --git a/contrib/moses2/TranslationTask.h b/contrib/moses2/TranslationTask.h
deleted file mode 100644
index bf2330357..000000000
--- a/contrib/moses2/TranslationTask.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#pragma once
-#include <string>
-#include "legacy/ThreadPool.h"
-
-namespace Moses2
-{
-
-class System;
-class ManagerBase;
-class Manager;
-
-class TranslationTask: public Task
-{
-public:
-
- TranslationTask(System &system, const std::string &line, long translationId);
- virtual ~TranslationTask();
- virtual void Run();
-
-protected:
- ManagerBase *m_mgr;
-};
-
-}
-
diff --git a/contrib/moses2/TrellisPaths.cpp b/contrib/moses2/TrellisPaths.cpp
deleted file mode 100644
index 814da4521..000000000
--- a/contrib/moses2/TrellisPaths.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * TrellisPaths.cpp
- *
- * Created on: 16 Mar 2016
- * Author: hieu
- */
-#include "TrellisPaths.h"
-#include "legacy/Util2.h"
-
-namespace Moses2
-{
-
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/TrellisPaths.h b/contrib/moses2/TrellisPaths.h
deleted file mode 100644
index 3e2d9ab9a..000000000
--- a/contrib/moses2/TrellisPaths.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * TrellisPaths.h
- *
- * Created on: 16 Mar 2016
- * Author: hieu
- */
-#pragma once
-
-#include <vector>
-#include <queue>
-#include "PhraseBased/TrellisPath.h"
-
-namespace Moses2
-{
-
-template<typename T>
-struct CompareTrellisPath
-{
- bool operator()(const T* pathA, const T* pathB) const
- {
- return (pathA->GetFutureScore() < pathB->GetFutureScore());
- }
-};
-
-template<typename T>
-class TrellisPaths
-{
-public:
- TrellisPaths() {}
-
- virtual ~TrellisPaths()
- {
- while (!empty()) {
- T *path = Get();
- delete path;
- }
- }
-
- bool empty() const
- {
- return m_coll.empty();
- }
-
- //! add a new entry into collection
- void Add(T *trellisPath)
- {
- m_coll.push(trellisPath);
- }
-
- T *Get()
- {
- T *top = m_coll.top();
-
- // Detach
- m_coll.pop();
- return top;
- }
-
- size_t GetSize() const
- { return m_coll.size(); }
-
-protected:
- typedef std::priority_queue<T*, std::vector<T*>,
- CompareTrellisPath<T> > CollectionType;
- CollectionType m_coll;
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/TypeDef.cpp b/contrib/moses2/TypeDef.cpp
deleted file mode 100644
index b8b79c59c..000000000
--- a/contrib/moses2/TypeDef.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-#include "TypeDef.h"
-#include "util/exception.hh"
-#include <typeinfo>
-
-
-namespace Moses2
-{
-
-
-
-}
diff --git a/contrib/moses2/TypeDef.h b/contrib/moses2/TypeDef.h
deleted file mode 100644
index e0a1a93a3..000000000
--- a/contrib/moses2/TypeDef.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * TypeDef.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#pragma once
-
-#include <cstddef>
-#include <vector>
-#include <istream>
-#include "Vector.h"
-
-namespace Moses2
-{
-
-class HypothesisBase;
-
-#define NOT_FOUND std::numeric_limits<size_t>::max()
-const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
-const size_t DEFAULT_MAX_CHART_SPAN = 20;
-const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
-const size_t DEFAULT_CUBE_PRUNING_POP_LIMIT = 1000;
-const size_t DEFAULT_CUBE_PRUNING_DIVERSITY = 0;
-const size_t DEFAULT_MAX_TRANS_OPT_SIZE = 5000;
-
-const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
-const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
-const float LOWEST_SCORE = -100.0f;
-
-const float DEFAULT_BEAM_WIDTH = 0.00001f;
-const float DEFAULT_EARLY_DISCARDING_THRESHOLD = 0.0f;
-const float DEFAULT_TRANSLATION_OPTION_THRESHOLD = 0.0f;
-
-#ifndef BOS_
-#define BOS_ "<s>" //Beginning of sentence symbol
-#endif
-#ifndef EOS_
-#define EOS_ "</s>" //End of sentence symbol
-#endif
-
-typedef size_t FactorType;
-typedef float SCORE;
-typedef std::vector<FactorType> FactorList;
-
-// Note: StaticData uses SearchAlgorithm to determine whether the translation
-// model is phrase-based or syntax-based. If you add a syntax-based search
-// algorithm here then you should also update StaticData::IsSyntax().
-enum SearchAlgorithm
-{
- Normal = 0, CubePruning = 1,
- //,CubeGrowing = 2
- CYKPlus = 3,
- NormalBatch = 4,
- ChartIncremental = 5,
- SyntaxS2T = 6,
- SyntaxT2S = 7,
- SyntaxT2S_SCFG = 8,
- SyntaxF2S = 9,
- CubePruningPerMiniStack = 10,
- CubePruningPerBitmap = 11,
- CubePruningCardinalStack = 12,
- CubePruningBitmapStack = 13,
- CubePruningMiniStack = 14,
- DefaultSearchAlgorithm = 777 // means: use StaticData.m_searchAlgorithm
-};
-
-enum InputTypeEnum {
- SentenceInput = 0,
- ConfusionNetworkInput = 1,
- WordLatticeInput = 2,
- TreeInputType = 3,
- //,WordLatticeInput2 = 4,
- TabbedSentenceInput = 5,
- ForestInputType = 6
-};
-
-enum XmlInputType {
- XmlPassThrough = 0,
- XmlIgnore = 1,
- XmlExclusive = 2,
- XmlInclusive = 3,
- XmlConstraint = 4
-};
-
-enum WordAlignmentSort {
- NoSort = 0,
- TargetOrder = 1
-};
-
-enum S2TParsingAlgorithm {
- RecursiveCYKPlus,
- Scope3
-};
-
-enum SourceLabelOverlap {
- SourceLabelOverlapAdd = 0,
- SourceLabelOverlapReplace = 1,
- SourceLabelOverlapDiscard = 2
-};
-
-/////////////////////////
-// MOSES2 only
-
-class StackAdd
-{
-public:
- bool added;
- HypothesisBase *other;
-
- StackAdd()
- {
- }
- StackAdd(bool vadded, HypothesisBase *vOther) :
- added(vadded), other(vOther)
- {
- }
-};
-
-class Hypothesis;
-typedef Vector<Hypothesis*> Batch;
-
-class Factor;
-typedef std::vector<const Factor*> Context;
-
-}
-
diff --git a/contrib/moses2/Vector.cpp b/contrib/moses2/Vector.cpp
deleted file mode 100644
index 46af0f793..000000000
--- a/contrib/moses2/Vector.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Vector.cpp
- *
- * Created on: 7 Dec 2015
- * Author: hieu
- */
-
-#include "Vector.h"
-
-namespace Moses2
-{
-
-}
-
diff --git a/contrib/moses2/Vector.h b/contrib/moses2/Vector.h
deleted file mode 100644
index f35e71825..000000000
--- a/contrib/moses2/Vector.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Vector.h
- *
- * Created on: 7 Dec 2015
- * Author: hieu
- */
-
-#pragma once
-#include <cassert>
-#include "MemPoolAllocator.h"
-
-namespace Moses2
-{
-
-template<typename T>
-class Vector: public std::vector<T, MemPoolAllocator<T> >
-{
- typedef std::vector<T, MemPoolAllocator<T> > Parent;
-
-public:
- Vector(MemPool &pool, size_t size = 0, const T &val = T()) :
- Parent(size, val, MemPoolAllocator<T>(pool))
- {
- }
-
- Vector(const Vector &copy) :
- Parent(copy)
- {
- }
-
-protected:
-};
-
-
-}
-
diff --git a/contrib/moses2/Weights.cpp b/contrib/moses2/Weights.cpp
deleted file mode 100644
index 643847eee..000000000
--- a/contrib/moses2/Weights.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Weights.cpp
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#include <cassert>
-#include <string>
-#include <vector>
-#include "FF/FeatureFunction.h"
-#include "FF/FeatureFunctions.h"
-#include "Weights.h"
-#include "System.h"
-#include "legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-Weights::Weights()
-{
- // TODO Auto-generated constructor stub
-
-}
-
-Weights::~Weights()
-{
- // TODO Auto-generated destructor stub
-}
-
-void Weights::Init(const FeatureFunctions &ffs)
-{
- size_t totalNumScores = ffs.GetNumScores();
- //cerr << "totalNumScores=" << totalNumScores << endl;
- m_weights.resize(totalNumScores, 1);
-}
-
-std::vector<SCORE> Weights::GetWeights(const FeatureFunction &ff) const
-{
- std::vector<SCORE> ret(m_weights.begin() + ff.GetStartInd(), m_weights.begin() + ff.GetStartInd() + ff.GetNumScores());
- return ret;
-}
-
-void Weights::SetWeights(const FeatureFunctions &ffs, const std::string &ffName, const std::vector<float> &weights)
-{
- const FeatureFunction *ff = ffs.FindFeatureFunction(ffName);
- UTIL_THROW_IF2(ff == NULL, "Feature function not found:" << ffName);
-
- size_t startInd = ff->GetStartInd();
- size_t numScores = ff->GetNumScores();
- UTIL_THROW_IF2(weights.size() != numScores, "Wrong number of weights. " << weights.size() << "!=" << numScores);
-
- for (size_t i = 0; i < numScores; ++i) {
- SCORE weight = weights[i];
- m_weights[startInd + i] = weight;
- }
-}
-
-}
-
diff --git a/contrib/moses2/Weights.h b/contrib/moses2/Weights.h
deleted file mode 100644
index c3c2cee62..000000000
--- a/contrib/moses2/Weights.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Weights.h
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#pragma once
-
-#include <iostream>
-#include <vector>
-#include "TypeDef.h"
-
-namespace Moses2
-{
-
-class FeatureFunctions;
-
-class Weights
-{
-public:
- Weights();
- virtual ~Weights();
- void Init(const FeatureFunctions &ffs);
-
- SCORE operator[](size_t ind) const
- {
- return m_weights[ind];
- }
-
- std::vector<SCORE> GetWeights(const FeatureFunction &ff) const;
-
- void SetWeights(const FeatureFunctions &ffs, const std::string &ffName, const std::vector<float> &weights);
-
-protected:
- std::vector<SCORE> m_weights;
-};
-
-}
-
diff --git a/contrib/moses2/Word.cpp b/contrib/moses2/Word.cpp
deleted file mode 100644
index fe10330e7..000000000
--- a/contrib/moses2/Word.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Word.cpp
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-#include <boost/functional/hash_fwd.hpp>
-#include <sstream>
-#include <vector>
-#include "Word.h"
-#include "System.h"
-#include "legacy/Util2.h"
-#include "util/murmur_hash.hh"
-
-using namespace std;
-
-namespace Moses2
-{
-
-Word::Word()
-{
- Init<const Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
-}
-
-Word::Word(const Word &copy)
-{
- memcpy(m_factors, copy.m_factors, sizeof(const Factor *) * MAX_NUM_FACTORS);
-}
-
-Word::~Word()
-{
- // TODO Auto-generated destructor stub
-}
-
-void Word::CreateFromString(FactorCollection &vocab, const System &system,
- const std::string &str)
-{
- vector<string> toks = Tokenize(str, "|");
- for (size_t i = 0; i < toks.size(); ++i) {
- const string &tok = toks[i];
- //cerr << "tok=" << tok << endl;
- const Factor *factor = vocab.AddFactor(tok, system, false);
- m_factors[i] = factor;
- }
-
- // null the rest
- for (size_t i = toks.size(); i < MAX_NUM_FACTORS; ++i) {
- m_factors[i] = NULL;
- }
-}
-
-size_t Word::hash() const
-{
- uint64_t seed = 0;
- size_t ret = util::MurmurHashNative(m_factors,
- sizeof(Factor*) * MAX_NUM_FACTORS, seed);
- return ret;
-}
-
-size_t Word::hash(const std::vector<FactorType> &factors) const
-{
- size_t seed = 0;
- for (size_t i = 0; i < factors.size(); ++i) {
- FactorType factorType = factors[i];
- const Factor *factor = m_factors[factorType];
- boost::hash_combine(seed, factor);
- }
- return seed;
-}
-
-
-int Word::Compare(const Word &compare) const
-{
-
- int cmp = memcmp(m_factors, compare.m_factors,
- sizeof(Factor*) * MAX_NUM_FACTORS);
- return cmp;
-
- /*
- int ret = m_factors[0]->GetString().compare(compare.m_factors[0]->GetString());
- return ret;
- */
-}
-
-bool Word::operator<(const Word &compare) const
-{
- int cmp = Compare(compare);
- return (cmp < 0);
-}
-
-std::string Word::Debug(const System &system) const
-{
- stringstream out;
- bool outputAlready = false;
- for (size_t i = 0; i < MAX_NUM_FACTORS; ++i) {
- const Factor *factor = m_factors[i];
- if (factor) {
- if (outputAlready) {
- out << "|";
- }
- out << *factor;
- outputAlready = true;
- }
- }
-
- return out.str();
-}
-
-void Word::OutputToStream(const System &system, std::ostream &out) const
-{
- const std::vector<FactorType> &factorTypes = system.options.output.factor_order;
- out << *m_factors[ factorTypes[0] ];
-
- for (size_t i = 1; i < factorTypes.size(); ++i) {
- FactorType factorType = factorTypes[i];
- const Factor *factor = m_factors[factorType];
-
- out << "|" << *factor;
- }
-}
-
-std::string Word::GetString(const FactorList &factorTypes) const
-{
- assert(factorTypes.size());
- std::stringstream ret;
-
- ret << m_factors[factorTypes[0]]->GetString();
- for (size_t i = 1; i < factorTypes.size(); ++i) {
- FactorType factorType = factorTypes[i];
- ret << "|" << m_factors[factorType];
- }
- return ret.str();
-}
-
-}
-
diff --git a/contrib/moses2/Word.h b/contrib/moses2/Word.h
deleted file mode 100644
index 7210c5140..000000000
--- a/contrib/moses2/Word.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Word.h
- *
- * Created on: 23 Oct 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <iostream>
-#include "TypeDef.h"
-#include "legacy/Factor.h"
-#include "legacy/FactorCollection.h"
-
-namespace Moses2
-{
-
-class Word
-{
-public:
- explicit Word();
- Word(const Word &copy);
-
- virtual ~Word();
-
- void CreateFromString(FactorCollection &vocab, const System &system,
- const std::string &str);
-
- virtual size_t hash() const;
- virtual size_t hash(const std::vector<FactorType> &factors) const;
-
- int Compare(const Word &compare) const;
-
- virtual bool operator==(const Word &compare) const
- {
- int cmp = Compare(compare);
- return cmp == 0;
- }
-
- virtual bool operator!=(const Word &compare) const
- {
- return !((*this) == compare);
- }
-
- virtual bool operator<(const Word &compare) const;
-
- const Factor* operator[](size_t ind) const
- {
- return m_factors[ind];
- }
-
- const Factor*& operator[](size_t ind)
- {
- return m_factors[ind];
- }
-
- virtual void OutputToStream(const System &system, std::ostream &out) const;
- virtual std::string Debug(const System &system) const;
-
- std::string GetString(const FactorList &factorTypes) const;
-protected:
- const Factor *m_factors[MAX_NUM_FACTORS];
-
-};
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Misc.cpp b/contrib/moses2/defer/CubePruningBitmapStack/Misc.cpp
deleted file mode 100644
index 5eb7893f2..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Misc.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * CubePruning.cpp
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-
-#include "Misc.h"
-#include "Stack.h"
-#include "../Manager.h"
-#include "../../MemPool.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningBitmapStack
-{
-
-////////////////////////////////////////////////////////////////////////
-QueueItem *QueueItem::Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- QueueItem *ret;
- if (currItem) {
- // reuse incoming queue item to create new item
- ret = currItem;
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- }
- else if (!queueItemRecycler.empty()) {
- // use item from recycle bin
- ret = queueItemRecycler.back();
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- queueItemRecycler.pop_back();
- }
- else {
- // create new item
- ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
- }
-
- return ret;
-}
-
-QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-:edge(&edge)
-,hypoIndex(hypoIndex)
-,tpIndex(tpIndex)
-{
- CreateHypothesis(mgr);
-}
-
-void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-{
- this->edge = &edge;
- this->hypoIndex = hypoIndex;
- this->tpIndex = tpIndex;
-
- CreateHypothesis(mgr);
-}
-
-void QueueItem::CreateHypothesis(Manager &mgr)
-{
- const Hypothesis *prevHypo = edge->hypos[hypoIndex];
- const TargetPhrase &tp = edge->tps[tpIndex];
-
- //cerr << "hypoIndex=" << hypoIndex << endl;
- //cerr << "edge.hypos=" << edge.hypos.size() << endl;
- //cerr << prevHypo << endl;
- //cerr << *prevHypo << endl;
-
- hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
- hypo->EvaluateWhenApplied();
-}
-
-////////////////////////////////////////////////////////////////////////
-CubeEdge::CubeEdge(
- Manager &mgr,
- const Hypotheses &hypos,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap)
-:hypos(hypos)
-,path(path)
-,tps(tps)
-,newBitmap(newBitmap)
-{
- estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
-}
-
-std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
-{
- out << obj.newBitmap;
- return out;
-}
-
-bool
-CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
-{
- //UTIL_THROW_IF2(x >= (1<<17), "Error");
- //UTIL_THROW_IF2(y >= (1<<17), "Error");
-
- SeenPositionItem val(this, (x<<16) + y);
- std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
- return pairRet.second;
-}
-
-void CubeEdge::CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- assert(hypos.size());
- assert(tps.GetSize());
-
- QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
- queue.push(item);
- bool setSeen = SetSeenPosition(0, 0, seenPositions);
- assert(setSeen);
-}
-
-void CubeEdge::CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- size_t hypoIndex = item->hypoIndex;
- size_t tpIndex = item->tpIndex;
-
- if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
- // reuse incoming queue item to create new item
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
- assert(newItem == item);
- queue.push(newItem);
- item = NULL;
- }
-
- if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
- queue.push(newItem);
- item = NULL;
- }
-
- if (item) {
- // recycle unused queue item
- queueItemRecycler.push_back(item);
- }
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Misc.h b/contrib/moses2/defer/CubePruningBitmapStack/Misc.h
deleted file mode 100644
index 00f3fa865..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Misc.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * CubePruning.h
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <vector>
-#include <queue>
-#include "../../legacy/Range.h"
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "Stack.h"
-
-namespace Moses2
-{
-
-class Manager;
-class InputPath;
-class TargetPhrases;
-class Bitmap;
-
-namespace NSCubePruningBitmapStack
-{
-class CubeEdge;
-
-///////////////////////////////////////////
-class QueueItem
-{
- ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
-public:
- static QueueItem *Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler);
- QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- CubeEdge *edge;
- size_t hypoIndex, tpIndex;
- Hypothesis *hypo;
-
-protected:
- void CreateHypothesis(Manager &mgr);
-};
-
-///////////////////////////////////////////
-class QueueItemOrderer
-{
-public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const {
- HypothesisFutureScoreOrderer orderer;
- return !orderer(itemA->hypo, itemB->hypo);
- }
-};
-
-///////////////////////////////////////////
-class CubeEdge
-{
- friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
-
-public:
- typedef std::priority_queue<QueueItem*,
- std::vector<QueueItem*>,
- QueueItemOrderer> Queue;
-
- typedef std::pair<const CubeEdge*, int> SeenPositionItem;
- typedef boost::unordered_set<SeenPositionItem,
- boost::hash<SeenPositionItem>,
- std::equal_to<SeenPositionItem> > SeenPositions;
-
- const Hypotheses &hypos;
- const InputPath &path;
- const TargetPhrases &tps;
- const Bitmap &newBitmap;
- SCORE estimatedScore;
-
- CubeEdge(Manager &mgr,
- const Hypotheses &hypos,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap);
-
- bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
-
- void CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
- void CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
-
-protected:
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Search.cpp b/contrib/moses2/defer/CubePruningBitmapStack/Search.cpp
deleted file mode 100644
index 6188edfa4..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Search.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Search.cpp
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Search.h"
-#include "Stack.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../../InputPaths.h"
-#include "../../InputPath.h"
-#include "../../System.h"
-#include "../../Sentence.h"
-#include "../../TranslationTask.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningBitmapStack
-{
-
-////////////////////////////////////////////////////////////////////////
-Search::Search(Manager &mgr)
-:Moses2::Search(mgr)
-,m_stack(mgr)
-
-,m_queue(QueueItemOrderer(), std::vector<QueueItem*>() )
-
-,m_seenPositions()
-{
-}
-
-Search::~Search()
-{
-}
-
-void Search::Decode()
-{
- // init cue edges
- m_cubeEdges.resize(mgr.GetInput().GetSize() + 1);
- for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
- m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges();
- }
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
-
- m_stack.Add(initHypo, mgr.GetHypoRecycle());
- PostDecode(0);
-
- for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- m_stack.Clear();
- Decode(stackInd);
- PostDecode(stackInd);
-
- //m_stack.DebugCounts();
- //cerr << m_stacks << endl;
- }
-
-}
-
-void Search::Decode(size_t stackInd)
-{
- Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
-
- // reuse queue from previous stack. Clear it first
- std::vector<QueueItem*> &container = Container(m_queue);
- //cerr << "container=" << container.size() << endl;
- BOOST_FOREACH(QueueItem *item, container) {
- // recycle unused hypos from queue
- Hypothesis *hypo = item->hypo;
- hypoRecycler.Recycle(hypo);
-
- // recycle queue item
- m_queueItemRecycler.push_back(item);
- }
- container.clear();
-
- m_seenPositions.clear();
-
- // add top hypo from every edge into queue
- CubeEdges &edges = *m_cubeEdges[stackInd];
-
- BOOST_FOREACH(CubeEdge *edge, edges) {
- //cerr << *edge << " ";
- edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
- }
-
- /*
- cerr << "edges: ";
- boost::unordered_set<const Bitmap*> uniqueBM;
- BOOST_FOREACH(CubeEdge *edge, edges) {
- uniqueBM.insert(&edge->newBitmap);
- //cerr << *edge << " ";
- }
- cerr << edges.size() << " " << uniqueBM.size();
- cerr << endl;
- */
-
- size_t pops = 0;
- while (!m_queue.empty() && pops < mgr.system.popLimit) {
- // get best hypo from queue, add to stack
- //cerr << "queue=" << queue.size() << endl;
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- CubeEdge *edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stack.Add(hypo, hypoRecycler);
-
- edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
-
- ++pops;
- }
-
- /*
- // create hypo from every edge. Increase diversity
- while (!m_queue.empty()) {
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- if (item->hypoIndex == 0 && item->tpIndex == 0) {
- CubeEdge &edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, mgr.GetHypoRecycle());
- }
- }
- */
-}
-
-void Search::PostDecode(size_t stackInd)
-{
- MemPool &pool = mgr.GetPool();
-
- Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr);
-
- BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) {
- const Bitmap &hypoBitmap = *val.first.first;
- size_t hypoEndPos = val.first.second;
- //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
-
- // create edges to next hypos from existing hypos
- const InputPaths &paths = mgr.GetInputPaths();
-
- BOOST_FOREACH(const InputPath *path, paths) {
- const Range &pathRange = path->range;
- //cerr << "pathRange=" << pathRange << endl;
-
- if (!path->IsUsed()) {
- continue;
- }
- if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
- continue;
- }
-
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
- size_t numWords = newBitmap.GetNumWordsCovered();
-
- CubeEdges &edges = *m_cubeEdges[numWords];
-
- // sort hypo for a particular bitmap and hypoEndPos
- Hypotheses &sortedHypos = *val.second;
-
- size_t numPt = mgr.system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = path->targetPhrases[i];
- if (tps && tps->GetSize()) {
- CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
- edges.push_back(edge);
- }
- }
- }
- }
-
-}
-
-const Hypothesis *Search::GetBestHypo() const
-{
- std::vector<const Hypothesis*> sortedHypos = m_stack.GetBestHypos(1);
-
- const Hypothesis *best = NULL;
- if (sortedHypos.size()) {
- best = sortedHypos[0];
- }
- return best;
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Search.h b/contrib/moses2/defer/CubePruningBitmapStack/Search.h
deleted file mode 100644
index 7e58ba91f..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Search.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Search.h
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include "../Search.h"
-#include "Misc.h"
-#include "Stack.h"
-#include "../../legacy/Range.h"
-
-namespace Moses2
-{
-
-class Bitmap;
-class Hypothesis;
-class InputPath;
-class TargetPhrases;
-
-namespace NSCubePruningBitmapStack
-{
-
-class Search : public Moses2::Search
-{
-public:
- Search(Manager &mgr);
- virtual ~Search();
-
- virtual void Decode();
- const Hypothesis *GetBestHypo() const;
-
-protected:
- Stack m_stack;
-
- CubeEdge::Queue m_queue;
- CubeEdge::SeenPositions m_seenPositions;
-
- // CUBE PRUNING VARIABLES
- // setup
- typedef std::vector<CubeEdge*> CubeEdges;
- std::vector<CubeEdges*> m_cubeEdges;
-
- std::deque<QueueItem*> m_queueItemRecycler;
-
- // CUBE PRUNING
- // decoding
- void Decode(size_t stackInd);
- void PostDecode(size_t stackInd);
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Stack.cpp b/contrib/moses2/defer/CubePruningBitmapStack/Stack.cpp
deleted file mode 100644
index 4dfa3b6f4..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Stack.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Stack.cpp
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#include <algorithm>
-#include <boost/foreach.hpp>
-#include "Stack.h"
-#include "../Hypothesis.h"
-#include "../Manager.h"
-#include "../../Scores.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningBitmapStack
-{
-MiniStack::MiniStack(const Manager &mgr)
-:m_coll()
-,m_sortedHypos(NULL)
-{}
-
-StackAdd MiniStack::Add(const Hypothesis *hypo)
-{
- std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
-
- // CHECK RECOMBINATION
- if (addRet.second) {
- // equiv hypo doesn't exists
- return StackAdd(true, NULL);
- }
- else {
- const Hypothesis *hypoExisting = *addRet.first;
- if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) {
- // incoming hypo is better than the one we have
- const Hypothesis *const &hypoExisting1 = *addRet.first;
- const Hypothesis *&hypoExisting2 = const_cast<const Hypothesis *&>(hypoExisting1);
- hypoExisting2 = hypo;
-
- return StackAdd(true, const_cast<Hypothesis*>(hypoExisting));
- }
- else {
- // already storing the best hypo. discard incoming hypo
- return StackAdd(false, const_cast<Hypothesis*>(hypo));
- }
- }
-
- assert(false);
-}
-
-Hypotheses &MiniStack::GetSortedAndPruneHypos(const Manager &mgr) const
-{
- if (m_sortedHypos == NULL) {
- // create sortedHypos first
- MemPool &pool = mgr.GetPool();
- m_sortedHypos = new (pool.Allocate< Vector<const Hypothesis*> >()) Vector<const Hypothesis*>(pool, m_coll.size());
-
- size_t ind = 0;
- BOOST_FOREACH(const Hypothesis *hypo, m_coll) {
- (*m_sortedHypos)[ind] = hypo;
- ++ind;
- }
-
- SortAndPruneHypos(mgr);
- }
-
- return *m_sortedHypos;
-}
-
-void MiniStack::SortAndPruneHypos(const Manager &mgr) const
-{
- size_t stackSize = mgr.system.stackSize;
- Recycler<Hypothesis*> &recycler = mgr.GetHypoRecycle();
-
- /*
- cerr << "UNSORTED hypos:" << endl;
- for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << *hypo << endl;
- }
- cerr << endl;
- */
- Hypotheses::iterator iterMiddle;
- iterMiddle = (stackSize == 0 || m_sortedHypos->size() < stackSize)
- ? m_sortedHypos->end()
- : m_sortedHypos->begin() + stackSize;
-
- std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(),
- HypothesisFutureScoreOrderer());
-
- // prune
- if (stackSize && m_sortedHypos->size() > stackSize) {
- for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) {
- Hypothesis *hypo = const_cast<Hypothesis*>((*m_sortedHypos)[i]);
- recycler.Recycle(hypo);
- }
- m_sortedHypos->resize(stackSize);
- }
-
- /*
- cerr << "sorted hypos:" << endl;
- for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << hypo << " " << *hypo << endl;
- }
- cerr << endl;
- */
-
-}
-
-void MiniStack::Clear()
-{
- m_sortedHypos = NULL;
- m_coll.clear();
-}
-
-///////////////////////////////////////////////////////////////
-Stack::Stack(const Manager &mgr)
-:m_mgr(mgr)
-,m_coll()
-,m_miniStackRecycler()
-{
-}
-
-Stack::~Stack() {
- // TODO Auto-generated destructor stub
-}
-
-void Stack::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
-{
- HypoCoverageInternal key = &hypo->GetBitmap();
- StackAdd added = GetMiniStack(key).Add(hypo);
-
- if (added.toBeDeleted) {
- hypoRecycle.Recycle(added.toBeDeleted);
- }
-}
-
-std::vector<const Hypothesis*> Stack::GetBestHypos(size_t num) const
-{
- std::vector<const Hypothesis*> ret;
- BOOST_FOREACH(const Coll::value_type &val, m_coll) {
- const MiniStack::_HCType &hypos = val.second->GetColl();
- ret.insert(ret.end(), hypos.begin(), hypos.end());
- }
-
- std::vector<const Hypothesis*>::iterator iterMiddle;
- iterMiddle = (num == 0 || ret.size() < num)
- ? ret.end()
- : ret.begin()+num;
-
- std::partial_sort(ret.begin(), iterMiddle, ret.end(),
- HypothesisFutureScoreOrderer());
-
- return ret;
-}
-
-size_t Stack::GetHypoSize() const
-{
- size_t ret = 0;
- BOOST_FOREACH(const Coll::value_type &val, m_coll) {
- const MiniStack::_HCType &hypos = val.second->GetColl();
- ret += hypos.size();
- }
- return ret;
-}
-
-MiniStack &Stack::GetMiniStack(const HypoCoverageInternal &key)
-{
- MiniStack *ret;
- Coll::iterator iter = m_coll.find(key);
- if (iter == m_coll.end()) {
- if (m_miniStackRecycler.empty()) {
- ret = new (m_mgr.GetPool().Allocate<MiniStack>()) MiniStack(m_mgr);
- }
- else {
- ret = m_miniStackRecycler.back();
- ret->Clear();
- m_miniStackRecycler.pop_back();
- }
-
- m_coll[key] = ret;
- }
- else {
- ret = iter->second;
- }
- return *ret;
-}
-
-void Stack::Clear()
-{
- BOOST_FOREACH(const Coll::value_type &val, m_coll) {
- MiniStack *miniStack = val.second;
- m_miniStackRecycler.push_back(miniStack);
- }
-
- m_coll.clear();
-}
-
-Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const
-{
- SortedHypos ret;
-
- MemPool &pool = mgr.GetPool();
-
- // prune and sort
- Hypotheses *allHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool, GetHypoSize());
- size_t i = 0;
-
- BOOST_FOREACH(const Coll::value_type &val, m_coll) {
- const MiniStack *miniStack = val.second;
- const MiniStack::MiniStack::_HCType &hypos = miniStack->GetColl();
-
- BOOST_FOREACH(const Hypothesis *hypo, hypos) {
- (*allHypos)[i++] = hypo;
- }
- }
-
- SortAndPruneHypos(mgr, *allHypos);
-
- // divide hypos by [bitmap, last end pos]
- BOOST_FOREACH(const Hypothesis *hypo, *allHypos) {
- HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
-
- Hypotheses *hypos;
- SortedHypos::iterator iter;
- iter = ret.find(key);
- if (iter == ret.end()) {
- hypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool);
- ret[key] = hypos;
- }
- else {
- hypos = iter->second;
- }
- hypos->push_back(hypo);
- }
-
- return ret;
-}
-
-void Stack::SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const
-{
- size_t stackSize = mgr.system.stackSize;
- Recycler<Hypothesis*> &recycler = mgr.GetHypoRecycle();
-
- /*
- cerr << "UNSORTED hypos:" << endl;
- for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << *hypo << endl;
- }
- cerr << endl;
- */
- Hypotheses::iterator iterMiddle;
- iterMiddle = (stackSize == 0 || hypos.size() < stackSize)
- ? hypos.end()
- : hypos.begin() + stackSize;
-
- std::partial_sort(hypos.begin(), iterMiddle, hypos.end(),
- HypothesisFutureScoreOrderer());
-
- // prune
- if (stackSize && hypos.size() > stackSize) {
- for (size_t i = stackSize; i < hypos.size(); ++i) {
- Hypothesis *hypo = const_cast<Hypothesis*>(hypos[i]);
- recycler.Recycle(hypo);
- }
- hypos.resize(stackSize);
- }
-
- /*
- cerr << "sorted hypos:" << endl;
- for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << hypo << " " << *hypo << endl;
- }
- cerr << endl;
- */
-
-}
-
-
-void Stack::DebugCounts()
-{
- /*
- cerr << "counts=";
- BOOST_FOREACH(const Coll::value_type &val, GetColl()) {
- const NSCubePruning::MiniStack &miniStack = *val.second;
- size_t count = miniStack.GetColl().size();
- cerr << count << " ";
- }
- cerr << endl;
- */
-}
-
-}
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Stack.h b/contrib/moses2/defer/CubePruningBitmapStack/Stack.h
deleted file mode 100644
index d0687ec59..000000000
--- a/contrib/moses2/defer/CubePruningBitmapStack/Stack.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Stack.h
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <deque>
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "../../MemPool.h"
-#include "../../Recycler.h"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-class Manager;
-
-namespace NSCubePruningBitmapStack
-{
-typedef Vector<const Hypothesis*> Hypotheses;
-
-class MiniStack
-{
-public:
- typedef boost::unordered_set<const Hypothesis*,
- UnorderedComparer<Hypothesis>,
- UnorderedComparer<Hypothesis>
- > _HCType;
-
- MiniStack(const Manager &mgr);
-
- StackAdd Add(const Hypothesis *hypo);
-
- _HCType &GetColl()
- { return m_coll; }
-
- const _HCType &GetColl() const
- { return m_coll; }
-
- void Clear();
-
- Hypotheses &GetSortedAndPruneHypos(const Manager &mgr) const;
-
-protected:
- _HCType m_coll;
- mutable Hypotheses *m_sortedHypos;
-
- void SortAndPruneHypos(const Manager &mgr) const;
-
-};
-
-/////////////////////////////////////////////
-class Stack {
-protected:
-
-
-public:
- typedef std::pair<const Bitmap*, size_t> HypoCoverage;
- // bitmap and current endPos of hypos
- typedef boost::unordered_map<HypoCoverage, Hypotheses*> SortedHypos;
-
- typedef const Bitmap* HypoCoverageInternal;
- typedef boost::unordered_map<HypoCoverageInternal, MiniStack*
- ,boost::hash<HypoCoverageInternal>
- ,std::equal_to<HypoCoverageInternal>
- > Coll;
-
-
- Stack(const Manager &mgr);
- virtual ~Stack();
-
- size_t GetHypoSize() const;
-
- Coll &GetColl()
- { return m_coll; }
- const Coll &GetColl() const
- { return m_coll; }
-
- void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
-
- MiniStack &GetMiniStack(const HypoCoverageInternal &key);
-
- std::vector<const Hypothesis*> GetBestHypos(size_t num) const;
- void Clear();
-
- SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const;
- void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const;
-
- void DebugCounts();
-
-protected:
- const Manager &m_mgr;
- Coll m_coll;
-
- std::deque<MiniStack*> m_miniStackRecycler;
-
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Misc.cpp b/contrib/moses2/defer/CubePruningCardinalStack/Misc.cpp
deleted file mode 100644
index 8918fdf52..000000000
--- a/contrib/moses2/defer/CubePruningCardinalStack/Misc.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * CubePruning.cpp
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-
-#include "Misc.h"
-#include "Stack.h"
-#include "../Manager.h"
-#include "../../MemPool.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningCardinalStack
-{
-
-////////////////////////////////////////////////////////////////////////
-QueueItem *QueueItem::Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- QueueItem *ret;
- if (currItem) {
- // reuse incoming queue item to create new item
- ret = currItem;
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- }
- else if (!queueItemRecycler.empty()) {
- // use item from recycle bin
- ret = queueItemRecycler.back();
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- queueItemRecycler.pop_back();
- }
- else {
- // create new item
- ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
- }
-
- return ret;
-}
-
-QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-:edge(&edge)
-,hypoIndex(hypoIndex)
-,tpIndex(tpIndex)
-{
- CreateHypothesis(mgr);
-}
-
-void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-{
- this->edge = &edge;
- this->hypoIndex = hypoIndex;
- this->tpIndex = tpIndex;
-
- CreateHypothesis(mgr);
-}
-
-void QueueItem::CreateHypothesis(Manager &mgr)
-{
- const Hypothesis *prevHypo = edge->hypos[hypoIndex];
- const TargetPhrase &tp = edge->tps[tpIndex];
-
- //cerr << "hypoIndex=" << hypoIndex << endl;
- //cerr << "edge.hypos=" << edge.hypos.size() << endl;
- //cerr << prevHypo << endl;
- //cerr << *prevHypo << endl;
-
- hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
- hypo->EvaluateWhenApplied();
-}
-
-////////////////////////////////////////////////////////////////////////
-CubeEdge::CubeEdge(
- Manager &mgr,
- const Hypotheses &hypos,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap)
-:hypos(hypos)
-,path(path)
-,tps(tps)
-,newBitmap(newBitmap)
-{
- estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
-}
-
-std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
-{
- out << obj.newBitmap;
- return out;
-}
-
-bool
-CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
-{
- //UTIL_THROW_IF2(x >= (1<<17), "Error");
- //UTIL_THROW_IF2(y >= (1<<17), "Error");
-
- SeenPositionItem val(this, (x<<16) + y);
- std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
- return pairRet.second;
-}
-
-void CubeEdge::CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- assert(hypos.size());
- assert(tps.GetSize());
-
- QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
- queue.push(item);
- bool setSeen = SetSeenPosition(0, 0, seenPositions);
- assert(setSeen);
-}
-
-void CubeEdge::CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- size_t hypoIndex = item->hypoIndex;
- size_t tpIndex = item->tpIndex;
-
- if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
- // reuse incoming queue item to create new item
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
- assert(newItem == item);
- queue.push(newItem);
- item = NULL;
- }
-
- if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
- queue.push(newItem);
- item = NULL;
- }
-
- if (item) {
- // recycle unused queue item
- queueItemRecycler.push_back(item);
- }
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Misc.h b/contrib/moses2/defer/CubePruningCardinalStack/Misc.h
deleted file mode 100644
index b86c88519..000000000
--- a/contrib/moses2/defer/CubePruningCardinalStack/Misc.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * CubePruning.h
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <vector>
-#include <queue>
-#include "../../legacy/Range.h"
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "Stack.h"
-
-namespace Moses2
-{
-
-class Manager;
-class InputPath;
-class TargetPhrases;
-class Bitmap;
-
-namespace NSCubePruningCardinalStack
-{
-class CubeEdge;
-
-///////////////////////////////////////////
-class QueueItem
-{
- ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
-public:
- static QueueItem *Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler);
- QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- CubeEdge *edge;
- size_t hypoIndex, tpIndex;
- Hypothesis *hypo;
-
-protected:
- void CreateHypothesis(Manager &mgr);
-};
-
-///////////////////////////////////////////
-class QueueItemOrderer
-{
-public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const {
- HypothesisFutureScoreOrderer orderer;
- return !orderer(itemA->hypo, itemB->hypo);
- }
-};
-
-///////////////////////////////////////////
-class CubeEdge
-{
- friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
-
-public:
- typedef std::priority_queue<QueueItem*,
- std::vector<QueueItem*>,
- QueueItemOrderer> Queue;
-
- typedef std::pair<const CubeEdge*, int> SeenPositionItem;
- typedef boost::unordered_set<SeenPositionItem,
- boost::hash<SeenPositionItem>,
- std::equal_to<SeenPositionItem>
- > SeenPositions;
-
- const Hypotheses &hypos;
- const InputPath &path;
- const TargetPhrases &tps;
- const Bitmap &newBitmap;
- SCORE estimatedScore;
-
- CubeEdge(Manager &mgr,
- const Hypotheses &hypos,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap);
-
- bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
-
- void CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
- void CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
-
-protected:
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Search.cpp b/contrib/moses2/defer/CubePruningCardinalStack/Search.cpp
deleted file mode 100644
index d4899ae46..000000000
--- a/contrib/moses2/defer/CubePruningCardinalStack/Search.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Search.cpp
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Search.h"
-#include "Stack.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../../InputPaths.h"
-#include "../../InputPath.h"
-#include "../../System.h"
-#include "../../Sentence.h"
-#include "../../TranslationTask.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningCardinalStack
-{
-
-////////////////////////////////////////////////////////////////////////
-Search::Search(Manager &mgr)
-:Moses2::Search(mgr)
-,m_stack(mgr)
-
-,m_queue(QueueItemOrderer(), std::vector<QueueItem* >() )
-
-,m_seenPositions()
-{
-}
-
-Search::~Search()
-{
-}
-
-void Search::Decode()
-{
- // init cue edges
- m_cubeEdges.resize(mgr.GetInput().GetSize() + 1);
- for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
- m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges();
- }
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
-
- m_stack.Add(initHypo, mgr.GetHypoRecycle());
- PostDecode(0);
-
- for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- m_stack.Clear();
- Decode(stackInd);
- PostDecode(stackInd);
-
- //m_stack.DebugCounts();
- //cerr << m_stacks << endl;
- }
-
-}
-
-void Search::Decode(size_t stackInd)
-{
- Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
-
- // reuse queue from previous stack. Clear it first
- std::vector<QueueItem*> &container = Container(m_queue);
- //cerr << "container=" << container.size() << endl;
- BOOST_FOREACH(QueueItem *item, container) {
- // recycle unused hypos from queue
- Hypothesis *hypo = item->hypo;
- hypoRecycler.Recycle(hypo);
-
- // recycle queue item
- m_queueItemRecycler.push_back(item);
- }
- container.clear();
-
- m_seenPositions.clear();
-
- // add top hypo from every edge into queue
- CubeEdges &edges = *m_cubeEdges[stackInd];
-
- BOOST_FOREACH(CubeEdge *edge, edges) {
- //cerr << *edge << " ";
- edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
- }
-
- /*
- cerr << "edges: ";
- boost::unordered_set<const Bitmap*> uniqueBM;
- BOOST_FOREACH(CubeEdge *edge, edges) {
- uniqueBM.insert(&edge->newBitmap);
- //cerr << *edge << " ";
- }
- cerr << edges.size() << " " << uniqueBM.size();
- cerr << endl;
- */
-
- size_t pops = 0;
- while (!m_queue.empty() && pops < mgr.system.popLimit) {
- // get best hypo from queue, add to stack
- //cerr << "queue=" << queue.size() << endl;
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- CubeEdge *edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stack.Add(hypo, hypoRecycler);
-
- edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
-
- ++pops;
- }
-
- /*
- // create hypo from every edge. Increase diversity
- while (!m_queue.empty()) {
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- if (item->hypoIndex == 0 && item->tpIndex == 0) {
- CubeEdge &edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, mgr.GetHypoRecycle());
- }
- }
- */
-}
-
-void Search::PostDecode(size_t stackInd)
-{
- MemPool &pool = mgr.GetPool();
-
- Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr);
-
- BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) {
- const Bitmap &hypoBitmap = *val.first.first;
- size_t hypoEndPos = val.first.second;
- //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
-
- // create edges to next hypos from existing hypos
- const InputPaths &paths = mgr.GetInputPaths();
-
- BOOST_FOREACH(const InputPath *path, paths) {
- const Range &pathRange = path->range;
- //cerr << "pathRange=" << pathRange << endl;
-
- if (!path->IsUsed()) {
- continue;
- }
- if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
- continue;
- }
-
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
- size_t numWords = newBitmap.GetNumWordsCovered();
-
- CubeEdges &edges = *m_cubeEdges[numWords];
-
- // sort hypo for a particular bitmap and hypoEndPos
- Hypotheses &sortedHypos = *val.second;
-
- size_t numPt = mgr.system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = path->targetPhrases[i];
- if (tps && tps->GetSize()) {
- CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
- edges.push_back(edge);
- }
- }
- }
- }
-
-}
-
-const Hypothesis *Search::GetBestHypo() const
-{
- std::vector<const Hypothesis*> sortedHypos = m_stack.GetBestHypos(1);
-
- const Hypothesis *best = NULL;
- if (sortedHypos.size()) {
- best = sortedHypos[0];
- }
- return best;
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Search.h b/contrib/moses2/defer/CubePruningCardinalStack/Search.h
deleted file mode 100644
index e772926a2..000000000
--- a/contrib/moses2/defer/CubePruningCardinalStack/Search.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Search.h
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include "../Search.h"
-#include "Misc.h"
-#include "Stack.h"
-#include "../../legacy/Range.h"
-
-namespace Moses2
-{
-
-class Bitmap;
-class Hypothesis;
-class InputPath;
-class TargetPhrases;
-
-namespace NSCubePruningCardinalStack
-{
-
-class Search : public Moses2::Search
-{
-public:
- Search(Manager &mgr);
- virtual ~Search();
-
- virtual void Decode();
- const Hypothesis *GetBestHypo() const;
-
-protected:
- Stack m_stack;
-
- CubeEdge::Queue m_queue;
- CubeEdge::SeenPositions m_seenPositions;
-
- // CUBE PRUNING VARIABLES
- // setup
- typedef std::vector<CubeEdge*> CubeEdges;
- std::vector<CubeEdges*> m_cubeEdges;
-
- std::deque<QueueItem*> m_queueItemRecycler;
-
- // CUBE PRUNING
- // decoding
- void Decode(size_t stackInd);
- void PostDecode(size_t stackInd);
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Stack.cpp b/contrib/moses2/defer/CubePruningCardinalStack/Stack.cpp
deleted file mode 100644
index 0c296d8ca..000000000
--- a/contrib/moses2/defer/CubePruningCardinalStack/Stack.cpp
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Stack.cpp
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#include <algorithm>
-#include <boost/foreach.hpp>
-#include "Stack.h"
-#include "../Hypothesis.h"
-#include "../Manager.h"
-#include "../../Scores.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningCardinalStack
-{
-
-///////////////////////////////////////////////////////////////
-Stack::Stack(const Manager &mgr)
-:m_mgr(mgr)
-,m_coll()
-{
-}
-
-Stack::~Stack() {
- // TODO Auto-generated destructor stub
-}
-
-void Stack::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
-{
- std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
-
- // CHECK RECOMBINATION
- if (addRet.second) {
- // equiv hypo doesn't exists
- }
- else {
- const Hypothesis *hypoExisting = *addRet.first;
- if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) {
- // incoming hypo is better than the one we have
- const Hypothesis *const &hypoExisting1 = *addRet.first;
- const Hypothesis *&hypoExisting2 = const_cast<const Hypothesis *&>(hypoExisting1);
- hypoExisting2 = hypo;
-
- Hypothesis *hypoToBeDeleted = const_cast<Hypothesis*>(hypoExisting);
- hypoRecycle.Recycle(hypoToBeDeleted);
- }
- else {
- // already storing the best hypo. discard incoming hypo
- Hypothesis *hypoToBeDeleted = const_cast<Hypothesis*>(hypo);
- hypoRecycle.Recycle(hypoToBeDeleted);
- }
- }
-}
-
-std::vector<const Hypothesis*> Stack::GetBestHypos(size_t num) const
-{
- std::vector<const Hypothesis*> ret;
- ret.insert(ret.end(), m_coll.begin(), m_coll.end());
-
- std::vector<const Hypothesis*>::iterator iterMiddle;
- iterMiddle = (num == 0 || ret.size() < num)
- ? ret.end()
- : ret.begin()+num;
-
- std::partial_sort(ret.begin(), iterMiddle, ret.end(),
- HypothesisFutureScoreOrderer());
-
- return ret;
-}
-
-size_t Stack::GetHypoSize() const
-{
- return m_coll.size();
-}
-
-void Stack::Clear()
-{
-
- m_coll.clear();
-}
-
-Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const
-{
- SortedHypos ret;
-
- MemPool &pool = mgr.GetPool();
-
- // prune and sort
- Hypotheses *allHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool, GetHypoSize());
- size_t i = 0;
- BOOST_FOREACH(const Hypothesis *hypo, m_coll) {
- (*allHypos)[i++] = hypo;
- }
- SortAndPruneHypos(mgr, *allHypos);
-
- // divide hypos by [bitmap, last end pos]
- BOOST_FOREACH(const Hypothesis *hypo, *allHypos) {
- HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
-
- Hypotheses *hypos;
- SortedHypos::iterator iter;
- iter = ret.find(key);
- if (iter == ret.end()) {
- hypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool);
- ret[key] = hypos;
- }
- else {
- hypos = iter->second;
- }
- hypos->push_back(hypo);
- }
-
- return ret;
-}
-
-
-//Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const
-//{
-// SortedHypos ret;
-//
-// MemPool &pool = mgr.GetPool();
-//
-// // divide hypos by [bitmap, last end pos]
-// BOOST_FOREACH(const Hypothesis *hypo, m_coll) {
-// HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
-//
-// Hypotheses *hypos;
-// SortedHypos::iterator iter;
-// iter = ret.find(key);
-// if (iter == ret.end()) {
-// hypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool);
-// ret[key] = hypos;
-// }
-// else {
-// hypos = iter->second;
-// }
-// hypos->push_back(hypo);
-// }
-//
-// // put into real return variable and sort
-// BOOST_FOREACH(SortedHypos::value_type &val, ret) {
-// Hypotheses &hypos = *val.second;
-// SortAndPruneHypos(mgr, hypos);
-// }
-//
-// return ret;
-//}
-
-void Stack::SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const
-{
- size_t stackSize = mgr.system.stackSize;
- Recycler<Hypothesis*> &recycler = mgr.GetHypoRecycle();
-
- /*
- cerr << "UNSORTED hypos:" << endl;
- for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << *hypo << endl;
- }
- cerr << endl;
- */
- Hypotheses::iterator iterMiddle;
- iterMiddle = (stackSize == 0 || hypos.size() < stackSize)
- ? hypos.end()
- : hypos.begin() + stackSize;
-
- std::partial_sort(hypos.begin(), iterMiddle, hypos.end(),
- HypothesisFutureScoreOrderer());
-
- // prune
- if (stackSize && hypos.size() > stackSize) {
- for (size_t i = stackSize; i < hypos.size(); ++i) {
- Hypothesis *hypo = const_cast<Hypothesis*>(hypos[i]);
- recycler.Recycle(hypo);
- }
- hypos.resize(stackSize);
- }
-
- /*
- cerr << "sorted hypos:" << endl;
- for (size_t i = 0; i < hypos.size(); ++i) {
- const Hypothesis *hypo = hypos[i];
- cerr << hypo << " " << *hypo << endl;
- }
- cerr << endl;
- */
-
-}
-
-
-}
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Stack.h b/contrib/moses2/defer/CubePruningCardinalStack/Stack.h
deleted file mode 100644
index d6ae80577..000000000
--- a/contrib/moses2/defer/CubePruningCardinalStack/Stack.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Stack.h
- *
- * Created on: 24 Oct 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <deque>
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "../../MemPool.h"
-#include "../../Recycler.h"
-#include "../../legacy/Util2.h"
-
-namespace Moses2
-{
-
-class Manager;
-
-namespace NSCubePruningCardinalStack
-{
-typedef Vector<const Hypothesis*> Hypotheses;
-
-
-/////////////////////////////////////////////
-class Stack {
-protected:
- typedef boost::unordered_set<const Hypothesis*,
- UnorderedComparer<Hypothesis>,
- UnorderedComparer<Hypothesis>
- > _HCType;
-
-public:
- typedef std::pair<const Bitmap*, size_t> HypoCoverage;
- typedef boost::unordered_map<HypoCoverage, Hypotheses*> SortedHypos;
-
- Stack(const Manager &mgr);
- virtual ~Stack();
-
- size_t GetHypoSize() const;
-
- _HCType &GetColl()
- { return m_coll; }
- const _HCType &GetColl() const
- { return m_coll; }
-
- void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
-
- std::vector<const Hypothesis*> GetBestHypos(size_t num) const;
- void Clear();
-
- SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const;
- void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const;
-
-protected:
- const Manager &m_mgr;
- _HCType m_coll;
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp b/contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp
deleted file mode 100644
index 7b324e244..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * CubePruning.cpp
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-
-#include "Misc.h"
-#include "../Manager.h"
-#include "../../MemPool.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerBitmap
-{
-
-////////////////////////////////////////////////////////////////////////
-QueueItem *QueueItem::Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- QueueItem *ret;
- if (currItem) {
- // reuse incoming queue item to create new item
- ret = currItem;
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- }
- else if (!queueItemRecycler.empty()) {
- // use item from recycle bin
- ret = queueItemRecycler.back();
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- queueItemRecycler.pop_back();
- }
- else {
- // create new item
- ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
- }
-
- return ret;
-}
-
-QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-:edge(&edge)
-,hypoIndex(hypoIndex)
-,tpIndex(tpIndex)
-{
- CreateHypothesis(mgr);
-}
-
-void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-{
- this->edge = &edge;
- this->hypoIndex = hypoIndex;
- this->tpIndex = tpIndex;
-
- CreateHypothesis(mgr);
-}
-
-void QueueItem::CreateHypothesis(Manager &mgr)
-{
- const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex];
- const TargetPhrase &tp = edge->tps[tpIndex];
-
- //cerr << "hypoIndex=" << hypoIndex << endl;
- //cerr << "edge.hypos=" << edge.hypos.size() << endl;
- //cerr << prevHypo << endl;
- //cerr << *prevHypo << endl;
-
- hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
- hypo->EvaluateWhenApplied();
-}
-
-////////////////////////////////////////////////////////////////////////
-CubeEdge::CubeEdge(
- Manager &mgr,
- const NSCubePruningMiniStack::MiniStack &miniStack,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap)
-:miniStack(miniStack)
-,path(path)
-,tps(tps)
-,newBitmap(newBitmap)
-{
- estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
-}
-
-std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
-{
- out << obj.newBitmap;
- return out;
-}
-
-bool
-CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
-{
- //UTIL_THROW_IF2(x >= (1<<17), "Error");
- //UTIL_THROW_IF2(y >= (1<<17), "Error");
-
- SeenPositionItem val(this, (x<<16) + y);
- std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
- return pairRet.second;
-}
-
-void CubeEdge::CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- if (miniStack.GetSortedAndPruneHypos(mgr).size()) {
- assert(tps.GetSize());
-
- QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
- queue.push(item);
- bool setSeen = SetSeenPosition(0, 0, seenPositions);
- assert(setSeen);
- }
-}
-
-void CubeEdge::CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- size_t hypoIndex = item->hypoIndex;
- size_t tpIndex = item->tpIndex;
-
- if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
- // reuse incoming queue item to create new item
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
- assert(newItem == item);
- queue.push(newItem);
- item = NULL;
- }
-
- if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
- queue.push(newItem);
- item = NULL;
- }
-
- if (item) {
- // recycle unused queue item
- queueItemRecycler.push_back(item);
- }
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Misc.h b/contrib/moses2/defer/CubePruningPerBitmap/Misc.h
deleted file mode 100644
index 77b5ba9c3..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Misc.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * CubePruning.h
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <vector>
-#include <queue>
-#include "../../legacy/Range.h"
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "../CubePruningMiniStack/Stack.h"
-
-namespace Moses2
-{
-
-class Manager;
-class InputPath;
-class TargetPhrases;
-class Bitmap;
-
-namespace NSCubePruningPerBitmap
-{
-class CubeEdge;
-
-///////////////////////////////////////////
-class QueueItem
-{
- ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
-public:
- static QueueItem *Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler);
- QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- CubeEdge *edge;
- size_t hypoIndex, tpIndex;
- Hypothesis *hypo;
-
-protected:
- void CreateHypothesis(Manager &mgr);
-};
-
-///////////////////////////////////////////
-class QueueItemOrderer
-{
-public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const {
- HypothesisFutureScoreOrderer orderer;
- return !orderer(itemA->hypo, itemB->hypo);
- }
-};
-
-///////////////////////////////////////////
-class CubeEdge
-{
- friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
-
-public:
- typedef std::priority_queue<QueueItem*,
- std::vector<QueueItem*>,
- QueueItemOrderer> Queue;
-
- typedef std::pair<const CubeEdge*, int> SeenPositionItem;
- typedef boost::unordered_set<SeenPositionItem,
- boost::hash<SeenPositionItem>,
- std::equal_to<SeenPositionItem>
- > SeenPositions;
-
- const NSCubePruningMiniStack::MiniStack &miniStack;
- const InputPath &path;
- const TargetPhrases &tps;
- const Bitmap &newBitmap;
- SCORE estimatedScore;
-
- CubeEdge(Manager &mgr,
- const NSCubePruningMiniStack::MiniStack &miniStack,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap);
-
- bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
-
- void CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
- void CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
-
-
-protected:
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Search.cpp b/contrib/moses2/defer/CubePruningPerBitmap/Search.cpp
deleted file mode 100644
index b0eddcc21..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Search.cpp
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Search.cpp
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Search.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../../InputPaths.h"
-#include "../../InputPath.h"
-#include "../../System.h"
-#include "../../Sentence.h"
-#include "../../TranslationTask.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerBitmap
-{
-
-////////////////////////////////////////////////////////////////////////
-Search::Search(Manager &mgr)
-:Moses2::Search(mgr)
-,m_stacks(mgr)
-
-,m_queue(QueueItemOrderer(),
- std::vector<QueueItem*>() )
-
-,m_seenPositions()
-{
-}
-
-Search::~Search()
-{
-}
-
-void Search::Decode()
-{
- // init stacks
- m_stacks.Init(mgr.GetInput().GetSize() + 1);
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
-
- m_stacks.Add(initHypo, mgr.GetHypoRecycle());
-
- for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) {
- CreateSearchGraph(stackInd);
- }
-
- for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- Decode(stackInd);
-
- //cerr << m_stacks << endl;
- }
-
- //DebugCounts();
-}
-
-void Search::Decode(size_t stackInd)
-{
- NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
-
- // FOR EACH BITMAP IN EACH STACK
- boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> > uniqueBM;
-
- BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
-
- const Bitmap *bitmap = val.first.first;
- uniqueBM[bitmap].push_back(&miniStack);
- }
-
- // decode each bitmap
- boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> >::iterator iter;
- for (iter = uniqueBM.begin(); iter != uniqueBM.end(); ++iter) {
- const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks = iter->second;
- Decode(miniStacks);
- }
-
- /*
- // FOR EACH STACK
- vector<NSCubePruningMiniStack::MiniStack*> miniStacks;
- BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
-
- miniStacks.push_back(&miniStack);
- }
- Decode(miniStacks);
- */
-}
-
-void Search::Decode(const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks)
-{
- Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
-
- // reuse queue from previous stack. Clear it first
- std::vector<QueueItem*> &container = Container(m_queue);
- //cerr << "container=" << container.size() << endl;
- BOOST_FOREACH(QueueItem *item, container) {
- // recycle unused hypos from queue
- Hypothesis *hypo = item->hypo;
- hypoRecycler.Recycle(hypo);
-
- // recycle queue item
- m_queueItemRecycler.push_back(item);
- }
- container.clear();
-
- m_seenPositions.clear();
-
- BOOST_FOREACH(NSCubePruningMiniStack::MiniStack *miniStack, miniStacks) {
- // add top hypo from every edge into queue
- CubeEdges &edges = *m_cubeEdges[miniStack];
-
- BOOST_FOREACH(CubeEdge *edge, edges) {
- //cerr << "edge=" << *edge << endl;
- edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
- }
- }
-
- size_t pops = 0;
- while (!m_queue.empty() && pops < mgr.system.popLimit) {
- // get best hypo from queue, add to stack
- //cerr << "queue=" << queue.size() << endl;
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- CubeEdge *edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, hypoRecycler);
-
- edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
-
- ++pops;
- }
-
- /*
- // create hypo from every edge. Increase diversity
- while (!m_queue.empty()) {
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- if (item->hypoIndex == 0 && item->tpIndex == 0) {
- CubeEdge &edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, mgr.GetHypoRecycle());
- }
- }
- */
-}
-
-
-void Search::CreateSearchGraph(size_t stackInd)
-{
- NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
- MemPool &pool = mgr.GetPool();
-
- BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- const Bitmap &hypoBitmap = *val.first.first;
- size_t hypoEndPos = val.first.second;
- //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
-
- // create edges to next hypos from existing hypos
- const InputPaths &paths = mgr.GetInputPaths();
-
- BOOST_FOREACH(const InputPath *path, paths) {
- const Range &pathRange = path->range;
- //cerr << "pathRange=" << pathRange << endl;
-
- if (!path->IsUsed()) {
- continue;
- }
- if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
- continue;
- }
-
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
-
- // sort hypo for a particular bitmap and hypoEndPos
- const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
-
-
- // add cube edge
- size_t numPt = mgr.system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = path->targetPhrases[i];
- if (tps && tps->GetSize()) {
- // create next mini stack
- NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange);
-
- CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap);
-
- CubeEdges *edges;
- boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack);
- if (iter == m_cubeEdges.end()) {
- edges = new (pool.Allocate<CubeEdges>()) CubeEdges();
- m_cubeEdges[&nextMiniStack] = edges;
- }
- else {
- edges = iter->second;
- }
-
- edges->push_back(edge);
- }
- }
- }
- }
-
-}
-
-
-const Hypothesis *Search::GetBestHypo() const
-{
- const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back();
- std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1);
-
- const Hypothesis *best = NULL;
- if (sortedHypos.size()) {
- best = sortedHypos[0];
- }
- return best;
-}
-
-void Search::DebugCounts()
-{
- std::map<size_t, size_t> counts;
-
- for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
- BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
- size_t count = miniStack.GetColl().size();
-
- if (counts.find(count) == counts.end()) {
- counts[count] = 0;
- }
- else {
- ++counts[count];
- }
- }
- //cerr << m_stacks << endl;
- }
-
- std::map<size_t, size_t>::const_iterator iter;
- for (iter = counts.begin(); iter != counts.end(); ++iter) {
- cerr << iter->first << "=" << iter->second << " ";
- }
- cerr << endl;
-}
-
-
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Search.h b/contrib/moses2/defer/CubePruningPerBitmap/Search.h
deleted file mode 100644
index 913095e25..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Search.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Search.h
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include "../Search.h"
-#include "Misc.h"
-#include "Stacks.h"
-#include "../../legacy/Range.h"
-
-namespace Moses2
-{
-
-class Bitmap;
-class Hypothesis;
-class InputPath;
-class TargetPhrases;
-
-namespace NSCubePruningMiniStack
-{
-class MiniStack;
-}
-
-namespace NSCubePruningPerBitmap
-{
-
-class Search : public Moses2::Search
-{
-public:
- Search(Manager &mgr);
- virtual ~Search();
-
- virtual void Decode();
- const Hypothesis *GetBestHypo() const;
-
-protected:
- Stacks m_stacks;
-
- CubeEdge::Queue m_queue;
- CubeEdge::SeenPositions m_seenPositions;
-
- // CUBE PRUNING VARIABLES
- // setup
- typedef std::vector<CubeEdge*> CubeEdges;
- boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*> m_cubeEdges;
-
- std::deque<QueueItem*> m_queueItemRecycler;
-
- // CUBE PRUNING
- // decoding
- void CreateSearchGraph(size_t stackInd);
- void Decode(size_t stackInd);
- void Decode(const std::vector<NSCubePruningMiniStack::MiniStack*> &miniStacks);
-
- void DebugCounts();
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp b/contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp
deleted file mode 100644
index ca29f52c0..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Stacks.cpp
- *
- * Created on: 6 Nov 2015
- * Author: hieu
- */
-
-#include "Stacks.h"
-#include "../../System.h"
-#include "../Manager.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerBitmap
-{
-
-Stacks::Stacks(const Manager &mgr)
-:m_mgr(mgr)
-{
-}
-
-Stacks::~Stacks()
-{
-}
-
-void Stacks::Init(size_t numStacks)
-{
- m_stacks.resize(numStacks);
- for (size_t i = 0; i < m_stacks.size(); ++i) {
- m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruningMiniStack::Stack>()) NSCubePruningMiniStack::Stack(m_mgr);
- }
-}
-
-
-std::ostream& operator<<(std::ostream &out, const Stacks &obj)
-{
- for (size_t i = 0; i < obj.GetSize(); ++i) {
- const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i];
- out << stack.GetHypoSize() << " ";
- }
-
- return out;
-}
-
-void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
-{
- size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
- //cerr << "numWordsCovered=" << numWordsCovered << endl;
- NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
- stack.Add(hypo, hypoRecycle);
-
-}
-
-NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange)
-{
- size_t numWordsCovered = newBitmap.GetNumWordsCovered();
- //cerr << "numWordsCovered=" << numWordsCovered << endl;
- NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
-
- NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos());
- stack.GetMiniStack(key);
-
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.h b/contrib/moses2/defer/CubePruningPerBitmap/Stacks.h
deleted file mode 100644
index 5729fa613..000000000
--- a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Stacks.h
- *
- * Created on: 6 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <vector>
-#include "../CubePruningMiniStack/Stack.h"
-#include "../../Recycler.h"
-
-namespace Moses2
-{
-class Manager;
-
-namespace NSCubePruningPerBitmap
-{
-
-class Stacks {
- friend std::ostream& operator<<(std::ostream &, const Stacks &);
-public:
- Stacks(const Manager &mgr);
- virtual ~Stacks();
-
- void Init(size_t numStacks);
-
- size_t GetSize() const
- { return m_stacks.size(); }
-
- const NSCubePruningMiniStack::Stack &Back() const
- { return *m_stacks.back(); }
-
- NSCubePruningMiniStack::Stack &operator[](size_t ind)
- { return *m_stacks[ind]; }
-
- void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
- NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange);
-
-protected:
- const Manager &m_mgr;
- std::vector<NSCubePruningMiniStack::Stack*> m_stacks;
-};
-
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Misc.cpp b/contrib/moses2/defer/CubePruningPerMiniStack/Misc.cpp
deleted file mode 100644
index 935882aa0..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Misc.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * CubePruning.cpp
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-
-#include "Misc.h"
-#include "../Manager.h"
-#include "../../MemPool.h"
-#include "../../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerMiniStack
-{
-
-////////////////////////////////////////////////////////////////////////
-QueueItem *QueueItem::Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- QueueItem *ret;
- if (currItem) {
- // reuse incoming queue item to create new item
- ret = currItem;
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- }
- else if (!queueItemRecycler.empty()) {
- // use item from recycle bin
- ret = queueItemRecycler.back();
- ret->Init(mgr, edge, hypoIndex, tpIndex);
- queueItemRecycler.pop_back();
- }
- else {
- // create new item
- ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
- }
-
- return ret;
-}
-
-QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-:edge(&edge)
-,hypoIndex(hypoIndex)
-,tpIndex(tpIndex)
-{
- CreateHypothesis(mgr);
-}
-
-void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
-{
- this->edge = &edge;
- this->hypoIndex = hypoIndex;
- this->tpIndex = tpIndex;
-
- CreateHypothesis(mgr);
-}
-
-void QueueItem::CreateHypothesis(Manager &mgr)
-{
- const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex];
- const TargetPhrase &tp = edge->tps[tpIndex];
-
- //cerr << "hypoIndex=" << hypoIndex << endl;
- //cerr << "edge.hypos=" << edge.hypos.size() << endl;
- //cerr << prevHypo << endl;
- //cerr << *prevHypo << endl;
-
- hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
- hypo->EvaluateWhenApplied();
-}
-
-////////////////////////////////////////////////////////////////////////
-CubeEdge::CubeEdge(
- Manager &mgr,
- const NSCubePruningMiniStack::MiniStack &miniStack,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap)
-:miniStack(miniStack)
-,path(path)
-,tps(tps)
-,newBitmap(newBitmap)
-{
- estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
-}
-
-std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
-{
- out << obj.newBitmap;
- return out;
-}
-
-bool
-CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
-{
- //UTIL_THROW_IF2(x >= (1<<17), "Error");
- //UTIL_THROW_IF2(y >= (1<<17), "Error");
-
- SeenPositionItem val(this, (x<<16) + y);
- std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
- return pairRet.second;
-}
-
-void CubeEdge::CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- if (miniStack.GetSortedAndPruneHypos(mgr).size()) {
- assert(tps.GetSize());
-
- QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
- queue.push(item);
- bool setSeen = SetSeenPosition(0, 0, seenPositions);
- assert(setSeen);
- }
-}
-
-void CubeEdge::CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler)
-{
- size_t hypoIndex = item->hypoIndex;
- size_t tpIndex = item->tpIndex;
-
- if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
- // reuse incoming queue item to create new item
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
- assert(newItem == item);
- queue.push(newItem);
- item = NULL;
- }
-
- if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
- QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
- queue.push(newItem);
- item = NULL;
- }
-
- if (item) {
- // recycle unused queue item
- queueItemRecycler.push_back(item);
- }
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Misc.h b/contrib/moses2/defer/CubePruningPerMiniStack/Misc.h
deleted file mode 100644
index 4a3935422..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Misc.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * CubePruning.h
- *
- * Created on: 27 Nov 2015
- * Author: hieu
- */
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <vector>
-#include <queue>
-#include "../../legacy/Range.h"
-#include "../Hypothesis.h"
-#include "../../TypeDef.h"
-#include "../../Vector.h"
-#include "../CubePruningMiniStack/Stack.h"
-
-namespace Moses2
-{
-
-class Manager;
-class InputPath;
-class TargetPhrases;
-class Bitmap;
-
-namespace NSCubePruningPerMiniStack
-{
-class CubeEdge;
-
-///////////////////////////////////////////
-class QueueItem
-{
- ~QueueItem(); // NOT IMPLEMENTED. Use MemPool
-public:
- static QueueItem *Create(QueueItem *currItem,
- Manager &mgr,
- CubeEdge &edge,
- size_t hypoIndex,
- size_t tpIndex,
- std::deque<QueueItem*> &queueItemRecycler);
- QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
-
- CubeEdge *edge;
- size_t hypoIndex, tpIndex;
- Hypothesis *hypo;
-
-protected:
- void CreateHypothesis(Manager &mgr);
-};
-
-///////////////////////////////////////////
-class QueueItemOrderer
-{
-public:
- bool operator()(QueueItem* itemA, QueueItem* itemB) const {
- HypothesisFutureScoreOrderer orderer;
- return !orderer(itemA->hypo, itemB->hypo);
- }
-};
-
-///////////////////////////////////////////
-class CubeEdge
-{
- friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
-
-public:
- typedef std::priority_queue<QueueItem*,
- std::vector<QueueItem*>,
- QueueItemOrderer> Queue;
-
- typedef std::pair<const CubeEdge*, int> SeenPositionItem;
- typedef boost::unordered_set<SeenPositionItem,
- boost::hash<SeenPositionItem>,
- std::equal_to<SeenPositionItem>
- > SeenPositions;
-
- const NSCubePruningMiniStack::MiniStack &miniStack;
- const InputPath &path;
- const TargetPhrases &tps;
- const Bitmap &newBitmap;
- SCORE estimatedScore;
-
- CubeEdge(Manager &mgr,
- const NSCubePruningMiniStack::MiniStack &miniStack,
- const InputPath &path,
- const TargetPhrases &tps,
- const Bitmap &newBitmap);
-
- bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
-
- void CreateFirst(Manager &mgr,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
- void CreateNext(Manager &mgr,
- QueueItem *item,
- Queue &queue,
- SeenPositions &seenPositions,
- std::deque<QueueItem*> &queueItemRecycler);
-
-
-protected:
-
-};
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Search.cpp b/contrib/moses2/defer/CubePruningPerMiniStack/Search.cpp
deleted file mode 100644
index fe993daf0..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Search.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Search.cpp
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "Search.h"
-#include "../Manager.h"
-#include "../Hypothesis.h"
-#include "../../InputPaths.h"
-#include "../../InputPath.h"
-#include "../../System.h"
-#include "../../Sentence.h"
-#include "../../TranslationTask.h"
-#include "../../legacy/Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerMiniStack
-{
-
-////////////////////////////////////////////////////////////////////////
-Search::Search(Manager &mgr)
-:Moses2::Search(mgr)
-,m_stacks(mgr)
-
-,m_queue(QueueItemOrderer(),
- std::vector<QueueItem*>() )
-
-,m_seenPositions()
-{
-}
-
-Search::~Search()
-{
-}
-
-void Search::Decode()
-{
- // init stacks
- m_stacks.Init(mgr.GetInput().GetSize() + 1);
-
- const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
- Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
- initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
- initHypo->EmptyHypothesisState(mgr.GetInput());
-
- m_stacks.Add(initHypo, mgr.GetHypoRecycle());
-
- for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) {
- CreateSearchGraph(stackInd);
- }
-
- for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- Decode(stackInd);
-
- //cerr << m_stacks << endl;
- }
-
- //DebugCounts();
-}
-
-void Search::Decode(size_t stackInd)
-{
- NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
- BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
- Decode(miniStack);
- }
-
-}
-
-void Search::Decode(NSCubePruningMiniStack::MiniStack &miniStack)
-{
- Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
-
- // reuse queue from previous stack. Clear it first
- std::vector<QueueItem*> &container = Container(m_queue);
- //cerr << "container=" << container.size() << endl;
- BOOST_FOREACH(QueueItem *item, container) {
- // recycle unused hypos from queue
- Hypothesis *hypo = item->hypo;
- hypoRecycler.Recycle(hypo);
-
- // recycle queue item
- m_queueItemRecycler.push_back(item);
- }
- container.clear();
-
- m_seenPositions.clear();
-
- // add top hypo from every edge into queue
- CubeEdges &edges = *m_cubeEdges[&miniStack];
-
- BOOST_FOREACH(CubeEdge *edge, edges) {
- //cerr << "edge=" << *edge << endl;
- edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
- }
-
- size_t pops = 0;
- while (!m_queue.empty() && pops < mgr.system.popLimit) {
- // get best hypo from queue, add to stack
- //cerr << "queue=" << queue.size() << endl;
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- CubeEdge *edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, hypoRecycler);
-
- edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
-
- ++pops;
- }
-
- /*
- // create hypo from every edge. Increase diversity
- while (!m_queue.empty()) {
- QueueItem *item = m_queue.top();
- m_queue.pop();
-
- if (item->hypoIndex == 0 && item->tpIndex == 0) {
- CubeEdge &edge = item->edge;
-
- // add hypo to stack
- Hypothesis *hypo = item->hypo;
- //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
- m_stacks.Add(hypo, mgr.GetHypoRecycle());
- }
- }
- */
-}
-
-
-void Search::CreateSearchGraph(size_t stackInd)
-{
- NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
- MemPool &pool = mgr.GetPool();
-
- BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- const Bitmap &hypoBitmap = *val.first.first;
- size_t hypoEndPos = val.first.second;
- //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
-
- // create edges to next hypos from existing hypos
- const InputPaths &paths = mgr.GetInputPaths();
-
- BOOST_FOREACH(const InputPath *path, paths) {
- const Range &pathRange = path->range;
- //cerr << "pathRange=" << pathRange << endl;
-
- if (!path->IsUsed()) {
- continue;
- }
- if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
- continue;
- }
-
- const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
-
- // sort hypo for a particular bitmap and hypoEndPos
- const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
-
-
- // add cube edge
- size_t numPt = mgr.system.mappings.size();
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *tps = path->targetPhrases[i];
- if (tps && tps->GetSize()) {
- // create next mini stack
- NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange);
-
- CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap);
-
- CubeEdges *edges;
- boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack);
- if (iter == m_cubeEdges.end()) {
- edges = new (pool.Allocate<CubeEdges>()) CubeEdges();
- m_cubeEdges[&nextMiniStack] = edges;
- }
- else {
- edges = iter->second;
- }
-
- edges->push_back(edge);
- }
- }
- }
- }
-
-}
-
-
-const Hypothesis *Search::GetBestHypo() const
-{
- const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back();
- std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1);
-
- const Hypothesis *best = NULL;
- if (sortedHypos.size()) {
- best = sortedHypos[0];
- }
- return best;
-}
-
-void Search::DebugCounts()
-{
- std::map<size_t, size_t> counts;
-
- for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
- //cerr << "stackInd=" << stackInd << endl;
- const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
- BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
- const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
- size_t count = miniStack.GetColl().size();
-
- if (counts.find(count) == counts.end()) {
- counts[count] = 0;
- }
- else {
- ++counts[count];
- }
- }
- //cerr << m_stacks << endl;
- }
-
- std::map<size_t, size_t>::const_iterator iter;
- for (iter = counts.begin(); iter != counts.end(); ++iter) {
- cerr << iter->first << "=" << iter->second << " ";
- }
- cerr << endl;
-}
-
-
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Search.h b/contrib/moses2/defer/CubePruningPerMiniStack/Search.h
deleted file mode 100644
index be256360e..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Search.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Search.h
- *
- * Created on: 16 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/unordered_map.hpp>
-#include "../Search.h"
-#include "Misc.h"
-#include "Stacks.h"
-#include "../../legacy/Range.h"
-
-namespace Moses2
-{
-
-class Bitmap;
-class Hypothesis;
-class InputPath;
-class TargetPhrases;
-
-namespace NSCubePruningMiniStack
-{
-class MiniStack;
-}
-
-namespace NSCubePruningPerMiniStack
-{
-
-class Search : public Moses2::Search
-{
-public:
- Search(Manager &mgr);
- virtual ~Search();
-
- virtual void Decode();
- const Hypothesis *GetBestHypo() const;
-
-protected:
- Stacks m_stacks;
-
- CubeEdge::Queue m_queue;
- CubeEdge::SeenPositions m_seenPositions;
-
- // CUBE PRUNING VARIABLES
- // setup
- typedef std::vector<CubeEdge*> CubeEdges;
- boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*> m_cubeEdges;
-
- std::deque<QueueItem*> m_queueItemRecycler;
-
- // CUBE PRUNING
- // decoding
- void CreateSearchGraph(size_t stackInd);
- void Decode(size_t stackInd);
- void Decode(NSCubePruningMiniStack::MiniStack &miniStack);
-
- void DebugCounts();
-};
-
-}
-
-}
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.cpp b/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.cpp
deleted file mode 100644
index 86bf5d1b8..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Stacks.cpp
- *
- * Created on: 6 Nov 2015
- * Author: hieu
- */
-
-#include "Stacks.h"
-#include "../../System.h"
-#include "../Manager.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-namespace NSCubePruningPerMiniStack
-{
-
-Stacks::Stacks(const Manager &mgr)
-:m_mgr(mgr)
-{
-}
-
-Stacks::~Stacks()
-{
-}
-
-void Stacks::Init(size_t numStacks)
-{
- m_stacks.resize(numStacks);
- for (size_t i = 0; i < m_stacks.size(); ++i) {
- m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruningMiniStack::Stack>()) NSCubePruningMiniStack::Stack(m_mgr);
- }
-}
-
-
-std::ostream& operator<<(std::ostream &out, const Stacks &obj)
-{
- for (size_t i = 0; i < obj.GetSize(); ++i) {
- const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i];
- out << stack.GetHypoSize() << " ";
- }
-
- return out;
-}
-
-void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
-{
- size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
- //cerr << "numWordsCovered=" << numWordsCovered << endl;
- NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
- stack.Add(hypo, hypoRecycle);
-
-}
-
-NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange)
-{
- size_t numWordsCovered = newBitmap.GetNumWordsCovered();
- //cerr << "numWordsCovered=" << numWordsCovered << endl;
- NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
-
- NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos());
- stack.GetMiniStack(key);
-
-}
-
-}
-
-}
-
-
diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.h b/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.h
deleted file mode 100644
index 94ebe4618..000000000
--- a/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Stacks.h
- *
- * Created on: 6 Nov 2015
- * Author: hieu
- */
-
-#pragma once
-
-#include <vector>
-#include "../CubePruningMiniStack/Stack.h"
-#include "../../Recycler.h"
-
-namespace Moses2
-{
-class Manager;
-
-namespace NSCubePruningPerMiniStack
-{
-
-class Stacks {
- friend std::ostream& operator<<(std::ostream &, const Stacks &);
-public:
- Stacks(const Manager &mgr);
- virtual ~Stacks();
-
- void Init(size_t numStacks);
-
- size_t GetSize() const
- { return m_stacks.size(); }
-
- const NSCubePruningMiniStack::Stack &Back() const
- { return *m_stacks.back(); }
-
- NSCubePruningMiniStack::Stack &operator[](size_t ind)
- { return *m_stacks[ind]; }
-
- void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
- NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange);
-
-protected:
- const Manager &m_mgr;
- std::vector<NSCubePruningMiniStack::Stack*> m_stacks;
-};
-
-
-}
-
-}
-
-
diff --git a/contrib/moses2/legacy/Bitmap.cpp b/contrib/moses2/legacy/Bitmap.cpp
deleted file mode 100644
index a8dc7db4d..000000000
--- a/contrib/moses2/legacy/Bitmap.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include <boost/functional/hash.hpp>
-#include "Bitmap.h"
-
-namespace Moses2
-{
-
-Bitmap::Bitmap(MemPool &pool, size_t size) :
- m_bitmap(pool, size)
-{
-}
-
-void Bitmap::Init(const std::vector<bool>& initializer)
-{
-
- for (size_t i = 0; i < initializer.size(); ++i) {
- m_bitmap[i] = initializer[i];
- }
-
- // The initializer may not be of the same length. Change to the desired
- // length. If we need to add any elements, initialize them to false.
- for (size_t i = initializer.size(); i < m_bitmap.size(); ++i) {
- m_bitmap[i] = false;
- }
-
- m_numWordsCovered = std::count(m_bitmap.begin(), m_bitmap.end(), true);
-
- // Find the first gap, and cache it.
- Array<char>::const_iterator first_gap = std::find(m_bitmap.begin(),
- m_bitmap.end(), false);
- m_firstGap = ((first_gap == m_bitmap.end()) ?
- NOT_FOUND: first_gap - m_bitmap.begin());
-}
-
-void Bitmap::Init(const Bitmap &copy, const Range &range)
-{
- m_firstGap = copy.m_firstGap;
- m_numWordsCovered = copy.m_numWordsCovered;
- for (size_t i = 0; i < m_bitmap.size(); ++i) {
- m_bitmap[i] = copy.m_bitmap[i];
- }
- SetValueNonOverlap(range);
-}
-
-// for unordered_set in stack
-size_t Bitmap::hash() const
-{
- size_t ret = m_bitmap.hash();
- return ret;
-}
-
-bool Bitmap::operator==(const Bitmap& other) const
-{
- return m_bitmap == other.m_bitmap;
-}
-
-// friend
-std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap)
-{
- for (size_t i = 0; i < bitmap.m_bitmap.size(); i++) {
- out << int(bitmap.GetValue(i));
- }
- return out;
-}
-
-}
-
diff --git a/contrib/moses2/legacy/Bitmap.h b/contrib/moses2/legacy/Bitmap.h
deleted file mode 100644
index e6a0f7948..000000000
--- a/contrib/moses2/legacy/Bitmap.h
+++ /dev/null
@@ -1,244 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <algorithm>
-#include <limits>
-#include <vector>
-#include <iostream>
-#include <cstring>
-#include <cmath>
-#include <cstdlib>
-#include "Range.h"
-#include "../Array.h"
-
-namespace Moses2
-{
-class MemPool;
-
-typedef unsigned long WordsBitmapID;
-
-/** Vector of boolean to represent whether a word has been translated or not.
- *
- * Implemented using a vector of char, which is usually the same representation
- * for the elements that a C array of bool would use. A vector of bool, or a
- * Boost dynamic_bitset, could be much more efficient in theory. Unfortunately
- * algorithms like std::find() are not optimized for vector<bool> on gcc or
- * clang, and dynamic_bitset lacks all the optimized search operations we want.
- * Only benchmarking will tell what works best. Perhaps dynamic_bitset could
- * still be a dramatic improvement, if we flip the meaning of the bits around
- * so we can use its find_first() and find_next() for the most common searches.
- */
-class Bitmap
-{
- friend std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap);
-private:
- Array<char> m_bitmap; //! Ticks of words in sentence that have been done.
- size_t m_firstGap; //! Cached position of first gap, or NOT_FOUND.
- size_t m_numWordsCovered;
-
- Bitmap(); // not implemented
- Bitmap& operator=(const Bitmap& other);
-
- /** Update the first gap, when bits are flipped */
- void UpdateFirstGap(size_t startPos, size_t endPos, bool value)
- {
- if (value) {
- //may remove gap
- if (startPos <= m_firstGap && m_firstGap <= endPos) {
- m_firstGap = NOT_FOUND;
- for (size_t i = endPos + 1; i < m_bitmap.size(); ++i) {
- if (!m_bitmap[i]) {
- m_firstGap = i;
- break;
- }
- }
- }
-
- }
- else {
- //setting positions to false, may add new gap
- if (startPos < m_firstGap) {
- m_firstGap = startPos;
- }
- }
- }
-
- //! set value between 2 positions, inclusive
- void
- SetValueNonOverlap(Range const& range) {
- size_t startPos = range.GetStartPos();
- size_t endPos = range.GetEndPos();
-
- for(size_t pos = startPos; pos <= endPos; pos++) {
- m_bitmap[pos] = true;
- }
-
- m_numWordsCovered += range.GetNumWordsCovered();
- UpdateFirstGap(startPos, endPos, true);
- }
-
- public:
- //! Create Bitmap of length size, and initialise with vector.
- explicit Bitmap(MemPool &pool, size_t size);
-
- void Init(const std::vector<bool>& initializer);
- void Init(const Bitmap &copy, const Range &range);
-
- //! Count of words translated.
- size_t GetNumWordsCovered() const {
- return m_numWordsCovered;
- }
-
- //! position of 1st word not yet translated, or NOT_FOUND if everything already translated
- size_t GetFirstGapPos() const {
- return m_firstGap;
- }
-
- //! position of last word not yet translated, or NOT_FOUND if everything already translated
- size_t GetLastGapPos() const {
- for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) {
- if (!m_bitmap[pos]) {
- return pos;
- }
- }
- // no starting pos
- return NOT_FOUND;
- }
-
- //! position of last translated word
- size_t GetLastPos() const {
- for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) {
- if (m_bitmap[pos]) {
- return pos;
- }
- }
- // no starting pos
- return NOT_FOUND;
- }
-
- //! whether a word has been translated at a particular position
- bool GetValue(size_t pos) const {
- return bool(m_bitmap[pos]);
- }
- //! set value at a particular position
- void SetValue( size_t pos, bool value ) {
- bool origValue = m_bitmap[pos];
- if (origValue == value) {
- // do nothing
- }
- else {
- m_bitmap[pos] = value;
- UpdateFirstGap(pos, pos, value);
- if (value) {
- ++m_numWordsCovered;
- }
- else {
- --m_numWordsCovered;
- }
- }
- }
-
- //! whether every word has been translated
- bool IsComplete() const {
- return GetSize() == GetNumWordsCovered();
- }
- //! whether the wordrange overlaps with any translated word in this bitmap
- bool Overlap(const Range &compare) const {
- for (size_t pos = compare.GetStartPos(); pos <= compare.GetEndPos(); pos++) {
- if (m_bitmap[pos])
- return true;
- }
- return false;
- }
- //! number of elements
- size_t GetSize() const {
- return m_bitmap.size();
- }
-
- inline size_t GetEdgeToTheLeftOf(size_t l) const {
- if (l == 0) return l;
- while (l && !m_bitmap[l-1]) {
- --l;
- }
- return l;
- }
-
- inline size_t GetEdgeToTheRightOf(size_t r) const {
- if (r+1 == m_bitmap.size()) return r;
- return (
- std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) -
- m_bitmap.begin()
- ) - 1;
- }
-
- //! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16
- WordsBitmapID GetID() const {
- assert(m_bitmap.size() < (1<<16));
-
- size_t start = GetFirstGapPos();
- if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
-
- size_t end = GetLastPos();
- if (end == NOT_FOUND) end = 0;// nothing translated yet
-
- assert(end < start || end-start <= 16);
- WordsBitmapID id = 0;
- for(size_t pos = end; pos > start; pos--) {
- id = id*2 + (int) GetValue(pos);
- }
- return id + (1<<16) * start;
- }
-
- //! converts bitmap into an integer ID, with an additional span covered
- WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
- assert(m_bitmap.size() < (1<<16));
-
- size_t start = GetFirstGapPos();
- if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
-
- size_t end = GetLastPos();
- if (end == NOT_FOUND) end = 0;// nothing translated yet
-
- if (start == startPos) start = endPos+1;
- if (end < endPos) end = endPos;
-
- assert(end < start || end-start <= 16);
- WordsBitmapID id = 0;
- for(size_t pos = end; pos > start; pos--) {
- id = id*2;
- if (GetValue(pos) || (startPos<=pos && pos<=endPos))
- id++;
- }
- return id + (1<<16) * start;
- }
-
- // for unordered_set in stack
- size_t hash() const;
- bool operator==(const Bitmap& other) const;
- bool operator!=(const Bitmap& other) const {
- return !(*this == other);
- }
-
- };
-
- }
diff --git a/contrib/moses2/legacy/Bitmaps.cpp b/contrib/moses2/legacy/Bitmaps.cpp
deleted file mode 100644
index 879ad9d71..000000000
--- a/contrib/moses2/legacy/Bitmaps.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <boost/foreach.hpp>
-#include "Bitmaps.h"
-#include "Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-Bitmaps::Bitmaps(MemPool &pool) :
- m_pool(pool)
-{
-}
-
-Bitmaps::~Bitmaps()
-{
-}
-
-void Bitmaps::Init(size_t inputSize,
- const std::vector<bool> &initSourceCompleted)
-{
- m_initBitmap = new (m_pool.Allocate<Bitmap>()) Bitmap(m_pool, inputSize);
- m_initBitmap->Init(initSourceCompleted);
- m_coll[m_initBitmap];
-}
-
-const Bitmap &Bitmaps::GetNextBitmap(const Bitmap &bm, const Range &range)
-{
- Bitmap *newBM;
- if (m_recycler.empty()) {
- newBM = new (m_pool.Allocate<Bitmap>()) Bitmap(m_pool, bm.GetSize());
- }
- else {
- newBM = m_recycler.top();
- m_recycler.pop();
- }
-
- newBM->Init(bm, range);
-
- Coll::const_iterator iter = m_coll.find(newBM);
- if (iter == m_coll.end()) {
- m_coll[newBM] = NextBitmaps();
- return *newBM;
- }
- else {
- m_recycler.push(newBM);
-
- return *iter->first;
- }
-}
-
-const Bitmap &Bitmaps::GetBitmap(const Bitmap &bm, const Range &range)
-{
- Coll::iterator iter = m_coll.find(&bm);
- assert(iter != m_coll.end());
-
- const Bitmap *newBM;
- NextBitmaps &next = iter->second;
- NextBitmaps::const_iterator iterNext = next.find(&range);
- if (iterNext == next.end()) {
- // not seen the link yet.
- newBM = &GetNextBitmap(bm, range);
- next[&range] = newBM;
- }
- else {
- // link exist
- //std::cerr << "link exists" << endl;
- newBM = iterNext->second;
- }
- return *newBM;
-}
-
-}
-
diff --git a/contrib/moses2/legacy/Bitmaps.h b/contrib/moses2/legacy/Bitmaps.h
deleted file mode 100644
index d8207b59e..000000000
--- a/contrib/moses2/legacy/Bitmaps.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#pragma once
-
-#include <boost/unordered_set.hpp>
-#include <boost/unordered_map.hpp>
-#include <set>
-#include <stack>
-#include "Bitmap.h"
-#include "Util2.h"
-
-namespace Moses2
-{
-class MemPool;
-
-class Bitmaps
-{
- typedef boost::unordered_map<const Range*, const Bitmap*> NextBitmaps;
- typedef boost::unordered_map<const Bitmap*, NextBitmaps,
- UnorderedComparer<Bitmap>, UnorderedComparer<Bitmap> > Coll;
- //typedef std::set<const Bitmap*, OrderedComparer<Bitmap> > Coll;
- Coll m_coll;
- Bitmap *m_initBitmap;
-
- MemPool &m_pool;
- std::stack<Bitmap*> m_recycler;
-
- const Bitmap &GetNextBitmap(const Bitmap &bm, const Range &range);
-public:
- Bitmaps(MemPool &pool);
- virtual ~Bitmaps();
- void Init(size_t inputSize, const std::vector<bool> &initSourceCompleted);
-
- const Bitmap &GetInitialBitmap() const
- {
- return *m_initBitmap;
- }
- const Bitmap &GetBitmap(const Bitmap &bm, const Range &range);
-};
-
-}
-
diff --git a/contrib/moses2/legacy/Factor.cpp b/contrib/moses2/legacy/Factor.cpp
deleted file mode 100644
index be9bad2c1..000000000
--- a/contrib/moses2/legacy/Factor.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include "Factor.h"
-
-#include <boost/functional/hash.hpp>
-
-using namespace std;
-
-namespace Moses2
-{
-
-// friend
-ostream& operator<<(ostream& out, const Factor& factor)
-{
- out << factor.GetString();
- return out;
-}
-
-size_t hash_value(const Factor& f)
-{
- boost::hash<size_t> hasher;
- return hasher(f.GetId());
-}
-
-}
-
diff --git a/contrib/moses2/legacy/Factor.h b/contrib/moses2/legacy/Factor.h
deleted file mode 100644
index 99d53f4f0..000000000
--- a/contrib/moses2/legacy/Factor.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <ostream>
-#include <string>
-#include <vector>
-#include "util/string_piece.hh"
-
-namespace Moses2
-{
-
-struct FactorFriend;
-class FactorCollection;
-
-/** Represents a factor (word, POS, etc).
- * A Factor has a contiguous identifier and string value.
- */
-class Factor
-{
- friend std::ostream& operator<<(std::ostream&, const Factor&);
-
- // only these classes are allowed to instantiate this class
- friend class FactorCollection;
- friend struct FactorFriend;
-
- // FactorCollection writes here.
- // This is mutable so the pointer can be changed to pool-backed memory.
- mutable StringPiece m_string;
- size_t m_id;
-
- //! protected constructor. only friend class, FactorCollection, is allowed to create Factor objects
- Factor()
- {
- }
-
- // Needed for STL containers. They'll delegate through FactorFriend, which is never exposed publicly.
- Factor(const Factor &factor) :
- m_string(factor.m_string), m_id(factor.m_id)
- {
- }
-
- // Not implemented. Shouldn't be called.
- Factor &operator=(const Factor &factor);
-
-public:
- //! original string representation of the factor
- StringPiece GetString() const
- {
- return m_string;
- }
- //! contiguous ID
- inline size_t GetId() const
- {
- return m_id;
- }
-
- /** transitive comparison between 2 factors.
- * -1 = less than
- * +1 = more than
- * 0 = same
- */
- inline int Compare(const Factor &compare) const
- {
- if (this < &compare) return -1;
- if (this > &compare) return 1;
- return 0;
- }
- //! transitive comparison used for adding objects into FactorCollection
- inline bool operator<(const Factor &compare) const
- {
- return this < &compare;
- }
-
- // quick equality comparison. Not used
- inline bool operator==(const Factor &compare) const
- {
- return this == &compare;
- }
-};
-
-size_t hash_value(const Factor &f);
-
-}
-
diff --git a/contrib/moses2/legacy/FactorCollection.cpp b/contrib/moses2/legacy/FactorCollection.cpp
deleted file mode 100644
index f8beb9b40..000000000
--- a/contrib/moses2/legacy/FactorCollection.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include <boost/version.hpp>
-#ifdef WITH_THREADS
-#include <boost/thread/locks.hpp>
-#endif
-#include <ostream>
-#include <string>
-#include "FactorCollection.h"
-#include "util/pool.hh"
-#include "util/exception.hh"
-#include "../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-const Factor *FactorCollection::AddFactor(const StringPiece &factorString,
- const System &system, bool isNonTerminal)
-{
- FactorFriend to_ins;
- to_ins.in.m_string = factorString;
- to_ins.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId;
- Set & set = (isNonTerminal) ? m_set : m_setNonTerminal;
- // If we're threaded, hope a read-only lock is sufficient.
-#ifdef WITH_THREADS
- {
- // read=lock scope
- boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
- Set::const_iterator i = set.find(to_ins);
- if (i != set.end()) return &i->in;
- }
- boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
-#endif // WITH_THREADS
- std::pair<Set::iterator, bool> ret(set.insert(to_ins));
- if (ret.second) {
- ret.first->in.m_string.set(
- memcpy(m_string_backing.Allocate(factorString.size()),
- factorString.data(), factorString.size()), factorString.size());
- if (isNonTerminal) {
- m_factorIdNonTerminal++;
- UTIL_THROW_IF2(m_factorIdNonTerminal >= moses_MaxNumNonterminals,
- "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile");
- }
- else {
- m_factorId++;
- }
- }
-
- const Factor *factor = &ret.first->in;
-
- return factor;
-}
-
-const Factor *FactorCollection::GetFactor(const StringPiece &factorString,
- bool isNonTerminal)
-{
- FactorFriend to_find;
- to_find.in.m_string = factorString;
- to_find.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId;
- Set & set = (isNonTerminal) ? m_set : m_setNonTerminal;
- {
- // read=lock scope
-#ifdef WITH_THREADS
- boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
-#endif // WITH_THREADS
- Set::const_iterator i = set.find(to_find);
- if (i != set.end()) return &i->in;
- }
- return NULL;
-}
-
-FactorCollection::~FactorCollection()
-{
-}
-
-// friend
-ostream& operator<<(ostream& out, const FactorCollection& factorCollection)
-{
-#ifdef WITH_THREADS
- boost::shared_lock<boost::shared_mutex> lock(factorCollection.m_accessLock);
-#endif
- for (FactorCollection::Set::const_iterator i = factorCollection.m_set.begin();
- i != factorCollection.m_set.end(); ++i) {
- out << i->in;
- }
- return out;
-}
-
-}
-
diff --git a/contrib/moses2/legacy/FactorCollection.h b/contrib/moses2/legacy/FactorCollection.h
deleted file mode 100644
index 0430e5cde..000000000
--- a/contrib/moses2/legacy/FactorCollection.h
+++ /dev/null
@@ -1,130 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-// reserve space for non-terminal symbols (ensuring consecutive numbering, and allowing quick lookup by ID)
-#ifndef moses_MaxNumNonterminals
-#define moses_MaxNumNonterminals 10000
-#endif
-
-#ifdef WITH_THREADS
-#include <boost/thread/shared_mutex.hpp>
-#endif
-
-#include "util/murmur_hash.hh"
-#include <boost/unordered_set.hpp>
-
-#include <functional>
-#include <string>
-
-#include "util/string_piece.hh"
-#include "util/pool.hh"
-#include "Factor.h"
-
-namespace Moses2
-{
-
-class System;
-
-/** We don't want Factor to be copyable by anybody. But we also want to store
- * it in an STL container. The solution is that Factor's copy constructor is
- * private and friended to FactorFriend. The STL containers can delegate
- * copying, so friending the container isn't sufficient. STL containers see
- * FactorFriend's public copy constructor and everybody else sees Factor's
- * private copy constructor.
- */
-struct FactorFriend
-{
- Factor in;
-};
-
-/** collection of factors
- *
- * All Factors in moses are accessed and created by a FactorCollection.
- * By enforcing this strict creation processes (ie, forbidding factors
- * from being created on the stack, etc), their memory addresses can
- * be used as keys to uniquely identify them.
- * Only 1 FactorCollection object should be created.
- */
-class FactorCollection
-{
- friend std::ostream& operator<<(std::ostream&, const FactorCollection&);
- friend class System;
-
- struct HashFactor: public std::unary_function<const FactorFriend &,
- std::size_t>
- {
- std::size_t operator()(const FactorFriend &factor) const
- {
- return util::MurmurHashNative(factor.in.m_string.data(),
- factor.in.m_string.size());
- }
- };
- struct EqualsFactor: public std::binary_function<const FactorFriend &,
- const FactorFriend &, bool>
- {
- bool operator()(const FactorFriend &left, const FactorFriend &right) const
- {
- return left.in.GetString() == right.in.GetString();
- }
- };
- typedef boost::unordered_set<FactorFriend, HashFactor, EqualsFactor> Set;
- Set m_set;
- Set m_setNonTerminal;
-
- util::Pool m_string_backing;
-
-#ifdef WITH_THREADS
- //reader-writer lock
- mutable boost::shared_mutex m_accessLock;
-#endif
-
- size_t m_factorIdNonTerminal; /**< unique, contiguous ids, starting from 0, for each non-terminal factor */
- size_t m_factorId; /**< unique, contiguous ids, starting from moses_MaxNumNonterminals, for each terminal factor */
-
- //! constructor. only the 1 static variable can be created
- FactorCollection() :
- m_factorIdNonTerminal(0), m_factorId(moses_MaxNumNonterminals)
- {
- }
-
-public:
- ~FactorCollection();
-
- /** returns a factor with the same direction, factorType and factorString.
- * If a factor already exist in the collection, return the existing factor, if not create a new 1
- */
- const Factor *AddFactor(const StringPiece &factorString, const System &system,
- bool isNonTerminal);
-
- size_t GetNumNonTerminals()
- {
- return m_factorIdNonTerminal;
- }
-
- const Factor *GetFactor(const StringPiece &factorString, bool isNonTerminal =
- false);
-
-};
-
-}
-
diff --git a/contrib/moses2/legacy/InputFileStream.cpp b/contrib/moses2/legacy/InputFileStream.cpp
deleted file mode 100644
index a68ea53ef..000000000
--- a/contrib/moses2/legacy/InputFileStream.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include "InputFileStream.h"
-#include "gzfilebuf.h"
-#include <iostream>
-
-using namespace std;
-
-namespace Moses2
-{
-
-InputFileStream::InputFileStream(const std::string &filePath) :
- std::istream(NULL), m_streambuf(NULL)
-{
- if (filePath.size() > 3 && filePath.substr(filePath.size() - 3, 3) == ".gz") {
- m_streambuf = new gzfilebuf(filePath.c_str());
- }
- else {
- std::filebuf* fb = new std::filebuf();
- fb = fb->open(filePath.c_str(), std::ios::in);
- if (!fb) {
- cerr << "Can't read " << filePath.c_str() << endl;
- exit(1);
- }
- m_streambuf = fb;
- }
- this->init(m_streambuf);
-}
-
-InputFileStream::~InputFileStream()
-{
- delete m_streambuf;
- m_streambuf = NULL;
-}
-
-void InputFileStream::Close()
-{
-}
-
-}
-
diff --git a/contrib/moses2/legacy/InputFileStream.h b/contrib/moses2/legacy/InputFileStream.h
deleted file mode 100644
index d8f78848c..000000000
--- a/contrib/moses2/legacy/InputFileStream.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <cstdlib>
-#include <fstream>
-#include <string>
-
-namespace Moses2
-{
-
-/** Used in place of std::istream, can read zipped files if it ends in .gz
- */
-class InputFileStream: public std::istream
-{
-protected:
- std::streambuf *m_streambuf;
-public:
-
- explicit InputFileStream(const std::string &filePath);
- ~InputFileStream();
-
- void Close();
-};
-
-}
-
diff --git a/contrib/moses2/legacy/Matrix.cpp b/contrib/moses2/legacy/Matrix.cpp
deleted file mode 100644
index 9d2abc8ab..000000000
--- a/contrib/moses2/legacy/Matrix.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-// $Id$
-// vim:tabstop=2
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include <string>
-#include <iostream>
-#include "Matrix.h"
-#include "Util2.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-}
-
diff --git a/contrib/moses2/legacy/Matrix.h b/contrib/moses2/legacy/Matrix.h
deleted file mode 100644
index 6c498b53d..000000000
--- a/contrib/moses2/legacy/Matrix.h
+++ /dev/null
@@ -1,106 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <iostream>
-#include "Util2.h"
-#include "../MemPool.h"
-
-namespace Moses2
-{
-template<typename T>
-class Matrix
-{
-protected:
- size_t m_rows, m_cols; /**< length of the square (sentence length) */
- T *m_array; /**< two-dimensional array to store floats */
-
- Matrix(); // not implemented
- Matrix(const Matrix &copy); // not implemented
-
-public:
- Matrix(MemPool &pool, size_t rows, size_t cols) :
- m_rows(rows), m_cols(cols)
- {
- m_array = pool.Allocate<T>(rows * cols);
- }
-
- ~Matrix(); // not implemented
-
- // set upper triangle
- void InitTriangle(const T &val)
- {
- assert(m_rows == m_cols);
- for (size_t row = 0; row < m_rows; row++) {
- for (size_t col = row; col < m_cols; col++) {
- SetValue(row, col, val);
- }
- }
- }
-
- // everything
- void Init(const T &val)
- {
- for (size_t row = 0; row < m_rows; row++) {
- for (size_t col = 0; col < m_cols; col++) {
- SetValue(row, col, val);
- }
- }
- }
-
- /** Returns length of the square: typically the sentence length */
- inline size_t GetSize() const
- {
- assert(m_rows == m_cols);
- return m_rows;
- }
-
- inline size_t GetRows() const
- {
- return m_rows;
- }
-
- inline size_t GetCols() const
- {
- return m_cols;
- }
-
- /** Get a future cost score for a span */
- inline const T &GetValue(size_t row, size_t col) const
- {
- return m_array[row * m_cols + col];
- }
-
- inline T &GetValue(size_t row, size_t col)
- {
- return m_array[row * m_cols + col];
- }
-
- /** Set a future cost score for a span */
- inline void SetValue(size_t row, size_t col, const T &value)
- {
- m_array[row * m_cols + col] = value;
- }
-};
-
-}
-
diff --git a/contrib/moses2/legacy/OutputCollector.h b/contrib/moses2/legacy/OutputCollector.h
deleted file mode 100644
index 5504d9add..000000000
--- a/contrib/moses2/legacy/OutputCollector.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2011 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#ifdef WITH_THREADS
-#include <boost/thread/mutex.hpp>
-#endif
-
-#ifdef BOOST_HAS_PTHREADS
-#include <pthread.h>
-#endif
-
-#include <iostream>
-#include <map>
-#include <ostream>
-#include <fstream>
-#include <string>
-#include "util/exception.hh"
-
-namespace Moses2
-{
-/**
- * Makes sure output goes in the correct order when multi-threading
- **/
-class OutputCollector
-{
-public:
- OutputCollector(std::ostream* outStream = &std::cout,
- std::ostream* debugStream = &std::cerr) :
- m_nextOutput(0), m_outStream(outStream), m_debugStream(debugStream), m_isHoldingOutputStream(
- false), m_isHoldingDebugStream(false)
- {
- }
-
- OutputCollector(std::string xout, std::string xerr = "") :
- m_nextOutput(0)
- {
- // TO DO open magic streams instead of regular ofstreams! [UG]
-
- if (xout == "/dev/stderr") {
- m_outStream = &std::cerr;
- m_isHoldingOutputStream = false;
- }
- else if (xout.size() && xout != "/dev/stdout" && xout != "-") {
- m_outStream = new std::ofstream(xout.c_str());
- UTIL_THROW_IF2(!m_outStream->good(),
- "Failed to open output file" << xout);
- m_isHoldingOutputStream = true;
- }
- else {
- m_outStream = &std::cout;
- m_isHoldingOutputStream = false;
- }
-
- if (xerr == "/dev/stdout") {
- m_debugStream = &std::cout;
- m_isHoldingDebugStream = false;
- }
- else if (xerr.size() && xerr != "/dev/stderr") {
- m_debugStream = new std::ofstream(xerr.c_str());
- UTIL_THROW_IF2(!m_debugStream->good(),
- "Failed to open debug stream" << xerr);
- m_isHoldingDebugStream = true;
- }
- else {
- m_debugStream = &std::cerr;
- m_isHoldingDebugStream = false;
- }
- }
-
- ~OutputCollector()
- {
- if (m_isHoldingOutputStream) delete m_outStream;
- if (m_isHoldingDebugStream) delete m_debugStream;
- }
-
- void HoldOutputStream()
- {
- m_isHoldingOutputStream = true;
- }
-
- void HoldDebugStream()
- {
- m_isHoldingDebugStream = true;
- }
-
- bool OutputIsCout() const
- {
- return (m_outStream == &std::cout);
- }
-
- /**
- * Write or cache the output, as appropriate.
- **/
- void Write(int sourceId, const std::string& output, const std::string& debug =
- "")
- {
-#ifdef WITH_THREADS
- boost::mutex::scoped_lock lock(m_mutex);
-#endif
- if (sourceId == m_nextOutput) {
- //This is the one we were expecting
- *m_outStream << output << std::flush;
- *m_debugStream << debug << std::flush;
- ++m_nextOutput;
- //see if there's any more
- std::map<int, std::string>::iterator iter;
- while ((iter = m_outputs.find(m_nextOutput)) != m_outputs.end()) {
- *m_outStream << iter->second << std::flush;
- ++m_nextOutput;
- std::map<int, std::string>::iterator debugIter = m_debugs.find(
- iter->first);
- m_outputs.erase(iter);
- if (debugIter != m_debugs.end()) {
- *m_debugStream << debugIter->second << std::flush;
- m_debugs.erase(debugIter);
- }
- }
- }
- else {
- //save for later
- m_outputs[sourceId] = output;
- m_debugs[sourceId] = debug;
- }
- }
-
-private:
- std::map<int, std::string> m_outputs;
- std::map<int, std::string> m_debugs;
- int m_nextOutput;
- std::ostream* m_outStream;
- std::ostream* m_debugStream;
- bool m_isHoldingOutputStream;
- bool m_isHoldingDebugStream;
-#ifdef WITH_THREADS
- boost::mutex m_mutex;
-#endif
-
-public:
- void SetOutputStream(std::ostream* outStream)
- {
- m_outStream = outStream;
- }
-
-};
-
-} // namespace Moses
-
diff --git a/contrib/moses2/legacy/OutputFileStream.cpp b/contrib/moses2/legacy/OutputFileStream.cpp
deleted file mode 100644
index ad46f3a0c..000000000
--- a/contrib/moses2/legacy/OutputFileStream.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include <iostream>
-#include <boost/algorithm/string/predicate.hpp>
-#include <boost/iostreams/filter/gzip.hpp>
-#include "OutputFileStream.h"
-#include "gzfilebuf.h"
-
-using namespace std;
-using namespace boost::algorithm;
-
-namespace Moses2
-{
-OutputFileStream::OutputFileStream() :
- boost::iostreams::filtering_ostream(), m_outFile(NULL), m_open(false)
-{
-}
-
-OutputFileStream::OutputFileStream(const std::string &filePath) :
- m_outFile(NULL), m_open(false)
-{
- Open(filePath);
-}
-
-OutputFileStream::~OutputFileStream()
-{
- Close();
-}
-
-bool OutputFileStream::Open(const std::string &filePath)
-{
- assert(!m_open);
- if (filePath == std::string("-")) {
- // Write to standard output. Leave m_outFile null.
- this->push(std::cout);
- }
- else {
- m_outFile = new ofstream(filePath.c_str(),
- ios_base::out | ios_base::binary);
- if (m_outFile->fail()) {
- return false;
- }
-
- if (ends_with(filePath, ".gz")) {
- this->push(boost::iostreams::gzip_compressor());
- }
- this->push(*m_outFile);
- }
-
- m_open = true;
- return true;
-}
-
-void OutputFileStream::Close()
-{
- if (!m_open) return;
- this->flush();
- if (m_outFile) {
- this->pop(); // file
-
- m_outFile->close();
- delete m_outFile;
- m_outFile = NULL;
- }
- m_open = false;
-}
-
-}
-
diff --git a/contrib/moses2/legacy/OutputFileStream.h b/contrib/moses2/legacy/OutputFileStream.h
deleted file mode 100644
index 27c0b4539..000000000
--- a/contrib/moses2/legacy/OutputFileStream.h
+++ /dev/null
@@ -1,81 +0,0 @@
-// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <cstdlib>
-#include <fstream>
-#include <string>
-#include <iostream>
-#include <boost/iostreams/filtering_stream.hpp>
-
-namespace Moses2
-{
-
-/** Version of std::ostream with transparent compression.
- *
- * Transparently compresses output when writing to a file whose name ends in
- * ".gz". Or, writes to stdout instead of a file when given a filename
- * consisting of just a dash ("-").
- */
-class OutputFileStream: public boost::iostreams::filtering_ostream
-{
-private:
- /** File that needs flushing & closing when we close this stream.
- *
- * Is NULL when no file is opened, e.g. when writing to standard output.
- */
- std::ofstream *m_outFile;
-
- /// Is this stream open?
- bool m_open;
-
-public:
- /** Create an unopened OutputFileStream.
- *
- * Until it's been opened, nothing can be done with this stream.
- */
- OutputFileStream();
-
- /// Create an OutputFileStream, and open it by calling Open().
- OutputFileStream(const std::string &filePath);
- virtual ~OutputFileStream();
-
- // TODO: Can we please just always throw an exception when this fails?
- /** Open stream.
- *
- * If filePath is "-" (just a dash), this opens the stream for writing to
- * standard output. Otherwise, it opens the given file. If the filename
- * has the ".gz" suffix, output will be transparently compressed.
- *
- * Call Close() to close the file.
- *
- * Returns whether opening the file was successful. It may also throw an
- * exception on failure.
- */
- bool Open(const std::string &filePath);
-
- /// Flush and close stream. After this, the stream can be opened again.
- void Close();
-};
-
-}
-
diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp
deleted file mode 100644
index 5cb88645e..000000000
--- a/contrib/moses2/legacy/Parameter.cpp
+++ /dev/null
@@ -1,1707 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include <ctime>
-#include <iostream>
-#include <iterator>
-#include <fstream>
-#include <sstream>
-#include <algorithm>
-#include <boost/algorithm/string/predicate.hpp>
-#include <boost/program_options.hpp>
-
-#include "Parameter.h"
-#include "InputFileStream.h"
-#include "../FF/FeatureRegistry.h"
-#include "util/string_stream.hh"
-#include "util/exception.hh"
-#include "util/random.hh"
-
-using namespace std;
-using namespace boost::algorithm;
-namespace po = boost::program_options;
-
-namespace Moses2
-{
-
-/** define allowed parameters */
-Parameter::Parameter()
-{
- ///////////////////////////////////////////////////////////////////////////////////////
- // general options
- po::options_description main_opts("Main Options");
- AddParam(main_opts, "config", "f", "location of the configuration file");
- AddParam(main_opts, "input-file", "i",
- "location of the input file to be translated");
-
- AddParam(main_opts, "verbose", "v", "verbosity level of the logging");
- AddParam(main_opts, "show-weights", "print feature weights and exit");
- //AddParam(main_opts, "time-out",
- // "seconds after which is interrupted (-1=no time-out, default is -1)");
-
- ///////////////////////////////////////////////////////////////////////////////////////
- // factorization options
- po::options_description factor_opts("General Factorization Options");
- //AddParam(factor_opts, "factor-delimiter", "fd",
- // "specify a different factor delimiter than the default");
- // one should be able to specify different factor delimiters for intput and output
- AddParam(factor_opts, "mapping", "description of decoding steps"); // whatever that means ...
- AddParam(factor_opts, "placeholder-factor",
- "Which source factor to use to store the original text for placeholders. The factor must not be used by a translation or gen model");
-
- ///////////////////////////////////////////////////////////////////////////////////////
- // general search options
- po::options_description search_opts("Search Options");
- string desc = "Which search algorithm to use.\n";
- desc += "0=normal stack (default)\n";
- desc += "1=cube pruning\n";
- desc += "3=chart (with cube pruning)\n";
- desc += "4=stack with batched lm requests\n";
- desc += "5=chart (with incremental search)\n";
- desc += "6=string-to-tree\n";
- desc += "7=tree-to-string\n";
- desc += "8=tree-to-string (SCFG-based)\n";
- desc += "9=forest-to-string";
- AddParam(search_opts, "search-algorithm", desc);
- AddParam(search_opts, "beam-threshold", "b",
- "threshold for threshold pruning");
- //AddParam(search_opts, "early-discarding-threshold", "edt",
- // "threshold for constructing hypotheses based on estimate cost");
- AddParam(search_opts, "stack", "s",
- "maximum stack size for histogram pruning. 0 = unlimited stack size");
- //AddParam(search_opts, "stack-diversity", "sd",
- // "minimum number of hypothesis of each coverage in stack (default 0)");
-
- // feature weight-related options
- //AddParam(search_opts, "weight-file", "wf",
- // "feature weights file. Do *not* put weights for 'core' features in here - they go in moses.ini");
- AddParam(search_opts, "weight",
- "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");
-
- AddParam(search_opts, "feature-overwrite",
- "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\"");
-
- po::options_description tune_opts("Options used in tuning.");
- AddParam(tune_opts, "weight-overwrite",
- "special parameter for mert. All on 1 line. Overrides weights specified in 'weights' argument");
- AddParam(tune_opts, "feature-add",
- "Add a feature function on the command line. Used by mira to add BLEU feature");
- AddParam(tune_opts, "weight-add",
- "Add weight for FF if it doesn't exist, i.e weights here are added 1st, and can be override by the ini file or on the command line. Used to specify initial weights for FF that was also specified on the copmmand line");
-
- // phrase table limitations:
- //AddParam(search_opts, "max-partial-trans-opt",
- // "maximum number of partial translation options per input span (during mapping steps)");
- //AddParam(search_opts, "max-trans-opt-per-coverage",
- // "maximum number of translation options per input span (after applying mapping steps)");
- AddParam(search_opts, "max-phrase-length",
- "maximum phrase length (default 20)");
- //AddParam(search_opts, "translation-option-threshold", "tot",
- // "threshold for translation options relative to best for input phrase");
-
- // miscellaneous search options
- //AddParam(search_opts, "disable-discarding", "dd",
- // "disable hypothesis discarding"); // ??? memory management? UG
- //AddParam(search_opts, "phrase-drop-allowed", "da",
- // "if present, allow dropping of source words"); //da = drop any (word); see -du for comparison
- AddParam(search_opts, "threads", "th",
- "number of threads to use in decoding (defaults to single-threaded)");
-
- // distortion options
- po::options_description disto_opts("Distortion options");
- AddParam(disto_opts, "distortion-limit", "dl",
- "distortion (reordering) limit in maximum number of words (0 = monotone, -1 = unlimited)");
- AddParam(disto_opts, "monotone-at-punctuation", "mp",
- "do not reorder over punctuation");
- //AddParam(disto_opts, "early-distortion-cost", "edc",
- // "include estimate of distortion cost yet to be incurred in the score [Moore & Quirk 2007]. Default is no");
- //AddParam(disto_opts, "distortion",
- // "configurations for each factorized/lexicalized reordering model."); // zombie parameter?
-
- // cube pruning
- po::options_description cube_opts("Cube pruning options.");
- AddParam(cube_opts, "cube-pruning-pop-limit", "cbp",
- "How many hypotheses should be popped for each stack. (default = 1000)");
- AddParam(cube_opts, "cube-pruning-diversity", "cbd",
- "How many hypotheses should be created for each coverage. (default = 0)");
- AddParam(cube_opts, "cube-pruning-lazy-scoring", "cbls",
- "Don't fully score a hypothesis until it is popped");
- //AddParam(cube_opts, "cube-pruning-deterministic-search", "cbds",
- // "Break ties deterministically during search");
-
- ///////////////////////////////////////////////////////////////////////////////////////
- // minimum bayes risk decoding
- po::options_description mbr_opts(
- "Minimum Bayes Risk (MBR), Lattice MBR, and Consensus decoding");
-
- //AddParam(mbr_opts, "minimum-bayes-risk", "mbr",
- // "use miminum Bayes risk to determine best translation");
- //AddParam(mbr_opts, "mbr-size",
- // "number of translation candidates considered in MBR decoding (default 200)");
- //AddParam(mbr_opts, "mbr-scale",
- // "scaling factor to convert log linear score probability in MBR decoding (default 1.0)");
-
- //AddParam(mbr_opts, "lminimum-bayes-risk", "lmbr",
- // "use lattice miminum Bayes risk to determine best translation");
- //AddParam(mbr_opts, "consensus-decoding", "con",
- // "use consensus decoding (De Nero et. al. 2009)");
-
- po::options_description lmbr_opts("Options specific to Lattic MBR");
- //AddParam(lmbr_opts, "lmbr-p", "unigram precision value for lattice mbr");
- //AddParam(lmbr_opts, "lmbr-r", "ngram precision decay value for lattice mbr");
- //AddParam(lmbr_opts, "lmbr-thetas", "theta(s) for lattice mbr calculation");
- //AddParam(mbr_opts, "lmbr-map-weight",
- // "weight given to map solution when doing lattice MBR (default 0)");
- //AddParam(mbr_opts, "lmbr-pruning-factor",
- // "average number of nodes/word wanted in pruned lattice");
- //AddParam(mbr_opts, "lattice-hypo-set",
- // "to use lattice as hypo set during lattice MBR");
-
- ///////////////////////////////////////////////////////////////////////////////////////
- // OOV handling options
- po::options_description oov_opts("OOV Handling Options");
- AddParam(oov_opts, "drop-unknown", "du",
- "drop unknown words instead of copying them");
- AddParam(oov_opts, "mark-unknown", "mu", "mark unknown words in output");
- AddParam(oov_opts, "unknown-word-prefix",
- "prefix to unknwon word when marked (default: 'UNK')");
- AddParam(oov_opts, "unknown-word-suffix",
- "suffix to unknwon word when marked (default: '')");
- //AddParam(oov_opts, "lmodel-oov-feature",
- // "add language model oov feature, one per model");
- //AddParam(oov_opts, "output-unknowns",
- // "Output the unknown (OOV) words to the given file, one line per sentence");
- //AddParam(oov_opts, "always-create-direct-transopt",
- // "Always create a translation that translates the source word ad-verbatim");
-
- ///////////////////////////////////////////////////////////////////////////////////////
- // input options
- po::options_description input_opts("Input Format Options");
- AddParam(input_opts, "input-factors", "list of factors in the input");
- AddParam(input_opts, "inputtype",
- "text (0), confusion network (1), word lattice (2), tree (3) (default = 0)");
- AddParam(input_opts, "xml-input", "xi",
- "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'constraint', 'ignore'");
- //AddParam(input_opts, "xml-brackets", "xb",
- // "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode");
- //AddParam(input_opts, "start-translation-id", "Id of 1st input. Default = 0");
- //AddParam(input_opts, "alternate-weight-setting", "aws",
- // "alternate set of weights to used per xml specification");
-
- ///////////////////////////////////////////////////////////////////////////////////////
- // output options
- po::options_description output_opts("Output Options");
- //AddParam(output_opts, "report-all-factors",
- // "report all factors in output, not just first");
- AddParam(output_opts, "output-factors", "list if factors in the output");
- //AddParam(output_opts, "print-id",
- // "prefix translations with id. Default if false");
- //AddParam(output_opts, "print-passthrough",
- // "output the sgml tag <passthrough> without any computation on that. Default is false");
- //AddParam(output_opts, "print-passthrough-in-n-best",
- // "output the sgml tag <passthrough> without any computation on that in each entry of the n-best-list. Default is false");
- //AddParam(output_opts, "print-all-derivations",
- // "to print all derivations in search graph");
- AddParam(output_opts, "translation-details", "T",
- "for each best hypothesis, report translation details to the given file");
-
- AddParam(output_opts, "output-hypo-score",
- "Output the hypo score to stdout with the output string. For search error analysis. Default is false");
- //AddParam(output_opts, "output-word-graph", "owg",
- // "Output stack info as word graph. Takes filename, 0=only hypos in stack, 1=stack + nbest hypos");
- //AddParam(output_opts, "tree-translation-details", "Ttree",
- // "for each hypothesis, report translation details with tree fragment info to given file");
- //AddParam(output_opts, "print-alignment-info",
- // "Output word-to-word alignment to standard out, separated from translation by |||. Word-to-word alignments are takne from the phrase table if any. Default is false");
- //AddParam(output_opts, "alignment-output-file",
- // "print output word alignments into given file");
- //AddParam(output_opts, "sort-word-alignment",
- // "Sort word alignments for more consistent display. 0=no sort (default), 1=target order");
- AddParam(output_opts, "report-segmentation", "t",
- "report phrase segmentation in the output");
- AddParam(output_opts, "report-segmentation-enriched", "tt",
- "report phrase segmentation in the output with additional information");
-
- // translation-all-details was introduced in the context of DIMwid: Decoder Inspection for Moses (using Widgets)
- // see here: https://ufal.mff.cuni.cz/pbml/100/art-kurtz-seemann-braune-maletti.pdf
- //AddParam(output_opts, "translation-all-details", "Tall",
- // "for all hypotheses, report translation details to the given file");
-
- po::options_description osg_opts("Options for outputting search graphs");
- //AddParam(osg_opts, "output-search-graph", "osg",
- // "Output connected hypotheses of search into specified filename");
- //AddParam(osg_opts, "output-search-graph-extended", "osgx",
- // "Output connected hypotheses of search into specified filename, in extended format");
- //AddParam(osg_opts, "unpruned-search-graph", "usg",
- // "When outputting chart search graph, do not exclude dead ends. Note: stack pruning may have eliminated some hypotheses");
- //AddParam(osg_opts, "output-search-graph-slf", "slf",
- // "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF) - the flag should be followed by a directory name, which must exist");
- //AddParam(output_opts, "include-lhs-in-search-graph", "lhssg",
- // "When outputting chart search graph, include the label of the LHS of the rule (useful when using syntax)");
-#ifdef HAVE_PROTOBUF
- //AddParam(osg_opts,"output-search-graph-pb", "pb", "Write phrase lattice to protocol buffer objects in the specified path.");
-#endif
- //AddParam(osg_opts, "output-search-graph-hypergraph",
- // "DEPRECATED! Output connected hypotheses of search into specified directory, one file per sentence, in a hypergraph format (see Kenneth Heafield's lazy hypergraph decoder). This flag is followed by 3 values: 'true (gz|txt|bz) directory-name'");
-
- ///////////////////////////////////////////////////////////////////////////////////////
- // nbest-options
- po::options_description nbest_opts("N-best Options");
- AddParam(nbest_opts, "n-best-list",
- "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
- // AddParam(nbest_opts,"n-best-list-file", "file of n-best-list to be generated; specify - as the file in order to write to STDOUT");
- // AddParam(nbest_opts,"n-best-list-size", "size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
- //AddParam(nbest_opts, "labeled-n-best-list",
- // "print out labels for each weight type in n-best list. default is true");
- //AddParam(nbest_opts, "n-best-trees",
- // "Write n-best target-side trees to n-best-list");
- AddParam(nbest_opts, "n-best-factor",
- "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
- //AddParam(nbest_opts, "report-all-factors-in-n-best",
- // "Report all factors in n-best-lists. Default is false");
- //AddParam(nbest_opts, "lattice-samples",
- // "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list");
- //AddParam(nbest_opts, "include-segmentation-in-n-best",
- // "include phrasal segmentation in the n-best list. default is false");
- //AddParam(nbest_opts, "print-alignment-info-in-n-best",
- // "Include word-to-word alignment in the n-best list. Word-to-word alignments are taken from the phrase table if any. Default is false");
-
- ///////////////////////////////////////////////////////////////////////////////////////
- // server options
- po::options_description server_opts("Moses Server Options");
- AddParam(server_opts, "server", "Run moses as a translation server.");
- AddParam(server_opts, "server-port", "Port for moses server");
- AddParam(server_opts, "server-log", "Log destination for moses server");
- //AddParam(server_opts, "session-timeout",
- // "Timeout for sessions, e.g. '2h30m' or 1d (=24h)");
- //AddParam(server_opts, "session-cache-size",
- // string("Max. number of sessions cached.")
- // + "Least recently used session is dumped first.");
- AddParam(server_opts, "serial",
- "Run server in serial mode, processing only one request at a time.");
-
- AddParam(server_opts,"server-maxconn",
- "Max. No of simultaneous HTTP transactions allowed by the server.");
- AddParam(server_opts,"server-maxconn-backlog",
- "Max. No. of requests the OS will queue if the server is busy.");
- AddParam(server_opts,"server-keepalive-maxconn",
- "Max. No. of requests the server will accept on a single TCP connection.");
- AddParam(server_opts,"server-keepalive-timeout",
- "Max. number of seconds the server will keep a persistent connection alive.");
- AddParam(server_opts,"server-timeout",
- "Max. number of seconds the server will wait for a client to submit a request once a connection has been established.");
-
- po::options_description irstlm_opts("IRSTLM Options");
- //AddParam(irstlm_opts, "clean-lm-cache",
- // "clean language model caches after N translations (default N=1)");
-
- po::options_description chart_opts("Chart Decoding Options");
- AddParam(chart_opts, "max-chart-span",
- "maximum num. of source word chart rules can consume (default 10)");
- AddParam(chart_opts, "non-terminals",
- "list of non-term symbols, space separated");
- //AddParam(chart_opts, "rule-limit",
- // "a little like table limit. But for chart decoding rules. Default is DEFAULT_MAX_TRANS_OPT_SIZE");
- //AddParam(chart_opts, "source-label-overlap",
- // "What happens if a span already has a label. 0=add more. 1=replace. 2=discard. Default is 0");
- //AddParam(chart_opts, "unknown-lhs",
- // "file containing target lhs of unknown words. 1 per line: LHS prob");
-
- po::options_description misc_opts("Miscellaneous Options");
- //AddParam(misc_opts, "mira", "do mira training");
- //AddParam(misc_opts, "description",
- // "Source language, target language, description");
- //AddParam(misc_opts, "no-cache",
- // "Disable all phrase-table caching. Default = false (ie. enable caching)");
- //AddParam(misc_opts, "default-non-term-for-empty-range-only",
- // "Don't add [X] to all ranges, just ranges where there isn't a source non-term. Default = false (ie. add [X] everywhere)");
- //AddParam(misc_opts, "s2t-parsing-algorithm",
- // "Which S2T parsing algorithm to use. 0=recursive CYK+, 1=scope-3 (default = 0)");
-
- //AddParam(o,"continue-partial-translation", "cpt", "start from nonempty hypothesis");
- AddParam(misc_opts, "decoding-graph-backoff", "dpb",
- "only use subsequent decoding paths for unknown spans of given length");
- //AddParam(misc_opts, "references",
- // "Reference file(s) - used for bleu score feature");
- //AddParam(misc_opts, "recover-input-path", "r",
- // "(conf net/word lattice only) - recover input path corresponding to the best translation");
- //AddParam(misc_opts, "link-param-count",
- // "Number of parameters on word links when using confusion networks or lattices (default = 1)");
- //AddParam(misc_opts, "feature-name-overwrite",
- // "Override feature name (NOT arguments). Eg. SRILM-->KENLM, PhraseDictionaryMemory-->PhraseDictionaryScope3");
-
- AddParam(misc_opts, "feature", "All the feature functions should be here");
- //AddParam(misc_opts, "context-string",
- // "A (tokenized) string containing context words for context-sensitive translation.");
- //AddParam(misc_opts, "context-weights",
- // "A key-value map for context-sensitive translation.");
- //AddParam(misc_opts, "context-window",
- // "Context window (in words) for context-sensitive translation: {+|-|+-}<number>.");
- AddParam(misc_opts, "cpu-affinity-offset", "CPU Affinity. Default = -1 (no affinity)");
- AddParam(misc_opts, "cpu-affinity-increment",
- "Set to 1 (default) to put each thread on different cores. 0 to run all threads on one core");
-
- // Compact phrase table and reordering table.
- po::options_description cpt_opts(
- "Options when using compact phrase and reordering tables.");
- //AddParam(cpt_opts, "minphr-memory",
- // "Load phrase table in minphr format into memory");
- //AddParam(cpt_opts, "minlexr-memory",
- // "Load lexical reordering table in minlexr format into memory");
-
- po::options_description spe_opts("Simulated Post-editing Options");
- //AddParam(spe_opts, "spe-src", "Simulated post-editing. Source filename");
- //AddParam(spe_opts, "spe-trg", "Simulated post-editing. Target filename");
- //AddParam(spe_opts, "spe-aln", "Simulated post-editing. Alignment filename");
-
- ///////////////////////////////////////////////////////////////////////////////////////
- // DEPRECATED options
- po::options_description deprec_opts("Deprecated Options");
- AddParam(deprec_opts, "text-type",
- "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
-
- /*
- AddParam(deprec_opts, "link-param-count",
- "DEPRECATED. DO NOT USE. Number of parameters on word links when using confusion networks or lattices (default = 1)");
- AddParam(deprec_opts, "weight-slm", "slm",
- "DEPRECATED. DO NOT USE. weight(s) for syntactic language model");
- AddParam(deprec_opts, "weight-bl", "bl",
- "DEPRECATED. DO NOT USE. weight for bleu score feature");
- AddParam(deprec_opts, "weight-d", "d",
- "DEPRECATED. DO NOT USE. weight(s) for distortion (reordering components)");
- AddParam(deprec_opts, "weight-dlm", "dlm",
- "DEPRECATED. DO NOT USE. weight for discriminative LM feature function (on top of sparse weights)");
- AddParam(deprec_opts, "weight-lr", "lr",
- "DEPRECATED. DO NOT USE. weight(s) for lexicalized reordering, if not included in weight-d");
- AddParam(deprec_opts, "weight-generation", "g",
- "DEPRECATED. DO NOT USE. weight(s) for generation components");
- AddParam(deprec_opts, "weight-i", "I",
- "DEPRECATED. DO NOT USE. weight(s) for word insertion - used for parameters from confusion network and lattice input links");
- AddParam(deprec_opts, "weight-l", "lm",
- "DEPRECATED. DO NOT USE. weight(s) for language models");
- AddParam(deprec_opts, "weight-lex", "lex",
- "DEPRECATED. DO NOT USE. weight for global lexical model");
- AddParam(deprec_opts, "weight-glm", "glm",
- "DEPRECATED. DO NOT USE. weight for global lexical feature, sparse producer");
- AddParam(deprec_opts, "weight-wt", "wt",
- "DEPRECATED. DO NOT USE. weight for word translation feature");
- AddParam(deprec_opts, "weight-pp", "pp",
- "DEPRECATED. DO NOT USE. weight for phrase pair feature");
- AddParam(deprec_opts, "weight-pb", "pb",
- "DEPRECATED. DO NOT USE. weight for phrase boundary feature");
- AddParam(deprec_opts, "weight-t", "tm",
- "DEPRECATED. DO NOT USE. weights for translation model components");
- AddParam(deprec_opts, "weight-p", "w",
- "DEPRECATED. DO NOT USE. weight for phrase penalty");
- AddParam(deprec_opts, "weight-w", "w",
- "DEPRECATED. DO NOT USE. weight for word penalty");
- AddParam(deprec_opts, "weight-u", "u",
- "DEPRECATED. DO NOT USE. weight for unknown word penalty");
- AddParam(deprec_opts, "weight-e", "e",
- "DEPRECATED. DO NOT USE. weight for word deletion");
- AddParam(deprec_opts, "input-scores",
- "DEPRECATED. DO NOT USE. 2 numbers on 2 lines - [1] of scores on each edge of a confusion network or lattice input (default=1). [2] Number of 'real' word scores (0 or 1. default=0)");
- AddParam(deprec_opts, "dlm-model",
- "DEPRECATED. DO NOT USE. Order, factor and vocabulary file for discriminative LM. Use * for filename to indicate unlimited vocabulary.");
- AddParam(deprec_opts, "generation-file",
- "DEPRECATED. DO NOT USE. location and properties of the generation table");
- AddParam(deprec_opts, "global-lexical-file", "gl",
- "DEPRECATED. DO NOT USE. discriminatively trained global lexical translation model file");
- AddParam(deprec_opts, "glm-feature",
- "DEPRECATED. DO NOT USE. discriminatively trained global lexical translation feature, sparse producer");
- AddParam(deprec_opts, "lmodel-file",
- "DEPRECATED. DO NOT USE. location and properties of the language models");
- AddParam(deprec_opts, "lmodel-dub",
- "DEPRECATED. DO NOT USE. dictionary upper bounds of language models");
-#ifdef HAVE_SYNLM
- AddParam(deprec_opts,"slmodel-file", "DEPRECATED. DO NOT USE. location of the syntactic language model file(s)");
- AddParam(deprec_opts,"slmodel-factor", "DEPRECATED. DO NOT USE. factor to use with syntactic language model");
- AddParam(deprec_opts,"slmodel-beam", "DEPRECATED. DO NOT USE. beam width to use with syntactic language model's parser");
-#endif
- AddParam(deprec_opts, "ttable-file",
- "DEPRECATED. DO NOT USE. location and properties of the translation tables");
- AddParam(deprec_opts, "phrase-pair-feature",
- "DEPRECATED. DO NOT USE. Source and target factors for phrase pair feature");
- AddParam(deprec_opts, "phrase-boundary-source-feature",
- "DEPRECATED. DO NOT USE. Source factors for phrase boundary feature");
- AddParam(deprec_opts, "phrase-boundary-target-feature",
- "DEPRECATED. DO NOT USE. Target factors for phrase boundary feature");
- AddParam(deprec_opts, "phrase-length-feature",
- "DEPRECATED. DO NOT USE. Count features for source length, target length, both of each phrase");
- AddParam(deprec_opts, "target-word-insertion-feature",
- "DEPRECATED. DO NOT USE. Count feature for each unaligned target word");
- AddParam(deprec_opts, "source-word-deletion-feature",
- "DEPRECATED. DO NOT USE. Count feature for each unaligned source word");
- AddParam(deprec_opts, "word-translation-feature",
- "DEPRECATED. DO NOT USE. Count feature for word translation according to word alignment");
- */
-
- po::options_description zombie_opts("Zombie Options");
- //AddParam(zombie_opts, "distortion-file",
- // "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
-
- //mbr_opts.add(lmbr_opts);
- search_opts.add(cube_opts);
- //search_opts.add(mbr_opts);
- search_opts.add(disto_opts);
- search_opts.add(chart_opts);
-
- //input_opts.add(spe_opts);
-
- output_opts.add(nbest_opts);
- //output_opts.add(osg_opts);
-
- m_options.add(main_opts);
- m_options.add(server_opts);
- m_options.add(input_opts);
- m_options.add(search_opts);
- m_options.add(output_opts);
- m_options.add(oov_opts);
- m_options.add(factor_opts);
- //m_options.add(cpt_opts);
- //m_options.add(irstlm_opts);
- m_options.add(tune_opts);
- m_options.add(misc_opts);
- //m_options.add(deprec_opts);
- //m_options.add(zombie_opts);
-
-}
-
-Parameter::~Parameter()
-{
-}
-
-const PARAM_VEC *Parameter::GetParam(const std::string &paramName) const
-{
- PARAM_MAP::const_iterator iter = m_setting.find(paramName);
- if (iter == m_setting.end()) {
- return NULL;
- }
- else {
- return &iter->second;
- }
-
-}
-
-/** initialize a parameter, sub of constructor */
-void Parameter::AddParam(po::options_description& optgroup,
- string const& paramName, string const& description)
-{
- m_valid[paramName] = true;
- m_description[paramName] = description;
- optgroup.add_options()(paramName.c_str(), description.c_str());
-}
-
-/** initialize a parameter (including abbreviation), sub of constructor */
-void Parameter::AddParam(po::options_description& optgroup,
- string const& paramName, string const& abbrevName,
- string const& description)
-{
- m_valid[paramName] = true;
- m_valid[abbrevName] = true;
- m_abbreviation[paramName] = abbrevName;
- m_fullname[abbrevName] = paramName;
- m_description[paramName] = description;
- string optname = paramName;
- if (abbrevName.size() == 1) {
- optname += string(",") + abbrevName;
- // m_confusable[abbrevName[0]].insert(paramName);
- }
- optgroup.add_options()(optname.c_str(), description.c_str());
-}
-
-/** print descriptions of all parameters */
-void Parameter::Explain()
-{
- cerr << "Usage:" << endl;
- cerr << m_options << endl;
- // for(PARAM_STRING::const_iterator iterParam = m_description.begin();
- // iterParam != m_description.end(); iterParam++)
- // {
- // const string paramName = iterParam->first;
- // const string paramDescription = iterParam->second;
- // cerr << "\t-" << paramName;
- // PARAM_STRING::const_iterator iterAbbr = m_abbreviation.find( paramName );
- // if ( iterAbbr != m_abbreviation.end() )
- // cerr << " (" << iterAbbr->second << ")";
- // cerr << ": " << paramDescription << endl;
- // }
-}
-
-/** check whether an item on the command line is a switch or a value
- * \param token token on the command line to checked **/
-
-bool Parameter::isOption(const char* token)
-{
- if (!token) return false;
- std::string tokenString(token);
- size_t length = tokenString.size();
- if (length <= 1) return false;
- if (!starts_with(tokenString, "-")) return false;
- if (tokenString.substr(1, 1).find_first_not_of("0123456789") == 0) return true;
- return false;
-}
-
-/** load all parameters from the configuration file and the command line switches */
-bool Parameter::LoadParam(const string &filePath)
-{
- const char *argv[] = { "executable", "-f", filePath.c_str() };
- return LoadParam(3, (char**) argv);
-}
-
-/** load all parameters from the configuration file and the command line switches */
-bool Parameter::LoadParam(int argc, char* xargv[])
-{
- // legacy parameter handling: all parameters are expected
- // to start with a single dash
- char* argv[argc + 1];
- for (int i = 0; i < argc; ++i) {
- argv[i] = xargv[i];
- if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-') ++argv[i];
- }
-
- // config file (-f) arg mandatory
- string configPath;
- if ((configPath = FindParam("-f", argc, argv)) == "" && (configPath =
- FindParam("-config", argc, argv)) == "") {
- PrintCredit();
- Explain();
- FeatureRegistry::Instance().PrintFF();
-
- cerr << endl;
- cerr << "No configuration file was specified. Use -config or -f";
- cerr << endl;
- return false;
- }
- else {
- if (!ReadConfigFile(configPath)) {
- std::cerr << "Could not read " << configPath;
- return false;
- }
- }
-
- // overwrite parameters with values from switches
- for (PARAM_STRING::const_iterator iterParam = m_description.begin();
- iterParam != m_description.end(); iterParam++) {
- const string paramName = iterParam->first;
- OverwriteParam("-" + paramName, paramName, argc, argv);
- }
-
- // ... also shortcuts
- for (PARAM_STRING::const_iterator iterParam = m_abbreviation.begin();
- iterParam != m_abbreviation.end(); iterParam++) {
- const string paramName = iterParam->first;
- const string paramShortName = iterParam->second;
- OverwriteParam("-" + paramShortName, paramName, argc, argv);
- }
-
- AddFeaturesCmd();
-
- // logging of parameters that were set in either config or switch
- int verbose = 1;
- if (m_setting.find("verbose") != m_setting.end()
- && m_setting["verbose"].size() > 0) verbose = Scan<int>(
- m_setting["verbose"][0]);
- if (verbose >= 1) { // only if verbose
- cerr << "Defined parameters (per moses.ini or switch):" << endl;
- for (PARAM_MAP::const_iterator iterParam = m_setting.begin();
- iterParam != m_setting.end(); iterParam++) {
- cerr << "\t" << iterParam->first << ": ";
- for (size_t i = 0; i < iterParam->second.size(); i++)
- cerr << iterParam->second[i] << " ";
- cerr << endl;
- }
- }
-
- // don't mix old and new format
- if ((GetParam("feature") || GetParam("weight"))
- && (GetParam("weight-slm") || GetParam("weight-bl")
- || GetParam("weight-d") || GetParam("weight-dlm")
- || GetParam("weight-lrl") || GetParam("weight-generation")
- || GetParam("weight-i") || GetParam("weight-l")
- || GetParam("weight-lex") || GetParam("weight-glm")
- || GetParam("weight-wt") || GetParam("weight-pp")
- || GetParam("weight-pb") || GetParam("weight-t")
- || GetParam("weight-w") || GetParam("weight-p")
- || GetParam("weight-u") || GetParam("weight-e")
- || GetParam("dlm-mode") || GetParam("generation-file")
- || GetParam("global-lexical-file") || GetParam("glm-feature")
- || GetParam("lmodel-file") || GetParam("lmodel-dub")
- || GetParam("slmodel-file") || GetParam("slmodel-factor")
- || GetParam("slmodel-beam") || GetParam("ttable-file")
- || GetParam("phrase-pair-feature")
- || GetParam("phrase-boundary-source-feature")
- || GetParam("phrase-boundary-target-feature")
- || GetParam("phrase-length-feature")
- || GetParam("target-word-insertion-feature")
- || GetParam("source-word-deletion-feature")
- || GetParam("word-translation-feature"))) {
- UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
- }
-
- // convert old weights args to new format
- if (GetParam("feature") == NULL) {
- ConvertWeightArgs();
- }
- CreateWeightsMap();
- WeightOverwrite();
-
- // check for illegal parameters
- bool noErrorFlag = true;
- for (int i = 0; i < argc; i++) {
- if (isOption(argv[i])) {
- string paramSwitch = (string) argv[i];
- string paramName = paramSwitch.substr(1);
- if (m_valid.find(paramName) == m_valid.end()) {
- std::cerr << "illegal switch: " << paramSwitch;
- noErrorFlag = false;
- }
- }
- }
-
- //Save("/tmp/moses.ini.new");
-
- // check if parameters make sense
- return Validate() && noErrorFlag;
-}
-
-void Parameter::AddFeaturesCmd()
-{
- const PARAM_VEC *params = GetParam("feature-add");
- if (params) {
- PARAM_VEC::const_iterator iter;
- for (iter = params->begin(); iter != params->end(); ++iter) {
- const string &line = *iter;
- AddFeature(line);
- }
-
- m_setting.erase("feature-add");
- }
-}
-
-std::vector<float> Parameter::GetWeights(const std::string &name)
-{
- std::vector<float> ret = m_weights[name];
-
- // cerr << "WEIGHT " << name << "=";
- // for (size_t i = 0; i < ret.size(); ++i) {
- // cerr << ret[i] << ",";
- // }
- // cerr << endl;
- return ret;
-}
-
-void Parameter::SetWeight(const std::string &name, size_t ind, float weight)
-{
- PARAM_VEC &newWeights = m_setting["weight"];
- string line = name + SPrint(ind) + "= " + SPrint(weight);
- newWeights.push_back(line);
-}
-
-void Parameter::SetWeight(const std::string &name, size_t ind,
- const vector<float> &weights)
-{
- PARAM_VEC &newWeights = m_setting["weight"];
- string line = name + SPrint(ind) + "=";
-
- for (size_t i = 0; i < weights.size(); ++i) {
- line += " " + SPrint(weights[i]);
- }
- newWeights.push_back(line);
-}
-
-void Parameter::AddWeight(const std::string &name, size_t ind,
- const std::vector<float> &weights)
-{
- PARAM_VEC &newWeights = m_setting["weight"];
-
- string sought = name + SPrint(ind) + "=";
- for (size_t i = 0; i < newWeights.size(); ++i) {
- string &line = newWeights[i];
- if (line.find(sought) == 0) {
- // found existing weight, most likely to be input weights. Append to this line
- for (size_t i = 0; i < weights.size(); ++i) {
- line += " " + SPrint(weights[i]);
- }
- return;
- }
- }
-
- // nothing found. Just set
- SetWeight(name, ind, weights);
-}
-
-void Parameter::ConvertWeightArgsSingleWeight(const string &oldWeightName,
- const string &newWeightName)
-{
- size_t ind = 0;
- PARAM_MAP::iterator iterMap;
-
- iterMap = m_setting.find(oldWeightName);
- if (iterMap != m_setting.end()) {
- const PARAM_VEC &weights = iterMap->second;
- for (size_t i = 0; i < weights.size(); ++i) {
- SetWeight(newWeightName, ind, Scan<float>(weights[i]));
- }
-
- m_setting.erase(iterMap);
- }
-}
-
-void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
-{
- const PARAM_VEC *params;
-
- // process input weights 1st
- params = GetParam("weight-i");
- if (params) {
- vector<float> inputWeights = Scan<float>(*params);
- PARAM_VEC &numInputScores = m_setting["input-scores"];
- if (inputWeights.size() == 1) {
- UTIL_THROW_IF2(numInputScores.size() != 0,
- "No [input-scores] section allowed");
- numInputScores.push_back("1");
- numInputScores.push_back("0");
- }
- else if (inputWeights.size() == 2) {
- UTIL_THROW_IF2(numInputScores.size() != 0,
- "No [input-scores] section allowed");
- numInputScores.push_back("1");
- numInputScores.push_back("1");
- }
-
- SetWeight("PhraseDictionaryBinary", 0, inputWeights);
- }
-
- // convert actually pt feature
- cerr << "Creating phrase table features" << endl;
-
- size_t numInputScores = 0;
- size_t numRealWordsInInput = 0;
- map<string, size_t> ptIndices;
-
- params = GetParam("input-scores");
- if (params) {
- numInputScores = Scan<size_t>(params->at(0));
-
- if (params->size() > 1) {
- numRealWordsInInput = Scan<size_t>(params->at(1));
- }
- }
-
- // load phrase translation tables
- params = GetParam("ttable-file");
- if (params) {
- // weights
- const vector<string> translationVector = *params;
-
- vector<size_t> maxTargetPhrase;
- params = GetParam("ttable-limit");
- if (params) {
- maxTargetPhrase = Scan<size_t>(*params);
- }
-
- if (maxTargetPhrase.size() == 1 && translationVector.size() > 1) {
- cerr << "Using uniform ttable-limit of " << maxTargetPhrase[0]
- << " for all translation tables." << endl;
- for (size_t i = 1; i < translationVector.size(); i++)
- maxTargetPhrase.push_back(maxTargetPhrase[0]);
- }
- else if (maxTargetPhrase.size() != 1
- && maxTargetPhrase.size() < translationVector.size()) {
- std::cerr << "You specified " << translationVector.size()
- << " translation tables, but only " << maxTargetPhrase.size()
- << " ttable-limits.";
- return;
- }
-
- // MAIN LOOP
- const PARAM_VEC &oldWeights = m_setting[oldWeightName];
-
- size_t currOldInd = 0;
- for (size_t currDict = 0; currDict < translationVector.size(); currDict++) {
- util::StringStream ptLine;
-
- vector<string> token = Tokenize(translationVector[currDict]);
-
- if (currDict == 0 && token.size() == 4) {
- std::cerr
- << "Phrase table specification in old 4-field format. No longer supported";
- return;
- }
- UTIL_THROW_IF2(token.size() < 5,
- "Phrase table must have at least 5 scores");
-
- int implementation = Scan<int>(token[0]);
-
- string ptType;
- switch (implementation) {
- case 0: // Memory
- ptType = "PhraseDictionaryMemory";
- break;
- case 1: // Binary
- ptType = "PhraseDictionaryBinary";
- break;
- case 2: // OnDisk
- ptType = "PhraseDictionaryOnDisk";
- break;
- case 6: // SCFG
- ptType = "PhraseDictionaryMemory";
- break;
- case 12: // Compact
- ptType = "PhraseDictionaryCompact";
- break;
- case 8: // SuffixArray
- ptType = "PhraseDictionarySuffixArray";
- break;
- case 14: // DSuffixArray
- ptType = "PhraseDictionaryDynSuffixArray";
- break;
- case 15: // DCacheBased:
- ptType = "PhraseDictionaryDynamicCacheBased";
- break;
- default:
- break;
- }
-
- size_t ptInd;
- if (ptIndices.find(ptType) == ptIndices.end()) {
- ptIndices[ptType] = 0;
- ptInd = 0;
- }
- else {
- ptInd = ++ptIndices[ptType];
- }
-
- // weights
- size_t numFFInd = (token.size() == 4) ? 2 : 3;
- size_t numFF = Scan<size_t>(token[numFFInd]);
-
- vector<float> weights(numFF);
- for (size_t currFF = 0; currFF < numFF; ++currFF) {
- UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
- "Errors converting old phrase-table weights to new weights");
- float weight = Scan<float>(oldWeights[currOldInd]);
- weights[currFF] = weight;
-
- ++currOldInd;
- }
-
- // cerr << weights.size() << " PHRASE TABLE WEIGHTS "
- // << __FILE__ << ":" << __LINE__ << endl;
- AddWeight(ptType, ptInd, weights);
-
- // actual pt
- ptLine << ptType << " ";
- ptLine << "input-factor=" << token[1] << " ";
- ptLine << "output-factor=" << token[2] << " ";
- ptLine << "path=" << token[4] << " ";
-
- //characteristics of the phrase table
-
- vector<FactorType> input = Tokenize<FactorType>(token[1], ","), output =
- Tokenize<FactorType>(token[2], ",");
- size_t numScoreComponent = Scan<size_t>(token[3]);
- string filePath = token[4];
-
- if (currDict == 0) {
- // only the 1st pt. THis is shit
- // TODO. find what the assumptions made by confusion network about phrase table output which makes
- // it only work with binary file. This is a hack
- numScoreComponent += numInputScores + numRealWordsInInput;
- }
-
- ptLine << "num-features=" << numScoreComponent << " ";
- ptLine << "table-limit=" << maxTargetPhrase[currDict] << " ";
-
- if (implementation == 8 || implementation == 14) {
- ptLine << "target-path=" << token[5] << " ";
- ptLine << "alignment-path=" << token[6] << " ";
- }
-
- AddFeature(ptLine.str());
- } // for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++) {
- } // if (GetParam("ttable-file").size() > 0) {
-
- m_setting.erase("weight-i");
- m_setting.erase(oldWeightName);
- m_setting.erase("ttable-file");
- m_setting.erase("ttable-limit");
-
-}
-
-void Parameter::AddFeature(const std::string &line)
-{
- PARAM_VEC &features = m_setting["feature"];
- features.push_back(line);
-}
-
-void Parameter::ConvertWeightArgsDistortion()
-{
- const string oldWeightName = "weight-d";
- const string oldLexReordingName = "distortion-file";
-
- // distortion / lex distortion
- const PARAM_VEC *oldWeights = GetParam(oldWeightName);
-
- if (oldWeights) {
- const PARAM_VEC *searchAlgo = GetParam("search-algorithm");
- if (searchAlgo == NULL
- || (searchAlgo->size() > 0
- && (Trim(searchAlgo->at(0)) == "0" || Trim(searchAlgo->at(0)) == "1"))) {
- // phrase-based. Add distance distortion to list of features
- AddFeature("Distortion");
- SetWeight("Distortion", 0, Scan<float>(oldWeights->at(0)));
- }
-
- // everything but the last is lex reordering model
-
- size_t currOldInd = 1;
- const PARAM_VEC *lextable = GetParam(oldLexReordingName);
-
- for (size_t indTable = 0; lextable && indTable < lextable->size();
- ++indTable) {
- const string &line = lextable->at(indTable);
- vector<string> toks = Tokenize(line);
-
- size_t numFF = Scan<size_t>(toks[2]);
-
- vector<float> weights(numFF);
- for (size_t currFF = 0; currFF < numFF; ++currFF) {
- UTIL_THROW_IF2(oldWeights && currOldInd >= oldWeights->size(),
- "Errors converting old distortion weights to new weights");
- float weight = Scan<float>(oldWeights->at(currOldInd));
- weights[currFF] = weight;
-
- ++currOldInd;
- }
- SetWeight("LexicalReordering", indTable, weights);
-
- util::StringStream strme;
- strme << "LexicalReordering " << "type=" << toks[1] << " ";
-
- vector<FactorType> factors = Tokenize<FactorType>(toks[0], "-");
- UTIL_THROW_IF2(factors.size() != 2,
- "Error in old factor specification for lexicalized reordering model: " << toks[0]);
- strme << "input-factor=" << factors[0] << " output-factor=" << factors[1]
- << " ";
-
- strme << "num-features=" << toks[2] << " ";
- strme << "path=" << toks[3];
-
- AddFeature(strme.str());
- }
- }
-
- m_setting.erase(oldWeightName);
- m_setting.erase(oldLexReordingName);
-
-}
-
-void Parameter::ConvertWeightArgsLM()
-{
- const string oldWeightName = "weight-l";
- const string oldFeatureName = "lmodel-file";
- const PARAM_VEC *params;
-
- bool isChartDecoding = true;
-
- params = GetParam("search-algorithm");
- if (params == NULL
- || (params->size() > 0
- && (Trim(params->at(0)) == "0" || Trim(params->at(0)) == "1"))) {
- isChartDecoding = false;
- }
-
- vector<int> oovWeights;
- params = GetParam("lmodel-oov-feature");
- if (params) {
- oovWeights = Scan<int>(*params);
- }
-
- PARAM_MAP::iterator iterMap;
-
- iterMap = m_setting.find(oldWeightName);
- if (iterMap != m_setting.end()) {
-
- size_t currOldInd = 0;
- const PARAM_VEC &weights = iterMap->second;
- const PARAM_VEC &models = m_setting[oldFeatureName];
- for (size_t lmIndex = 0; lmIndex < models.size(); ++lmIndex) {
- const string &line = models[lmIndex];
- vector<string> modelToks = Tokenize(line);
-
- int lmType = Scan<int>(modelToks[0]);
-
- string newFeatureName;
- switch (lmType) {
- case 0:
- newFeatureName = "SRILM";
- break;
- case 1:
- newFeatureName = "IRSTLM";
- break;
- case 8:
- case 9:
- newFeatureName = "KENLM";
- break;
- default:
- UTIL_THROW2("Unkown language model type id:" << lmType)
- ;
- }
-
- size_t numFF = 1;
- if (oovWeights.size() > lmIndex) numFF += oovWeights[lmIndex];
-
- vector<float> weightsLM(numFF);
- for (size_t currFF = 0; currFF < numFF; ++currFF) {
- UTIL_THROW_IF2(currOldInd >= weights.size(),
- "Errors converting old LM weights to new weights");
- weightsLM[currFF] = Scan<float>(weights[currOldInd]);
- if (isChartDecoding) {
- weightsLM[currFF] = UntransformLMScore(weightsLM[currFF]);
- }
-
- ++currOldInd;
- }
-
- SetWeight(newFeatureName, lmIndex, weightsLM);
-
- string featureLine = newFeatureName + " " + "factor=" + modelToks[1] + " " // factor
- + "order=" + modelToks[2] + " " // order
- + "num-features=" + SPrint(numFF) + " ";
- if (lmType == 9) {
- featureLine += "lazyken=1 ";
- }
- else if (lmType == 8) {
- featureLine += "lazyken=0 ";
- }
-
- featureLine += "path=" + modelToks[3]; // file
-
- AddFeature(featureLine);
- } // for (size_t lmIndex = 0; lmIndex < models.size(); ++lmIndex) {
-
- m_setting.erase(iterMap);
- }
-
- m_setting.erase(oldFeatureName);
-}
-
-void Parameter::ConvertWeightArgsGeneration(const std::string &oldWeightName,
- const std::string &newWeightName)
-{
- string oldFeatureName = "generation-file";
-
- // distortion / lex distortion
- PARAM_VEC &oldWeights = m_setting[oldWeightName];
-
- if (oldWeights.size() > 0) {
- size_t currOldInd = 0;
- PARAM_VEC &models = m_setting[oldFeatureName];
-
- for (size_t indTable = 0; indTable < models.size(); ++indTable) {
- string &line = models[indTable];
- vector<string> modelToks = Tokenize(line);
-
- size_t numFF = Scan<size_t>(modelToks[2]);
-
- vector<float> weights(numFF);
- for (size_t currFF = 0; currFF < numFF; ++currFF) {
- UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
- "Errors converting old generation weights to new weights");
- float weight = Scan<float>(oldWeights[currOldInd]);
- weights[currFF] = weight;
-
- ++currOldInd;
- }
- SetWeight(newWeightName, indTable, weights);
-
- util::StringStream strme;
- strme << "Generation " << "input-factor=" << modelToks[0] << " "
- << "output-factor=" << modelToks[1] << " " << "num-features="
- << modelToks[2] << " " << "path=" << modelToks[3];
- AddFeature(strme.str());
- }
- }
-
- m_setting.erase(oldWeightName);
- m_setting.erase(oldFeatureName);
-}
-
-void Parameter::ConvertWeightArgsWordPenalty()
-{
- const std::string oldWeightName = "weight-w";
- const std::string newWeightName = "WordPenalty";
-
- bool isChartDecoding = true;
- const PARAM_VEC *searchAlgo = GetParam("search-algorithm");
- if (searchAlgo == NULL
- || (searchAlgo->size() > 0
- && (Trim(searchAlgo->at(0)) == "0" || Trim(searchAlgo->at(0)) == "1"))) {
- isChartDecoding = false;
- }
-
- PARAM_MAP::iterator iterMap;
-
- iterMap = m_setting.find(oldWeightName);
- if (iterMap != m_setting.end()) {
- const PARAM_VEC &weights = iterMap->second;
- for (size_t i = 0; i < weights.size(); ++i) {
- float weight = Scan<float>(weights[i]);
- if (isChartDecoding) {
- weight *= 0.434294482;
- }
- SetWeight(newWeightName, i, weight);
- }
-
- m_setting.erase(iterMap);
- }
-
-}
-
-void Parameter::ConvertPhrasePenalty()
-{
- string oldWeightName = "weight-p";
- const PARAM_VEC *params = GetParam(oldWeightName);
- if (params) {
- UTIL_THROW_IF2(params->size() != 1,
- "There should be only 1 phrase-penalty weight");
- float weight = Scan<float>(params->at(0));
- AddFeature("PhrasePenalty");
- SetWeight("PhrasePenalty", 0, weight);
-
- m_setting.erase(oldWeightName);
- }
-}
-
-void Parameter::ConvertWeightArgs()
-{
- // can't handle discr LM. must do it manually 'cos of bigram/n-gram split
- UTIL_THROW_IF2(m_setting.count("weight-dlm") != 0,
- "Can't handle discr LM. must do it manually 'cos of bigram/n-gram split");
-
- // check that old & new format aren't mixed
- if (m_setting.count("weight")
- && (m_setting.count("weight-i") || m_setting.count("weight-t")
- || m_setting.count("weight-w") || m_setting.count("weight-l")
- || m_setting.count("weight-u") || m_setting.count("weight-lex")
- || m_setting.count("weight-generation")
- || m_setting.count("weight-lr") || m_setting.count("weight-d"))) {
- cerr << "Do not mix old and new format for specify weights";
- }
-
- ConvertWeightArgsWordPenalty();
- ConvertWeightArgsLM();
- ConvertWeightArgsSingleWeight("weight-slm", "SyntacticLM");
- ConvertWeightArgsSingleWeight("weight-u", "UnknownWordPenalty");
- ConvertWeightArgsGeneration("weight-generation", "Generation");
- ConvertWeightArgsDistortion();
-
- // don't know or can't be bothered converting these weights
- ConvertWeightArgsSingleWeight("weight-lr", "LexicalReordering");
- ConvertWeightArgsSingleWeight("weight-bl", "BleuScoreFeature");
- ConvertWeightArgsSingleWeight("weight-glm", "GlobalLexicalModel");
- ConvertWeightArgsSingleWeight("weight-wt", "WordTranslationFeature");
- ConvertWeightArgsSingleWeight("weight-pp", "PhrasePairFeature");
- ConvertWeightArgsSingleWeight("weight-pb", "PhraseBoundaryFeature");
-
- ConvertWeightArgsSingleWeight("weight-e", "WordDeletion"); // TODO Can't find real name
- ConvertWeightArgsSingleWeight("weight-lex", "GlobalLexicalReordering"); // TODO Can't find real name
-
- ConvertPhrasePenalty();
-
- AddFeature("WordPenalty");
- AddFeature("UnknownWordPenalty");
-
- ConvertWeightArgsPhraseModel("weight-t");
-
-}
-
-void Parameter::CreateWeightsMap()
-{
- CreateWeightsMap(m_setting["weight-add"]);
- CreateWeightsMap(m_setting["weight"]);
-}
-
-void Parameter::CreateWeightsMap(const PARAM_VEC &vec)
-{
- for (size_t i = 0; i < vec.size(); ++i) {
- const string &line = vec[i];
- vector<string> toks = Tokenize(line);
- UTIL_THROW_IF2(toks.size() < 2, "Error in format of weights: " << line);
-
- string name = toks[0];
- name = name.substr(0, name.size() - 1);
-
- vector<float> weights(toks.size() - 1);
- for (size_t i = 1; i < toks.size(); ++i) {
- float weight = Scan<float>(toks[i]);
- weights[i - 1] = weight;
- }
- m_weights[name] = weights;
- }
-}
-
-void Parameter::WeightOverwrite()
-{
- PARAM_VEC &vec = m_setting["weight-overwrite"];
-
- if (vec.size() == 0) return;
-
- // should only be on 1 line
- UTIL_THROW_IF2(vec.size() != 1, "weight-overwrite should only be on 1 line");
-
- string name("");
- vector<float> weights;
- vector<string> toks = Tokenize(vec[0]);
- size_t cnt = 0;
- const std::vector<float>* oldWeights = NULL;
- for (size_t i = 0; i < toks.size(); ++i) {
- const string &tok = toks[i];
-
- if (ends_with(tok, "=")) {
- // start of new feature
-
- if (name != "") {
- // save previous ff
- m_weights[name] = weights;
- weights.clear();
- }
-
- name = tok.substr(0, tok.size() - 1);
- std::map<std::string, std::vector<float> >::const_iterator found =
- m_weights.find(name);
- if (found != m_weights.end()) {
- oldWeights = &(found->second);
- }
- else {
- oldWeights = NULL;
- }
- cnt = 0;
- }
- else {
- // a weight for curr ff
- if (toks[i] == "x") {
- UTIL_THROW_IF2(!oldWeights || cnt >= oldWeights->size(),
- "Keeping previous weight failed in weight-overwrite");
- weights.push_back(oldWeights->at(cnt));
- }
- else {
- float weight = Scan<float>(toks[i]);
- weights.push_back(weight);
- }
- ++cnt;
- }
- }
-
- if (name != "") {
- m_weights[name] = weights;
- }
-
-}
-
-/** check that parameter settings make sense */
-bool Parameter::Validate()
-{
- bool noErrorFlag = true;
-
- PARAM_MAP::const_iterator iterParams;
- for (iterParams = m_setting.begin(); iterParams != m_setting.end();
- ++iterParams) {
- const std::string &key = iterParams->first;
-
- if (m_valid.find(key) == m_valid.end()) {
- std::cerr << "Unknown parameter " << key;
- noErrorFlag = false;
- }
- }
-
- if (m_setting["lmodel-dub"].size() > 0) {
- if (m_setting["lmodel-file"].size() != m_setting["lmodel-dub"].size()) {
- std::cerr << "Config and parameters specify "
- << static_cast<int>(m_setting["lmodel-file"].size())
- << " language model files (lmodel-file), but "
- << static_cast<int>(m_setting["lmodel-dub"].size())
- << " LM upperbounds (lmodel-dub)" << endl;
- noErrorFlag = false;
- }
- }
-
- // do files exist?
-
- // input file
- if (noErrorFlag && m_setting["input-file"].size() == 1) {
- noErrorFlag = FileExists(m_setting["input-file"][0]);
- if (!noErrorFlag) {
- std::cerr << endl << "Input file " << m_setting["input-file"][0]
- << " does not exist";
- }
- }
- // generation tables
- if (noErrorFlag) {
- std::vector<std::string> ext;
- //raw tables in either un compressed or compressed form
- ext.push_back("");
- ext.push_back(".gz");
- noErrorFlag = FilesExist("generation-file", 3, ext);
- }
- // distortion
- if (noErrorFlag) {
- std::vector<std::string> ext;
- //raw tables in either un compressed or compressed form
- ext.push_back("");
- ext.push_back(".gz");
- //prefix tree format
- ext.push_back(".binlexr.idx");
- //prefix tree format
- ext.push_back(".minlexr");
- noErrorFlag = FilesExist("distortion-file", 3, ext);
- }
- return noErrorFlag;
-}
-
-/** check whether a file exists */
-bool Parameter::FilesExist(const string &paramName, int fieldNo,
- std::vector<std::string> const& extensions)
-{
- typedef std::vector<std::string> StringVec;
- StringVec::const_iterator iter;
-
- PARAM_MAP::const_iterator iterParam = m_setting.find(paramName);
- if (iterParam == m_setting.end()) {
- // no param. therefore nothing to check
- return true;
- }
- const StringVec &pathVec = (*iterParam).second;
- for (iter = pathVec.begin(); iter != pathVec.end(); ++iter) {
- StringVec vec = Tokenize(*iter);
-
- size_t tokenizeIndex;
- if (fieldNo == -1) tokenizeIndex = vec.size() - 1;
- else tokenizeIndex = static_cast<size_t>(fieldNo);
-
- if (tokenizeIndex >= vec.size()) {
- std::cerr << "Expected at least " << (tokenizeIndex + 1)
- << " tokens per entry in '" << paramName << "', but only found "
- << vec.size();
- return false;
- }
- const string &pathStr = vec[tokenizeIndex];
-
- bool fileFound = 0;
- for (size_t i = 0; i < extensions.size() && !fileFound; ++i) {
- fileFound |= FileExists(pathStr + extensions[i]);
- }
- if (!fileFound) {
- std::cerr << "File " << pathStr << " does not exist";
- return false;
- }
- }
- return true;
-}
-
-/** look for a switch in arg, update parameter */
-// TODO arg parsing like this does not belong in the library, it belongs
-// in moses-cmd
-string Parameter::FindParam(const string &paramSwitch, int argc, char* argv[])
-{
- for (int i = 0; i < argc; i++) {
- if (string(argv[i]) == paramSwitch) {
- if (i + 1 < argc) {
- return argv[i + 1];
- }
- else {
- std::cerr << "Option " << paramSwitch << " requires a parameter!";
- // TODO return some sort of error, not the empty string
- }
- }
- }
- return "";
-}
-
-/** update parameter settings with command line switches
- * \param paramSwitch (potentially short) name of switch
- * \param paramName full name of parameter
- * \param argc number of arguments on command line
- * \param argv values of paramters on command line */
-void Parameter::OverwriteParam(const string &paramSwitch,
- const string &paramName, int argc, char* argv[])
-{
- int startPos = -1;
- for (int i = 0; i < argc; i++) {
- if (string(argv[i]) == paramSwitch) {
- startPos = i + 1;
- break;
- }
- }
- if (startPos < 0) return;
-
- int index = 0;
- m_setting[paramName]; // defines the parameter, important for boolean switches
- while (startPos < argc && (!isOption(argv[startPos]))) {
- if (m_setting[paramName].size() > (size_t) index) m_setting[paramName][index] =
- argv[startPos];
- else m_setting[paramName].push_back(argv[startPos]);
- index++;
- startPos++;
- }
-}
-
-/** read parameters from a configuration file */
-bool Parameter::ReadConfigFile(const string &filePath)
-{
- InputFileStream inFile(filePath);
- string line, paramName;
- while (getline(inFile, line)) {
- // comments
- size_t comPos = line.find_first_of("#");
- if (comPos != string::npos) line = line.substr(0, comPos);
- // trim leading and trailing spaces/tabs
- line = Trim(line);
-
- if (line.size() == 0) {
- // blank line. do nothing.
- }
- else if (line[0] == '[') {
- // new parameter
- for (size_t currPos = 0; currPos < line.size(); currPos++) {
- if (line[currPos] == ']') {
- paramName = line.substr(1, currPos - 1);
- break;
- }
- }
- }
- else {
- // add value to parameter
- m_setting[paramName].push_back(line);
- }
- }
- return true;
-}
-
-struct Credit
-{
- string name, contact, currentPursuits, areaResponsibility;
- int sortId;
-
- Credit(string name, string contact, string currentPursuits,
- string areaResponsibility)
- {
- this->name = name;
- this->contact = contact;
- this->currentPursuits = currentPursuits;
- this->areaResponsibility = areaResponsibility;
- this->sortId = util::rand_excl(1000);
- }
-
- bool operator<(const Credit &other) const
- {
- /*
- if (areaResponsibility.size() != 0 && other.areaResponsibility.size() ==0)
- return true;
- if (areaResponsibility.size() == 0 && other.areaResponsibility.size() !=0)
- return false;
-
- return name < other.name;
- */
- return sortId < other.sortId;
- }
-
-};
-
-std::ostream& operator<<(std::ostream &os, const Credit &credit)
-{
- os << credit.name;
- if (credit.contact != "") os << "\t contact: " << credit.contact;
- if (credit.currentPursuits != "") os << " " << credit.currentPursuits;
- if (credit.areaResponsibility != "") os << " I'll answer question on: "
- << credit.areaResponsibility;
- return os;
-}
-
-void Parameter::PrintCredit()
-{
- vector<Credit> everyone;
- srand(time(NULL));
-
- everyone.push_back(
- Credit("Nicola Bertoldi", "911", "", "scripts & other stuff"));
- everyone.push_back(Credit("Ondrej Bojar", "", "czech this out!", ""));
- everyone.push_back(
- Credit("Chris Callison-Burch", "anytime, anywhere",
- "international playboy", ""));
- everyone.push_back(Credit("Alexandra Constantin", "", "eu sunt varza", ""));
- everyone.push_back(
- Credit("Brooke Cowan", "brooke@csail.mit.edu",
- "if you're going to san francisco, be sure to wear a flower in your hair",
- ""));
- everyone.push_back(
- Credit("Chris Dyer", "can't. i'll be out driving my mustang",
- "driving my mustang", ""));
- everyone.push_back(
- Credit("Marcello Federico", "federico at itc at it",
- "Researcher at ITC-irst, Trento, Italy", "IRST language model"));
- everyone.push_back(
- Credit("Evan Herbst", "Small college in upstate New York", "", ""));
- everyone.push_back(
- Credit("Philipp Koehn", "only between 2 and 4am", "",
- "Nothing fazes this dude"));
- everyone.push_back(
- Credit("Christine Moran", "weird building at MIT", "", ""));
- everyone.push_back(
- Credit("Wade Shen", "via morse code", "buying another laptop", ""));
- everyone.push_back(
- Credit("Richard Zens", "richard at aachen dot de", "",
- "ambiguous source input, confusion networks, confusing source code"));
- everyone.push_back(
- Credit("Hieu Hoang", "http://www.hoang.co.uk/hieu/",
- "phd student at Edinburgh Uni. Original Moses developer",
- "general queries/ flames on Moses."));
-
- sort(everyone.begin(), everyone.end());
-
- cerr
- << "Moses - A beam search decoder for phrase-based statistical machine translation models"
- << endl << "Copyright (C) 2006 University of Edinburgh" << endl << endl
-
- << "This library is free software; you can redistribute it and/or" << endl
- << "modify it under the terms of the GNU Lesser General Public" << endl
- << "License as published by the Free Software Foundation; either" << endl
- << "version 2.1 of the License, or (at your option) any later version."
- << endl << endl
-
- << "This library is distributed in the hope that it will be useful,"
- << endl
- << "but WITHOUT ANY WARRANTY; without even the implied warranty of"
- << endl
- << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU"
- << endl << "Lesser General Public License for more details." << endl
- << endl
-
- << "You should have received a copy of the GNU Lesser General Public"
- << endl
- << "License along with this library; if not, write to the Free Software"
- << endl
- << "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA"
- << endl << endl
- << "***********************************************************************"
- << endl << endl << "Built on " << __DATE__ << " at " __TIME__ << endl
- << endl << "WHO'S FAULT IS THIS GODDAM SOFTWARE:" << endl;
-
- ostream_iterator<Credit> out(cerr, "\n");
- copy(everyone.begin(), everyone.end(), out);
- cerr << endl << endl;
-}
-
-/** update parameter settings with command line switches
- * \param paramName full name of parameter
- * \param values inew values for paramName */
-void Parameter::OverwriteParam(const string &paramName, PARAM_VEC values)
-{
- cerr << "Overwriting parameter " << paramName;
-
- m_setting[paramName]; // defines the parameter, important for boolean switches
- if (m_setting[paramName].size() > 1) {
- cerr << " (the parameter had " << m_setting[paramName].size()
- << " previous values)";
- UTIL_THROW_IF2(m_setting[paramName].size() != values.size(),
- "Number of weight override for " << paramName << " is not the same as the original number of weights");
- }
- else {
- cerr << " (the parameter does not have previous values)";
- m_setting[paramName].resize(values.size());
- }
- cerr << " with the following values:";
- int i = 0;
- for (PARAM_VEC::iterator iter = values.begin(); iter != values.end();
- iter++, i++) {
- m_setting[paramName][i] = *iter;
- cerr << " " << *iter;
- }
- cerr << std::endl;
-}
-
-std::set<std::string> Parameter::GetWeightNames() const
-{
- std::set<std::string> ret;
- std::map<std::string, std::vector<float> >::const_iterator iter;
- for (iter = m_weights.begin(); iter != m_weights.end(); ++iter) {
- const string &key = iter->first;
- ret.insert(key);
- }
- return ret;
-}
-
-void Parameter::Save(const std::string path)
-{
- ofstream file;
- file.open(path.c_str());
-
- PARAM_MAP::const_iterator iterOuter;
- for (iterOuter = m_setting.begin(); iterOuter != m_setting.end();
- ++iterOuter) {
- const std::string &sectionName = iterOuter->first;
- file << "[" << sectionName << "]" << endl;
-
- const PARAM_VEC &values = iterOuter->second;
-
- PARAM_VEC::const_iterator iterInner;
- for (iterInner = values.begin(); iterInner != values.end(); ++iterInner) {
- const std::string &value = *iterInner;
- file << value << endl;
- }
-
- file << endl;
- }
-
- file.close();
-}
-
-template<>
-void Parameter::SetParameter<bool>(bool &parameter,
- std::string const& parameterName, bool const& defaultValue) const
-{
- const PARAM_VEC *params = GetParam(parameterName);
-
- // default value if nothing is specified
- parameter = defaultValue;
- if (params == NULL) {
- return;
- }
-
- // if parameter is just specified as, e.g. "-parameter" set it true
- if (params->size() == 0) {
- parameter = true;
- }
- // if paramter is specified "-parameter true" or "-parameter false"
- else if (params->size() == 1) {
- parameter = Scan<bool>(params->at(0));
- }
-}
-
-void Parameter::SetParameter(bool& var, std::string const& name)
-{
- SetParameter(var, name, false);
-}
-
-}
-
diff --git a/contrib/moses2/legacy/Parameter.h b/contrib/moses2/legacy/Parameter.h
deleted file mode 100644
index f43ce98a4..000000000
--- a/contrib/moses2/legacy/Parameter.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <string>
-#include <set>
-#include <map>
-#include <vector>
-#include <boost/program_options.hpp>
-#include "Util2.h"
-
-namespace Moses2
-{
-
-typedef std::vector<std::string> PARAM_VEC;
-typedef std::map<std::string, PARAM_VEC> PARAM_MAP;
-typedef std::map<std::string, bool> PARAM_BOOL;
-typedef std::map<std::string, std::string> PARAM_STRING;
-
-/** Handles parameter values set in config file or on command line.
- * Process raw parameter data (names and values as strings) for StaticData
- * to parse; to get useful values, see StaticData.
- */
-class Parameter
-{
- typedef boost::program_options::options_description options_description;
- typedef boost::program_options::value_semantic value_semantic;
-protected:
- PARAM_MAP m_setting;
- PARAM_BOOL m_valid;
- PARAM_STRING m_abbreviation;
- PARAM_STRING m_description;
- PARAM_STRING m_fullname;
- // std::map<char,std::set<std::string> > m_confusable;
- // stores long parameter names that start with a letter that is also a short option.
- options_description m_options;
-
- std::map<std::string, std::vector<float> > m_weights;
-
- std::string FindParam(const std::string &paramSwitch, int argc, char* argv[]);
- void OverwriteParam(const std::string &paramSwitch,
- const std::string &paramName, int argc, char* argv[]);
- bool ReadConfigFile(const std::string &filePath);
- bool FilesExist(const std::string &paramName, int fieldNo,
- std::vector<std::string> const& fileExtension = std::vector<std::string>(
- 1, ""));
- bool isOption(const char* token);
- bool Validate();
-
- void
- AddParam(options_description& optgroup, value_semantic const* optvalue,
- std::string const& paramName, std::string const& description);
-
- void
- AddParam(options_description& optgroup, std::string const &paramName,
- std::string const &description);
-
- void
- AddParam(options_description& optgroup, value_semantic const* optvalue,
- std::string const& paramName, std::string const& abbrevName,
- std::string const& description);
-
- void
- AddParam(options_description& optgroup, std::string const& paramName,
- std::string const& abbrevName, std::string const& description);
-
- void PrintCredit();
-
- void SetWeight(const std::string &name, size_t ind, float weight);
- void SetWeight(const std::string &name, size_t ind,
- const std::vector<float> &weights);
- void AddWeight(const std::string &name, size_t ind,
- const std::vector<float> &weights);
- void ConvertWeightArgs();
- void ConvertWeightArgsSingleWeight(const std::string &oldWeightName,
- const std::string &newWeightName);
- void ConvertWeightArgsPhraseModel(const std::string &oldWeightName);
- void ConvertWeightArgsLM();
- void ConvertWeightArgsDistortion();
- void ConvertWeightArgsGeneration(const std::string &oldWeightName,
- const std::string &newWeightName);
- void ConvertWeightArgsPhrasePenalty();
- void ConvertWeightArgsWordPenalty();
- void ConvertPhrasePenalty();
- void CreateWeightsMap();
- void CreateWeightsMap(const PARAM_VEC &vec);
- void WeightOverwrite();
- void AddFeature(const std::string &line);
- void AddFeaturesCmd();
-
-public:
- Parameter();
- ~Parameter();
- bool LoadParam(int argc, char* argv[]);
- bool LoadParam(const std::string &filePath);
- void Explain();
-
- /** return a vector of strings holding the whitespace-delimited values on the ini-file line corresponding to the given parameter name */
- const PARAM_VEC *GetParam(const std::string &paramName) const;
-
- /** check if parameter is defined (either in moses.ini or as switch) */
- bool isParamSpecified(const std::string &paramName) const
- {
- return m_setting.find(paramName) != m_setting.end();
- }
-
- void OverwriteParam(const std::string &paramName, PARAM_VEC values);
-
- std::vector<float> GetWeights(const std::string &name);
- const std::map<std::string, std::vector<float> > &GetAllWeights() const
- {
- return m_weights;
- }
- std::set<std::string> GetWeightNames() const;
-
- const PARAM_MAP &GetParams() const
- {
- return m_setting;
- }
-
- void Save(const std::string path);
-
- template<typename T>
- void SetParameter(T &var, const std::string &name,
- const T &defaultValue) const
- {
- const PARAM_VEC *params = GetParam(name);
- if (params && params->size()) {
- var = Scan<T>(params->at(0));
- }
- else {
- var = defaultValue;
- }
- }
-
- void SetParameter(bool& var, std::string const& name);
-
- bool SetBooleanSwitch(bool& val, std::string const name)
- {
- // issues a warning if format is wrong
- const PARAM_VEC *params = GetParam(name);
- val = (params && params->size());
- if (val && params->size() != 1) {
- std::cerr << "ERROR: wrong format for switch -" << name;
- return false;
- }
- return true;
- }
-
-};
-
-template<>
-void Parameter::SetParameter<bool>(bool &var, const std::string &name,
- const bool &defaultValue) const;
-
-}
-
diff --git a/contrib/moses2/legacy/Range.cpp b/contrib/moses2/legacy/Range.cpp
deleted file mode 100644
index 7186e4265..000000000
--- a/contrib/moses2/legacy/Range.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include "Range.h"
-
-namespace Moses2
-{
-
-std::ostream& operator <<(std::ostream& out, const Range& range)
-{
- out << "[" << range.m_startPos << ".." << range.m_endPos << "]";
- return out;
-}
-
-}
-
diff --git a/contrib/moses2/legacy/Range.h b/contrib/moses2/legacy/Range.h
deleted file mode 100644
index 76d720bed..000000000
--- a/contrib/moses2/legacy/Range.h
+++ /dev/null
@@ -1,123 +0,0 @@
-// $Id$
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <cassert>
-#include <iostream>
-#include <boost/functional/hash.hpp>
-#include "Util2.h"
-#include "util/exception.hh"
-
-#ifdef WIN32
-#undef max
-#endif
-
-namespace Moses2
-{
-
-/***
- * Efficient version of Bitmap for contiguous ranges
- */
-class Range
-{
- friend std::ostream& operator <<(std::ostream& out, const Range& range);
-
- // m_endPos is inclusive
- size_t m_startPos, m_endPos;
-public:
- inline explicit Range()
- {
- }
- inline Range(size_t startPos, size_t endPos) :
- m_startPos(startPos), m_endPos(endPos)
- {
- }
- inline Range(const Range &copy) :
- m_startPos(copy.GetStartPos()), m_endPos(copy.GetEndPos())
- {
- }
-
- inline size_t GetStartPos() const
- {
- return m_startPos;
- }
- inline size_t GetEndPos() const
- {
- return m_endPos;
- }
-
- inline void SetStartPos(size_t val)
- {
- m_startPos = val;
- }
- inline void SetEndPos(size_t val)
- {
- m_endPos = val;
- }
-
- //! count of words translated
- inline size_t GetNumWordsCovered() const
- {
- assert(
- (m_startPos == NOT_FOUND && m_endPos == NOT_FOUND) || (m_startPos != NOT_FOUND && m_endPos != NOT_FOUND));
- return (m_startPos == NOT_FOUND) ? 0 : m_endPos - m_startPos + 1;
- }
-
- //! transitive comparison
- inline bool operator<(const Range& x) const {
- return (m_startPos<x.m_startPos
- || (m_startPos==x.m_startPos && m_endPos<x.m_endPos));
- }
-
- // equality operator
- inline bool operator==(const Range& x) const {
- return (m_startPos==x.m_startPos && m_endPos==x.m_endPos);
- }
- // Whether two word ranges overlap or not
- inline bool Overlap(const Range& x) const {
-
- if ( x.m_endPos < m_startPos || x.m_startPos > m_endPos) return false;
-
- return true;
- }
-
- inline size_t GetNumWordsBetween(const Range& x) const {
- UTIL_THROW_IF2(Overlap(x), "Overlapping ranges");
-
- if (x.m_endPos < m_startPos) {
- return m_startPos - x.m_endPos - 1;
- }
-
- return x.m_startPos - m_endPos - 1;
- }
-
-};
-
-inline size_t hash_value(const Range& range)
-{
- size_t seed = range.GetStartPos();
- boost::hash_combine(seed, range.GetEndPos());
- return seed;
-}
-
-}
-
diff --git a/contrib/moses2/legacy/ThreadPool.cpp b/contrib/moses2/legacy/ThreadPool.cpp
deleted file mode 100644
index 3e159020b..000000000
--- a/contrib/moses2/legacy/ThreadPool.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-// $Id: ThreadPool.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2009 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-#include <stdio.h>
-#include <pthread.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include "ThreadPool.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-#define handle_error_en(en, msg) \
- do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0)
-
-ThreadPool::ThreadPool(size_t numThreads, int cpuAffinityOffset,
- int cpuAffinityIncr) :
- m_stopped(false), m_stopping(false), m_queueLimit(0)
-{
- size_t numCPU = sysconf(_SC_NPROCESSORS_ONLN);
- int cpuInd = cpuAffinityOffset % numCPU;
-
- for (size_t i = 0; i < numThreads; ++i) {
- boost::thread *thread = m_threads.create_thread(
- boost::bind(&ThreadPool::Execute, this));
-
-#ifdef __linux
- if (cpuAffinityOffset >= 0) {
- int s;
-
- boost::thread::native_handle_type handle = thread->native_handle();
-
- //cerr << "numCPU=" << numCPU << endl;
- cpu_set_t cpuset;
- CPU_ZERO(&cpuset);
-
- CPU_SET(cpuInd, &cpuset);
- cpuInd += cpuAffinityIncr;
- cpuInd = cpuInd % numCPU;
-
- s = pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset);
- if (s != 0) {
- handle_error_en(s, "pthread_setaffinity_np");
- //cerr << "affinity error with thread " << i << endl;
- }
-
- // get affinity
- CPU_ZERO(&cpuset);
- s = pthread_getaffinity_np(handle, sizeof(cpu_set_t), &cpuset);
- cerr << "Set returned by pthread_getaffinity_np() contained:\n";
- for (int j = 0; j < CPU_SETSIZE; j++) {
- if (CPU_ISSET(j, &cpuset)) {
- cerr << " CPU " << j << "\n";
- }
- }
- }
-#endif
- }
-}
-
-void ThreadPool::Execute()
-{
- do {
- boost::shared_ptr<Task> task;
- {
- // Find a job to perform
- boost::mutex::scoped_lock lock(m_mutex);
- if (m_tasks.empty() && !m_stopped) {
- m_threadNeeded.wait(lock);
- }
- if (!m_stopped && !m_tasks.empty()) {
- task = m_tasks.front();
- m_tasks.pop();
- }
- }
- //Execute job
- if (task) {
- // must read from task before run. otherwise task may be deleted by main thread
- // race condition
- task->DeleteAfterExecution();
- task->Run();
- }
- m_threadAvailable.notify_all();
- }
- while (!m_stopped);
-}
-
-void ThreadPool::Submit(boost::shared_ptr<Task> task)
-{
- boost::mutex::scoped_lock lock(m_mutex);
- if (m_stopping) {
- throw runtime_error("ThreadPool stopping - unable to accept new jobs");
- }
- while (m_queueLimit > 0 && m_tasks.size() >= m_queueLimit) {
- m_threadAvailable.wait(lock);
- }
- m_tasks.push(task);
- m_threadNeeded.notify_all();
-}
-
-void ThreadPool::Stop(bool processRemainingJobs)
-{
- {
- //prevent more jobs from being added to the queue
- boost::mutex::scoped_lock lock(m_mutex);
- if (m_stopped) return;
- m_stopping = true;
- }
- if (processRemainingJobs) {
- boost::mutex::scoped_lock lock(m_mutex);
- //wait for queue to drain.
- while (!m_tasks.empty() && !m_stopped) {
- m_threadAvailable.wait(lock);
- }
- }
- //tell all threads to stop
- {
- boost::mutex::scoped_lock lock(m_mutex);
- m_stopped = true;
- }
- m_threadNeeded.notify_all();
-
- m_threads.join_all();
-}
-
-}
-
diff --git a/contrib/moses2/legacy/ThreadPool.h b/contrib/moses2/legacy/ThreadPool.h
deleted file mode 100644
index 62a8f43ad..000000000
--- a/contrib/moses2/legacy/ThreadPool.h
+++ /dev/null
@@ -1,140 +0,0 @@
-// $Id: ThreadPool.h 3045 2010-04-05 13:07:29Z hieuhoang1972 $
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2009 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <iostream>
-#include <queue>
-#include <vector>
-
-#include <boost/shared_ptr.hpp>
-
-#ifdef WITH_THREADS
-#include <boost/bind.hpp>
-#include <boost/thread.hpp>
-#endif
-
-#ifdef BOOST_HAS_PTHREADS
-#include <pthread.h>
-#endif
-
-//#include "Util.h"
-
-namespace Moses2
-{
-
-/**
- * Classes to implement a ThreadPool.
- **/
-
-/** A task to be executed by the ThreadPool
- */
-class Task
-{
-public:
- virtual void Run() = 0;
- virtual bool DeleteAfterExecution()
- {
- return true;
- }
- virtual ~Task()
- {
- }
-};
-
-class ThreadPool
-{
-public:
- /**
- * Construct a thread pool of a fixed size.
- **/
- explicit ThreadPool(size_t numThreads, int cpuAffinityOffset = -1,
- int cpuAffinityIncr = 1);
-
- ~ThreadPool()
- {
- Stop();
- }
-
- /**
- * Add a job to the threadpool.
- **/
- void Submit(boost::shared_ptr<Task> task);
-
- /**
- * Wait until all queued jobs have completed, and shut down
- * the ThreadPool.
- **/
- void Stop(bool processRemainingJobs = false);
-
- /**
- * Set maximum number of queued threads (otherwise Submit blocks)
- **/
- void SetQueueLimit(size_t limit)
- {
- m_queueLimit = limit;
- }
-
-private:
- /**
- * The main loop executed by each thread.
- **/
- void Execute();
-
- std::queue<boost::shared_ptr<Task> > m_tasks;
- boost::thread_group m_threads;
- boost::mutex m_mutex;
- boost::condition_variable m_threadNeeded;
- boost::condition_variable m_threadAvailable;
- bool m_stopped;
- bool m_stopping;
- size_t m_queueLimit;
-};
-
-class TestTask: public Task
-{
-public:
- TestTask(int id) :
- m_id(id)
- {
- }
-
- virtual void Run()
- {
-#ifdef BOOST_HAS_PTHREADS
- pthread_t tid = pthread_self();
-#else
- typedef void * pthread_t;
- pthread_t tid = 0;
-#endif
- std::cerr << "Executing " << m_id << " in thread id " << tid << std::endl;
- }
-
- virtual ~TestTask()
- {
- }
-
-private:
- int m_id;
-};
-
-}
-
diff --git a/contrib/moses2/legacy/Timer.cpp b/contrib/moses2/legacy/Timer.cpp
deleted file mode 100644
index b1857ee0d..000000000
--- a/contrib/moses2/legacy/Timer.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include <iostream>
-#include <iomanip>
-#include "Timer.h"
-
-#include "util/usage.hh"
-
-namespace Moses2
-{
-
-Timer::Timer() :
- running(false), stopped(false)
-{
- start_time = 0;
-}
-
-/***
- * Return the total wall time that the timer has been in the "running"
- * state since it was first "started".
- */
-double Timer::get_elapsed_time() const
-{
- if (stopped) {
- return stop_time - start_time;
- }
- if (running) {
- return util::WallTime() - start_time;
- }
- return 0;
-}
-
-/***
- * Start a timer. If it is already running, let it continue running.
- * Print an optional message.
- */
-void Timer::start(const char* msg)
-{
- // Print an optional message, something like "Starting timer t";
- if (msg) {
- std::cerr << msg << std::endl;
- }
-
- // Return immediately if the timer is already running
- if (running && !stopped) return;
-
- // If stopped, recompute start time
- if (stopped) {
- start_time = util::WallTime() - (stop_time - start_time);
- stopped = false;
- }
- else {
- start_time = util::WallTime();
- running = true;
- }
-}
-
-/***
- * Stop a timer.
- * Print an optional message.
- */
-void Timer::stop(const char* msg)
-{
- // Print an optional message, something like "Stopping timer t";
- if (msg) {
- std::cerr << msg << std::endl;
- }
-
- // Return immediately if the timer is not running
- if (stopped || !running) return;
-
- // Record stopped time
- stop_time = util::WallTime();
-
- // Change timer status to running
- stopped = true;
-}
-
-/***
- * Print out an optional message followed by the current timer timing.
- */
-void Timer::check(const char* msg)
-{
- // Print an optional message, something like "Checking timer t";
- if (msg) {
- std::cerr << msg << " : ";
- }
-
-// VERBOSE(1, "[" << std::setiosflags(std::ios::fixed) << std::setprecision(2) << (running ? elapsed_time() : 0) << "] seconds\n");
- std::cerr << "[" << (running ? get_elapsed_time() : 0) << "] seconds\n";
-}
-
-/***
- * Allow timers to be printed to ostreams using the syntax 'os << t'
- * for an ostream 'os' and a timer 't'. For example, "cout << t" will
- * print out the total amount of time 't' has been "running".
- */
-std::ostream& operator<<(std::ostream& os, Timer& t)
-{
- //os << std::setprecision(2) << std::setiosflags(std::ios::fixed) << (t.running ? t.elapsed_time() : 0);
- os << (t.running ? t.get_elapsed_time() : 0);
- return os;
-}
-
-}
-
diff --git a/contrib/moses2/legacy/Timer.h b/contrib/moses2/legacy/Timer.h
deleted file mode 100644
index 3f44ef4b9..000000000
--- a/contrib/moses2/legacy/Timer.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#pragma once
-
-#include <ctime>
-#include <iostream>
-#include <iomanip>
-
-namespace Moses2
-{
-
-/** Wrapper around time_t to time how long things have been running
- * according to walltime. We avoid CPU time since it is less reliable
- * in a multi-threaded environment and can spuriously include clock cycles
- * used by other threads in the same process.
- */
-class Timer
-{
- friend std::ostream& operator<<(std::ostream& os, Timer& t);
-
-private:
- bool running;
- bool stopped;
- double start_time;
- double stop_time;
-
-public:
- /***
- * 'running' is initially false. A timer needs to be explicitly started
- * using 'start'
- */
- Timer();
-
- void start(const char* msg = 0);
- void stop(const char* msg = 0);
- void check(const char* msg = 0);
- double get_elapsed_time() const;
-};
-
-}
-
diff --git a/contrib/moses2/legacy/Util2.cpp b/contrib/moses2/legacy/Util2.cpp
deleted file mode 100644
index ffc348090..000000000
--- a/contrib/moses2/legacy/Util2.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-#include "Util2.h"
-#include "util/exception.hh"
-
-namespace Moses2
-{
-
-class BoolValueException: public util::Exception
-{
-};
-
-template<>
-bool Scan<bool>(const std::string &input)
-{
- std::string lc = ToLower(input);
- if (lc == "yes" || lc == "y" || lc == "true" || lc == "1") return true;
- if (lc == "no" || lc == "n" || lc == "false" || lc == "0") return false;
- UTIL_THROW(BoolValueException,
- "Could not interpret " << input << " as a boolean. After lowercasing, valid values are yes, y, true, 1, no, n, false, and 0.");
-}
-
-const std::string ToLower(const std::string& str)
-{
- std::string lc(str);
- std::transform(lc.begin(), lc.end(), lc.begin(), (int (*)(int))std::tolower);return
-lc ;
-}
-
-}
-
diff --git a/contrib/moses2/legacy/Util2.h b/contrib/moses2/legacy/Util2.h
deleted file mode 100644
index eef638f93..000000000
--- a/contrib/moses2/legacy/Util2.h
+++ /dev/null
@@ -1,351 +0,0 @@
-#pragma once
-
-#include <boost/thread.hpp>
-#include <boost/thread/mutex.hpp>
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <limits>
-#include <sstream>
-#include <vector>
-#include <queue>
-#include <cmath>
-#include <stdlib.h>
-#include "../TypeDef.h"
-#include "util/exception.hh"
-
-namespace Moses2
-{
-
-template<typename T>
-class UnorderedComparer
-{
-public:
- size_t operator()(const T& obj) const
- {
- return obj.hash();
- }
-
- bool operator()(const T& a, const T& b) const
- {
- return a == b;
- }
-
- size_t operator()(const T* obj) const
- {
- return obj->hash();
- }
-
- bool operator()(const T* a, const T* b) const
- {
- return (*a) == (*b);
- }
-
-};
-
-template<typename T>
-void Init(T arr[], size_t size, const T &val)
-{
- for (size_t i = 0; i < size; ++i) {
- arr[i] = val;
- }
-}
-
-//! delete white spaces at beginning and end of string
-inline std::string Trim(const std::string& str, const std::string dropChars =
- " \t\n\r")
-{
- std::string res = str;
- res.erase(str.find_last_not_of(dropChars) + 1);
- return res.erase(0, res.find_first_not_of(dropChars));
-}
-
-//! convert string to variable of type T. Used to reading floats, int etc from files
-template<typename T>
-inline T Scan(const std::string &input)
-{
- std::stringstream stream(input);
- T ret;
- stream >> ret;
- return ret;
-}
-
-//! just return input
-template<>
-inline std::string Scan<std::string>(const std::string &input)
-{
- return input;
-}
-
-template<>
-inline SCORE Scan<SCORE>(const std::string &input)
-{
- SCORE ret = atof(input.c_str());
- return ret;
-}
-
-//! Specialisation to understand yes/no y/n true/false 0/1
-template<>
-bool Scan<bool>(const std::string &input);
-
-template<>
-inline S2TParsingAlgorithm Scan<S2TParsingAlgorithm>(const std::string &input)
-{
- return (S2TParsingAlgorithm) Scan<size_t>(input);
-}
-
-template<>
-inline SourceLabelOverlap Scan<SourceLabelOverlap>(const std::string &input)
-{
- return (SourceLabelOverlap) Scan<size_t>(input);
-}
-
-template<>
-inline SearchAlgorithm Scan<SearchAlgorithm>(const std::string &input)
-{
- return (SearchAlgorithm) Scan<size_t>(input);
-}
-
-template<>
- inline XmlInputType Scan<XmlInputType>(const std::string &input)
- {
- XmlInputType ret;
- if (input=="exclusive") ret = XmlExclusive;
- else if (input=="inclusive") ret = XmlInclusive;
- else if (input=="constraint") ret = XmlConstraint;
- else if (input=="ignore") ret = XmlIgnore;
- else if (input=="pass-through") ret = XmlPassThrough;
- else {
- UTIL_THROW2("Unknown XML input type");
- }
-
- return ret;
- }
-
-template<>
- inline InputTypeEnum Scan<InputTypeEnum>(const std::string &input)
- {
- return (InputTypeEnum) Scan<size_t>(input);
- }
-
-template<>
- inline WordAlignmentSort Scan<WordAlignmentSort>(const std::string &input)
- {
- return (WordAlignmentSort) Scan<size_t>(input);
- }
-
-//! convert vectors of string to vectors of type T variables
-template<typename T>
-inline std::vector<T> Scan(const std::vector<std::string> &input)
-{
- std::vector<T> output(input.size());
- for (size_t i = 0; i < input.size(); i++) {
- output[i] = Scan<T>(input[i]);
- }
- return output;
-}
-
-//! speeded up version of above
-template<typename T>
-inline void Scan(std::vector<T> &output, const std::vector<std::string> &input)
-{
- output.resize(input.size());
- for (size_t i = 0; i < input.size(); i++) {
- output[i] = Scan<T>(input[i]);
- }
-}
-
-/** tokenise input string to vector of string. each element has been separated by a character in the delimiters argument.
- The separator can only be 1 character long. The default delimiters are space or tab
- */
-inline std::vector<std::string> Tokenize(const std::string& str,
- const std::string& delimiters = " \t")
-{
- std::vector<std::string> tokens;
- // Skip delimiters at beginning.
- std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
- // Find first "non-delimiter".
- std::string::size_type pos = str.find_first_of(delimiters, lastPos);
-
- while (std::string::npos != pos || std::string::npos != lastPos) {
- // Found a token, add it to the vector.
- tokens.push_back(str.substr(lastPos, pos - lastPos));
- // Skip delimiters. Note the "not_of"
- lastPos = str.find_first_not_of(delimiters, pos);
- // Find next "non-delimiter"
- pos = str.find_first_of(delimiters, lastPos);
- }
-
- return tokens;
-}
-
-//! tokenise input string to vector of type T
-template<typename T>
-inline std::vector<T> Tokenize(const std::string &input,
- const std::string& delimiters = " \t")
-{
- std::vector<std::string> stringVector = Tokenize(input, delimiters);
- return Scan<T>(stringVector);
-}
-
-/** only split of the first delimiter. Used by class FeatureFunction for parse key=value pair.
- * Value may have = character
- */
-inline std::vector<std::string> TokenizeFirstOnly(const std::string& str,
- const std::string& delimiters = " \t")
-{
- std::vector<std::string> tokens;
- std::string::size_type pos = str.find_first_of(delimiters);
-
- if (std::string::npos != pos) {
- // Found a token, add it to the vector.
- tokens.push_back(str.substr(0, pos));
- tokens.push_back(str.substr(pos + 1, str.size() - pos - 1));
- }
- else {
- tokens.push_back(str);
- }
-
- return tokens;
-}
-
-inline std::vector<std::string> TokenizeMultiCharSeparator(
- const std::string& str, const std::string& separator)
-{
- std::vector<std::string> tokens;
-
- size_t pos = 0;
- // Find first "non-delimiter".
- std::string::size_type nextPos = str.find(separator, pos);
-
- while (nextPos != std::string::npos) {
- // Found a token, add it to the vector.
- tokens.push_back(str.substr(pos, nextPos - pos));
- // Skip delimiters. Note the "not_of"
- pos = nextPos + separator.size();
- // Find next "non-delimiter"
- nextPos = str.find(separator, pos);
- }
- tokens.push_back(str.substr(pos, nextPos - pos));
-
- return tokens;
-}
-
-// speeded up version of above
-inline void TokenizeMultiCharSeparator(std::vector<std::string> &output,
- const std::string& str, const std::string& separator)
-{
- size_t pos = 0;
- // Find first "non-delimiter".
- std::string::size_type nextPos = str.find(separator, pos);
-
- while (nextPos != std::string::npos) {
- // Found a token, add it to the vector.
- output.push_back(Trim(str.substr(pos, nextPos - pos)));
- // Skip delimiters. Note the "not_of"
- pos = nextPos + separator.size();
- // Find next "non-delimiter"
- nextPos = str.find(separator, pos);
- }
- output.push_back(Trim(str.substr(pos, nextPos - pos)));
-}
-
-//! get string representation of any object/variable, as long as it can pipe to a stream
-template<typename T>
-inline std::string SPrint(const T &input)
-{
- std::stringstream stream("");
- stream << input;
- return stream.str();
-}
-
-//! irst number are in log 10, transform to natural log
-inline float TransformLMScore(float irstScore)
-{
- return irstScore * 2.30258509299405f;
-}
-
-//! transform prob to natural log score
-inline float TransformScore(float prob)
-{
- return log(prob);
-}
-
-//! make sure score doesn't fall below LOWEST_SCORE
-inline float FloorScore(float logScore)
-{
- return (std::max)(logScore, LOWEST_SCORE);
-}
-
-inline float UntransformLMScore(float logNScore)
-{
- // opposite of above
- return logNScore / 2.30258509299405f;
-}
-
-inline bool FileExists(const std::string& filePath)
-{
- std::ifstream ifs(filePath.c_str());
- return !ifs.fail();
-}
-
-const std::string ToLower(const std::string& str);
-
-//! delete and remove every element of a collection object such as set, list etc
-template<class COLL>
-void RemoveAllInColl(COLL &coll)
-{
- for (typename COLL::const_iterator iter = coll.begin(); iter != coll.end();
- ++iter) {
- delete (*iter);
- }
- coll.clear();
-}
-
-template<typename T>
-void Swap(T &a, T &b)
-{
- T &c = a;
- a = b;
- b = c;
-}
-
-template<typename T>
-T &GetThreadSpecificObj(boost::thread_specific_ptr<T> &coll)
-{
- T *obj;
- obj = coll.get();
- if (obj == NULL) {
- obj = new T;
- coll.reset(obj);
- }
- assert(obj);
- return *obj;
-
-}
-
-// grab the underlying contain of priority queue
-template<class T, class S, class C>
-S& Container(std::priority_queue<T, S, C>& q)
-{
- struct HackedQueue: private std::priority_queue<T, S, C>
- {
- static S& Container(std::priority_queue<T, S, C>& q)
- {
- return q.*&HackedQueue::c;
- }
- };
- return HackedQueue::Container(q);
-}
-
-#define HERE __FILE__ << ":" << __LINE__
-
-/** Enforce rounding */
-inline void FixPrecision(std::ostream& stream, size_t size = 3)
-{
- stream.setf(std::ios::fixed);
- stream.precision(size);
-}
-
-}
-
diff --git a/contrib/moses2/legacy/gzfilebuf.h b/contrib/moses2/legacy/gzfilebuf.h
deleted file mode 100644
index ea7021757..000000000
--- a/contrib/moses2/legacy/gzfilebuf.h
+++ /dev/null
@@ -1,101 +0,0 @@
-#ifndef moses_gzfile_buf_h
-#define moses_gzfile_buf_h
-
-#include <stdexcept>
-#include <streambuf>
-#include <zlib.h>
-#include <cstring>
-
-namespace Moses2
-{
-
-/** wrapper around gzip input stream. Unknown parentage
- * @todo replace with boost version - output stream already uses it
- */
-class gzfilebuf: public std::streambuf
-{
-public:
- gzfilebuf(const char *filename)
- {
- _gzf = gzopen(filename, "rb");
- if (!_gzf) throw std::runtime_error(
- "Could not open " + std::string(filename) + ".");
- setg(_buff + sizeof(int), // beginning of putback area
- _buff + sizeof(int), // read position
- _buff + sizeof(int)); // end position
- }
- ~gzfilebuf()
- {
- gzclose(_gzf);
- }
-protected:
- virtual int_type overflow(int_type /* c */)
- {
- throw;
- }
-
- // write multiple characters
- virtual std::streamsize xsputn(const char* /* s */, std::streamsize /* num */)
- {
- throw;
- }
-
- virtual std::streampos seekpos(std::streampos /* sp */,
- std::ios_base::openmode /* which = std::ios_base::in | std::ios_base::out */)
- {
- throw;
- }
-
- //read one character
- virtual int_type underflow()
- {
- // is read position before end of _buff?
- if (gptr() < egptr()) {
- return traits_type::to_int_type(*gptr());
- }
-
- /* process size of putback area
- * - use number of characters read
- * - but at most four
- */
- unsigned int numPutback = gptr() - eback();
- if (numPutback > sizeof(int)) {
- numPutback = sizeof(int);
- }
-
- /* copy up to four characters previously read into
- * the putback _buff (area of first four characters)
- */
- std::memmove(_buff + (sizeof(int) - numPutback), gptr() - numPutback,
- numPutback);
-
- // read new characters
- int num = gzread(_gzf, _buff + sizeof(int), _buffsize - sizeof(int));
- if (num <= 0) {
- // ERROR or EOF
- return EOF;
- }
-
- // reset _buff pointers
- setg(_buff + (sizeof(int) - numPutback), // beginning of putback area
- _buff + sizeof(int), // read position
- _buff + sizeof(int) + num); // end of buffer
-
- // return next character
- return traits_type::to_int_type(*gptr());
- }
-
- std::streamsize xsgetn(char* s, std::streamsize num)
- {
- return gzread(_gzf, s, num);
- }
-
-private:
- gzFile _gzf;
- static const unsigned int _buffsize = 1024;
- char _buff[_buffsize];
-};
-
-}
-
-#endif
diff --git a/contrib/moses2/parameters/AllOptions.cpp b/contrib/moses2/parameters/AllOptions.cpp
deleted file mode 100644
index c4171d807..000000000
--- a/contrib/moses2/parameters/AllOptions.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "../legacy/Parameter.h"
-#include "AllOptions.h"
-
-namespace Moses2
-{
- AllOptions::
- AllOptions()
- : mira(false)
- , use_legacy_pt(false)
- { }
-
- AllOptions::
- AllOptions(Parameter const& param)
- {
- init(param);
- }
-
- bool
- AllOptions::
- init(Parameter const& param)
- {
- if (!search.init(param)) return false;
- if (!cube.init(param)) return false;
- if (!nbest.init(param)) return false;
- if (!reordering.init(param)) return false;
- if (!context.init(param)) return false;
- if (!input.init(param)) return false;
- if (!mbr.init(param)) return false;
- if (!lmbr.init(param)) return false;
- if (!output.init(param)) return false;
- if (!unk.init(param)) return false;
- if (!server.init(param)) return false;
- if (!syntax.init(param)) return false;
-
- param.SetParameter(mira, "mira", false);
-
- return sanity_check();
- }
-
- bool
- AllOptions::
- sanity_check()
- {
- using namespace std;
- if (lmbr.enabled)
- {
- if (mbr.enabled)
- {
- cerr << "Error: Cannot use both n-best mbr and lattice mbr together" << endl;
- return false;
- }
- mbr.enabled = true;
- }
- if (search.consensus)
- {
- if (mbr.enabled)
- {
- cerr << "Error: Cannot use consensus decoding together with mbr"
- << endl;
- return false;
- }
- mbr.enabled = true;
- }
-
- // RecoverPath should only be used with confusion net or word lattice input
- if (output.RecoverPath && input.input_type == SentenceInput)
- {
- TRACE_ERR("--recover-input-path should only be used with "
- <<"confusion net or word lattice input!\n");
- output.RecoverPath = false;
- }
-
- // set m_nbest_options.enabled = true if necessary:
- nbest.enabled = (nbest.enabled || mira || search.consensus
- || nbest.nbest_size > 0
- || mbr.enabled || lmbr.enabled
- || !output.SearchGraph.empty()
- || !output.SearchGraphExtended.empty()
- || !output.SearchGraphSLF.empty()
- || !output.SearchGraphHG.empty()
- || !output.SearchGraphPB.empty()
- || output.lattice_sample_size != 0);
-
- return true;
- }
-
-#ifdef HAVE_XMLRPC_C
- bool
- AllOptions::
- update(std::map<std::string,xmlrpc_c::value>const& param)
- {
- if (!search.update(param)) return false;
- if (!cube.update(param)) return false;
- if (!nbest.update(param)) return false;
- if (!reordering.update(param)) return false;
- if (!context.update(param)) return false;
- if (!input.update(param)) return false;
- if (!mbr.update(param)) return false;
- if (!lmbr.update(param)) return false;
- if (!output.update(param)) return false;
- if (!unk.update(param)) return false;
- if (!server.update(param)) return false;
- //if (!syntax.update(param)) return false;
- return sanity_check();
- }
-#endif
-
- bool
- AllOptions::
- NBestDistinct() const
- {
- return (nbest.only_distinct
- || mbr.enabled || lmbr.enabled
- || output.lattice_sample_size
- || !output.SearchGraph.empty()
- || !output.SearchGraphExtended.empty()
- || !output.SearchGraphSLF.empty()
- || !output.SearchGraphHG.empty());
- }
-
-
-}
diff --git a/contrib/moses2/parameters/AllOptions.h b/contrib/moses2/parameters/AllOptions.h
deleted file mode 100644
index 694a8a347..000000000
--- a/contrib/moses2/parameters/AllOptions.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <boost/shared_ptr.hpp>
-#include "OptionsBaseClass.h"
-#include "SearchOptions.h"
-#include "CubePruningOptions.h"
-#include "NBestOptions.h"
-#include "ReorderingOptions.h"
-#include "ContextParameters.h"
-#include "InputOptions.h"
-#include "MBR_Options.h"
-#include "LMBR_Options.h"
-#include "ReportingOptions.h"
-#include "OOVHandlingOptions.h"
-#include "ServerOptions.h"
-#include "SyntaxOptions.h"
-
-namespace Moses2
-{
- struct
- AllOptions : public OptionsBaseClass
- {
- typedef boost::shared_ptr<AllOptions const> ptr;
- SearchOptions search;
- CubePruningOptions cube;
- NBestOptions nbest;
- ReorderingOptions reordering;
- ContextParameters context;
- InputOptions input;
- MBR_Options mbr;
- LMBR_Options lmbr;
- ReportingOptions output;
- OOVHandlingOptions unk;
- ServerOptions server;
- SyntaxOptions syntax;
- bool mira;
- bool use_legacy_pt;
- // StackOptions stack;
- // BeamSearchOptions beam;
- bool init(Parameter const& param);
- bool sanity_check();
- AllOptions();
- AllOptions(Parameter const& param);
-
- bool update(std::map<std::string,xmlrpc_c::value>const& param);
- bool NBestDistinct() const;
-
- };
-
-}
diff --git a/contrib/moses2/parameters/BeamSearchOptions.h b/contrib/moses2/parameters/BeamSearchOptions.h
deleted file mode 100644
index d67c43438..000000000
--- a/contrib/moses2/parameters/BeamSearchOptions.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include "OptionsBaseClass.h"
-namespace Moses2
-{
-
- struct
- BeamSearchOptions : public OptionsBaseClass
- {
- bool init(Parameter const& param);
- BeamSearchOptions(Parameter const& param);
- };
-
-}
diff --git a/contrib/moses2/parameters/BookkeepingOptions.cpp b/contrib/moses2/parameters/BookkeepingOptions.cpp
deleted file mode 100644
index d54f84644..000000000
--- a/contrib/moses2/parameters/BookkeepingOptions.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-#include "BookkeepingOptions.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
-bool
-BookkeepingOptions::
-init(Parameter const& P)
-{
- bool& x = need_alignment_info;
- P.SetParameter(x, "print-alignment-info", false);
- if (!x) P.SetParameter(x, "print-alignment-info-in-n-best", false);
- if (!x) {
- PARAM_VEC const* params = P.GetParam("alignment-output-file");
- x = params && params->size();
- }
- return true;
-}
-
-BookkeepingOptions::
-BookkeepingOptions()
- : need_alignment_info(false)
-{ }
-
-}
diff --git a/contrib/moses2/parameters/BookkeepingOptions.h b/contrib/moses2/parameters/BookkeepingOptions.h
deleted file mode 100644
index ad7c78301..000000000
--- a/contrib/moses2/parameters/BookkeepingOptions.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include "OptionsBaseClass.h"
-
-namespace Moses2
-{
-class Parameter;
-
- struct BookkeepingOptions : public OptionsBaseClass
- {
- bool need_alignment_info;
- bool init(Parameter const& param);
- BookkeepingOptions();
- };
-
-
-
-}
diff --git a/contrib/moses2/parameters/ContextParameters.cpp b/contrib/moses2/parameters/ContextParameters.cpp
deleted file mode 100644
index 144692399..000000000
--- a/contrib/moses2/parameters/ContextParameters.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-#include "ContextParameters.h"
-#include "moses/Util.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
-ContextParameters::
-ContextParameters()
- : look_ahead(0), look_back(0)
-{ }
-
-bool
-ContextParameters::
-init(Parameter const& params)
-{
- look_back = look_ahead = 0;
- params.SetParameter(context_string, "context-string", std::string(""));
- std::string context_window;
- params.SetParameter(context_window, "context-window", std::string(""));
-
- if (context_window == "")
- return true;
-
- if (context_window.substr(0,3) == "all")
- {
- look_back = look_ahead = std::numeric_limits<size_t>::max();
- return true;
- }
-
- size_t p = context_window.find_first_of("0123456789");
- if (p == 0)
- look_back = look_ahead = atoi(context_window.c_str());
-
- if (p == 1) {
- if (context_window[0] == '-')
- look_back = atoi(context_window.substr(1).c_str());
- else if (context_window[0] == '+')
- look_ahead = atoi(context_window.substr(1).c_str());
- else
- UTIL_THROW2("Invalid specification of context window.");
- }
-
- if (p == 2) {
- if (context_window.substr(0,2) == "+-" ||
- context_window.substr(0,2) == "-+")
- look_back = look_ahead = atoi(context_window.substr(p).c_str());
- else
- UTIL_THROW2("Invalid specification of context window.");
- }
- return true;
-}
-}
diff --git a/contrib/moses2/parameters/ContextParameters.h b/contrib/moses2/parameters/ContextParameters.h
deleted file mode 100644
index 54923c548..000000000
--- a/contrib/moses2/parameters/ContextParameters.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include "moses/TypeDef.h"
-#include "moses/Util.h"
-#include "OptionsBaseClass.h"
-
-namespace Moses2
-{
-
-class ContextParameters : public OptionsBaseClass
-{
-public:
- ContextParameters();
- bool init(Parameter const& params);
- size_t look_ahead; // # of words to look ahead for context-sensitive decoding
- size_t look_back; // # of works to look back for context-sensitive decoding
- std::string context_string; // fixed context string specified on command line
-};
-
-}
diff --git a/contrib/moses2/parameters/CubePruningOptions.cpp b/contrib/moses2/parameters/CubePruningOptions.cpp
deleted file mode 100644
index 35663e61d..000000000
--- a/contrib/moses2/parameters/CubePruningOptions.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "CubePruningOptions.h"
-#include "../TypeDef.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- CubePruningOptions::
- CubePruningOptions()
- : pop_limit(DEFAULT_CUBE_PRUNING_POP_LIMIT)
- , diversity(DEFAULT_CUBE_PRUNING_DIVERSITY)
- , lazy_scoring(false)
- , deterministic_search(false)
- {}
-
- bool
- CubePruningOptions::
- init(Parameter const& param)
- {
- param.SetParameter(pop_limit, "cube-pruning-pop-limit",
- DEFAULT_CUBE_PRUNING_POP_LIMIT);
- param.SetParameter(diversity, "cube-pruning-diversity",
- DEFAULT_CUBE_PRUNING_DIVERSITY);
- param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false);
- //param.SetParameter(deterministic_search, "cube-pruning-deterministic-search", false);
- return true;
- }
-
-#ifdef HAVE_XMLRPC_C
- bool
- CubePruningOptions::
- update(std::map<std::string,xmlrpc_c::value>const& params)
- {
- typedef std::map<std::string, xmlrpc_c::value> params_t;
-
- params_t::const_iterator si = params.find("cube-pruning-pop-limit");
- if (si != params.end()) pop_limit = xmlrpc_c::value_int(si->second);
-
- si = params.find("cube-pruning-diversity");
- if (si != params.end()) diversity = xmlrpc_c::value_int(si->second);
-
- si = params.find("cube-pruning-lazy-scoring");
- if (si != params.end())
- {
- std::string spec = xmlrpc_c::value_string(si->second);
- if (spec == "true" or spec == "on" or spec == "1")
- lazy_scoring = true;
- else if (spec == "false" or spec == "off" or spec == "0")
- lazy_scoring = false;
- else
- {
- char const* msg
- = "Error parsing specification for cube-pruning-lazy-scoring";
- xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
- }
- }
-
- si = params.find("cube-pruning-deterministic-search");
- if (si != params.end())
- {
- std::string spec = xmlrpc_c::value_string(si->second);
- if (spec == "true" or spec == "on" or spec == "1")
- deterministic_search = true;
- else if (spec == "false" or spec == "off" or spec == "0")
- deterministic_search = false;
- else
- {
- char const* msg
- = "Error parsing specification for cube-pruning-deterministic-search";
- xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
- }
- }
-
- return true;
- }
-#endif
-
-
-}
diff --git a/contrib/moses2/parameters/CubePruningOptions.h b/contrib/moses2/parameters/CubePruningOptions.h
deleted file mode 100644
index 2e9c898dc..000000000
--- a/contrib/moses2/parameters/CubePruningOptions.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include "OptionsBaseClass.h"
-
-namespace Moses2
-{
-
- struct
- CubePruningOptions : public OptionsBaseClass
- {
- size_t pop_limit;
- size_t diversity;
- bool lazy_scoring;
- bool deterministic_search;
-
- bool init(Parameter const& param);
- CubePruningOptions(Parameter const& param);
- CubePruningOptions();
-
- bool
- update(std::map<std::string,xmlrpc_c::value>const& params);
- };
-
-}
diff --git a/contrib/moses2/parameters/InputOptions.cpp b/contrib/moses2/parameters/InputOptions.cpp
deleted file mode 100644
index c008e98c4..000000000
--- a/contrib/moses2/parameters/InputOptions.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "InputOptions.h"
-#include <vector>
-#include <iostream>
-// #include "moses/StaticData.h"
-#include "moses/TypeDef.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- InputOptions::
- InputOptions()
- : continue_partial_translation(false)
- , input_type(SentenceInput)
- , xml_policy(XmlPassThrough)
- , placeholder_factor(NOT_FOUND)
- {
- xml_brackets.first = "<";
- xml_brackets.second = ">";
- factor_order.assign(1,0);
- factor_delimiter = "|";
- }
-
- bool
- InputOptions::
- init(Parameter const& param)
- {
- param.SetParameter(input_type, "inputtype", SentenceInput);
-#if 0
- if (input_type == SentenceInput)
- { VERBOSE(2, "input type is: text input"); }
- else if (input_type == ConfusionNetworkInput)
- { VERBOSE(2, "input type is: confusion net"); }
- else if (input_type == WordLatticeInput)
- { VERBOSE(2, "input type is: word lattice"); }
- else if (input_type == TreeInputType)
- { VERBOSE(2, "input type is: tree"); }
- else if (input_type == TabbedSentenceInput)
- { VERBOSE(2, "input type is: tabbed sentence"); }
- else if (input_type == ForestInputType)
- { VERBOSE(2, "input type is: forest"); }
-#endif
-
-
- param.SetParameter(continue_partial_translation,
- "continue-partial-translation", false);
-
- param.SetParameter<XmlInputType>(xml_policy, "xml-input", XmlPassThrough);
-
- // specify XML tags opening and closing brackets for XML option
- // Do we really want this to be configurable???? UG
- const PARAM_VEC *pspec;
- pspec = param.GetParam("xml-brackets");
- if (pspec && pspec->size())
- {
- std::vector<std::string> brackets = Tokenize(pspec->at(0));
- if(brackets.size()!=2)
- {
- std::cerr << "invalid xml-brackets value, "
- << "must specify exactly 2 blank-delimited strings "
- << "for XML tags opening and closing brackets"
- << std::endl;
- exit(1);
- }
-
- xml_brackets.first= brackets[0];
- xml_brackets.second=brackets[1];
-
-#if 0
- VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
- << xml_brackets.first << " and "
- << xml_brackets.second << std::endl);
-#endif
- }
-
- pspec = param.GetParam("input-factors");
- if (pspec) factor_order = Scan<FactorType>(*pspec);
- if (factor_order.empty()) factor_order.assign(1,0);
- param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND);
-
- param.SetParameter<std::string>(factor_delimiter, "factor-delimiter", "|");
- param.SetParameter<std::string>(input_file_path,"input-file","");
-
- return true;
- }
-
-
-#ifdef HAVE_XMLRPC_C
- bool
- InputOptions::
- update(std::map<std::string,xmlrpc_c::value>const& param)
- {
- typedef std::map<std::string, xmlrpc_c::value> params_t;
- params_t::const_iterator si = param.find("xml-input");
- if (si != param.end())
- xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
- return true;
- }
-#endif
-
-}
diff --git a/contrib/moses2/parameters/InputOptions.h b/contrib/moses2/parameters/InputOptions.h
deleted file mode 100644
index dd3be80e1..000000000
--- a/contrib/moses2/parameters/InputOptions.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <string>
-#include "OptionsBaseClass.h"
-#include "../TypeDef.h"
-
-namespace Moses2
-{
- struct
- InputOptions : public OptionsBaseClass
- {
- bool continue_partial_translation;
- InputTypeEnum input_type;
- XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
- std::vector<FactorType> factor_order; // input factor order
- std::string factor_delimiter;
- FactorType placeholder_factor; // where to store original text for placeholders
- std::string input_file_path;
- std::pair<std::string,std::string> xml_brackets;
- // strings to use as XML tags' opening and closing brackets.
- // Default are "<" and ">"
-
- InputOptions();
-
- bool init(Parameter const& param);
- bool update(std::map<std::string,xmlrpc_c::value>const& param);
-
- };
-
-}
-
diff --git a/contrib/moses2/parameters/LMBR_Options.cpp b/contrib/moses2/parameters/LMBR_Options.cpp
deleted file mode 100644
index 25febd616..000000000
--- a/contrib/moses2/parameters/LMBR_Options.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "LMBR_Options.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- LMBR_Options::
- LMBR_Options()
- : enabled(false)
- , use_lattice_hyp_set(false)
- , precision(0.8f)
- , ratio(0.6f)
- , map_weight(0.8f)
- , pruning_factor(30)
- { }
-
- bool
- LMBR_Options::
- init(Parameter const& param)
- {
- param.SetParameter(enabled, "lminimum-bayes-risk", false);
-
- param.SetParameter(ratio, "lmbr-r", 0.6f);
- param.SetParameter(precision, "lmbr-p", 0.8f);
- param.SetParameter(map_weight, "lmbr-map-weight", 0.0f);
- param.SetParameter(pruning_factor, "lmbr-pruning-factor", size_t(30));
- param.SetParameter(use_lattice_hyp_set, "lattice-hypo-set", false);
-
- PARAM_VEC const* params = param.GetParam("lmbr-thetas");
- if (params) theta = Scan<float>(*params);
-
- return true;
- }
-
-
-
-
-}
diff --git a/contrib/moses2/parameters/LMBR_Options.h b/contrib/moses2/parameters/LMBR_Options.h
deleted file mode 100644
index c084f04b9..000000000
--- a/contrib/moses2/parameters/LMBR_Options.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <vector>
-#include "OptionsBaseClass.h"
-
-namespace Moses2
-{
-
- // Options for mimum bayes risk decoding
- struct
- LMBR_Options : public OptionsBaseClass
- {
- bool enabled;
- bool use_lattice_hyp_set; //! to use nbest as hypothesis set during lattice MBR
- float precision; //! unigram precision theta - see Tromble et al 08 for more details
- float ratio; //! decaying factor for ngram thetas - see Tromble et al 08
- float map_weight; //! Weight given to the map solution. See Kumar et al 09
- size_t pruning_factor; //! average number of nodes per word wanted in pruned lattice
- std::vector<float> theta; //! theta(s) for lattice mbr calculation
- bool init(Parameter const& param);
- LMBR_Options();
- };
-
-}
-
diff --git a/contrib/moses2/parameters/LookupOptions.h b/contrib/moses2/parameters/LookupOptions.h
deleted file mode 100644
index 3728d97d1..000000000
--- a/contrib/moses2/parameters/LookupOptions.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include "OptionsBaseClass.h"
-
-namespace Moses2
-{
-
- struct
- LookupOptions : public OptionsBaseClass
- {
- bool init(Parameter const& param);
- LookupOptions() {}
- };
-
-}
-
diff --git a/contrib/moses2/parameters/MBR_Options.cpp b/contrib/moses2/parameters/MBR_Options.cpp
deleted file mode 100644
index 669ee94cc..000000000
--- a/contrib/moses2/parameters/MBR_Options.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "MBR_Options.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- MBR_Options::
- MBR_Options()
- : enabled(false)
- , size(200)
- , scale(1.0f)
- {}
-
-
- bool
- MBR_Options::
- init(Parameter const& param)
- {
- param.SetParameter(enabled, "minimum-bayes-risk", false);
- param.SetParameter<size_t>(size, "mbr-size", 200);
- param.SetParameter(scale, "mbr-scale", 1.0f);
- return true;
- }
-
-}
diff --git a/contrib/moses2/parameters/MBR_Options.h b/contrib/moses2/parameters/MBR_Options.h
deleted file mode 100644
index 47ff45551..000000000
--- a/contrib/moses2/parameters/MBR_Options.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include "OptionsBaseClass.h"
-namespace Moses2
-{
-
- // Options for mimum bayes risk decoding
- struct
- MBR_Options : public OptionsBaseClass
- {
- bool enabled;
- size_t size; //! number of translation candidates considered
- float scale; /*! scaling factor for computing marginal probability
- * of candidate translation */
- bool init(Parameter const& param);
- MBR_Options();
- };
-
-}
-
diff --git a/contrib/moses2/parameters/NBestOptions.cpp b/contrib/moses2/parameters/NBestOptions.cpp
deleted file mode 100644
index 0536793b8..000000000
--- a/contrib/moses2/parameters/NBestOptions.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "NBestOptions.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- NBestOptions::
- NBestOptions()
- : nbest_size(0)
- , factor(20)
- , enabled(false)
- , print_trees(false)
- , only_distinct(false)
- , include_alignment_info(false)
- , include_feature_labels(true)
- , include_segmentation(false)
- , include_passthrough(false)
- , include_all_factors(false)
- {}
-
-
-bool
-NBestOptions::
-init(Parameter const& P)
-{
- const PARAM_VEC *params;
- params = P.GetParam("n-best-list");
- if (params) {
- if (params->size() >= 2) {
- output_file_path = params->at(0);
- nbest_size = Scan<size_t>( params->at(1) );
- only_distinct = (params->size()>2 && params->at(2)=="distinct");
- } else {
- std::cerr << "wrong format for switch -n-best-list file size [distinct]";
- return false;
- }
- } else nbest_size = 0;
-
- P.SetParameter<size_t>(factor, "n-best-factor", 20);
- P.SetParameter(include_alignment_info, "print-alignment-info-in-n-best", false );
- P.SetParameter(include_feature_labels, "labeled-n-best-list", true );
- P.SetParameter(include_segmentation, "include-segmentation-in-n-best", false );
- P.SetParameter(include_passthrough, "print-passthrough-in-n-best", false );
- P.SetParameter(include_all_factors, "report-all-factors-in-n-best", false );
- P.SetParameter(print_trees, "n-best-trees", false );
-
- enabled = output_file_path.size();
- return true;
-}
-
-#ifdef HAVE_XMLRPC_C
-bool
-NBestOptions::
-update(std::map<std::string,xmlrpc_c::value>const& param)
-{
- typedef std::map<std::string, xmlrpc_c::value> params_t;
- params_t::const_iterator si = param.find("nbest");
- if (si != param.end())
- nbest_size = xmlrpc_c::value_int(si->second);
- only_distinct = check(param, "nbest-distinct", only_distinct);
- enabled = (nbest_size > 0);
- return true;
-}
-#endif
-
-
-} // namespace Moses
diff --git a/contrib/moses2/parameters/NBestOptions.h b/contrib/moses2/parameters/NBestOptions.h
deleted file mode 100644
index f2e478b84..000000000
--- a/contrib/moses2/parameters/NBestOptions.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include "OptionsBaseClass.h"
-namespace Moses2
-{
-
-struct NBestOptions : public OptionsBaseClass
-{
- size_t nbest_size;
- size_t factor;
- bool enabled;
- bool print_trees;
- bool only_distinct;
-
- bool include_alignment_info;
- bool include_segmentation;
- bool include_feature_labels;
- bool include_passthrough;
-
- bool include_all_factors;
-
- std::string output_file_path;
-
- bool init(Parameter const& param);
-
- bool update(std::map<std::string,xmlrpc_c::value>const& param);
-
- NBestOptions();
-};
-
-}
diff --git a/contrib/moses2/parameters/OOVHandlingOptions.cpp b/contrib/moses2/parameters/OOVHandlingOptions.cpp
deleted file mode 100644
index 65f79584e..000000000
--- a/contrib/moses2/parameters/OOVHandlingOptions.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "OOVHandlingOptions.h"
-#include <vector>
-#include <iostream>
-#include "moses/StaticData.h"
-#include "moses/TypeDef.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- OOVHandlingOptions::
- OOVHandlingOptions()
- {
- drop = false;
- mark = false;
- prefix = "UNK";
- suffix = "";
- word_deletion_enabled = false;
- always_create_direct_transopt = false;
- }
-
- bool
- OOVHandlingOptions::
- init(Parameter const& param)
- {
- param.SetParameter(drop,"drop-unknown",false);
- param.SetParameter(mark,"mark-unknown",false);
- param.SetParameter(word_deletion_enabled, "phrase-drop-allowed", false);
- param.SetParameter(always_create_direct_transopt, "always-create-direct-transopt", false);
- param.SetParameter<std::string>(prefix,"unknown-word-prefix","UNK");
- param.SetParameter<std::string>(suffix,"unknown-word-suffix","");
- return true;
- }
-
-
-#ifdef HAVE_XMLRPC_C
- bool
- OOVHandlingOptions::
- update(std::map<std::string,xmlrpc_c::value>const& param)
- {
- typedef std::map<std::string, xmlrpc_c::value> params_t;
- // params_t::const_iterator si = param.find("xml-input");
- // if (si != param.end())
- // xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
- return true;
- }
-#endif
-
-}
diff --git a/contrib/moses2/parameters/OOVHandlingOptions.h b/contrib/moses2/parameters/OOVHandlingOptions.h
deleted file mode 100644
index d11284220..000000000
--- a/contrib/moses2/parameters/OOVHandlingOptions.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <string>
-#include "OptionsBaseClass.h"
-
-namespace Moses2
-{
- struct
- OOVHandlingOptions : public OptionsBaseClass
- {
- bool drop;
- bool mark;
- std::string prefix;
- std::string suffix;
-
- bool word_deletion_enabled;
- bool always_create_direct_transopt;
- OOVHandlingOptions();
-
- bool init(Parameter const& param);
- bool update(std::map<std::string,xmlrpc_c::value>const& param);
-
- };
-
-}
-
diff --git a/contrib/moses2/parameters/OptionsBaseClass.cpp b/contrib/moses2/parameters/OptionsBaseClass.cpp
deleted file mode 100644
index c523a1333..000000000
--- a/contrib/moses2/parameters/OptionsBaseClass.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
-#include "OptionsBaseClass.h"
-#include "moses/Util.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
-#ifdef HAVE_XMLRPC_C
- bool
- OptionsBaseClass::
- update(std::map<std::string,xmlrpc_c::value>const& params)
- {
- return true;
- }
-#endif
-
-#ifdef HAVE_XMLRPC_C
- bool
- OptionsBaseClass::
- check(std::map<std::string, xmlrpc_c::value> const& param,
- std::string const key, bool dfltval)
- {
- std::map<std::string, xmlrpc_c::value>::const_iterator m;
- m = param.find(key);
- if (m == param.end()) return dfltval;
- return Scan<bool>(xmlrpc_c::value_string(m->second));
- }
-#endif
-}
diff --git a/contrib/moses2/parameters/OptionsBaseClass.h b/contrib/moses2/parameters/OptionsBaseClass.h
deleted file mode 100644
index 05914ed82..000000000
--- a/contrib/moses2/parameters/OptionsBaseClass.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include "moses/xmlrpc-c.h"
-#include <string>
-#include <map>
-namespace Moses2
-{
-class Parameter;
-
- struct OptionsBaseClass
- {
-#ifdef HAVE_XMLRPC_C
- virtual bool
- update(std::map<std::string,xmlrpc_c::value>const& params);
-#endif
- bool
- check(std::map<std::string, xmlrpc_c::value> const& param,
- std::string const key, bool dfltval);
- };
-}
diff --git a/contrib/moses2/parameters/ReorderingOptions.cpp b/contrib/moses2/parameters/ReorderingOptions.cpp
deleted file mode 100644
index 5fef5c54a..000000000
--- a/contrib/moses2/parameters/ReorderingOptions.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "ReorderingOptions.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- ReorderingOptions::
- ReorderingOptions()
- : max_distortion(-1)
- , monotone_at_punct(false)
- , use_early_distortion_cost(false)
- {}
-
-
- ReorderingOptions::
- ReorderingOptions(Parameter const& param)
- {
- init(param);
- }
-
- bool
- ReorderingOptions::
- init(Parameter const& param)
- {
- param.SetParameter(max_distortion, "distortion-limit", -1);
- param.SetParameter(monotone_at_punct, "monotone-at-punctuation", false);
- param.SetParameter(use_early_distortion_cost, "early-distortion-cost", false);
- return true;
- }
-}
diff --git a/contrib/moses2/parameters/ReorderingOptions.h b/contrib/moses2/parameters/ReorderingOptions.h
deleted file mode 100644
index 6bdc1f043..000000000
--- a/contrib/moses2/parameters/ReorderingOptions.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include "OptionsBaseClass.h"
-namespace Moses2
-{
-
- struct
- ReorderingOptions : public OptionsBaseClass
- {
- int max_distortion;
- bool monotone_at_punct;
- bool use_early_distortion_cost;
- bool init(Parameter const& param);
- ReorderingOptions(Parameter const& param);
- ReorderingOptions();
- };
-
-}
-
diff --git a/contrib/moses2/parameters/ReportingOptions.cpp b/contrib/moses2/parameters/ReportingOptions.cpp
deleted file mode 100644
index adc27baf2..000000000
--- a/contrib/moses2/parameters/ReportingOptions.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "ReportingOptions.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
- using namespace std;
-
- ReportingOptions::
- ReportingOptions()
- : start_translation_id(0)
- , ReportAllFactors(false)
- , ReportSegmentation(0)
- , PrintAlignmentInfo(false)
- , PrintAllDerivations(false)
- , PrintTranslationOptions(false)
- , WA_SortOrder(NoSort)
- , WordGraph(false)
- , DontPruneSearchGraph(false)
- , RecoverPath(false)
- , ReportHypoScore(false)
- , PrintID(false)
- , PrintPassThrough(false)
- , include_lhs_in_search_graph(false)
- , lattice_sample_size(0)
- {
- factor_order.assign(1,0);
- factor_delimiter = "|";
- }
-
- bool
- ReportingOptions::
- init(Parameter const& param)
- {
- param.SetParameter<long>(start_translation_id, "start-translation-id", 0);
-
- // including factors in the output
- param.SetParameter(ReportAllFactors, "report-all-factors", false);
-
- // segmentation reporting
- ReportSegmentation = (param.GetParam("report-segmentation-enriched")
- ? 2 : param.GetParam("report-segmentation")
- ? 1 : 0);
-
- // word alignment reporting
- param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false);
- param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort);
- std::string e; // hack to save us param.SetParameter<string>(...)
- param.SetParameter(AlignmentOutputFile,"alignment-output-file", e);
-
-
- param.SetParameter(PrintAllDerivations, "print-all-derivations", false);
- param.SetParameter(PrintTranslationOptions, "print-translation-option", false);
-
- // output a word graph
- PARAM_VEC const* params;
- params = param.GetParam("output-word-graph");
- WordGraph = (params && params->size() == 2); // what are the two options?
-
- // dump the search graph
- param.SetParameter(SearchGraph, "output-search-graph", e);
- param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e);
- param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e);
- param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e);
-#ifdef HAVE_PROTOBUF
- param.SetParameter(SearchGraphPB, "output-search-graph-pb", e);
-#endif
-
- param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false);
- param.SetParameter(include_lhs_in_search_graph,
- "include-lhs-in-search-graph", false );
-
-
- // miscellaneous
- param.SetParameter(RecoverPath, "recover-input-path",false);
- param.SetParameter(ReportHypoScore, "output-hypo-score",false);
- param.SetParameter(PrintID, "print-id",false);
- param.SetParameter(PrintPassThrough, "print-passthrough",false);
- param.SetParameter(detailed_all_transrep_filepath,
- "translation-all-details", e);
- param.SetParameter(detailed_transrep_filepath, "translation-details", e);
- param.SetParameter(detailed_tree_transrep_filepath,
- "tree-translation-details", e);
-
- params = param.GetParam("lattice-samples");
- if (params) {
- if (params->size() ==2 ) {
- lattice_sample_filepath = params->at(0);
- lattice_sample_size = Scan<size_t>(params->at(1));
- } else {
- std::cerr <<"wrong format for switch -lattice-samples file size";
- return false;
- }
- }
-
-
- if (ReportAllFactors) {
- factor_order.clear();
- for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
- factor_order.push_back(i);
- } else {
- params= param.GetParam("output-factors");
- if (params) factor_order = Scan<FactorType>(*params);
- if (factor_order.empty()) factor_order.assign(1,0);
- }
-
- param.SetParameter(factor_delimiter, "factor-delimiter", std::string("|"));
- param.SetParameter(factor_delimiter, "output-factor-delimiter", factor_delimiter);
-
- return true;
- }
-
-#ifdef HAVE_XMLRPC_C
- bool
- ReportingOptions::
- update(std::map<std::string, xmlrpc_c::value>const& param)
- {
- ReportAllFactors = check(param, "report-all-factors", ReportAllFactors);
-
-
- std::map<std::string, xmlrpc_c::value>::const_iterator m;
- m = param.find("output-factors");
- if (m != param.end()) {
- factor_order=Tokenize<FactorType>(xmlrpc_c::value_string(m->second),",");
- }
-
- if (ReportAllFactors) {
- factor_order.clear();
- for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
- factor_order.push_back(i);
- }
-
- m = param.find("align");
- if (m != param.end() && Scan<bool>(xmlrpc_c::value_string(m->second)))
- ReportSegmentation = 1;
-
- PrintAlignmentInfo = check(param,"word-align",PrintAlignmentInfo);
-
- m = param.find("factor-delimiter");
- if (m != param.end()) {
- factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
- }
-
- m = param.find("output-factor-delimiter");
- if (m != param.end()) {
- factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
- }
-
- return true;
- }
-#endif
-}
diff --git a/contrib/moses2/parameters/ReportingOptions.h b/contrib/moses2/parameters/ReportingOptions.h
deleted file mode 100644
index c96809aec..000000000
--- a/contrib/moses2/parameters/ReportingOptions.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <vector>
-#include "OptionsBaseClass.h"
-#include "../TypeDef.h"
-
-namespace Moses2
-{
-
- struct
- ReportingOptions : public OptionsBaseClass
- {
- long start_translation_id;
-
- std::vector<FactorType> factor_order;
- std::string factor_delimiter;
-
- bool ReportAllFactors; // m_reportAllFactors;
- int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
-
- bool PrintAlignmentInfo; // m_PrintAlignmentInfo
- bool PrintAllDerivations;
- bool PrintTranslationOptions;
-
- WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
- std::string AlignmentOutputFile;
-
- bool WordGraph;
-
- std::string SearchGraph;
- std::string SearchGraphExtended;
- std::string SearchGraphSLF;
- std::string SearchGraphHG;
- std::string SearchGraphPB;
- bool DontPruneSearchGraph;
-
- bool RecoverPath; // recover input path?
- bool ReportHypoScore;
-
- bool PrintID;
- bool PrintPassThrough;
-
- // transrep = translation reporting
- std::string detailed_transrep_filepath;
- std::string detailed_tree_transrep_filepath;
- std::string detailed_all_transrep_filepath;
- bool include_lhs_in_search_graph;
-
-
- std::string lattice_sample_filepath;
- size_t lattice_sample_size;
-
- bool init(Parameter const& param);
-
- /// do we need to keep the search graph from decoding?
- bool NeedSearchGraph() const {
- return !(SearchGraph.empty() && SearchGraphExtended.empty());
- }
-
-#ifdef HAVE_XMLRPC_C
- bool update(std::map<std::string, xmlrpc_c::value>const& param);
-#endif
-
-
- ReportingOptions();
- };
-
-}
-
diff --git a/contrib/moses2/parameters/SearchOptions.cpp b/contrib/moses2/parameters/SearchOptions.cpp
deleted file mode 100644
index 98c1789ea..000000000
--- a/contrib/moses2/parameters/SearchOptions.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include "SearchOptions.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-
- SearchOptions::
- SearchOptions()
- : algo(Normal)
- , stack_size(DEFAULT_MAX_HYPOSTACK_SIZE)
- , stack_diversity(0)
- , disable_discarding(false)
- , max_phrase_length(DEFAULT_MAX_PHRASE_LENGTH)
- , max_trans_opt_per_cov(DEFAULT_MAX_TRANS_OPT_SIZE)
- , max_partial_trans_opt(DEFAULT_MAX_PART_TRANS_OPT_SIZE)
- , beam_width(DEFAULT_BEAM_WIDTH)
- , timeout(0)
- , consensus(false)
- , early_discarding_threshold(DEFAULT_EARLY_DISCARDING_THRESHOLD)
- , trans_opt_threshold(DEFAULT_TRANSLATION_OPTION_THRESHOLD)
- { }
-
- SearchOptions::
- SearchOptions(Parameter const& param)
- : stack_diversity(0)
- {
- init(param);
- }
-
- bool
- SearchOptions::
- init(Parameter const& param)
- {
- param.SetParameter(algo, "search-algorithm", Normal);
- param.SetParameter(stack_size, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
- param.SetParameter(stack_diversity, "stack-diversity", size_t(0));
- param.SetParameter(beam_width, "beam-threshold", DEFAULT_BEAM_WIDTH);
- param.SetParameter(early_discarding_threshold, "early-discarding-threshold",
- DEFAULT_EARLY_DISCARDING_THRESHOLD);
- param.SetParameter(timeout, "time-out", 0);
- param.SetParameter(max_phrase_length, "max-phrase-length",
- DEFAULT_MAX_PHRASE_LENGTH);
- param.SetParameter(trans_opt_threshold, "translation-option-threshold",
- DEFAULT_TRANSLATION_OPTION_THRESHOLD);
- param.SetParameter(max_trans_opt_per_cov, "max-trans-opt-per-coverage",
- DEFAULT_MAX_TRANS_OPT_SIZE);
- param.SetParameter(max_partial_trans_opt, "max-partial-trans-opt",
- DEFAULT_MAX_PART_TRANS_OPT_SIZE);
-
- param.SetParameter(consensus, "consensus-decoding", false);
- param.SetParameter(disable_discarding, "disable-discarding", false);
-
- // transformation to log of a few scores
- beam_width = TransformScore(beam_width);
- trans_opt_threshold = TransformScore(trans_opt_threshold);
- early_discarding_threshold = TransformScore(early_discarding_threshold);
-
- return true;
- }
-
- bool
- is_syntax(SearchAlgorithm algo)
- {
- return (algo == CYKPlus || algo == ChartIncremental ||
- algo == SyntaxS2T || algo == SyntaxT2S ||
- algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
- }
-
-#ifdef HAVE_XMLRPC_C
- bool
- SearchOptions::
- update(std::map<std::string,xmlrpc_c::value>const& params)
- {
- typedef std::map<std::string, xmlrpc_c::value> params_t;
-
- params_t::const_iterator si = params.find("search-algorithm");
- if (si != params.end())
- {
- // use named parameters
- std::string spec = xmlrpc_c::value_string(si->second);
- if (spec == "normal" || spec == "0") algo = Normal;
- else if (spec == "cube" || spec == "1") algo = CubePruning;
- else throw xmlrpc_c::fault("Unsupported search algorithm",
- xmlrpc_c::fault::CODE_PARSE);
- }
-
- si = params.find("stack");
- if (si != params.end()) stack_size = xmlrpc_c::value_int(si->second);
-
- si = params.find("stack-diversity");
- if (si != params.end()) stack_diversity = xmlrpc_c::value_int(si->second);
-
- si = params.find("beam-threshold");
- if (si != params.end()) beam_width = xmlrpc_c::value_double(si->second);
-
- si = params.find("time-out");
- if (si != params.end()) timeout = xmlrpc_c::value_int(si->second);
-
- si = params.find("max-phrase-length");
- if (si != params.end()) max_phrase_length = xmlrpc_c::value_int(si->second);
-
- return true;
- }
-#endif
-
-}
diff --git a/contrib/moses2/parameters/SearchOptions.h b/contrib/moses2/parameters/SearchOptions.h
deleted file mode 100644
index 3de0a979a..000000000
--- a/contrib/moses2/parameters/SearchOptions.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <limits>
-#include "OptionsBaseClass.h"
-#include "../TypeDef.h"
-
-namespace Moses2
-{
-
- bool is_syntax(SearchAlgorithm algo);
-
- struct
- SearchOptions : public OptionsBaseClass
- {
- SearchAlgorithm algo;
-
- // stack decoding
- size_t stack_size; // maxHypoStackSize;
- size_t stack_diversity; // minHypoStackDiversity;
- bool disable_discarding;
- // Disable discarding of bad hypotheses from HypothesisStackNormal
- size_t max_phrase_length;
- size_t max_trans_opt_per_cov;
- size_t max_partial_trans_opt;
- // beam search
- float beam_width;
-
- int timeout;
-
- bool consensus; //! Use Consensus decoding (DeNero et al 2009)
-
- // reordering options
- // bool reorderingConstraint; //! use additional reordering constraints
- // bool useEarlyDistortionCost;
-
- float early_discarding_threshold;
- float trans_opt_threshold;
-
- bool init(Parameter const& param);
- SearchOptions(Parameter const& param);
- SearchOptions();
-
- bool
- UseEarlyDiscarding() const {
- return early_discarding_threshold != -std::numeric_limits<float>::infinity();
- }
-
- bool
- update(std::map<std::string,xmlrpc_c::value>const& params);
-
- };
-
-}
diff --git a/contrib/moses2/parameters/ServerOptions.cpp b/contrib/moses2/parameters/ServerOptions.cpp
deleted file mode 100644
index d8942c5c7..000000000
--- a/contrib/moses2/parameters/ServerOptions.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#include <boost/foreach.hpp>
-#include <string>
-#include "../legacy/Parameter.h"
-#include "ServerOptions.h"
-#include "../legacy/Util2.h"
-#include "util/exception.hh"
-
-namespace Moses2
-{
-
-// parse the session timeout specifciation for moses server
-// Format is "<number>d[<number>[h[<number>m[<number>s]]]]".
-// If none of 'dhms' is given, it is assumed that it's seconds.
-// Specs can be combined, e.g. 2h30m, although it's probably nonsense
-// to be so specific.
-size_t
-parse_timespec(std::string const& spec)
-{
- size_t t = 0, timeout = 0;
- BOOST_FOREACH(char const& c, spec)
- {
- if (c >= '0' && c <= '9')
- {
- t = t * 10 + c - '0';
- }
- else
- {
- if (c == 'd') timeout = t * 24 * 3600;
- else if (c == 'h') timeout += t * 3600;
- else if (c == 'm') timeout += t * 60;
- else if (c == 's') timeout += t;
- else UTIL_THROW2("Can't parse specification '" << spec
- << " at " << HERE);
- t = 0;
- }
- }
- return timeout;
-}
-
-ServerOptions::
-ServerOptions()
- : is_serial(false)
- , numThreads(15) // why 15?
- , sessionTimeout(1800) // = 30 min
- , sessionCacheSize(25)
- , port(8080)
- , maxConn(15)
- , maxConnBacklog(15)
- , keepaliveTimeout(15)
- , keepaliveMaxConn(30)
- , timeout(15)
-{ }
-
-ServerOptions::
-ServerOptions(Parameter const& P)
-{
- init(P);
-}
-
-bool
-ServerOptions::
-init(Parameter const& P)
-{
- // Settings for the abyss server
- P.SetParameter(this->port, "server-port", 8080);
- P.SetParameter(this->is_serial, "serial", false);
- P.SetParameter(this->logfile, "server-log", std::string("/dev/null"));
- P.SetParameter(this->numThreads, "threads", uint32_t(15));
-
- // defaults reflect recommended defaults (according to Hieu)
- // -> http://xmlrpc-c.sourceforge.net/doc/libxmlrpc_server_abyss.html#max_conn
- P.SetParameter(this->maxConn,"server-maxconn", 15);
- P.SetParameter(this->maxConnBacklog,"server-maxconn-backlog", 15);
- P.SetParameter(this->keepaliveTimeout,"server-keepalive-timeout", 15);
- P.SetParameter(this->keepaliveMaxConn,"server-keepalive-maxconn", 30);
- P.SetParameter(this->timeout,"server-timeout",15);
-
- // the stuff below is related to Moses translation sessions
- std::string timeout_spec;
- P.SetParameter(timeout_spec, "session-timeout",std::string("30m"));
- this->sessionTimeout = parse_timespec(timeout_spec);
- P.SetParameter(this->sessionCacheSize, "session-cache_size", size_t(25));
-
- return true;
-}
-} // namespace Moses
diff --git a/contrib/moses2/parameters/ServerOptions.h b/contrib/moses2/parameters/ServerOptions.h
deleted file mode 100644
index d662d1499..000000000
--- a/contrib/moses2/parameters/ServerOptions.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
-#pragma once
-#include <string>
-#include <map>
-#include <stdint.h>
-#include <xmlrpc-c/base.hpp>
-#include <xmlrpc-c/registry.hpp>
-#include <xmlrpc-c/server_abyss.hpp>
-
-namespace Moses2
-{
-class Parameter;
-
- struct
- ServerOptions
- {
- bool is_serial;
- uint32_t numThreads; // might not be used any more, actually
-
- size_t sessionTimeout; // this is related to Moses translation sessions
- size_t sessionCacheSize; // this is related to Moses translation sessions
-
- int port; // this is for the abyss server
- std::string logfile; // this is for the abyss server
- int maxConn; // this is for the abyss server
- int maxConnBacklog; // this is for the abyss server
- int keepaliveTimeout; // this is for the abyss server
- int keepaliveMaxConn; // this is for the abyss server
- int timeout; // this is for the abyss server
-
- bool init(Parameter const& param);
- ServerOptions(Parameter const& param);
- ServerOptions();
-
- bool
- update(std::map<std::string,xmlrpc_c::value>const& params)
- {
- return true;
- }
-
- };
-
-}
diff --git a/contrib/moses2/parameters/SyntaxOptions.cpp b/contrib/moses2/parameters/SyntaxOptions.cpp
deleted file mode 100644
index 0d4b0c7eb..000000000
--- a/contrib/moses2/parameters/SyntaxOptions.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * SyntaxOptions.cpp
- *
- * Created on: 13 Apr 2016
- * Author: hieu
- */
-
-#include "SyntaxOptions.h"
-#include "../legacy/Parameter.h"
-
-namespace Moses2
-{
-SyntaxOptions::SyntaxOptions()
-: s2t_parsing_algo(RecursiveCYKPlus)
-, default_non_term_only_for_empty_range(false)
-, source_label_overlap(SourceLabelOverlapAdd)
-, rule_limit(DEFAULT_MAX_TRANS_OPT_SIZE)
-{}
-
-bool SyntaxOptions::init(Parameter const& param)
-{
- param.SetParameter(rule_limit, "rule-limit", DEFAULT_MAX_TRANS_OPT_SIZE);
- param.SetParameter(s2t_parsing_algo, "s2t-parsing-algorithm",
- RecursiveCYKPlus);
- param.SetParameter(default_non_term_only_for_empty_range,
- "default-non-term-for-empty-range-only", false);
- param.SetParameter(source_label_overlap, "source-label-overlap",
- SourceLabelOverlapAdd);
- return true;
-}
-
-bool SyntaxOptions::update(std::map<std::string,xmlrpc_c::value>const& param)
-{
- typedef std::map<std::string, xmlrpc_c::value> params_t;
- // params_t::const_iterator si = param.find("xml-input");
- // if (si != param.end())
- // xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
- return true;
-}
-
-void SyntaxOptions::LoadNonTerminals(Parameter const& param, FactorCollection& factorCollection)
-{
-
-}
-
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/parameters/SyntaxOptions.h b/contrib/moses2/parameters/SyntaxOptions.h
deleted file mode 100644
index 133a1d9ed..000000000
--- a/contrib/moses2/parameters/SyntaxOptions.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * SyntaxOptions.h
- *
- * Created on: 13 Apr 2016
- * Author: hieu
- */
-#pragma once
-#include <string>
-#include <vector>
-#include "OptionsBaseClass.h"
-#include "../SCFG/Word.h"
-
-namespace Moses2
-{
-class FactorCollection;
-class Parameter;
-
-typedef std::pair<std::string, float> UnknownLHSEntry;
-typedef std::vector<UnknownLHSEntry> UnknownLHSList;
-
-struct
-SyntaxOptions : public OptionsBaseClass
-{
- S2TParsingAlgorithm s2t_parsing_algo;
- SCFG::Word input_default_non_terminal;
- SCFG::Word output_default_non_terminal;
- bool default_non_term_only_for_empty_range; // whatever that means
- UnknownLHSList unknown_lhs;
- SourceLabelOverlap source_label_overlap; // m_sourceLabelOverlap;
- size_t rule_limit;
-
- SyntaxOptions();
-
- bool init(Parameter const& param);
- bool update(std::map<std::string,xmlrpc_c::value>const& param);
- void LoadNonTerminals(Parameter const& param, FactorCollection& factorCollection);
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/pugiconfig.hpp b/contrib/moses2/pugiconfig.hpp
deleted file mode 100644
index e50b580bf..000000000
--- a/contrib/moses2/pugiconfig.hpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * pugixml parser - version 1.7
- * --------------------------------------------------------
- * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
- * Report bugs and download new versions at http://pugixml.org/
- *
- * This library is distributed under the MIT License. See notice at the end
- * of this file.
- *
- * This work is based on the pugxml parser, which is:
- * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
- */
-
-#ifndef HEADER_PUGICONFIG_HPP
-#define HEADER_PUGICONFIG_HPP
-
-// Uncomment this to enable wchar_t mode
-// #define PUGIXML_WCHAR_MODE
-
-// Uncomment this to enable compact mode
-// #define PUGIXML_COMPACT
-
-// Uncomment this to disable XPath
-// #define PUGIXML_NO_XPATH
-
-// Uncomment this to disable STL
-// #define PUGIXML_NO_STL
-
-// Uncomment this to disable exceptions
-// #define PUGIXML_NO_EXCEPTIONS
-
-// Set this to control attributes for public classes/functions, i.e.:
-// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
-// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
-// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
-// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
-
-// Tune these constants to adjust memory-related behavior
-// #define PUGIXML_MEMORY_PAGE_SIZE 32768
-// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
-// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
-
-// Uncomment this to switch to header-only version
-// #define PUGIXML_HEADER_ONLY
-
-// Uncomment this to enable long long support
-// #define PUGIXML_HAS_LONG_LONG
-
-#endif
-
-/**
- * Copyright (c) 2006-2015 Arseny Kapoulkine
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
diff --git a/contrib/moses2/pugixml.cpp b/contrib/moses2/pugixml.cpp
deleted file mode 100644
index 737733e64..000000000
--- a/contrib/moses2/pugixml.cpp
+++ /dev/null
@@ -1,12444 +0,0 @@
-/**
- * pugixml parser - version 1.7
- * --------------------------------------------------------
- * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
- * Report bugs and download new versions at http://pugixml.org/
- *
- * This library is distributed under the MIT License. See notice at the end
- * of this file.
- *
- * This work is based on the pugxml parser, which is:
- * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
- */
-
-#ifndef SOURCE_PUGIXML_CPP
-#define SOURCE_PUGIXML_CPP
-
-#include "pugixml.hpp"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <limits.h>
-
-#ifdef PUGIXML_WCHAR_MODE
-# include <wchar.h>
-#endif
-
-#ifndef PUGIXML_NO_XPATH
-# include <math.h>
-# include <float.h>
-# ifdef PUGIXML_NO_EXCEPTIONS
-# include <setjmp.h>
-# endif
-#endif
-
-#ifndef PUGIXML_NO_STL
-# include <istream>
-# include <ostream>
-# include <string>
-#endif
-
-// For placement new
-#include <new>
-
-#ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable: 4127) // conditional expression is constant
-# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
-# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
-# pragma warning(disable: 4702) // unreachable code
-# pragma warning(disable: 4996) // this function or variable may be unsafe
-# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
-#endif
-
-#ifdef __INTEL_COMPILER
-# pragma warning(disable: 177) // function was declared but never referenced
-# pragma warning(disable: 279) // controlling expression is constant
-# pragma warning(disable: 1478 1786) // function was declared "deprecated"
-# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
-#endif
-
-#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
-# pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
-#endif
-
-#ifdef __BORLANDC__
-# pragma option push
-# pragma warn -8008 // condition is always false
-# pragma warn -8066 // unreachable code
-#endif
-
-#ifdef __SNC__
-// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
-# pragma diag_suppress=178 // function was declared but never referenced
-# pragma diag_suppress=237 // controlling expression is constant
-#endif
-
-// Inlining controls
-#if defined(_MSC_VER) && _MSC_VER >= 1300
-# define PUGI__NO_INLINE __declspec(noinline)
-#elif defined(__GNUC__)
-# define PUGI__NO_INLINE __attribute__((noinline))
-#else
-# define PUGI__NO_INLINE
-#endif
-
-// Branch weight controls
-#if defined(__GNUC__)
-# define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
-#else
-# define PUGI__UNLIKELY(cond) (cond)
-#endif
-
-// Simple static assertion
-#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
-
-// Digital Mars C++ bug workaround for passing char loaded from memory via stack
-#ifdef __DMC__
-# define PUGI__DMC_VOLATILE volatile
-#else
-# define PUGI__DMC_VOLATILE
-#endif
-
-// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
-#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
-using std::memcpy;
-using std::memmove;
-using std::memset;
-#endif
-
-// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
-#if defined(_MSC_VER) && !defined(__S3E__)
-# define PUGI__MSVC_CRT_VERSION _MSC_VER
-#endif
-
-#ifdef PUGIXML_HEADER_ONLY
-# define PUGI__NS_BEGIN namespace pugi { namespace impl {
-# define PUGI__NS_END } }
-# define PUGI__FN inline
-# define PUGI__FN_NO_INLINE inline
-#else
-# if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
-# define PUGI__NS_BEGIN namespace pugi { namespace impl {
-# define PUGI__NS_END } }
-# else
-# define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
-# define PUGI__NS_END } } }
-# endif
-# define PUGI__FN
-# define PUGI__FN_NO_INLINE PUGI__NO_INLINE
-#endif
-
-// uintptr_t
-#if !defined(_MSC_VER) || _MSC_VER >= 1600
-# include <stdint.h>
-#else
-namespace pugi
-{
-# ifndef _UINTPTR_T_DEFINED
- typedef size_t uintptr_t;
-# endif
-
- typedef unsigned __int8 uint8_t;
- typedef unsigned __int16 uint16_t;
- typedef unsigned __int32 uint32_t;
-}
-#endif
-
-// Memory allocation
-PUGI__NS_BEGIN
- PUGI__FN void* default_allocate(size_t size)
- {
- return malloc(size);
- }
-
- PUGI__FN void default_deallocate(void* ptr)
- {
- free(ptr);
- }
-
- template <typename T>
- struct xml_memory_management_function_storage
- {
- static allocation_function allocate;
- static deallocation_function deallocate;
- };
-
- // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
- // Without a template<> we'll get multiple definitions of the same static
- template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
- template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
-
- typedef xml_memory_management_function_storage<int> xml_memory;
-PUGI__NS_END
-
-// String utilities
-PUGI__NS_BEGIN
- // Get string length
- PUGI__FN size_t strlength(const char_t* s)
- {
- assert(s);
-
- #ifdef PUGIXML_WCHAR_MODE
- return wcslen(s);
- #else
- return strlen(s);
- #endif
- }
-
- // Compare two strings
- PUGI__FN bool strequal(const char_t* src, const char_t* dst)
- {
- assert(src && dst);
-
- #ifdef PUGIXML_WCHAR_MODE
- return wcscmp(src, dst) == 0;
- #else
- return strcmp(src, dst) == 0;
- #endif
- }
-
- // Compare lhs with [rhs_begin, rhs_end)
- PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
- {
- for (size_t i = 0; i < count; ++i)
- if (lhs[i] != rhs[i])
- return false;
-
- return lhs[count] == 0;
- }
-
- // Get length of wide string, even if CRT lacks wide character support
- PUGI__FN size_t strlength_wide(const wchar_t* s)
- {
- assert(s);
-
- #ifdef PUGIXML_WCHAR_MODE
- return wcslen(s);
- #else
- const wchar_t* end = s;
- while (*end) end++;
- return static_cast<size_t>(end - s);
- #endif
- }
-PUGI__NS_END
-
-// auto_ptr-like object for exception recovery
-PUGI__NS_BEGIN
- template <typename T, typename D = void(*)(T*)> struct auto_deleter
- {
- T* data;
- D deleter;
-
- auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
- {
- }
-
- ~auto_deleter()
- {
- if (data) deleter(data);
- }
-
- T* release()
- {
- T* result = data;
- data = 0;
- return result;
- }
- };
-PUGI__NS_END
-
-#ifdef PUGIXML_COMPACT
-PUGI__NS_BEGIN
- class compact_hash_table
- {
- public:
- compact_hash_table(): _items(0), _capacity(0), _count(0)
- {
- }
-
- void clear()
- {
- if (_items)
- {
- xml_memory::deallocate(_items);
- _items = 0;
- _capacity = 0;
- _count = 0;
- }
- }
-
- void** find(const void* key)
- {
- assert(key);
-
- if (_capacity == 0) return 0;
-
- size_t hashmod = _capacity - 1;
- size_t bucket = hash(key) & hashmod;
-
- for (size_t probe = 0; probe <= hashmod; ++probe)
- {
- item_t& probe_item = _items[bucket];
-
- if (probe_item.key == key)
- return &probe_item.value;
-
- if (probe_item.key == 0)
- return 0;
-
- // hash collision, quadratic probing
- bucket = (bucket + probe + 1) & hashmod;
- }
-
- assert(!"Hash table is full");
- return 0;
- }
-
- void** insert(const void* key)
- {
- assert(key);
- assert(_capacity != 0 && _count < _capacity - _capacity / 4);
-
- size_t hashmod = _capacity - 1;
- size_t bucket = hash(key) & hashmod;
-
- for (size_t probe = 0; probe <= hashmod; ++probe)
- {
- item_t& probe_item = _items[bucket];
-
- if (probe_item.key == 0)
- {
- probe_item.key = key;
- _count++;
- return &probe_item.value;
- }
-
- if (probe_item.key == key)
- return &probe_item.value;
-
- // hash collision, quadratic probing
- bucket = (bucket + probe + 1) & hashmod;
- }
-
- assert(!"Hash table is full");
- return 0;
- }
-
- bool reserve()
- {
- if (_count + 16 >= _capacity - _capacity / 4)
- return rehash();
-
- return true;
- }
-
- private:
- struct item_t
- {
- const void* key;
- void* value;
- };
-
- item_t* _items;
- size_t _capacity;
-
- size_t _count;
-
- bool rehash();
-
- static unsigned int hash(const void* key)
- {
- unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
-
- // MurmurHash3 32-bit finalizer
- h ^= h >> 16;
- h *= 0x85ebca6bu;
- h ^= h >> 13;
- h *= 0xc2b2ae35u;
- h ^= h >> 16;
-
- return h;
- }
- };
-
- PUGI__FN_NO_INLINE bool compact_hash_table::rehash()
- {
- compact_hash_table rt;
- rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
- rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity));
-
- if (!rt._items)
- return false;
-
- memset(rt._items, 0, sizeof(item_t) * rt._capacity);
-
- for (size_t i = 0; i < _capacity; ++i)
- if (_items[i].key)
- *rt.insert(_items[i].key) = _items[i].value;
-
- if (_items)
- xml_memory::deallocate(_items);
-
- _capacity = rt._capacity;
- _items = rt._items;
-
- assert(_count == rt._count);
-
- return true;
- }
-
-PUGI__NS_END
-#endif
-
-PUGI__NS_BEGIN
- static const size_t xml_memory_page_size =
- #ifdef PUGIXML_MEMORY_PAGE_SIZE
- PUGIXML_MEMORY_PAGE_SIZE
- #else
- 32768
- #endif
- ;
-
-#ifdef PUGIXML_COMPACT
- static const uintptr_t xml_memory_block_alignment = 4;
-
- static const uintptr_t xml_memory_page_alignment = sizeof(void*);
-#else
- static const uintptr_t xml_memory_block_alignment = sizeof(void*);
-
- static const uintptr_t xml_memory_page_alignment = 64;
- static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
-#endif
-
- // extra metadata bits
- static const uintptr_t xml_memory_page_contents_shared_mask = 32;
- static const uintptr_t xml_memory_page_name_allocated_mask = 16;
- static const uintptr_t xml_memory_page_value_allocated_mask = 8;
- static const uintptr_t xml_memory_page_type_mask = 7;
-
- // combined masks for string uniqueness
- static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
- static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
-
-#ifdef PUGIXML_COMPACT
- #define PUGI__GETPAGE_IMPL(header) (header).get_page()
-#else
- #define PUGI__GETPAGE_IMPL(header) reinterpret_cast<impl::xml_memory_page*>((header) & impl::xml_memory_page_pointer_mask)
-#endif
-
- #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
- #define PUGI__NODETYPE(n) static_cast<xml_node_type>(((n)->header & impl::xml_memory_page_type_mask) + 1)
-
- struct xml_allocator;
-
- struct xml_memory_page
- {
- static xml_memory_page* construct(void* memory)
- {
- xml_memory_page* result = static_cast<xml_memory_page*>(memory);
-
- result->allocator = 0;
- result->prev = 0;
- result->next = 0;
- result->busy_size = 0;
- result->freed_size = 0;
-
- #ifdef PUGIXML_COMPACT
- result->compact_string_base = 0;
- result->compact_shared_parent = 0;
- result->compact_page_marker = 0;
- #endif
-
- return result;
- }
-
- xml_allocator* allocator;
-
- xml_memory_page* prev;
- xml_memory_page* next;
-
- size_t busy_size;
- size_t freed_size;
-
- #ifdef PUGIXML_COMPACT
- char_t* compact_string_base;
- void* compact_shared_parent;
- uint32_t* compact_page_marker;
- #endif
- };
-
- struct xml_memory_string_header
- {
- uint16_t page_offset; // offset from page->data
- uint16_t full_size; // 0 if string occupies whole page
- };
-
- struct xml_allocator
- {
- xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
- {
- #ifdef PUGIXML_COMPACT
- _hash = 0;
- #endif
- }
-
- xml_memory_page* allocate_page(size_t data_size)
- {
- size_t size = sizeof(xml_memory_page) + data_size;
-
- // allocate block with some alignment, leaving memory for worst-case padding
- void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
- if (!memory) return 0;
-
- // align to next page boundary (note: this guarantees at least 1 usable byte before the page)
- char* page_memory = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1));
-
- // prepare page structure
- xml_memory_page* page = xml_memory_page::construct(page_memory);
- assert(page);
-
- page->allocator = _root->allocator;
-
- // record the offset for freeing the memory block
- assert(page_memory > memory && page_memory - static_cast<char*>(memory) <= 127);
- page_memory[-1] = static_cast<char>(page_memory - static_cast<char*>(memory));
-
- return page;
- }
-
- static void deallocate_page(xml_memory_page* page)
- {
- char* page_memory = reinterpret_cast<char*>(page);
-
- xml_memory::deallocate(page_memory - page_memory[-1]);
- }
-
- void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
-
- void* allocate_memory(size_t size, xml_memory_page*& out_page)
- {
- if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
- return allocate_memory_oob(size, out_page);
-
- void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
-
- _busy_size += size;
-
- out_page = _root;
-
- return buf;
- }
-
- #ifdef PUGIXML_COMPACT
- void* allocate_object(size_t size, xml_memory_page*& out_page)
- {
- void* result = allocate_memory(size + sizeof(uint32_t), out_page);
- if (!result) return 0;
-
- // adjust for marker
- ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
-
- if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
- {
- // insert new marker
- uint32_t* marker = static_cast<uint32_t*>(result);
-
- *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
- out_page->compact_page_marker = marker;
-
- // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
- // this will make sure deallocate_memory correctly tracks the size
- out_page->freed_size += sizeof(uint32_t);
-
- return marker + 1;
- }
- else
- {
- // roll back uint32_t part
- _busy_size -= sizeof(uint32_t);
-
- return result;
- }
- }
- #else
- void* allocate_object(size_t size, xml_memory_page*& out_page)
- {
- return allocate_memory(size, out_page);
- }
- #endif
-
- void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
- {
- if (page == _root) page->busy_size = _busy_size;
-
- assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
- (void)!ptr;
-
- page->freed_size += size;
- assert(page->freed_size <= page->busy_size);
-
- if (page->freed_size == page->busy_size)
- {
- if (page->next == 0)
- {
- assert(_root == page);
-
- // top page freed, just reset sizes
- page->busy_size = 0;
- page->freed_size = 0;
-
- #ifdef PUGIXML_COMPACT
- // reset compact state to maximize efficiency
- page->compact_string_base = 0;
- page->compact_shared_parent = 0;
- page->compact_page_marker = 0;
- #endif
-
- _busy_size = 0;
- }
- else
- {
- assert(_root != page);
- assert(page->prev);
-
- // remove from the list
- page->prev->next = page->next;
- page->next->prev = page->prev;
-
- // deallocate
- deallocate_page(page);
- }
- }
- }
-
- char_t* allocate_string(size_t length)
- {
- static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
-
- PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
-
- // allocate memory for string and header block
- size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
-
- // round size up to block alignment boundary
- size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
-
- xml_memory_page* page;
- xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
-
- if (!header) return 0;
-
- // setup header
- ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
-
- assert(page_offset % xml_memory_block_alignment == 0);
- assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
- header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
-
- // full_size == 0 for large strings that occupy the whole page
- assert(full_size % xml_memory_block_alignment == 0);
- assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
- header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
-
- // round-trip through void* to avoid 'cast increases required alignment of target type' warning
- // header is guaranteed a pointer-sized alignment, which should be enough for char_t
- return static_cast<char_t*>(static_cast<void*>(header + 1));
- }
-
- void deallocate_string(char_t* string)
- {
- // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
- // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
-
- // get header
- xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
- assert(header);
-
- // deallocate
- size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
- xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
-
- // if full_size == 0 then this string occupies the whole page
- size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
-
- deallocate_memory(header, full_size, page);
- }
-
- bool reserve()
- {
- #ifdef PUGIXML_COMPACT
- return _hash->reserve();
- #else
- return true;
- #endif
- }
-
- xml_memory_page* _root;
- size_t _busy_size;
-
- #ifdef PUGIXML_COMPACT
- compact_hash_table* _hash;
- #endif
- };
-
- PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
- {
- const size_t large_allocation_threshold = xml_memory_page_size / 4;
-
- xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
- out_page = page;
-
- if (!page) return 0;
-
- if (size <= large_allocation_threshold)
- {
- _root->busy_size = _busy_size;
-
- // insert page at the end of linked list
- page->prev = _root;
- _root->next = page;
- _root = page;
-
- _busy_size = size;
- }
- else
- {
- // insert page before the end of linked list, so that it is deleted as soon as possible
- // the last page is not deleted even if it's empty (see deallocate_memory)
- assert(_root->prev);
-
- page->prev = _root->prev;
- page->next = _root;
-
- _root->prev->next = page;
- _root->prev = page;
-
- page->busy_size = size;
- }
-
- return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
- }
-PUGI__NS_END
-
-#ifdef PUGIXML_COMPACT
-PUGI__NS_BEGIN
- static const uintptr_t compact_alignment_log2 = 2;
- static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
-
- class compact_header
- {
- public:
- compact_header(xml_memory_page* page, unsigned int flags)
- {
- PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
-
- ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
- assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
-
- _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
- _flags = static_cast<unsigned char>(flags);
- }
-
- void operator&=(uintptr_t mod)
- {
- _flags &= static_cast<unsigned char>(mod);
- }
-
- void operator|=(uintptr_t mod)
- {
- _flags |= static_cast<unsigned char>(mod);
- }
-
- uintptr_t operator&(uintptr_t mod) const
- {
- return _flags & mod;
- }
-
- xml_memory_page* get_page() const
- {
- // round-trip through void* to silence 'cast increases required alignment of target type' warnings
- const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
- const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
-
- return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
- }
-
- private:
- unsigned char _page;
- unsigned char _flags;
- };
-
- PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
- {
- const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
-
- return header->get_page();
- }
-
- template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
- {
- return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object));
- }
-
- template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
- {
- *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value;
- }
-
- template <typename T, int header_offset, int start = -126> class compact_pointer
- {
- public:
- compact_pointer(): _data(0)
- {
- }
-
- void operator=(const compact_pointer& rhs)
- {
- *this = rhs + 0;
- }
-
- void operator=(T* value)
- {
- if (value)
- {
- // value is guaranteed to be compact-aligned; 'this' is not
- // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
- // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
- // compensate for arithmetic shift rounding for negative values
- ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
- ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
-
- if (static_cast<uintptr_t>(offset) <= 253)
- _data = static_cast<unsigned char>(offset + 1);
- else
- {
- compact_set_value<header_offset>(this, value);
-
- _data = 255;
- }
- }
- else
- _data = 0;
- }
-
- operator T*() const
- {
- if (_data)
- {
- if (_data < 255)
- {
- uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
-
- return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2));
- }
- else
- return compact_get_value<header_offset, T>(this);
- }
- else
- return 0;
- }
-
- T* operator->() const
- {
- return *this;
- }
-
- private:
- unsigned char _data;
- };
-
- template <typename T, int header_offset> class compact_pointer_parent
- {
- public:
- compact_pointer_parent(): _data(0)
- {
- }
-
- void operator=(const compact_pointer_parent& rhs)
- {
- *this = rhs + 0;
- }
-
- void operator=(T* value)
- {
- if (value)
- {
- // value is guaranteed to be compact-aligned; 'this' is not
- // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
- // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
- // compensate for arithmetic shift behavior for negative values
- ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
- ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
-
- if (static_cast<uintptr_t>(offset) <= 65533)
- {
- _data = static_cast<unsigned short>(offset + 1);
- }
- else
- {
- xml_memory_page* page = compact_get_page(this, header_offset);
-
- if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
- page->compact_shared_parent = value;
-
- if (page->compact_shared_parent == value)
- {
- _data = 65534;
- }
- else
- {
- compact_set_value<header_offset>(this, value);
-
- _data = 65535;
- }
- }
- }
- else
- {
- _data = 0;
- }
- }
-
- operator T*() const
- {
- if (_data)
- {
- if (_data < 65534)
- {
- uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
-
- return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2));
- }
- else if (_data == 65534)
- return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
- else
- return compact_get_value<header_offset, T>(this);
- }
- else
- return 0;
- }
-
- T* operator->() const
- {
- return *this;
- }
-
- private:
- uint16_t _data;
- };
-
- template <int header_offset, int base_offset> class compact_string
- {
- public:
- compact_string(): _data(0)
- {
- }
-
- void operator=(const compact_string& rhs)
- {
- *this = rhs + 0;
- }
-
- void operator=(char_t* value)
- {
- if (value)
- {
- xml_memory_page* page = compact_get_page(this, header_offset);
-
- if (PUGI__UNLIKELY(page->compact_string_base == 0))
- page->compact_string_base = value;
-
- ptrdiff_t offset = value - page->compact_string_base;
-
- if (static_cast<uintptr_t>(offset) < (65535 << 7))
- {
- // round-trip through void* to silence 'cast increases required alignment of target type' warnings
- uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
-
- if (*base == 0)
- {
- *base = static_cast<uint16_t>((offset >> 7) + 1);
- _data = static_cast<unsigned char>((offset & 127) + 1);
- }
- else
- {
- ptrdiff_t remainder = offset - ((*base - 1) << 7);
-
- if (static_cast<uintptr_t>(remainder) <= 253)
- {
- _data = static_cast<unsigned char>(remainder + 1);
- }
- else
- {
- compact_set_value<header_offset>(this, value);
-
- _data = 255;
- }
- }
- }
- else
- {
- compact_set_value<header_offset>(this, value);
-
- _data = 255;
- }
- }
- else
- {
- _data = 0;
- }
- }
-
- operator char_t*() const
- {
- if (_data)
- {
- if (_data < 255)
- {
- xml_memory_page* page = compact_get_page(this, header_offset);
-
- // round-trip through void* to silence 'cast increases required alignment of target type' warnings
- const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
- assert(*base);
-
- ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
-
- return page->compact_string_base + offset;
- }
- else
- {
- return compact_get_value<header_offset, char_t>(this);
- }
- }
- else
- return 0;
- }
-
- private:
- unsigned char _data;
- };
-PUGI__NS_END
-#endif
-
-#ifdef PUGIXML_COMPACT
-namespace pugi
-{
- struct xml_attribute_struct
- {
- xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
- {
- PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
- }
-
- impl::compact_header header;
-
- uint16_t namevalue_base;
-
- impl::compact_string<4, 2> name;
- impl::compact_string<5, 3> value;
-
- impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
- impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
- };
-
- struct xml_node_struct
- {
- xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1), namevalue_base(0)
- {
- PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
- }
-
- impl::compact_header header;
-
- uint16_t namevalue_base;
-
- impl::compact_string<4, 2> name;
- impl::compact_string<5, 3> value;
-
- impl::compact_pointer_parent<xml_node_struct, 6> parent;
-
- impl::compact_pointer<xml_node_struct, 8, 0> first_child;
-
- impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
- impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
-
- impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
- };
-}
-#else
-namespace pugi
-{
- struct xml_attribute_struct
- {
- xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
- {
- }
-
- uintptr_t header;
-
- char_t* name;
- char_t* value;
-
- xml_attribute_struct* prev_attribute_c;
- xml_attribute_struct* next_attribute;
- };
-
- struct xml_node_struct
- {
- xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
- {
- }
-
- uintptr_t header;
-
- char_t* name;
- char_t* value;
-
- xml_node_struct* parent;
-
- xml_node_struct* first_child;
-
- xml_node_struct* prev_sibling_c;
- xml_node_struct* next_sibling;
-
- xml_attribute_struct* first_attribute;
- };
-}
-#endif
-
-PUGI__NS_BEGIN
- struct xml_extra_buffer
- {
- char_t* buffer;
- xml_extra_buffer* next;
- };
-
- struct xml_document_struct: public xml_node_struct, public xml_allocator
- {
- xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
- {
- #ifdef PUGIXML_COMPACT
- _hash = &hash;
- #endif
- }
-
- const char_t* buffer;
-
- xml_extra_buffer* extra_buffers;
-
- #ifdef PUGIXML_COMPACT
- compact_hash_table hash;
- #endif
- };
-
- template <typename Object> inline xml_allocator& get_allocator(const Object* object)
- {
- assert(object);
-
- return *PUGI__GETPAGE(object)->allocator;
- }
-
- template <typename Object> inline xml_document_struct& get_document(const Object* object)
- {
- assert(object);
-
- return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
- }
-PUGI__NS_END
-
-// Low-level DOM operations
-PUGI__NS_BEGIN
- inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
- {
- xml_memory_page* page;
- void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
- if (!memory) return 0;
-
- return new (memory) xml_attribute_struct(page);
- }
-
- inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
- {
- xml_memory_page* page;
- void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
- if (!memory) return 0;
-
- return new (memory) xml_node_struct(page, type);
- }
-
- inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
- {
- if (a->header & impl::xml_memory_page_name_allocated_mask)
- alloc.deallocate_string(a->name);
-
- if (a->header & impl::xml_memory_page_value_allocated_mask)
- alloc.deallocate_string(a->value);
-
- alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
- }
-
- inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
- {
- if (n->header & impl::xml_memory_page_name_allocated_mask)
- alloc.deallocate_string(n->name);
-
- if (n->header & impl::xml_memory_page_value_allocated_mask)
- alloc.deallocate_string(n->value);
-
- for (xml_attribute_struct* attr = n->first_attribute; attr; )
- {
- xml_attribute_struct* next = attr->next_attribute;
-
- destroy_attribute(attr, alloc);
-
- attr = next;
- }
-
- for (xml_node_struct* child = n->first_child; child; )
- {
- xml_node_struct* next = child->next_sibling;
-
- destroy_node(child, alloc);
-
- child = next;
- }
-
- alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
- }
-
- inline void append_node(xml_node_struct* child, xml_node_struct* node)
- {
- child->parent = node;
-
- xml_node_struct* head = node->first_child;
-
- if (head)
- {
- xml_node_struct* tail = head->prev_sibling_c;
-
- tail->next_sibling = child;
- child->prev_sibling_c = tail;
- head->prev_sibling_c = child;
- }
- else
- {
- node->first_child = child;
- child->prev_sibling_c = child;
- }
- }
-
- inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
- {
- child->parent = node;
-
- xml_node_struct* head = node->first_child;
-
- if (head)
- {
- child->prev_sibling_c = head->prev_sibling_c;
- head->prev_sibling_c = child;
- }
- else
- child->prev_sibling_c = child;
-
- child->next_sibling = head;
- node->first_child = child;
- }
-
- inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
- {
- xml_node_struct* parent = node->parent;
-
- child->parent = parent;
-
- if (node->next_sibling)
- node->next_sibling->prev_sibling_c = child;
- else
- parent->first_child->prev_sibling_c = child;
-
- child->next_sibling = node->next_sibling;
- child->prev_sibling_c = node;
-
- node->next_sibling = child;
- }
-
- inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
- {
- xml_node_struct* parent = node->parent;
-
- child->parent = parent;
-
- if (node->prev_sibling_c->next_sibling)
- node->prev_sibling_c->next_sibling = child;
- else
- parent->first_child = child;
-
- child->prev_sibling_c = node->prev_sibling_c;
- child->next_sibling = node;
-
- node->prev_sibling_c = child;
- }
-
- inline void remove_node(xml_node_struct* node)
- {
- xml_node_struct* parent = node->parent;
-
- if (node->next_sibling)
- node->next_sibling->prev_sibling_c = node->prev_sibling_c;
- else
- parent->first_child->prev_sibling_c = node->prev_sibling_c;
-
- if (node->prev_sibling_c->next_sibling)
- node->prev_sibling_c->next_sibling = node->next_sibling;
- else
- parent->first_child = node->next_sibling;
-
- node->parent = 0;
- node->prev_sibling_c = 0;
- node->next_sibling = 0;
- }
-
- inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
- {
- xml_attribute_struct* head = node->first_attribute;
-
- if (head)
- {
- xml_attribute_struct* tail = head->prev_attribute_c;
-
- tail->next_attribute = attr;
- attr->prev_attribute_c = tail;
- head->prev_attribute_c = attr;
- }
- else
- {
- node->first_attribute = attr;
- attr->prev_attribute_c = attr;
- }
- }
-
- inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
- {
- xml_attribute_struct* head = node->first_attribute;
-
- if (head)
- {
- attr->prev_attribute_c = head->prev_attribute_c;
- head->prev_attribute_c = attr;
- }
- else
- attr->prev_attribute_c = attr;
-
- attr->next_attribute = head;
- node->first_attribute = attr;
- }
-
- inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
- {
- if (place->next_attribute)
- place->next_attribute->prev_attribute_c = attr;
- else
- node->first_attribute->prev_attribute_c = attr;
-
- attr->next_attribute = place->next_attribute;
- attr->prev_attribute_c = place;
- place->next_attribute = attr;
- }
-
- inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
- {
- if (place->prev_attribute_c->next_attribute)
- place->prev_attribute_c->next_attribute = attr;
- else
- node->first_attribute = attr;
-
- attr->prev_attribute_c = place->prev_attribute_c;
- attr->next_attribute = place;
- place->prev_attribute_c = attr;
- }
-
- inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
- {
- if (attr->next_attribute)
- attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
- else
- node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
-
- if (attr->prev_attribute_c->next_attribute)
- attr->prev_attribute_c->next_attribute = attr->next_attribute;
- else
- node->first_attribute = attr->next_attribute;
-
- attr->prev_attribute_c = 0;
- attr->next_attribute = 0;
- }
-
- PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
- {
- if (!alloc.reserve()) return 0;
-
- xml_node_struct* child = allocate_node(alloc, type);
- if (!child) return 0;
-
- append_node(child, node);
-
- return child;
- }
-
- PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
- {
- if (!alloc.reserve()) return 0;
-
- xml_attribute_struct* attr = allocate_attribute(alloc);
- if (!attr) return 0;
-
- append_attribute(attr, node);
-
- return attr;
- }
-PUGI__NS_END
-
-// Helper classes for code generation
-PUGI__NS_BEGIN
- struct opt_false
- {
- enum { value = 0 };
- };
-
- struct opt_true
- {
- enum { value = 1 };
- };
-PUGI__NS_END
-
-// Unicode utilities
-PUGI__NS_BEGIN
- inline uint16_t endian_swap(uint16_t value)
- {
- return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
- }
-
- inline uint32_t endian_swap(uint32_t value)
- {
- return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
- }
-
- struct utf8_counter
- {
- typedef size_t value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- // U+0000..U+007F
- if (ch < 0x80) return result + 1;
- // U+0080..U+07FF
- else if (ch < 0x800) return result + 2;
- // U+0800..U+FFFF
- else return result + 3;
- }
-
- static value_type high(value_type result, uint32_t)
- {
- // U+10000..U+10FFFF
- return result + 4;
- }
- };
-
- struct utf8_writer
- {
- typedef uint8_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- // U+0000..U+007F
- if (ch < 0x80)
- {
- *result = static_cast<uint8_t>(ch);
- return result + 1;
- }
- // U+0080..U+07FF
- else if (ch < 0x800)
- {
- result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
- result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
- return result + 2;
- }
- // U+0800..U+FFFF
- else
- {
- result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
- result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
- result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
- return result + 3;
- }
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- // U+10000..U+10FFFF
- result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
- result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
- result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
- result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
- return result + 4;
- }
-
- static value_type any(value_type result, uint32_t ch)
- {
- return (ch < 0x10000) ? low(result, ch) : high(result, ch);
- }
- };
-
- struct utf16_counter
- {
- typedef size_t value_type;
-
- static value_type low(value_type result, uint32_t)
- {
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t)
- {
- return result + 2;
- }
- };
-
- struct utf16_writer
- {
- typedef uint16_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- *result = static_cast<uint16_t>(ch);
-
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
- uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
-
- result[0] = static_cast<uint16_t>(0xD800 + msh);
- result[1] = static_cast<uint16_t>(0xDC00 + lsh);
-
- return result + 2;
- }
-
- static value_type any(value_type result, uint32_t ch)
- {
- return (ch < 0x10000) ? low(result, ch) : high(result, ch);
- }
- };
-
- struct utf32_counter
- {
- typedef size_t value_type;
-
- static value_type low(value_type result, uint32_t)
- {
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t)
- {
- return result + 1;
- }
- };
-
- struct utf32_writer
- {
- typedef uint32_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- *result = ch;
-
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- *result = ch;
-
- return result + 1;
- }
-
- static value_type any(value_type result, uint32_t ch)
- {
- *result = ch;
-
- return result + 1;
- }
- };
-
- struct latin1_writer
- {
- typedef uint8_t* value_type;
-
- static value_type low(value_type result, uint32_t ch)
- {
- *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
-
- return result + 1;
- }
-
- static value_type high(value_type result, uint32_t ch)
- {
- (void)ch;
-
- *result = '?';
-
- return result + 1;
- }
- };
-
- struct utf8_decoder
- {
- typedef uint8_t type;
-
- template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
- {
- const uint8_t utf8_byte_mask = 0x3f;
-
- while (size)
- {
- uint8_t lead = *data;
-
- // 0xxxxxxx -> U+0000..U+007F
- if (lead < 0x80)
- {
- result = Traits::low(result, lead);
- data += 1;
- size -= 1;
-
- // process aligned single-byte (ascii) blocks
- if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
- {
- // round-trip through void* to silence 'cast increases required alignment of target type' warnings
- while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
- {
- result = Traits::low(result, data[0]);
- result = Traits::low(result, data[1]);
- result = Traits::low(result, data[2]);
- result = Traits::low(result, data[3]);
- data += 4;
- size -= 4;
- }
- }
- }
- // 110xxxxx -> U+0080..U+07FF
- else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
- {
- result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
- data += 2;
- size -= 2;
- }
- // 1110xxxx -> U+0800-U+FFFF
- else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
- {
- result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
- data += 3;
- size -= 3;
- }
- // 11110xxx -> U+10000..U+10FFFF
- else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
- {
- result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
- data += 4;
- size -= 4;
- }
- // 10xxxxxx or 11111xxx -> invalid
- else
- {
- data += 1;
- size -= 1;
- }
- }
-
- return result;
- }
- };
-
- template <typename opt_swap> struct utf16_decoder
- {
- typedef uint16_t type;
-
- template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
- {
- while (size)
- {
- uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
-
- // U+0000..U+D7FF
- if (lead < 0xD800)
- {
- result = Traits::low(result, lead);
- data += 1;
- size -= 1;
- }
- // U+E000..U+FFFF
- else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
- {
- result = Traits::low(result, lead);
- data += 1;
- size -= 1;
- }
- // surrogate pair lead
- else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
- {
- uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
-
- if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
- {
- result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
- data += 2;
- size -= 2;
- }
- else
- {
- data += 1;
- size -= 1;
- }
- }
- else
- {
- data += 1;
- size -= 1;
- }
- }
-
- return result;
- }
- };
-
- template <typename opt_swap> struct utf32_decoder
- {
- typedef uint32_t type;
-
- template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
- {
- while (size)
- {
- uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
-
- // U+0000..U+FFFF
- if (lead < 0x10000)
- {
- result = Traits::low(result, lead);
- data += 1;
- size -= 1;
- }
- // U+10000..U+10FFFF
- else
- {
- result = Traits::high(result, lead);
- data += 1;
- size -= 1;
- }
- }
-
- return result;
- }
- };
-
- struct latin1_decoder
- {
- typedef uint8_t type;
-
- template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
- {
- while (size)
- {
- result = Traits::low(result, *data);
- data += 1;
- size -= 1;
- }
-
- return result;
- }
- };
-
- template <size_t size> struct wchar_selector;
-
- template <> struct wchar_selector<2>
- {
- typedef uint16_t type;
- typedef utf16_counter counter;
- typedef utf16_writer writer;
- typedef utf16_decoder<opt_false> decoder;
- };
-
- template <> struct wchar_selector<4>
- {
- typedef uint32_t type;
- typedef utf32_counter counter;
- typedef utf32_writer writer;
- typedef utf32_decoder<opt_false> decoder;
- };
-
- typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
- typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
-
- struct wchar_decoder
- {
- typedef wchar_t type;
-
- template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
- {
- typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
-
- return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
- }
- };
-
-#ifdef PUGIXML_WCHAR_MODE
- PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
- {
- for (size_t i = 0; i < length; ++i)
- result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
- }
-#endif
-PUGI__NS_END
-
-PUGI__NS_BEGIN
- enum chartype_t
- {
- ct_parse_pcdata = 1, // \0, &, \r, <
- ct_parse_attr = 2, // \0, &, \r, ', "
- ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
- ct_space = 8, // \r, \n, space, tab
- ct_parse_cdata = 16, // \0, ], >, \r
- ct_parse_comment = 32, // \0, -, >, \r
- ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
- ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
- };
-
- static const unsigned char chartype_table[256] =
- {
- 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
- 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
- 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
- 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
-
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
- 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
- };
-
- enum chartypex_t
- {
- ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
- ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
- ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
- ctx_digit = 8, // 0-9
- ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
- };
-
- static const unsigned char chartypex_table[256] =
- {
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
- 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
-
- 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
- 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
-
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
- };
-
-#ifdef PUGIXML_WCHAR_MODE
- #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
-#else
- #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
-#endif
-
- #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
- #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
-
- PUGI__FN bool is_little_endian()
- {
- unsigned int ui = 1;
-
- return *reinterpret_cast<unsigned char*>(&ui) == 1;
- }
-
- PUGI__FN xml_encoding get_wchar_encoding()
- {
- PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
-
- if (sizeof(wchar_t) == 2)
- return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
- else
- return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
- }
-
- PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
- {
- // look for BOM in first few bytes
- if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
- if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
- if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
- if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
- if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
-
- // look for <, <? or <?xm in various encodings
- if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
- if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
- if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
- if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
- if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
-
- // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
- if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
- if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
-
- // no known BOM detected, assume utf8
- return encoding_utf8;
- }
-
- PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
- {
- // replace wchar encoding with utf implementation
- if (encoding == encoding_wchar) return get_wchar_encoding();
-
- // replace utf16 encoding with utf16 with specific endianness
- if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- // replace utf32 encoding with utf32 with specific endianness
- if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- // only do autodetection if no explicit encoding is requested
- if (encoding != encoding_auto) return encoding;
-
- // skip encoding autodetection if input buffer is too small
- if (size < 4) return encoding_utf8;
-
- // try to guess encoding (based on XML specification, Appendix F.1)
- const uint8_t* data = static_cast<const uint8_t*>(contents);
-
- PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
-
- return guess_buffer_encoding(d0, d1, d2, d3);
- }
-
- PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
- {
- size_t length = size / sizeof(char_t);
-
- if (is_mutable)
- {
- out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
- out_length = length;
- }
- else
- {
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- if (contents)
- memcpy(buffer, contents, length * sizeof(char_t));
- else
- assert(length == 0);
-
- buffer[length] = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
- }
-
- return true;
- }
-
-#ifdef PUGIXML_WCHAR_MODE
- PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
- {
- return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
- (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
- }
-
- PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
- {
- const char_t* data = static_cast<const char_t*>(contents);
- size_t length = size / sizeof(char_t);
-
- if (is_mutable)
- {
- char_t* buffer = const_cast<char_t*>(data);
-
- convert_wchar_endian_swap(buffer, data, length);
-
- out_buffer = buffer;
- out_length = length;
- }
- else
- {
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- convert_wchar_endian_swap(buffer, data, length);
- buffer[length] = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
- }
-
- return true;
- }
-
- template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
- {
- const typename D::type* data = static_cast<const typename D::type*>(contents);
- size_t data_length = size / sizeof(typename D::type);
-
- // first pass: get length in wchar_t units
- size_t length = D::process(data, data_length, 0, wchar_counter());
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert utf16 input to wchar_t
- wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
- wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
- {
- // get native encoding
- xml_encoding wchar_encoding = get_wchar_encoding();
-
- // fast path: no conversion required
- if (encoding == wchar_encoding)
- return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
-
- // only endian-swapping is required
- if (need_endian_swap_utf(encoding, wchar_encoding))
- return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
-
- // source encoding is utf8
- if (encoding == encoding_utf8)
- return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
-
- // source encoding is utf16
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- return (native_encoding == encoding) ?
- convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
- convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
- }
-
- // source encoding is utf32
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- return (native_encoding == encoding) ?
- convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
- convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
- }
-
- // source encoding is latin1
- if (encoding == encoding_latin1)
- return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
-
- assert(!"Invalid encoding");
- return false;
- }
-#else
- template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
- {
- const typename D::type* data = static_cast<const typename D::type*>(contents);
- size_t data_length = size / sizeof(typename D::type);
-
- // first pass: get length in utf8 units
- size_t length = D::process(data, data_length, 0, utf8_counter());
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert utf16 input to utf8
- uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
- uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
- {
- for (size_t i = 0; i < size; ++i)
- if (data[i] > 127)
- return i;
-
- return size;
- }
-
- PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
- {
- const uint8_t* data = static_cast<const uint8_t*>(contents);
- size_t data_length = size;
-
- // get size of prefix that does not need utf8 conversion
- size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
- assert(prefix_length <= data_length);
-
- const uint8_t* postfix = data + prefix_length;
- size_t postfix_length = data_length - prefix_length;
-
- // if no conversion is needed, just return the original buffer
- if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
-
- // first pass: get length in utf8 units
- size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert latin1 input to utf8
- memcpy(buffer, data, prefix_length);
-
- uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
- uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
- {
- // fast path: no conversion required
- if (encoding == encoding_utf8)
- return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
-
- // source encoding is utf16
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- return (native_encoding == encoding) ?
- convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
- convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
- }
-
- // source encoding is utf32
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- return (native_encoding == encoding) ?
- convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
- convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
- }
-
- // source encoding is latin1
- if (encoding == encoding_latin1)
- return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
-
- assert(!"Invalid encoding");
- return false;
- }
-#endif
-
- PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
- {
- // get length in utf8 characters
- return wchar_decoder::process(str, length, 0, utf8_counter());
- }
-
- PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
- {
- // convert to utf8
- uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
- uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
-
- assert(begin + size == end);
- (void)!end;
- (void)!size;
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
- {
- // first pass: get length in utf8 characters
- size_t size = as_utf8_begin(str, length);
-
- // allocate resulting string
- std::string result;
- result.resize(size);
-
- // second pass: convert to utf8
- if (size > 0) as_utf8_end(&result[0], size, str, length);
-
- return result;
- }
-
- PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
- {
- const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
-
- // first pass: get length in wchar_t units
- size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
-
- // allocate resulting string
- std::basic_string<wchar_t> result;
- result.resize(length);
-
- // second pass: convert to wchar_t
- if (length > 0)
- {
- wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
- wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
-
- assert(begin + length == end);
- (void)!end;
- }
-
- return result;
- }
-#endif
-
- template <typename Header>
- inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
- {
- // never reuse shared memory
- if (header & xml_memory_page_contents_shared_mask) return false;
-
- size_t target_length = strlength(target);
-
- // always reuse document buffer memory if possible
- if ((header & header_mask) == 0) return target_length >= length;
-
- // reuse heap memory if waste is not too great
- const size_t reuse_threshold = 32;
-
- return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
- {
- if (source_length == 0)
- {
- // empty string and null pointer are equivalent, so just deallocate old memory
- xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
-
- if (header & header_mask) alloc->deallocate_string(dest);
-
- // mark the string as not allocated
- dest = 0;
- header &= ~header_mask;
-
- return true;
- }
- else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
- {
- // we can reuse old buffer, so just copy the new data (including zero terminator)
- memcpy(dest, source, source_length * sizeof(char_t));
- dest[source_length] = 0;
-
- return true;
- }
- else
- {
- xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
-
- if (!alloc->reserve()) return false;
-
- // allocate new buffer
- char_t* buf = alloc->allocate_string(source_length + 1);
- if (!buf) return false;
-
- // copy the string (including zero terminator)
- memcpy(buf, source, source_length * sizeof(char_t));
- buf[source_length] = 0;
-
- // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
- if (header & header_mask) alloc->deallocate_string(dest);
-
- // the string is now allocated, so set the flag
- dest = buf;
- header |= header_mask;
-
- return true;
- }
- }
-
- struct gap
- {
- char_t* end;
- size_t size;
-
- gap(): end(0), size(0)
- {
- }
-
- // Push new gap, move s count bytes further (skipping the gap).
- // Collapse previous gap.
- void push(char_t*& s, size_t count)
- {
- if (end) // there was a gap already; collapse it
- {
- // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
- assert(s >= end);
- memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
- }
-
- s += count; // end of current gap
-
- // "merge" two gaps
- end = s;
- size += count;
- }
-
- // Collapse all gaps, return past-the-end pointer
- char_t* flush(char_t* s)
- {
- if (end)
- {
- // Move [old_gap_end, current_pos) to [old_gap_start, ...)
- assert(s >= end);
- memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
-
- return s - size;
- }
- else return s;
- }
- };
-
- PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
- {
- char_t* stre = s + 1;
-
- switch (*stre)
- {
- case '#': // &#...
- {
- unsigned int ucsc = 0;
-
- if (stre[1] == 'x') // &#x... (hex code)
- {
- stre += 2;
-
- char_t ch = *stre;
-
- if (ch == ';') return stre;
-
- for (;;)
- {
- if (static_cast<unsigned int>(ch - '0') <= 9)
- ucsc = 16 * ucsc + (ch - '0');
- else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
- ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
- else if (ch == ';')
- break;
- else // cancel
- return stre;
-
- ch = *++stre;
- }
-
- ++stre;
- }
- else // &#... (dec code)
- {
- char_t ch = *++stre;
-
- if (ch == ';') return stre;
-
- for (;;)
- {
- if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
- ucsc = 10 * ucsc + (ch - '0');
- else if (ch == ';')
- break;
- else // cancel
- return stre;
-
- ch = *++stre;
- }
-
- ++stre;
- }
-
- #ifdef PUGIXML_WCHAR_MODE
- s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
- #else
- s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
- #endif
-
- g.push(s, stre - s);
- return stre;
- }
-
- case 'a': // &a
- {
- ++stre;
-
- if (*stre == 'm') // &am
- {
- if (*++stre == 'p' && *++stre == ';') // &amp;
- {
- *s++ = '&';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- }
- else if (*stre == 'p') // &ap
- {
- if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
- {
- *s++ = '\'';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- }
- break;
- }
-
- case 'g': // &g
- {
- if (*++stre == 't' && *++stre == ';') // &gt;
- {
- *s++ = '>';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- break;
- }
-
- case 'l': // &l
- {
- if (*++stre == 't' && *++stre == ';') // &lt;
- {
- *s++ = '<';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- break;
- }
-
- case 'q': // &q
- {
- if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
- {
- *s++ = '"';
- ++stre;
-
- g.push(s, stre - s);
- return stre;
- }
- break;
- }
-
- default:
- break;
- }
-
- return stre;
- }
-
- // Parser utilities
- #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
- #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
- #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
- #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
- #define PUGI__POPNODE() { cursor = cursor->parent; }
- #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
- #define PUGI__SCANWHILE(X) { while (X) ++s; }
- #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
- #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
- #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
- #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
-
- PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
- {
- gap g;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
-
- if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
- {
- *s++ = '\n'; // replace first one with 0x0a
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
- {
- *g.flush(s) = 0;
-
- return s + (s[2] == '>' ? 3 : 2);
- }
- else if (*s == 0)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
- {
- gap g;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
-
- if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
- {
- *s++ = '\n'; // replace first one with 0x0a
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (*s == 0)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- typedef char_t* (*strconv_pcdata_t)(char_t*);
-
- template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
- {
- static char_t* parse(char_t* s)
- {
- gap g;
-
- char_t* begin = s;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
-
- if (*s == '<') // PCDATA ends here
- {
- char_t* end = g.flush(s);
-
- if (opt_trim::value)
- while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
- --end;
-
- *end = 0;
-
- return s + 1;
- }
- else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
- {
- *s++ = '\n'; // replace first one with 0x0a
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (*s == 0)
- {
- char_t* end = g.flush(s);
-
- if (opt_trim::value)
- while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
- --end;
-
- *end = 0;
-
- return s;
- }
- else ++s;
- }
- }
- };
-
- PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
- {
- PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
-
- switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
- {
- case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
- case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
- case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
- case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
- case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
- case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
- case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
- case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
- default: assert(false); return 0; // should not get here
- }
- }
-
- typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
-
- template <typename opt_escape> struct strconv_attribute_impl
- {
- static char_t* parse_wnorm(char_t* s, char_t end_quote)
- {
- gap g;
-
- // trim leading whitespaces
- if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- char_t* str = s;
-
- do ++str;
- while (PUGI__IS_CHARTYPE(*str, ct_space));
-
- g.push(s, str - s);
- }
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
-
- if (*s == end_quote)
- {
- char_t* str = g.flush(s);
-
- do *str-- = 0;
- while (PUGI__IS_CHARTYPE(*str, ct_space));
-
- return s + 1;
- }
- else if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- *s++ = ' ';
-
- if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- char_t* str = s + 1;
- while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
-
- g.push(s, str - s);
- }
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- static char_t* parse_wconv(char_t* s, char_t end_quote)
- {
- gap g;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
-
- if (*s == end_quote)
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (PUGI__IS_CHARTYPE(*s, ct_space))
- {
- if (*s == '\r')
- {
- *s++ = ' ';
-
- if (*s == '\n') g.push(s, 1);
- }
- else *s++ = ' ';
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- static char_t* parse_eol(char_t* s, char_t end_quote)
- {
- gap g;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
-
- if (*s == end_quote)
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (*s == '\r')
- {
- *s++ = '\n';
-
- if (*s == '\n') g.push(s, 1);
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
-
- static char_t* parse_simple(char_t* s, char_t end_quote)
- {
- gap g;
-
- while (true)
- {
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
-
- if (*s == end_quote)
- {
- *g.flush(s) = 0;
-
- return s + 1;
- }
- else if (opt_escape::value && *s == '&')
- {
- s = strconv_escape(s, g);
- }
- else if (!*s)
- {
- return 0;
- }
- else ++s;
- }
- }
- };
-
- PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
- {
- PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
-
- switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
- {
- case 0: return strconv_attribute_impl<opt_false>::parse_simple;
- case 1: return strconv_attribute_impl<opt_true>::parse_simple;
- case 2: return strconv_attribute_impl<opt_false>::parse_eol;
- case 3: return strconv_attribute_impl<opt_true>::parse_eol;
- case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
- case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
- case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
- case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
- case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
- case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
- case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
- case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
- case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
- default: assert(false); return 0; // should not get here
- }
- }
-
- inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
- {
- xml_parse_result result;
- result.status = status;
- result.offset = offset;
-
- return result;
- }
-
- struct xml_parser
- {
- xml_allocator alloc;
- xml_allocator* alloc_state;
- char_t* error_offset;
- xml_parse_status error_status;
-
- xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok)
- {
- }
-
- ~xml_parser()
- {
- *alloc_state = alloc;
- }
-
- // DOCTYPE consists of nested sections of the following possible types:
- // <!-- ... -->, <? ... ?>, "...", '...'
- // <![...]]>
- // <!...>
- // First group can not contain nested groups
- // Second group can contain nested groups of the same type
- // Third group can contain all other groups
- char_t* parse_doctype_primitive(char_t* s)
- {
- if (*s == '"' || *s == '\'')
- {
- // quoted string
- char_t ch = *s++;
- PUGI__SCANFOR(*s == ch);
- if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- s++;
- }
- else if (s[0] == '<' && s[1] == '?')
- {
- // <? ... ?>
- s += 2;
- PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
- if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- s += 2;
- }
- else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
- {
- s += 4;
- PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
- if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- s += 3;
- }
- else PUGI__THROW_ERROR(status_bad_doctype, s);
-
- return s;
- }
-
- char_t* parse_doctype_ignore(char_t* s)
- {
- size_t depth = 0;
-
- assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
- s += 3;
-
- while (*s)
- {
- if (s[0] == '<' && s[1] == '!' && s[2] == '[')
- {
- // nested ignore section
- s += 3;
- depth++;
- }
- else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
- {
- // ignore section end
- s += 3;
-
- if (depth == 0)
- return s;
-
- depth--;
- }
- else s++;
- }
-
- PUGI__THROW_ERROR(status_bad_doctype, s);
- }
-
- char_t* parse_doctype_group(char_t* s, char_t endch)
- {
- size_t depth = 0;
-
- assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
- s += 2;
-
- while (*s)
- {
- if (s[0] == '<' && s[1] == '!' && s[2] != '-')
- {
- if (s[2] == '[')
- {
- // ignore
- s = parse_doctype_ignore(s);
- if (!s) return s;
- }
- else
- {
- // some control group
- s += 2;
- depth++;
- }
- }
- else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
- {
- // unknown tag (forbidden), or some primitive group
- s = parse_doctype_primitive(s);
- if (!s) return s;
- }
- else if (*s == '>')
- {
- if (depth == 0)
- return s;
-
- depth--;
- s++;
- }
- else s++;
- }
-
- if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
-
- return s;
- }
-
- char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
- {
- // parse node contents, starting with exclamation mark
- ++s;
-
- if (*s == '-') // '<!-...'
- {
- ++s;
-
- if (*s == '-') // '<!--...'
- {
- ++s;
-
- if (PUGI__OPTSET(parse_comments))
- {
- PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
- }
-
- if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
- {
- s = strconv_comment(s, endch);
-
- if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
- }
- else
- {
- // Scan for terminating '-->'.
- PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
- PUGI__CHECK_ERROR(status_bad_comment, s);
-
- if (PUGI__OPTSET(parse_comments))
- *s = 0; // Zero-terminate this segment at the first terminating '-'.
-
- s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
- }
- }
- else PUGI__THROW_ERROR(status_bad_comment, s);
- }
- else if (*s == '[')
- {
- // '<![CDATA[...'
- if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
- {
- ++s;
-
- if (PUGI__OPTSET(parse_cdata))
- {
- PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
-
- if (PUGI__OPTSET(parse_eol))
- {
- s = strconv_cdata(s, endch);
-
- if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
- }
- else
- {
- // Scan for terminating ']]>'.
- PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
- PUGI__CHECK_ERROR(status_bad_cdata, s);
-
- *s++ = 0; // Zero-terminate this segment.
- }
- }
- else // Flagged for discard, but we still have to scan for the terminator.
- {
- // Scan for terminating ']]>'.
- PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
- PUGI__CHECK_ERROR(status_bad_cdata, s);
-
- ++s;
- }
-
- s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
- }
- else PUGI__THROW_ERROR(status_bad_cdata, s);
- }
- else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
- {
- s -= 2;
-
- if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
-
- char_t* mark = s + 9;
-
- s = parse_doctype_group(s, endch);
- if (!s) return s;
-
- assert((*s == 0 && endch == '>') || *s == '>');
- if (*s) *s++ = 0;
-
- if (PUGI__OPTSET(parse_doctype))
- {
- while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
-
- PUGI__PUSHNODE(node_doctype);
-
- cursor->value = mark;
- }
- }
- else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
- else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
- else PUGI__THROW_ERROR(status_unrecognized_tag, s);
-
- return s;
- }
-
- char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
- {
- // load into registers
- xml_node_struct* cursor = ref_cursor;
- char_t ch = 0;
-
- // parse node contents, starting with question mark
- ++s;
-
- // read PI target
- char_t* target = s;
-
- if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
-
- PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
- PUGI__CHECK_ERROR(status_bad_pi, s);
-
- // determine node type; stricmp / strcasecmp is not portable
- bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
-
- if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
- {
- if (declaration)
- {
- // disallow non top-level declarations
- if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
-
- PUGI__PUSHNODE(node_declaration);
- }
- else
- {
- PUGI__PUSHNODE(node_pi);
- }
-
- cursor->name = target;
-
- PUGI__ENDSEG();
-
- // parse value/attributes
- if (ch == '?')
- {
- // empty node
- if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
- s += (*s == '>');
-
- PUGI__POPNODE();
- }
- else if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- PUGI__SKIPWS();
-
- // scan for tag end
- char_t* value = s;
-
- PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
- PUGI__CHECK_ERROR(status_bad_pi, s);
-
- if (declaration)
- {
- // replace ending ? with / so that 'element' terminates properly
- *s = '/';
-
- // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
- s = value;
- }
- else
- {
- // store value and step over >
- cursor->value = value;
-
- PUGI__POPNODE();
-
- PUGI__ENDSEG();
-
- s += (*s == '>');
- }
- }
- else PUGI__THROW_ERROR(status_bad_pi, s);
- }
- else
- {
- // scan for tag end
- PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
- PUGI__CHECK_ERROR(status_bad_pi, s);
-
- s += (s[1] == '>' ? 2 : 1);
- }
-
- // store from registers
- ref_cursor = cursor;
-
- return s;
- }
-
- char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
- {
- strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
- strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
-
- char_t ch = 0;
- xml_node_struct* cursor = root;
- char_t* mark = s;
-
- while (*s != 0)
- {
- if (*s == '<')
- {
- ++s;
-
- LOC_TAG:
- if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
- {
- PUGI__PUSHNODE(node_element); // Append a new node to the tree.
-
- cursor->name = s;
-
- PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
- PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
-
- if (ch == '>')
- {
- // end of tag
- }
- else if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- LOC_ATTRIBUTES:
- while (true)
- {
- PUGI__SKIPWS(); // Eat any whitespace.
-
- if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
- {
- xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
- if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
-
- a->name = s; // Save the offset.
-
- PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
- PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
-
- if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- PUGI__SKIPWS(); // Eat any whitespace.
-
- ch = *s;
- ++s;
- }
-
- if (ch == '=') // '<... #=...'
- {
- PUGI__SKIPWS(); // Eat any whitespace.
-
- if (*s == '"' || *s == '\'') // '<... #="...'
- {
- ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
- ++s; // Step over the quote.
- a->value = s; // Save the offset.
-
- s = strconv_attribute(s, ch);
-
- if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
-
- // After this line the loop continues from the start;
- // Whitespaces, / and > are ok, symbols and EOF are wrong,
- // everything else will be detected
- if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
- }
- else PUGI__THROW_ERROR(status_bad_attribute, s);
- }
- else PUGI__THROW_ERROR(status_bad_attribute, s);
- }
- else if (*s == '/')
- {
- ++s;
-
- if (*s == '>')
- {
- PUGI__POPNODE();
- s++;
- break;
- }
- else if (*s == 0 && endch == '>')
- {
- PUGI__POPNODE();
- break;
- }
- else PUGI__THROW_ERROR(status_bad_start_element, s);
- }
- else if (*s == '>')
- {
- ++s;
-
- break;
- }
- else if (*s == 0 && endch == '>')
- {
- break;
- }
- else PUGI__THROW_ERROR(status_bad_start_element, s);
- }
-
- // !!!
- }
- else if (ch == '/') // '<#.../'
- {
- if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
-
- PUGI__POPNODE(); // Pop.
-
- s += (*s == '>');
- }
- else if (ch == 0)
- {
- // we stepped over null terminator, backtrack & handle closing tag
- --s;
-
- if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
- }
- else PUGI__THROW_ERROR(status_bad_start_element, s);
- }
- else if (*s == '/')
- {
- ++s;
-
- char_t* name = cursor->name;
- if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
-
- while (PUGI__IS_CHARTYPE(*s, ct_symbol))
- {
- if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
- }
-
- if (*name)
- {
- if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
- else PUGI__THROW_ERROR(status_end_element_mismatch, s);
- }
-
- PUGI__POPNODE(); // Pop.
-
- PUGI__SKIPWS();
-
- if (*s == 0)
- {
- if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
- }
- else
- {
- if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
- ++s;
- }
- }
- else if (*s == '?') // '<?...'
- {
- s = parse_question(s, cursor, optmsk, endch);
- if (!s) return s;
-
- assert(cursor);
- if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
- }
- else if (*s == '!') // '<!...'
- {
- s = parse_exclamation(s, cursor, optmsk, endch);
- if (!s) return s;
- }
- else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
- else PUGI__THROW_ERROR(status_unrecognized_tag, s);
- }
- else
- {
- mark = s; // Save this offset while searching for a terminator.
-
- PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
-
- if (*s == '<' || !*s)
- {
- // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
- assert(mark != s);
-
- if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
- {
- continue;
- }
- else if (PUGI__OPTSET(parse_ws_pcdata_single))
- {
- if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
- }
- }
-
- if (!PUGI__OPTSET(parse_trim_pcdata))
- s = mark;
-
- if (cursor->parent || PUGI__OPTSET(parse_fragment))
- {
- PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
-
- s = strconv_pcdata(s);
-
- PUGI__POPNODE(); // Pop since this is a standalone.
-
- if (!*s) break;
- }
- else
- {
- PUGI__SCANFOR(*s == '<'); // '...<'
- if (!*s) break;
-
- ++s;
- }
-
- // We're after '<'
- goto LOC_TAG;
- }
- }
-
- // check that last tag is closed
- if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
-
- return s;
- }
-
- #ifdef PUGIXML_WCHAR_MODE
- static char_t* parse_skip_bom(char_t* s)
- {
- unsigned int bom = 0xfeff;
- return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
- }
- #else
- static char_t* parse_skip_bom(char_t* s)
- {
- return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
- }
- #endif
-
- static bool has_element_node_siblings(xml_node_struct* node)
- {
- while (node)
- {
- if (PUGI__NODETYPE(node) == node_element) return true;
-
- node = node->next_sibling;
- }
-
- return false;
- }
-
- static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
- {
- // early-out for empty documents
- if (length == 0)
- return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
-
- // get last child of the root before parsing
- xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
-
- // create parser on stack
- xml_parser parser(static_cast<xml_allocator*>(xmldoc));
-
- // save last character and make buffer zero-terminated (speeds up parsing)
- char_t endch = buffer[length - 1];
- buffer[length - 1] = 0;
-
- // skip BOM to make sure it does not end up as part of parse output
- char_t* buffer_data = parse_skip_bom(buffer);
-
- // perform actual parsing
- parser.parse_tree(buffer_data, root, optmsk, endch);
-
- xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
- assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
-
- if (result)
- {
- // since we removed last character, we have to handle the only possible false positive (stray <)
- if (endch == '<')
- return make_parse_result(status_unrecognized_tag, length - 1);
-
- // check if there are any element nodes parsed
- xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
-
- if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
- return make_parse_result(status_no_document_element, length - 1);
- }
- else
- {
- // roll back offset if it occurs on a null terminator in the source buffer
- if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
- result.offset--;
- }
-
- return result;
- }
- };
-
- // Output facilities
- PUGI__FN xml_encoding get_write_native_encoding()
- {
- #ifdef PUGIXML_WCHAR_MODE
- return get_wchar_encoding();
- #else
- return encoding_utf8;
- #endif
- }
-
- PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
- {
- // replace wchar encoding with utf implementation
- if (encoding == encoding_wchar) return get_wchar_encoding();
-
- // replace utf16 encoding with utf16 with specific endianness
- if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- // replace utf32 encoding with utf32 with specific endianness
- if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- // only do autodetection if no explicit encoding is requested
- if (encoding != encoding_auto) return encoding;
-
- // assume utf8 encoding
- return encoding_utf8;
- }
-
- template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
- {
- PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
-
- typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
-
- return static_cast<size_t>(end - dest) * sizeof(*dest);
- }
-
- template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
- {
- PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
-
- typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
-
- if (opt_swap)
- {
- for (typename T::value_type i = dest; i != end; ++i)
- *i = endian_swap(*i);
- }
-
- return static_cast<size_t>(end - dest) * sizeof(*dest);
- }
-
-#ifdef PUGIXML_WCHAR_MODE
- PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
- {
- if (length < 1) return 0;
-
- // discard last character if it's the lead of a surrogate pair
- return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
- }
-
- PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
- {
- // only endian-swapping is required
- if (need_endian_swap_utf(encoding, get_wchar_encoding()))
- {
- convert_wchar_endian_swap(r_char, data, length);
-
- return length * sizeof(char_t);
- }
-
- // convert to utf8
- if (encoding == encoding_utf8)
- return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
-
- // convert to utf16
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
- }
-
- // convert to utf32
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
- }
-
- // convert to latin1
- if (encoding == encoding_latin1)
- return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
-
- assert(!"Invalid encoding");
- return 0;
- }
-#else
- PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
- {
- if (length < 5) return 0;
-
- for (size_t i = 1; i <= 4; ++i)
- {
- uint8_t ch = static_cast<uint8_t>(data[length - i]);
-
- // either a standalone character or a leading one
- if ((ch & 0xc0) != 0x80) return length - i;
- }
-
- // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
- return length;
- }
-
- PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
- {
- if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
- return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
- }
-
- if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
- {
- xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
- return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
- }
-
- if (encoding == encoding_latin1)
- return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
-
- assert(!"Invalid encoding");
- return 0;
- }
-#endif
-
- class xml_buffered_writer
- {
- xml_buffered_writer(const xml_buffered_writer&);
- xml_buffered_writer& operator=(const xml_buffered_writer&);
-
- public:
- xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
- {
- PUGI__STATIC_ASSERT(bufcapacity >= 8);
- }
-
- size_t flush()
- {
- flush(buffer, bufsize);
- bufsize = 0;
- return 0;
- }
-
- void flush(const char_t* data, size_t size)
- {
- if (size == 0) return;
-
- // fast path, just write data
- if (encoding == get_write_native_encoding())
- writer.write(data, size * sizeof(char_t));
- else
- {
- // convert chunk
- size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
- assert(result <= sizeof(scratch));
-
- // write data
- writer.write(scratch.data_u8, result);
- }
- }
-
- void write_direct(const char_t* data, size_t length)
- {
- // flush the remaining buffer contents
- flush();
-
- // handle large chunks
- if (length > bufcapacity)
- {
- if (encoding == get_write_native_encoding())
- {
- // fast path, can just write data chunk
- writer.write(data, length * sizeof(char_t));
- return;
- }
-
- // need to convert in suitable chunks
- while (length > bufcapacity)
- {
- // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
- // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
- size_t chunk_size = get_valid_length(data, bufcapacity);
- assert(chunk_size);
-
- // convert chunk and write
- flush(data, chunk_size);
-
- // iterate
- data += chunk_size;
- length -= chunk_size;
- }
-
- // small tail is copied below
- bufsize = 0;
- }
-
- memcpy(buffer + bufsize, data, length * sizeof(char_t));
- bufsize += length;
- }
-
- void write_buffer(const char_t* data, size_t length)
- {
- size_t offset = bufsize;
-
- if (offset + length <= bufcapacity)
- {
- memcpy(buffer + offset, data, length * sizeof(char_t));
- bufsize = offset + length;
- }
- else
- {
- write_direct(data, length);
- }
- }
-
- void write_string(const char_t* data)
- {
- // write the part of the string that fits in the buffer
- size_t offset = bufsize;
-
- while (*data && offset < bufcapacity)
- buffer[offset++] = *data++;
-
- // write the rest
- if (offset < bufcapacity)
- {
- bufsize = offset;
- }
- else
- {
- // backtrack a bit if we have split the codepoint
- size_t length = offset - bufsize;
- size_t extra = length - get_valid_length(data - length, length);
-
- bufsize = offset - extra;
-
- write_direct(data - extra, strlength(data) + extra);
- }
- }
-
- void write(char_t d0)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 1) offset = flush();
-
- buffer[offset + 0] = d0;
- bufsize = offset + 1;
- }
-
- void write(char_t d0, char_t d1)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 2) offset = flush();
-
- buffer[offset + 0] = d0;
- buffer[offset + 1] = d1;
- bufsize = offset + 2;
- }
-
- void write(char_t d0, char_t d1, char_t d2)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 3) offset = flush();
-
- buffer[offset + 0] = d0;
- buffer[offset + 1] = d1;
- buffer[offset + 2] = d2;
- bufsize = offset + 3;
- }
-
- void write(char_t d0, char_t d1, char_t d2, char_t d3)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 4) offset = flush();
-
- buffer[offset + 0] = d0;
- buffer[offset + 1] = d1;
- buffer[offset + 2] = d2;
- buffer[offset + 3] = d3;
- bufsize = offset + 4;
- }
-
- void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 5) offset = flush();
-
- buffer[offset + 0] = d0;
- buffer[offset + 1] = d1;
- buffer[offset + 2] = d2;
- buffer[offset + 3] = d3;
- buffer[offset + 4] = d4;
- bufsize = offset + 5;
- }
-
- void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
- {
- size_t offset = bufsize;
- if (offset > bufcapacity - 6) offset = flush();
-
- buffer[offset + 0] = d0;
- buffer[offset + 1] = d1;
- buffer[offset + 2] = d2;
- buffer[offset + 3] = d3;
- buffer[offset + 4] = d4;
- buffer[offset + 5] = d5;
- bufsize = offset + 6;
- }
-
- // utf8 maximum expansion: x4 (-> utf32)
- // utf16 maximum expansion: x2 (-> utf32)
- // utf32 maximum expansion: x1
- enum
- {
- bufcapacitybytes =
- #ifdef PUGIXML_MEMORY_OUTPUT_STACK
- PUGIXML_MEMORY_OUTPUT_STACK
- #else
- 10240
- #endif
- ,
- bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
- };
-
- char_t buffer[bufcapacity];
-
- union
- {
- uint8_t data_u8[4 * bufcapacity];
- uint16_t data_u16[2 * bufcapacity];
- uint32_t data_u32[bufcapacity];
- char_t data_char[bufcapacity];
- } scratch;
-
- xml_writer& writer;
- size_t bufsize;
- xml_encoding encoding;
- };
-
- PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
- {
- while (*s)
- {
- const char_t* prev = s;
-
- // While *s is a usual symbol
- PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
-
- writer.write_buffer(prev, static_cast<size_t>(s - prev));
-
- switch (*s)
- {
- case 0: break;
- case '&':
- writer.write('&', 'a', 'm', 'p', ';');
- ++s;
- break;
- case '<':
- writer.write('&', 'l', 't', ';');
- ++s;
- break;
- case '>':
- writer.write('&', 'g', 't', ';');
- ++s;
- break;
- case '"':
- writer.write('&', 'q', 'u', 'o', 't', ';');
- ++s;
- break;
- default: // s is not a usual symbol
- {
- unsigned int ch = static_cast<unsigned int>(*s++);
- assert(ch < 32);
-
- writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
- }
- }
- }
- }
-
- PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
- {
- if (flags & format_no_escapes)
- writer.write_string(s);
- else
- text_output_escaped(writer, s, type);
- }
-
- PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
- {
- do
- {
- writer.write('<', '!', '[', 'C', 'D');
- writer.write('A', 'T', 'A', '[');
-
- const char_t* prev = s;
-
- // look for ]]> sequence - we can't output it as is since it terminates CDATA
- while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
-
- // skip ]] if we stopped at ]]>, > will go to the next CDATA section
- if (*s) s += 2;
-
- writer.write_buffer(prev, static_cast<size_t>(s - prev));
-
- writer.write(']', ']', '>');
- }
- while (*s);
- }
-
- PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
- {
- switch (indent_length)
- {
- case 1:
- {
- for (unsigned int i = 0; i < depth; ++i)
- writer.write(indent[0]);
- break;
- }
-
- case 2:
- {
- for (unsigned int i = 0; i < depth; ++i)
- writer.write(indent[0], indent[1]);
- break;
- }
-
- case 3:
- {
- for (unsigned int i = 0; i < depth; ++i)
- writer.write(indent[0], indent[1], indent[2]);
- break;
- }
-
- case 4:
- {
- for (unsigned int i = 0; i < depth; ++i)
- writer.write(indent[0], indent[1], indent[2], indent[3]);
- break;
- }
-
- default:
- {
- for (unsigned int i = 0; i < depth; ++i)
- writer.write_buffer(indent, indent_length);
- }
- }
- }
-
- PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
- {
- writer.write('<', '!', '-', '-');
-
- while (*s)
- {
- const char_t* prev = s;
-
- // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
- while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
-
- writer.write_buffer(prev, static_cast<size_t>(s - prev));
-
- if (*s)
- {
- assert(*s == '-');
-
- writer.write('-', ' ');
- ++s;
- }
- }
-
- writer.write('-', '-', '>');
- }
-
- PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
- {
- while (*s)
- {
- const char_t* prev = s;
-
- // look for ?> sequence - we can't output it since ?> terminates PI
- while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
-
- writer.write_buffer(prev, static_cast<size_t>(s - prev));
-
- if (*s)
- {
- assert(s[0] == '?' && s[1] == '>');
-
- writer.write('?', ' ', '>');
- s += 2;
- }
- }
- }
-
- PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
- {
- const char_t* default_name = PUGIXML_TEXT(":anonymous");
-
- for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
- {
- if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
- {
- writer.write('\n');
-
- text_output_indent(writer, indent, indent_length, depth + 1);
- }
- else
- {
- writer.write(' ');
- }
-
- writer.write_string(a->name ? a->name + 0 : default_name);
- writer.write('=', '"');
-
- if (a->value)
- text_output(writer, a->value, ctx_special_attr, flags);
-
- writer.write('"');
- }
- }
-
- PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
- {
- const char_t* default_name = PUGIXML_TEXT(":anonymous");
- const char_t* name = node->name ? node->name + 0 : default_name;
-
- writer.write('<');
- writer.write_string(name);
-
- if (node->first_attribute)
- node_output_attributes(writer, node, indent, indent_length, flags, depth);
-
- if (!node->first_child)
- {
- writer.write(' ', '/', '>');
-
- return false;
- }
- else
- {
- writer.write('>');
-
- return true;
- }
- }
-
- PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
- {
- const char_t* default_name = PUGIXML_TEXT(":anonymous");
- const char_t* name = node->name ? node->name + 0 : default_name;
-
- writer.write('<', '/');
- writer.write_string(name);
- writer.write('>');
- }
-
- PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
- {
- const char_t* default_name = PUGIXML_TEXT(":anonymous");
-
- switch (PUGI__NODETYPE(node))
- {
- case node_pcdata:
- text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
- break;
-
- case node_cdata:
- text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
- break;
-
- case node_comment:
- node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
- break;
-
- case node_pi:
- writer.write('<', '?');
- writer.write_string(node->name ? node->name + 0 : default_name);
-
- if (node->value)
- {
- writer.write(' ');
- node_output_pi_value(writer, node->value);
- }
-
- writer.write('?', '>');
- break;
-
- case node_declaration:
- writer.write('<', '?');
- writer.write_string(node->name ? node->name + 0 : default_name);
- node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
- writer.write('?', '>');
- break;
-
- case node_doctype:
- writer.write('<', '!', 'D', 'O', 'C');
- writer.write('T', 'Y', 'P', 'E');
-
- if (node->value)
- {
- writer.write(' ');
- writer.write_string(node->value);
- }
-
- writer.write('>');
- break;
-
- default:
- assert(!"Invalid node type");
- }
- }
-
- enum indent_flags_t
- {
- indent_newline = 1,
- indent_indent = 2
- };
-
- PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
- {
- size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
- unsigned int indent_flags = indent_indent;
-
- xml_node_struct* node = root;
-
- do
- {
- assert(node);
-
- // begin writing current node
- if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
- {
- node_output_simple(writer, node, flags);
-
- indent_flags = 0;
- }
- else
- {
- if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
- writer.write('\n');
-
- if ((indent_flags & indent_indent) && indent_length)
- text_output_indent(writer, indent, indent_length, depth);
-
- if (PUGI__NODETYPE(node) == node_element)
- {
- indent_flags = indent_newline | indent_indent;
-
- if (node_output_start(writer, node, indent, indent_length, flags, depth))
- {
- node = node->first_child;
- depth++;
- continue;
- }
- }
- else if (PUGI__NODETYPE(node) == node_document)
- {
- indent_flags = indent_indent;
-
- if (node->first_child)
- {
- node = node->first_child;
- continue;
- }
- }
- else
- {
- node_output_simple(writer, node, flags);
-
- indent_flags = indent_newline | indent_indent;
- }
- }
-
- // continue to the next node
- while (node != root)
- {
- if (node->next_sibling)
- {
- node = node->next_sibling;
- break;
- }
-
- node = node->parent;
-
- // write closing node
- if (PUGI__NODETYPE(node) == node_element)
- {
- depth--;
-
- if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
- writer.write('\n');
-
- if ((indent_flags & indent_indent) && indent_length)
- text_output_indent(writer, indent, indent_length, depth);
-
- node_output_end(writer, node);
-
- indent_flags = indent_newline | indent_indent;
- }
- }
- }
- while (node != root);
-
- if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
- writer.write('\n');
- }
-
- PUGI__FN bool has_declaration(xml_node_struct* node)
- {
- for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
- {
- xml_node_type type = PUGI__NODETYPE(child);
-
- if (type == node_declaration) return true;
- if (type == node_element) return false;
- }
-
- return false;
- }
-
- PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
- {
- for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
- if (a == attr)
- return true;
-
- return false;
- }
-
- PUGI__FN bool allow_insert_attribute(xml_node_type parent)
- {
- return parent == node_element || parent == node_declaration;
- }
-
- PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
- {
- if (parent != node_document && parent != node_element) return false;
- if (child == node_document || child == node_null) return false;
- if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
-
- return true;
- }
-
- PUGI__FN bool allow_move(xml_node parent, xml_node child)
- {
- // check that child can be a child of parent
- if (!allow_insert_child(parent.type(), child.type()))
- return false;
-
- // check that node is not moved between documents
- if (parent.root() != child.root())
- return false;
-
- // check that new parent is not in the child subtree
- xml_node cur = parent;
-
- while (cur)
- {
- if (cur == child)
- return false;
-
- cur = cur.parent();
- }
-
- return true;
- }
-
- template <typename String, typename Header>
- PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
- {
- assert(!dest && (header & header_mask) == 0);
-
- if (source)
- {
- if (alloc && (source_header & header_mask) == 0)
- {
- dest = source;
-
- // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
- header |= xml_memory_page_contents_shared_mask;
- source_header |= xml_memory_page_contents_shared_mask;
- }
- else
- strcpy_insitu(dest, header, header_mask, source, strlength(source));
- }
- }
-
- PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
- {
- node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
- node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
-
- for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
- {
- xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
-
- if (da)
- {
- node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
- node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
- }
- }
- }
-
- PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
- {
- xml_allocator& alloc = get_allocator(dn);
- xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
-
- node_copy_contents(dn, sn, shared_alloc);
-
- xml_node_struct* dit = dn;
- xml_node_struct* sit = sn->first_child;
-
- while (sit && sit != sn)
- {
- if (sit != dn)
- {
- xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
-
- if (copy)
- {
- node_copy_contents(copy, sit, shared_alloc);
-
- if (sit->first_child)
- {
- dit = copy;
- sit = sit->first_child;
- continue;
- }
- }
- }
-
- // continue to the next node
- do
- {
- if (sit->next_sibling)
- {
- sit = sit->next_sibling;
- break;
- }
-
- sit = sit->parent;
- dit = dit->parent;
- }
- while (sit != sn);
- }
- }
-
- PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
- {
- xml_allocator& alloc = get_allocator(da);
- xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
-
- node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
- node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
- }
-
- inline bool is_text_node(xml_node_struct* node)
- {
- xml_node_type type = PUGI__NODETYPE(node);
-
- return type == node_pcdata || type == node_cdata;
- }
-
- // get value with conversion functions
- template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos)
- {
- U result = 0;
- const char_t* s = value;
-
- while (PUGI__IS_CHARTYPE(*s, ct_space))
- s++;
-
- bool negative = (*s == '-');
-
- s += (*s == '+' || *s == '-');
-
- bool overflow = false;
-
- if (s[0] == '0' && (s[1] | ' ') == 'x')
- {
- s += 2;
-
- // since overflow detection relies on length of the sequence skip leading zeros
- while (*s == '0')
- s++;
-
- const char_t* start = s;
-
- for (;;)
- {
- if (static_cast<unsigned>(*s - '0') < 10)
- result = result * 16 + (*s - '0');
- else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
- result = result * 16 + ((*s | ' ') - 'a' + 10);
- else
- break;
-
- s++;
- }
-
- size_t digits = static_cast<size_t>(s - start);
-
- overflow = digits > sizeof(U) * 2;
- }
- else
- {
- // since overflow detection relies on length of the sequence skip leading zeros
- while (*s == '0')
- s++;
-
- const char_t* start = s;
-
- for (;;)
- {
- if (static_cast<unsigned>(*s - '0') < 10)
- result = result * 10 + (*s - '0');
- else
- break;
-
- s++;
- }
-
- size_t digits = static_cast<size_t>(s - start);
-
- PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
-
- const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
- const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
- const size_t high_bit = sizeof(U) * 8 - 1;
-
- overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
- }
-
- if (negative)
- return (overflow || result > minneg) ? 0 - minneg : 0 - result;
- else
- return (overflow || result > maxpos) ? maxpos : result;
- }
-
- PUGI__FN int get_value_int(const char_t* value)
- {
- return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX);
- }
-
- PUGI__FN unsigned int get_value_uint(const char_t* value)
- {
- return string_to_integer<unsigned int>(value, 0, UINT_MAX);
- }
-
- PUGI__FN double get_value_double(const char_t* value)
- {
- #ifdef PUGIXML_WCHAR_MODE
- return wcstod(value, 0);
- #else
- return strtod(value, 0);
- #endif
- }
-
- PUGI__FN float get_value_float(const char_t* value)
- {
- #ifdef PUGIXML_WCHAR_MODE
- return static_cast<float>(wcstod(value, 0));
- #else
- return static_cast<float>(strtod(value, 0));
- #endif
- }
-
- PUGI__FN bool get_value_bool(const char_t* value)
- {
- // only look at first char
- char_t first = *value;
-
- // 1*, t* (true), T* (True), y* (yes), Y* (YES)
- return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN long long get_value_llong(const char_t* value)
- {
- return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
- }
-
- PUGI__FN unsigned long long get_value_ullong(const char_t* value)
- {
- return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
- }
-#endif
-
- template <typename U>
- PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
- {
- char_t* result = end - 1;
- U rest = negative ? 0 - value : value;
-
- do
- {
- *result-- = static_cast<char_t>('0' + (rest % 10));
- rest /= 10;
- }
- while (rest);
-
- assert(result >= begin);
- (void)begin;
-
- *result = '-';
-
- return result + !negative;
- }
-
- // set value with conversion functions
- template <typename String, typename Header>
- PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
- {
- #ifdef PUGIXML_WCHAR_MODE
- char_t wbuf[128];
- assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
-
- size_t offset = 0;
- for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
-
- return strcpy_insitu(dest, header, header_mask, wbuf, offset);
- #else
- return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
- #endif
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value)
- {
- char_t buf[64];
- char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
- char_t* begin = integer_to_string<unsigned int>(buf, end, value, value < 0);
-
- return strcpy_insitu(dest, header, header_mask, begin, end - begin);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value)
- {
- char_t buf[64];
- char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
- char_t* begin = integer_to_string<unsigned int>(buf, end, value, false);
-
- return strcpy_insitu(dest, header, header_mask, begin, end - begin);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
- {
- char buf[128];
- sprintf(buf, "%.9g", value);
-
- return set_value_ascii(dest, header, header_mask, buf);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
- {
- char buf[128];
- sprintf(buf, "%.17g", value);
-
- return set_value_ascii(dest, header, header_mask, buf);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value)
- {
- return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value)
- {
- char_t buf[64];
- char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
- char_t* begin = integer_to_string<unsigned long long>(buf, end, value, value < 0);
-
- return strcpy_insitu(dest, header, header_mask, begin, end - begin);
- }
-
- template <typename String, typename Header>
- PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value)
- {
- char_t buf[64];
- char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
- char_t* begin = integer_to_string<unsigned long long>(buf, end, value, false);
-
- return strcpy_insitu(dest, header, header_mask, begin, end - begin);
- }
-#endif
-
- PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
- {
- // check input buffer
- if (!contents && size) return make_parse_result(status_io_error);
-
- // get actual encoding
- xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
-
- // get private buffer
- char_t* buffer = 0;
- size_t length = 0;
-
- if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
-
- // delete original buffer if we performed a conversion
- if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
-
- // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
- if (own || buffer != contents) *out_buffer = buffer;
-
- // store buffer for offset_debug
- doc->buffer = buffer;
-
- // parse
- xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
-
- // remember encoding
- res.encoding = buffer_encoding;
-
- return res;
- }
-
- // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
- PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
- {
- #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
- // there are 64-bit versions of fseek/ftell, let's use them
- typedef __int64 length_type;
-
- _fseeki64(file, 0, SEEK_END);
- length_type length = _ftelli64(file);
- _fseeki64(file, 0, SEEK_SET);
- #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
- // there are 64-bit versions of fseek/ftell, let's use them
- typedef off64_t length_type;
-
- fseeko64(file, 0, SEEK_END);
- length_type length = ftello64(file);
- fseeko64(file, 0, SEEK_SET);
- #else
- // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
- typedef long length_type;
-
- fseek(file, 0, SEEK_END);
- length_type length = ftell(file);
- fseek(file, 0, SEEK_SET);
- #endif
-
- // check for I/O errors
- if (length < 0) return status_io_error;
-
- // check for overflow
- size_t result = static_cast<size_t>(length);
-
- if (static_cast<length_type>(result) != length) return status_out_of_memory;
-
- // finalize
- out_result = result;
-
- return status_ok;
- }
-
- // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
- PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
- {
- // We only need to zero-terminate if encoding conversion does not do it for us
- #ifdef PUGIXML_WCHAR_MODE
- xml_encoding wchar_encoding = get_wchar_encoding();
-
- if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
- {
- size_t length = size / sizeof(char_t);
-
- static_cast<char_t*>(buffer)[length] = 0;
- return (length + 1) * sizeof(char_t);
- }
- #else
- if (encoding == encoding_utf8)
- {
- static_cast<char*>(buffer)[size] = 0;
- return size + 1;
- }
- #endif
-
- return size;
- }
-
- PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
- {
- if (!file) return make_parse_result(status_file_not_found);
-
- // get file size (can result in I/O errors)
- size_t size = 0;
- xml_parse_status size_status = get_file_size(file, size);
- if (size_status != status_ok) return make_parse_result(size_status);
-
- size_t max_suffix_size = sizeof(char_t);
-
- // allocate buffer for the whole file
- char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
- if (!contents) return make_parse_result(status_out_of_memory);
-
- // read file in memory
- size_t read_size = fread(contents, 1, size, file);
-
- if (read_size != size)
- {
- xml_memory::deallocate(contents);
- return make_parse_result(status_io_error);
- }
-
- xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
-
- return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
- }
-
-#ifndef PUGIXML_NO_STL
- template <typename T> struct xml_stream_chunk
- {
- static xml_stream_chunk* create()
- {
- void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
- if (!memory) return 0;
-
- return new (memory) xml_stream_chunk();
- }
-
- static void destroy(xml_stream_chunk* chunk)
- {
- // free chunk chain
- while (chunk)
- {
- xml_stream_chunk* next_ = chunk->next;
-
- xml_memory::deallocate(chunk);
-
- chunk = next_;
- }
- }
-
- xml_stream_chunk(): next(0), size(0)
- {
- }
-
- xml_stream_chunk* next;
- size_t size;
-
- T data[xml_memory_page_size / sizeof(T)];
- };
-
- template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
- {
- auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
-
- // read file to a chunk list
- size_t total = 0;
- xml_stream_chunk<T>* last = 0;
-
- while (!stream.eof())
- {
- // allocate new chunk
- xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
- if (!chunk) return status_out_of_memory;
-
- // append chunk to list
- if (last) last = last->next = chunk;
- else chunks.data = last = chunk;
-
- // read data to chunk
- stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
- chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
-
- // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
- if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
-
- // guard against huge files (chunk size is small enough to make this overflow check work)
- if (total + chunk->size < total) return status_out_of_memory;
- total += chunk->size;
- }
-
- size_t max_suffix_size = sizeof(char_t);
-
- // copy chunk list to a contiguous buffer
- char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
- if (!buffer) return status_out_of_memory;
-
- char* write = buffer;
-
- for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
- {
- assert(write + chunk->size <= buffer + total);
- memcpy(write, chunk->data, chunk->size);
- write += chunk->size;
- }
-
- assert(write == buffer + total);
-
- // return buffer
- *out_buffer = buffer;
- *out_size = total;
-
- return status_ok;
- }
-
- template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
- {
- // get length of remaining data in stream
- typename std::basic_istream<T>::pos_type pos = stream.tellg();
- stream.seekg(0, std::ios::end);
- std::streamoff length = stream.tellg() - pos;
- stream.seekg(pos);
-
- if (stream.fail() || pos < 0) return status_io_error;
-
- // guard against huge files
- size_t read_length = static_cast<size_t>(length);
-
- if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
-
- size_t max_suffix_size = sizeof(char_t);
-
- // read stream data into memory (guard against stream exceptions with buffer holder)
- auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
- if (!buffer.data) return status_out_of_memory;
-
- stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
-
- // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
- if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
-
- // return buffer
- size_t actual_length = static_cast<size_t>(stream.gcount());
- assert(actual_length <= read_length);
-
- *out_buffer = buffer.release();
- *out_size = actual_length * sizeof(T);
-
- return status_ok;
- }
-
- template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
- {
- void* buffer = 0;
- size_t size = 0;
- xml_parse_status status = status_ok;
-
- // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
- if (stream.fail()) return make_parse_result(status_io_error);
-
- // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
- if (stream.tellg() < 0)
- {
- stream.clear(); // clear error flags that could be set by a failing tellg
- status = load_stream_data_noseek(stream, &buffer, &size);
- }
- else
- status = load_stream_data_seek(stream, &buffer, &size);
-
- if (status != status_ok) return make_parse_result(status);
-
- xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
-
- return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
- }
-#endif
-
-#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
- PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
- {
- return _wfopen(path, mode);
- }
-#else
- PUGI__FN char* convert_path_heap(const wchar_t* str)
- {
- assert(str);
-
- // first pass: get length in utf8 characters
- size_t length = strlength_wide(str);
- size_t size = as_utf8_begin(str, length);
-
- // allocate resulting string
- char* result = static_cast<char*>(xml_memory::allocate(size + 1));
- if (!result) return 0;
-
- // second pass: convert to utf8
- as_utf8_end(result, size, str, length);
-
- // zero-terminate
- result[size] = 0;
-
- return result;
- }
-
- PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
- {
- // there is no standard function to open wide paths, so our best bet is to try utf8 path
- char* path_utf8 = convert_path_heap(path);
- if (!path_utf8) return 0;
-
- // convert mode to ASCII (we mirror _wfopen interface)
- char mode_ascii[4] = {0};
- for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
-
- // try to open the utf8 path
- FILE* result = fopen(path_utf8, mode_ascii);
-
- // free dummy buffer
- xml_memory::deallocate(path_utf8);
-
- return result;
- }
-#endif
-
- PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
- {
- if (!file) return false;
-
- xml_writer_file writer(file);
- doc.save(writer, indent, flags, encoding);
-
- return ferror(file) == 0;
- }
-
- struct name_null_sentry
- {
- xml_node_struct* node;
- char_t* name;
-
- name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
- {
- node->name = 0;
- }
-
- ~name_null_sentry()
- {
- node->name = name;
- }
- };
-PUGI__NS_END
-
-namespace pugi
-{
- PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
- {
- }
-
- PUGI__FN void xml_writer_file::write(const void* data, size_t size)
- {
- size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
- (void)!result; // unfortunately we can't do proper error handling here
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
- {
- }
-
- PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
- {
- }
-
- PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
- {
- if (narrow_stream)
- {
- assert(!wide_stream);
- narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
- }
- else
- {
- assert(wide_stream);
- assert(size % sizeof(wchar_t) == 0);
-
- wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
- }
- }
-#endif
-
- PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
- {
- }
-
- PUGI__FN xml_tree_walker::~xml_tree_walker()
- {
- }
-
- PUGI__FN int xml_tree_walker::depth() const
- {
- return _depth;
- }
-
- PUGI__FN bool xml_tree_walker::begin(xml_node&)
- {
- return true;
- }
-
- PUGI__FN bool xml_tree_walker::end(xml_node&)
- {
- return true;
- }
-
- PUGI__FN xml_attribute::xml_attribute(): _attr(0)
- {
- }
-
- PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
- {
- }
-
- PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
- {
- }
-
- PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
- {
- return _attr ? unspecified_bool_xml_attribute : 0;
- }
-
- PUGI__FN bool xml_attribute::operator!() const
- {
- return !_attr;
- }
-
- PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
- {
- return (_attr == r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
- {
- return (_attr != r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
- {
- return (_attr < r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
- {
- return (_attr > r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
- {
- return (_attr <= r._attr);
- }
-
- PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
- {
- return (_attr >= r._attr);
- }
-
- PUGI__FN xml_attribute xml_attribute::next_attribute() const
- {
- return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
- }
-
- PUGI__FN xml_attribute xml_attribute::previous_attribute() const
- {
- return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
- }
-
- PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
- {
- return (_attr && _attr->value) ? _attr->value + 0 : def;
- }
-
- PUGI__FN int xml_attribute::as_int(int def) const
- {
- return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
- }
-
- PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
- {
- return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
- }
-
- PUGI__FN double xml_attribute::as_double(double def) const
- {
- return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
- }
-
- PUGI__FN float xml_attribute::as_float(float def) const
- {
- return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
- }
-
- PUGI__FN bool xml_attribute::as_bool(bool def) const
- {
- return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN long long xml_attribute::as_llong(long long def) const
- {
- return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
- }
-
- PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
- {
- return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
- }
-#endif
-
- PUGI__FN bool xml_attribute::empty() const
- {
- return !_attr;
- }
-
- PUGI__FN const char_t* xml_attribute::name() const
- {
- return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* xml_attribute::value() const
- {
- return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
- }
-
- PUGI__FN size_t xml_attribute::hash_value() const
- {
- return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
- }
-
- PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
- {
- return _attr;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
- {
- set_value(rhs);
- return *this;
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
- {
- set_value(rhs);
- return *this;
- }
-
- PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
- {
- set_value(rhs);
- return *this;
- }
-#endif
-
- PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
- {
- if (!_attr) return false;
-
- return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
- }
-
- PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
- {
- if (!_attr) return false;
-
- return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
- }
-
- PUGI__FN bool xml_attribute::set_value(int rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(double rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(float rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(bool rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN bool xml_attribute::set_value(long long rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-
- PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
- {
- if (!_attr) return false;
-
- return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
- }
-#endif
-
-#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
-#endif
-
- PUGI__FN xml_node::xml_node(): _root(0)
- {
- }
-
- PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
- {
- }
-
- PUGI__FN static void unspecified_bool_xml_node(xml_node***)
- {
- }
-
- PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
- {
- return _root ? unspecified_bool_xml_node : 0;
- }
-
- PUGI__FN bool xml_node::operator!() const
- {
- return !_root;
- }
-
- PUGI__FN xml_node::iterator xml_node::begin() const
- {
- return iterator(_root ? _root->first_child + 0 : 0, _root);
- }
-
- PUGI__FN xml_node::iterator xml_node::end() const
- {
- return iterator(0, _root);
- }
-
- PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
- {
- return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
- }
-
- PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
- {
- return attribute_iterator(0, _root);
- }
-
- PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
- {
- return xml_object_range<xml_node_iterator>(begin(), end());
- }
-
- PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
- {
- return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
- }
-
- PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
- {
- return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
- }
-
- PUGI__FN bool xml_node::operator==(const xml_node& r) const
- {
- return (_root == r._root);
- }
-
- PUGI__FN bool xml_node::operator!=(const xml_node& r) const
- {
- return (_root != r._root);
- }
-
- PUGI__FN bool xml_node::operator<(const xml_node& r) const
- {
- return (_root < r._root);
- }
-
- PUGI__FN bool xml_node::operator>(const xml_node& r) const
- {
- return (_root > r._root);
- }
-
- PUGI__FN bool xml_node::operator<=(const xml_node& r) const
- {
- return (_root <= r._root);
- }
-
- PUGI__FN bool xml_node::operator>=(const xml_node& r) const
- {
- return (_root >= r._root);
- }
-
- PUGI__FN bool xml_node::empty() const
- {
- return !_root;
- }
-
- PUGI__FN const char_t* xml_node::name() const
- {
- return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
- }
-
- PUGI__FN xml_node_type xml_node::type() const
- {
- return _root ? PUGI__NODETYPE(_root) : node_null;
- }
-
- PUGI__FN const char_t* xml_node::value() const
- {
- return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
- }
-
- PUGI__FN xml_node xml_node::child(const char_t* name_) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
- return xml_node();
- }
-
- PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
- {
- if (!_root) return xml_attribute();
-
- for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
- if (i->name && impl::strequal(name_, i->name))
- return xml_attribute(i);
-
- return xml_attribute();
- }
-
- PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
- if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
- return xml_node();
- }
-
- PUGI__FN xml_node xml_node::next_sibling() const
- {
- return _root ? xml_node(_root->next_sibling) : xml_node();
- }
-
- PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
- if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
- return xml_node();
- }
-
- PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
- {
- xml_attribute_struct* hint = hint_._attr;
-
- // if hint is not an attribute of node, behavior is not defined
- assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
-
- if (!_root) return xml_attribute();
-
- // optimistically search from hint up until the end
- for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
- if (i->name && impl::strequal(name_, i->name))
- {
- // update hint to maximize efficiency of searching for consecutive attributes
- hint_._attr = i->next_attribute;
-
- return xml_attribute(i);
- }
-
- // wrap around and search from the first attribute until the hint
- // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
- for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
- if (j->name && impl::strequal(name_, j->name))
- {
- // update hint to maximize efficiency of searching for consecutive attributes
- hint_._attr = j->next_attribute;
-
- return xml_attribute(j);
- }
-
- return xml_attribute();
- }
-
- PUGI__FN xml_node xml_node::previous_sibling() const
- {
- if (!_root) return xml_node();
-
- if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
- else return xml_node();
- }
-
- PUGI__FN xml_node xml_node::parent() const
- {
- return _root ? xml_node(_root->parent) : xml_node();
- }
-
- PUGI__FN xml_node xml_node::root() const
- {
- return _root ? xml_node(&impl::get_document(_root)) : xml_node();
- }
-
- PUGI__FN xml_text xml_node::text() const
- {
- return xml_text(_root);
- }
-
- PUGI__FN const char_t* xml_node::child_value() const
- {
- if (!_root) return PUGIXML_TEXT("");
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (impl::is_text_node(i) && i->value)
- return i->value;
-
- return PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
- {
- return child(name_).child_value();
- }
-
- PUGI__FN xml_attribute xml_node::first_attribute() const
- {
- return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
- }
-
- PUGI__FN xml_attribute xml_node::last_attribute() const
- {
- return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
- }
-
- PUGI__FN xml_node xml_node::first_child() const
- {
- return _root ? xml_node(_root->first_child) : xml_node();
- }
-
- PUGI__FN xml_node xml_node::last_child() const
- {
- return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
- }
-
- PUGI__FN bool xml_node::set_name(const char_t* rhs)
- {
- xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
-
- if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
- return false;
-
- return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
- }
-
- PUGI__FN bool xml_node::set_value(const char_t* rhs)
- {
- xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
-
- if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
- return false;
-
- return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
- }
-
- PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
- {
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::append_attribute(a._attr, _root);
-
- a.set_name(name_);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
- {
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::prepend_attribute(a._attr, _root);
-
- a.set_name(name_);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
- {
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
- if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::insert_attribute_after(a._attr, attr._attr, _root);
-
- a.set_name(name_);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
- {
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
- if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::insert_attribute_before(a._attr, attr._attr, _root);
-
- a.set_name(name_);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
- {
- if (!proto) return xml_attribute();
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::append_attribute(a._attr, _root);
- impl::node_copy_attribute(a._attr, proto._attr);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
- {
- if (!proto) return xml_attribute();
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::prepend_attribute(a._attr, _root);
- impl::node_copy_attribute(a._attr, proto._attr);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
- {
- if (!proto) return xml_attribute();
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
- if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::insert_attribute_after(a._attr, attr._attr, _root);
- impl::node_copy_attribute(a._attr, proto._attr);
-
- return a;
- }
-
- PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
- {
- if (!proto) return xml_attribute();
- if (!impl::allow_insert_attribute(type())) return xml_attribute();
- if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_attribute();
-
- xml_attribute a(impl::allocate_attribute(alloc));
- if (!a) return xml_attribute();
-
- impl::insert_attribute_before(a._attr, attr._attr, _root);
- impl::node_copy_attribute(a._attr, proto._attr);
-
- return a;
- }
-
- PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
- {
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::append_node(n._root, _root);
-
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
- {
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::prepend_node(n._root, _root);
-
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
- {
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::insert_node_before(n._root, node._root);
-
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
- {
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::insert_node_after(n._root, node._root);
-
- if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::append_child(const char_t* name_)
- {
- xml_node result = append_child(node_element);
-
- result.set_name(name_);
-
- return result;
- }
-
- PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
- {
- xml_node result = prepend_child(node_element);
-
- result.set_name(name_);
-
- return result;
- }
-
- PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
- {
- xml_node result = insert_child_after(node_element, node);
-
- result.set_name(name_);
-
- return result;
- }
-
- PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
- {
- xml_node result = insert_child_before(node_element, node);
-
- result.set_name(name_);
-
- return result;
- }
-
- PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
- {
- xml_node_type type_ = proto.type();
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::append_node(n._root, _root);
- impl::node_copy_tree(n._root, proto._root);
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
- {
- xml_node_type type_ = proto.type();
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::prepend_node(n._root, _root);
- impl::node_copy_tree(n._root, proto._root);
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
- {
- xml_node_type type_ = proto.type();
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::insert_node_after(n._root, node._root);
- impl::node_copy_tree(n._root, proto._root);
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
- {
- xml_node_type type_ = proto.type();
- if (!impl::allow_insert_child(type(), type_)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- xml_node n(impl::allocate_node(alloc, type_));
- if (!n) return xml_node();
-
- impl::insert_node_before(n._root, node._root);
- impl::node_copy_tree(n._root, proto._root);
-
- return n;
- }
-
- PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
- {
- if (!impl::allow_move(*this, moved)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
- impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
-
- impl::remove_node(moved._root);
- impl::append_node(moved._root, _root);
-
- return moved;
- }
-
- PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
- {
- if (!impl::allow_move(*this, moved)) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
- impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
-
- impl::remove_node(moved._root);
- impl::prepend_node(moved._root, _root);
-
- return moved;
- }
-
- PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
- {
- if (!impl::allow_move(*this, moved)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
- if (moved._root == node._root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
- impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
-
- impl::remove_node(moved._root);
- impl::insert_node_after(moved._root, node._root);
-
- return moved;
- }
-
- PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
- {
- if (!impl::allow_move(*this, moved)) return xml_node();
- if (!node._root || node._root->parent != _root) return xml_node();
- if (moved._root == node._root) return xml_node();
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return xml_node();
-
- // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
- impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
-
- impl::remove_node(moved._root);
- impl::insert_node_before(moved._root, node._root);
-
- return moved;
- }
-
- PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
- {
- return remove_attribute(attribute(name_));
- }
-
- PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
- {
- if (!_root || !a._attr) return false;
- if (!impl::is_attribute_of(a._attr, _root)) return false;
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return false;
-
- impl::remove_attribute(a._attr, _root);
- impl::destroy_attribute(a._attr, alloc);
-
- return true;
- }
-
- PUGI__FN bool xml_node::remove_child(const char_t* name_)
- {
- return remove_child(child(name_));
- }
-
- PUGI__FN bool xml_node::remove_child(const xml_node& n)
- {
- if (!_root || !n._root || n._root->parent != _root) return false;
-
- impl::xml_allocator& alloc = impl::get_allocator(_root);
- if (!alloc.reserve()) return false;
-
- impl::remove_node(n._root);
- impl::destroy_node(n._root, alloc);
-
- return true;
- }
-
- PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
- {
- // append_buffer is only valid for elements/documents
- if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
-
- // get document node
- impl::xml_document_struct* doc = &impl::get_document(_root);
-
- // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
- doc->header |= impl::xml_memory_page_contents_shared_mask;
-
- // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
- impl::xml_memory_page* page = 0;
- impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
- (void)page;
-
- if (!extra) return impl::make_parse_result(status_out_of_memory);
-
- // add extra buffer to the list
- extra->buffer = 0;
- extra->next = doc->extra_buffers;
- doc->extra_buffers = extra;
-
- // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
- impl::name_null_sentry sentry(_root);
-
- return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
- }
-
- PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (i->name && impl::strequal(name_, i->name))
- {
- for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
- if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
- return xml_node(i);
- }
-
- return xml_node();
- }
-
- PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
- {
- if (!_root) return xml_node();
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
- if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
- return xml_node(i);
-
- return xml_node();
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN string_t xml_node::path(char_t delimiter) const
- {
- if (!_root) return string_t();
-
- size_t offset = 0;
-
- for (xml_node_struct* i = _root; i; i = i->parent)
- {
- offset += (i != _root);
- offset += i->name ? impl::strlength(i->name) : 0;
- }
-
- string_t result;
- result.resize(offset);
-
- for (xml_node_struct* j = _root; j; j = j->parent)
- {
- if (j != _root)
- result[--offset] = delimiter;
-
- if (j->name && *j->name)
- {
- size_t length = impl::strlength(j->name);
-
- offset -= length;
- memcpy(&result[offset], j->name, length * sizeof(char_t));
- }
- }
-
- assert(offset == 0);
-
- return result;
- }
-#endif
-
- PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
- {
- xml_node found = *this; // Current search context.
-
- if (!_root || !path_ || !path_[0]) return found;
-
- if (path_[0] == delimiter)
- {
- // Absolute path; e.g. '/foo/bar'
- found = found.root();
- ++path_;
- }
-
- const char_t* path_segment = path_;
-
- while (*path_segment == delimiter) ++path_segment;
-
- const char_t* path_segment_end = path_segment;
-
- while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
-
- if (path_segment == path_segment_end) return found;
-
- const char_t* next_segment = path_segment_end;
-
- while (*next_segment == delimiter) ++next_segment;
-
- if (*path_segment == '.' && path_segment + 1 == path_segment_end)
- return found.first_element_by_path(next_segment, delimiter);
- else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
- return found.parent().first_element_by_path(next_segment, delimiter);
- else
- {
- for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
- {
- if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
- {
- xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
-
- if (subsearch) return subsearch;
- }
- }
-
- return xml_node();
- }
- }
-
- PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
- {
- walker._depth = -1;
-
- xml_node arg_begin = *this;
- if (!walker.begin(arg_begin)) return false;
-
- xml_node cur = first_child();
-
- if (cur)
- {
- ++walker._depth;
-
- do
- {
- xml_node arg_for_each = cur;
- if (!walker.for_each(arg_for_each))
- return false;
-
- if (cur.first_child())
- {
- ++walker._depth;
- cur = cur.first_child();
- }
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- // Borland C++ workaround
- while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
- {
- --walker._depth;
- cur = cur.parent();
- }
-
- if (cur != *this)
- cur = cur.next_sibling();
- }
- }
- while (cur && cur != *this);
- }
-
- assert(walker._depth == -1);
-
- xml_node arg_end = *this;
- return walker.end(arg_end);
- }
-
- PUGI__FN size_t xml_node::hash_value() const
- {
- return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
- }
-
- PUGI__FN xml_node_struct* xml_node::internal_object() const
- {
- return _root;
- }
-
- PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
- {
- if (!_root) return;
-
- impl::xml_buffered_writer buffered_writer(writer, encoding);
-
- impl::node_output(buffered_writer, _root, indent, flags, depth);
-
- buffered_writer.flush();
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
- {
- xml_writer_stream writer(stream);
-
- print(writer, indent, flags, encoding, depth);
- }
-
- PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
- {
- xml_writer_stream writer(stream);
-
- print(writer, indent, flags, encoding_wchar, depth);
- }
-#endif
-
- PUGI__FN ptrdiff_t xml_node::offset_debug() const
- {
- if (!_root) return -1;
-
- impl::xml_document_struct& doc = impl::get_document(_root);
-
- // we can determine the offset reliably only if there is exactly once parse buffer
- if (!doc.buffer || doc.extra_buffers) return -1;
-
- switch (type())
- {
- case node_document:
- return 0;
-
- case node_element:
- case node_declaration:
- case node_pi:
- return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
-
- case node_pcdata:
- case node_cdata:
- case node_comment:
- case node_doctype:
- return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
-
- default:
- return -1;
- }
- }
-
-#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
-#endif
-
- PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
- {
- }
-
- PUGI__FN xml_node_struct* xml_text::_data() const
- {
- if (!_root || impl::is_text_node(_root)) return _root;
-
- for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
- if (impl::is_text_node(node))
- return node;
-
- return 0;
- }
-
- PUGI__FN xml_node_struct* xml_text::_data_new()
- {
- xml_node_struct* d = _data();
- if (d) return d;
-
- return xml_node(_root).append_child(node_pcdata).internal_object();
- }
-
- PUGI__FN xml_text::xml_text(): _root(0)
- {
- }
-
- PUGI__FN static void unspecified_bool_xml_text(xml_text***)
- {
- }
-
- PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
- {
- return _data() ? unspecified_bool_xml_text : 0;
- }
-
- PUGI__FN bool xml_text::operator!() const
- {
- return !_data();
- }
-
- PUGI__FN bool xml_text::empty() const
- {
- return _data() == 0;
- }
-
- PUGI__FN const char_t* xml_text::get() const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? d->value + 0 : def;
- }
-
- PUGI__FN int xml_text::as_int(int def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_int(d->value) : def;
- }
-
- PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_uint(d->value) : def;
- }
-
- PUGI__FN double xml_text::as_double(double def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_double(d->value) : def;
- }
-
- PUGI__FN float xml_text::as_float(float def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_float(d->value) : def;
- }
-
- PUGI__FN bool xml_text::as_bool(bool def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_bool(d->value) : def;
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN long long xml_text::as_llong(long long def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_llong(d->value) : def;
- }
-
- PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
- {
- xml_node_struct* d = _data();
-
- return (d && d->value) ? impl::get_value_ullong(d->value) : def;
- }
-#endif
-
- PUGI__FN bool xml_text::set(const char_t* rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
- }
-
- PUGI__FN bool xml_text::set(int rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
- PUGI__FN bool xml_text::set(unsigned int rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
- PUGI__FN bool xml_text::set(float rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
- PUGI__FN bool xml_text::set(double rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
- PUGI__FN bool xml_text::set(bool rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN bool xml_text::set(long long rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-
- PUGI__FN bool xml_text::set(unsigned long long rhs)
- {
- xml_node_struct* dn = _data_new();
-
- return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
- }
-#endif
-
- PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(int rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(double rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(float rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(bool rhs)
- {
- set(rhs);
- return *this;
- }
-
-#ifdef PUGIXML_HAS_LONG_LONG
- PUGI__FN xml_text& xml_text::operator=(long long rhs)
- {
- set(rhs);
- return *this;
- }
-
- PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
- {
- set(rhs);
- return *this;
- }
-#endif
-
- PUGI__FN xml_node xml_text::data() const
- {
- return xml_node(_data());
- }
-
-#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
-#endif
-
- PUGI__FN xml_node_iterator::xml_node_iterator()
- {
- }
-
- PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
- {
- }
-
- PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
- {
- }
-
- PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
- {
- return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
- }
-
- PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
- {
- return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
- }
-
- PUGI__FN xml_node& xml_node_iterator::operator*() const
- {
- assert(_wrap._root);
- return _wrap;
- }
-
- PUGI__FN xml_node* xml_node_iterator::operator->() const
- {
- assert(_wrap._root);
- return const_cast<xml_node*>(&_wrap); // BCC32 workaround
- }
-
- PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
- {
- assert(_wrap._root);
- _wrap._root = _wrap._root->next_sibling;
- return *this;
- }
-
- PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
- {
- xml_node_iterator temp = *this;
- ++*this;
- return temp;
- }
-
- PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
- {
- _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
- return *this;
- }
-
- PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
- {
- xml_node_iterator temp = *this;
- --*this;
- return temp;
- }
-
- PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
- {
- }
-
- PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
- {
- }
-
- PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
- {
- }
-
- PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
- {
- return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
- }
-
- PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
- {
- return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
- }
-
- PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
- {
- assert(_wrap._attr);
- return _wrap;
- }
-
- PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
- {
- assert(_wrap._attr);
- return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
- }
-
- PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
- {
- assert(_wrap._attr);
- _wrap._attr = _wrap._attr->next_attribute;
- return *this;
- }
-
- PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
- {
- xml_attribute_iterator temp = *this;
- ++*this;
- return temp;
- }
-
- PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
- {
- _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
- return *this;
- }
-
- PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
- {
- xml_attribute_iterator temp = *this;
- --*this;
- return temp;
- }
-
- PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
- {
- }
-
- PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
- {
- }
-
- PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
- {
- }
-
- PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
- {
- return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
- }
-
- PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
- {
- return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
- }
-
- PUGI__FN xml_node& xml_named_node_iterator::operator*() const
- {
- assert(_wrap._root);
- return _wrap;
- }
-
- PUGI__FN xml_node* xml_named_node_iterator::operator->() const
- {
- assert(_wrap._root);
- return const_cast<xml_node*>(&_wrap); // BCC32 workaround
- }
-
- PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
- {
- assert(_wrap._root);
- _wrap = _wrap.next_sibling(_name);
- return *this;
- }
-
- PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
- {
- xml_named_node_iterator temp = *this;
- ++*this;
- return temp;
- }
-
- PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
- {
- if (_wrap._root)
- _wrap = _wrap.previous_sibling(_name);
- else
- {
- _wrap = _parent.last_child();
-
- if (!impl::strequal(_wrap.name(), _name))
- _wrap = _wrap.previous_sibling(_name);
- }
-
- return *this;
- }
-
- PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
- {
- xml_named_node_iterator temp = *this;
- --*this;
- return temp;
- }
-
- PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
- {
- }
-
- PUGI__FN xml_parse_result::operator bool() const
- {
- return status == status_ok;
- }
-
- PUGI__FN const char* xml_parse_result::description() const
- {
- switch (status)
- {
- case status_ok: return "No error";
-
- case status_file_not_found: return "File was not found";
- case status_io_error: return "Error reading from file/stream";
- case status_out_of_memory: return "Could not allocate memory";
- case status_internal_error: return "Internal error occurred";
-
- case status_unrecognized_tag: return "Could not determine tag type";
-
- case status_bad_pi: return "Error parsing document declaration/processing instruction";
- case status_bad_comment: return "Error parsing comment";
- case status_bad_cdata: return "Error parsing CDATA section";
- case status_bad_doctype: return "Error parsing document type declaration";
- case status_bad_pcdata: return "Error parsing PCDATA section";
- case status_bad_start_element: return "Error parsing start element tag";
- case status_bad_attribute: return "Error parsing element attribute";
- case status_bad_end_element: return "Error parsing end element tag";
- case status_end_element_mismatch: return "Start-end tags mismatch";
-
- case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
-
- case status_no_document_element: return "No document element found";
-
- default: return "Unknown error";
- }
- }
-
- PUGI__FN xml_document::xml_document(): _buffer(0)
- {
- create();
- }
-
- PUGI__FN xml_document::~xml_document()
- {
- destroy();
- }
-
- PUGI__FN void xml_document::reset()
- {
- destroy();
- create();
- }
-
- PUGI__FN void xml_document::reset(const xml_document& proto)
- {
- reset();
-
- for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
- append_copy(cur);
- }
-
- PUGI__FN void xml_document::create()
- {
- assert(!_root);
-
- #ifdef PUGIXML_COMPACT
- const size_t page_offset = sizeof(uint32_t);
- #else
- const size_t page_offset = 0;
- #endif
-
- // initialize sentinel page
- PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) + page_offset <= sizeof(_memory));
-
- // align upwards to page boundary
- void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
-
- // prepare page structure
- impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
- assert(page);
-
- page->busy_size = impl::xml_memory_page_size;
-
- // setup first page marker
- #ifdef PUGIXML_COMPACT
- // round-trip through void* to avoid 'cast increases required alignment of target type' warning
- page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
- *page->compact_page_marker = sizeof(impl::xml_memory_page);
- #endif
-
- // allocate new root
- _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
- _root->prev_sibling_c = _root;
-
- // setup sentinel page
- page->allocator = static_cast<impl::xml_document_struct*>(_root);
-
- // verify the document allocation
- assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
- }
-
- PUGI__FN void xml_document::destroy()
- {
- assert(_root);
-
- // destroy static storage
- if (_buffer)
- {
- impl::xml_memory::deallocate(_buffer);
- _buffer = 0;
- }
-
- // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
- for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
- {
- if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
- }
-
- // destroy dynamic storage, leave sentinel page (it's in static memory)
- impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
- assert(root_page && !root_page->prev);
- assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
-
- for (impl::xml_memory_page* page = root_page->next; page; )
- {
- impl::xml_memory_page* next = page->next;
-
- impl::xml_allocator::deallocate_page(page);
-
- page = next;
- }
-
- #ifdef PUGIXML_COMPACT
- // destroy hash table
- static_cast<impl::xml_document_struct*>(_root)->hash.clear();
- #endif
-
- _root = 0;
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
- }
-
- PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
- {
- reset();
-
- return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
- }
-#endif
-
- PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
- {
- // Force native encoding (skip autodetection)
- #ifdef PUGIXML_WCHAR_MODE
- xml_encoding encoding = encoding_wchar;
- #else
- xml_encoding encoding = encoding_utf8;
- #endif
-
- return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
- }
-
- PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
- {
- return load_string(contents, options);
- }
-
- PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- using impl::auto_deleter; // MSVC7 workaround
- auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, "rb"), fclose);
-
- return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
- }
-
- PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- using impl::auto_deleter; // MSVC7 workaround
- auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, L"rb"), fclose);
-
- return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
- }
-
- PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
- }
-
- PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
- }
-
- PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
- {
- reset();
-
- return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
- }
-
- PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- impl::xml_buffered_writer buffered_writer(writer, encoding);
-
- if ((flags & format_write_bom) && encoding != encoding_latin1)
- {
- // BOM always represents the codepoint U+FEFF, so just write it in native encoding
- #ifdef PUGIXML_WCHAR_MODE
- unsigned int bom = 0xfeff;
- buffered_writer.write(static_cast<wchar_t>(bom));
- #else
- buffered_writer.write('\xef', '\xbb', '\xbf');
- #endif
- }
-
- if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
- {
- buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
- if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
- buffered_writer.write('?', '>');
- if (!(flags & format_raw)) buffered_writer.write('\n');
- }
-
- impl::node_output(buffered_writer, _root, indent, flags, 0);
-
- buffered_writer.flush();
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- xml_writer_stream writer(stream);
-
- save(writer, indent, flags, encoding);
- }
-
- PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
- {
- xml_writer_stream writer(stream);
-
- save(writer, indent, flags, encoding_wchar);
- }
-#endif
-
- PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- using impl::auto_deleter; // MSVC7 workaround
- auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), fclose);
-
- return impl::save_file_impl(*this, file.data, indent, flags, encoding);
- }
-
- PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
- {
- using impl::auto_deleter; // MSVC7 workaround
- auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), fclose);
-
- return impl::save_file_impl(*this, file.data, indent, flags, encoding);
- }
-
- PUGI__FN xml_node xml_document::document_element() const
- {
- assert(_root);
-
- for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (PUGI__NODETYPE(i) == node_element)
- return xml_node(i);
-
- return xml_node();
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
- {
- assert(str);
-
- return impl::as_utf8_impl(str, impl::strlength_wide(str));
- }
-
- PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
- {
- return impl::as_utf8_impl(str.c_str(), str.size());
- }
-
- PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
- {
- assert(str);
-
- return impl::as_wide_impl(str, strlen(str));
- }
-
- PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
- {
- return impl::as_wide_impl(str.c_str(), str.size());
- }
-#endif
-
- PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
- {
- impl::xml_memory::allocate = allocate;
- impl::xml_memory::deallocate = deallocate;
- }
-
- PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
- {
- return impl::xml_memory::allocate;
- }
-
- PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
- {
- return impl::xml_memory::deallocate;
- }
-}
-
-#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
-namespace std
-{
- // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
- PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-
- PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-
- PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-}
-#endif
-
-#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
-namespace std
-{
- // Workarounds for (non-standard) iterator category detection
- PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-
- PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-
- PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
- {
- return std::bidirectional_iterator_tag();
- }
-}
-#endif
-
-#ifndef PUGIXML_NO_XPATH
-// STL replacements
-PUGI__NS_BEGIN
- struct equal_to
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs == rhs;
- }
- };
-
- struct not_equal_to
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs != rhs;
- }
- };
-
- struct less
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs < rhs;
- }
- };
-
- struct less_equal
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs <= rhs;
- }
- };
-
- template <typename T> void swap(T& lhs, T& rhs)
- {
- T temp = lhs;
- lhs = rhs;
- rhs = temp;
- }
-
- template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
- {
- I result = begin;
-
- for (I it = begin + 1; it != end; ++it)
- if (pred(*it, *result))
- result = it;
-
- return result;
- }
-
- template <typename I> void reverse(I begin, I end)
- {
- while (end - begin > 1) swap(*begin++, *--end);
- }
-
- template <typename I> I unique(I begin, I end)
- {
- // fast skip head
- while (end - begin > 1 && *begin != *(begin + 1)) begin++;
-
- if (begin == end) return begin;
-
- // last written element
- I write = begin++;
-
- // merge unique elements
- while (begin != end)
- {
- if (*begin != *write)
- *++write = *begin++;
- else
- begin++;
- }
-
- // past-the-end (write points to live element)
- return write + 1;
- }
-
- template <typename I> void copy_backwards(I begin, I end, I target)
- {
- while (begin != end) *--target = *--end;
- }
-
- template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
- {
- assert(begin != end);
-
- for (I it = begin + 1; it != end; ++it)
- {
- T val = *it;
-
- if (pred(val, *begin))
- {
- // move to front
- copy_backwards(begin, it, it + 1);
- *begin = val;
- }
- else
- {
- I hole = it;
-
- // move hole backwards
- while (pred(val, *(hole - 1)))
- {
- *hole = *(hole - 1);
- hole--;
- }
-
- // fill hole with element
- *hole = val;
- }
- }
- }
-
- // std variant for elements with ==
- template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
- {
- I eqbeg = middle, eqend = middle + 1;
-
- // expand equal range
- while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
- while (eqend != end && *eqend == *eqbeg) ++eqend;
-
- // process outer elements
- I ltend = eqbeg, gtbeg = eqend;
-
- for (;;)
- {
- // find the element from the right side that belongs to the left one
- for (; gtbeg != end; ++gtbeg)
- if (!pred(*eqbeg, *gtbeg))
- {
- if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
- else break;
- }
-
- // find the element from the left side that belongs to the right one
- for (; ltend != begin; --ltend)
- if (!pred(*(ltend - 1), *eqbeg))
- {
- if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
- else break;
- }
-
- // scanned all elements
- if (gtbeg == end && ltend == begin)
- {
- *out_eqbeg = eqbeg;
- *out_eqend = eqend;
- return;
- }
-
- // make room for elements by moving equal area
- if (gtbeg == end)
- {
- if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
- swap(*eqbeg, *--eqend);
- }
- else if (ltend == begin)
- {
- if (eqend != gtbeg) swap(*eqbeg, *eqend);
- ++eqend;
- swap(*gtbeg++, *eqbeg++);
- }
- else swap(*gtbeg++, *--ltend);
- }
- }
-
- template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
- {
- if (pred(*middle, *first)) swap(*middle, *first);
- if (pred(*last, *middle)) swap(*last, *middle);
- if (pred(*middle, *first)) swap(*middle, *first);
- }
-
- template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
- {
- if (last - first <= 40)
- {
- // median of three for small chunks
- median3(first, middle, last, pred);
- }
- else
- {
- // median of nine
- size_t step = (last - first + 1) / 8;
-
- median3(first, first + step, first + 2 * step, pred);
- median3(middle - step, middle, middle + step, pred);
- median3(last - 2 * step, last - step, last, pred);
- median3(first + step, middle, last - step, pred);
- }
- }
-
- template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
- {
- // sort large chunks
- while (end - begin > 32)
- {
- // find median element
- I middle = begin + (end - begin) / 2;
- median(begin, middle, end - 1, pred);
-
- // partition in three chunks (< = >)
- I eqbeg, eqend;
- partition(begin, middle, end, pred, &eqbeg, &eqend);
-
- // loop on larger half
- if (eqbeg - begin > end - eqend)
- {
- sort(eqend, end, pred);
- end = eqbeg;
- }
- else
- {
- sort(begin, eqbeg, pred);
- begin = eqend;
- }
- }
-
- // insertion sort small chunk
- if (begin != end) insertion_sort(begin, end, pred, &*begin);
- }
-PUGI__NS_END
-
-// Allocator used for AST and evaluation stacks
-PUGI__NS_BEGIN
- static const size_t xpath_memory_page_size =
- #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
- PUGIXML_MEMORY_XPATH_PAGE_SIZE
- #else
- 4096
- #endif
- ;
-
- static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
-
- struct xpath_memory_block
- {
- xpath_memory_block* next;
- size_t capacity;
-
- union
- {
- char data[xpath_memory_page_size];
- double alignment;
- };
- };
-
- class xpath_allocator
- {
- xpath_memory_block* _root;
- size_t _root_size;
-
- public:
- #ifdef PUGIXML_NO_EXCEPTIONS
- jmp_buf* error_handler;
- #endif
-
- xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- error_handler = 0;
- #endif
- }
-
- void* allocate_nothrow(size_t size)
- {
- // round size up to block alignment boundary
- size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
-
- if (_root_size + size <= _root->capacity)
- {
- void* buf = &_root->data[0] + _root_size;
- _root_size += size;
- return buf;
- }
- else
- {
- // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
- size_t block_capacity_base = sizeof(_root->data);
- size_t block_capacity_req = size + block_capacity_base / 4;
- size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
-
- size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
-
- xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
- if (!block) return 0;
-
- block->next = _root;
- block->capacity = block_capacity;
-
- _root = block;
- _root_size = size;
-
- return block->data;
- }
- }
-
- void* allocate(size_t size)
- {
- void* result = allocate_nothrow(size);
-
- if (!result)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- assert(error_handler);
- longjmp(*error_handler, 1);
- #else
- throw std::bad_alloc();
- #endif
- }
-
- return result;
- }
-
- void* reallocate(void* ptr, size_t old_size, size_t new_size)
- {
- // round size up to block alignment boundary
- old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
- new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
-
- // we can only reallocate the last object
- assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
-
- // adjust root size so that we have not allocated the object at all
- bool only_object = (_root_size == old_size);
-
- if (ptr) _root_size -= old_size;
-
- // allocate a new version (this will obviously reuse the memory if possible)
- void* result = allocate(new_size);
- assert(result);
-
- // we have a new block
- if (result != ptr && ptr)
- {
- // copy old data
- assert(new_size >= old_size);
- memcpy(result, ptr, old_size);
-
- // free the previous page if it had no other objects
- if (only_object)
- {
- assert(_root->data == result);
- assert(_root->next);
-
- xpath_memory_block* next = _root->next->next;
-
- if (next)
- {
- // deallocate the whole page, unless it was the first one
- xml_memory::deallocate(_root->next);
- _root->next = next;
- }
- }
- }
-
- return result;
- }
-
- void revert(const xpath_allocator& state)
- {
- // free all new pages
- xpath_memory_block* cur = _root;
-
- while (cur != state._root)
- {
- xpath_memory_block* next = cur->next;
-
- xml_memory::deallocate(cur);
-
- cur = next;
- }
-
- // restore state
- _root = state._root;
- _root_size = state._root_size;
- }
-
- void release()
- {
- xpath_memory_block* cur = _root;
- assert(cur);
-
- while (cur->next)
- {
- xpath_memory_block* next = cur->next;
-
- xml_memory::deallocate(cur);
-
- cur = next;
- }
- }
- };
-
- struct xpath_allocator_capture
- {
- xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
- {
- }
-
- ~xpath_allocator_capture()
- {
- _target->revert(_state);
- }
-
- xpath_allocator* _target;
- xpath_allocator _state;
- };
-
- struct xpath_stack
- {
- xpath_allocator* result;
- xpath_allocator* temp;
- };
-
- struct xpath_stack_data
- {
- xpath_memory_block blocks[2];
- xpath_allocator result;
- xpath_allocator temp;
- xpath_stack stack;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- jmp_buf error_handler;
- #endif
-
- xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
- {
- blocks[0].next = blocks[1].next = 0;
- blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
-
- stack.result = &result;
- stack.temp = &temp;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- result.error_handler = temp.error_handler = &error_handler;
- #endif
- }
-
- ~xpath_stack_data()
- {
- result.release();
- temp.release();
- }
- };
-PUGI__NS_END
-
-// String class
-PUGI__NS_BEGIN
- class xpath_string
- {
- const char_t* _buffer;
- bool _uses_heap;
- size_t _length_heap;
-
- static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
- {
- char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
- assert(result);
-
- memcpy(result, string, length * sizeof(char_t));
- result[length] = 0;
-
- return result;
- }
-
- xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
- {
- }
-
- public:
- static xpath_string from_const(const char_t* str)
- {
- return xpath_string(str, false, 0);
- }
-
- static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
- {
- assert(begin <= end && *end == 0);
-
- return xpath_string(begin, true, static_cast<size_t>(end - begin));
- }
-
- static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
- {
- assert(begin <= end);
-
- size_t length = static_cast<size_t>(end - begin);
-
- return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length);
- }
-
- xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
- {
- }
-
- void append(const xpath_string& o, xpath_allocator* alloc)
- {
- // skip empty sources
- if (!*o._buffer) return;
-
- // fast append for constant empty target and constant source
- if (!*_buffer && !_uses_heap && !o._uses_heap)
- {
- _buffer = o._buffer;
- }
- else
- {
- // need to make heap copy
- size_t target_length = length();
- size_t source_length = o.length();
- size_t result_length = target_length + source_length;
-
- // allocate new buffer
- char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
- assert(result);
-
- // append first string to the new buffer in case there was no reallocation
- if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
-
- // append second string to the new buffer
- memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
- result[result_length] = 0;
-
- // finalize
- _buffer = result;
- _uses_heap = true;
- _length_heap = result_length;
- }
- }
-
- const char_t* c_str() const
- {
- return _buffer;
- }
-
- size_t length() const
- {
- return _uses_heap ? _length_heap : strlength(_buffer);
- }
-
- char_t* data(xpath_allocator* alloc)
- {
- // make private heap copy
- if (!_uses_heap)
- {
- size_t length_ = strlength(_buffer);
-
- _buffer = duplicate_string(_buffer, length_, alloc);
- _uses_heap = true;
- _length_heap = length_;
- }
-
- return const_cast<char_t*>(_buffer);
- }
-
- bool empty() const
- {
- return *_buffer == 0;
- }
-
- bool operator==(const xpath_string& o) const
- {
- return strequal(_buffer, o._buffer);
- }
-
- bool operator!=(const xpath_string& o) const
- {
- return !strequal(_buffer, o._buffer);
- }
-
- bool uses_heap() const
- {
- return _uses_heap;
- }
- };
-PUGI__NS_END
-
-PUGI__NS_BEGIN
- PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
- {
- while (*pattern && *string == *pattern)
- {
- string++;
- pattern++;
- }
-
- return *pattern == 0;
- }
-
- PUGI__FN const char_t* find_char(const char_t* s, char_t c)
- {
- #ifdef PUGIXML_WCHAR_MODE
- return wcschr(s, c);
- #else
- return strchr(s, c);
- #endif
- }
-
- PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
- {
- #ifdef PUGIXML_WCHAR_MODE
- // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
- return (*p == 0) ? s : wcsstr(s, p);
- #else
- return strstr(s, p);
- #endif
- }
-
- // Converts symbol to lower case, if it is an ASCII one
- PUGI__FN char_t tolower_ascii(char_t ch)
- {
- return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
- }
-
- PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
- {
- if (na.attribute())
- return xpath_string::from_const(na.attribute().value());
- else
- {
- xml_node n = na.node();
-
- switch (n.type())
- {
- case node_pcdata:
- case node_cdata:
- case node_comment:
- case node_pi:
- return xpath_string::from_const(n.value());
-
- case node_document:
- case node_element:
- {
- xpath_string result;
-
- xml_node cur = n.first_child();
-
- while (cur && cur != n)
- {
- if (cur.type() == node_pcdata || cur.type() == node_cdata)
- result.append(xpath_string::from_const(cur.value()), alloc);
-
- if (cur.first_child())
- cur = cur.first_child();
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- while (!cur.next_sibling() && cur != n)
- cur = cur.parent();
-
- if (cur != n) cur = cur.next_sibling();
- }
- }
-
- return result;
- }
-
- default:
- return xpath_string();
- }
- }
- }
-
- PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
- {
- assert(ln->parent == rn->parent);
-
- // there is no common ancestor (the shared parent is null), nodes are from different documents
- if (!ln->parent) return ln < rn;
-
- // determine sibling order
- xml_node_struct* ls = ln;
- xml_node_struct* rs = rn;
-
- while (ls && rs)
- {
- if (ls == rn) return true;
- if (rs == ln) return false;
-
- ls = ls->next_sibling;
- rs = rs->next_sibling;
- }
-
- // if rn sibling chain ended ln must be before rn
- return !rs;
- }
-
- PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
- {
- // find common ancestor at the same depth, if any
- xml_node_struct* lp = ln;
- xml_node_struct* rp = rn;
-
- while (lp && rp && lp->parent != rp->parent)
- {
- lp = lp->parent;
- rp = rp->parent;
- }
-
- // parents are the same!
- if (lp && rp) return node_is_before_sibling(lp, rp);
-
- // nodes are at different depths, need to normalize heights
- bool left_higher = !lp;
-
- while (lp)
- {
- lp = lp->parent;
- ln = ln->parent;
- }
-
- while (rp)
- {
- rp = rp->parent;
- rn = rn->parent;
- }
-
- // one node is the ancestor of the other
- if (ln == rn) return left_higher;
-
- // find common ancestor... again
- while (ln->parent != rn->parent)
- {
- ln = ln->parent;
- rn = rn->parent;
- }
-
- return node_is_before_sibling(ln, rn);
- }
-
- PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
- {
- while (node && node != parent) node = node->parent;
-
- return parent && node == parent;
- }
-
- PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
- {
- xml_node_struct* node = xnode.node().internal_object();
-
- if (node)
- {
- if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
- {
- if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
- if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
- }
-
- return 0;
- }
-
- xml_attribute_struct* attr = xnode.attribute().internal_object();
-
- if (attr)
- {
- if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
- {
- if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
- if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
- }
-
- return 0;
- }
-
- return 0;
- }
-
- struct document_order_comparator
- {
- bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
- {
- // optimized document order based check
- const void* lo = document_buffer_order(lhs);
- const void* ro = document_buffer_order(rhs);
-
- if (lo && ro) return lo < ro;
-
- // slow comparison
- xml_node ln = lhs.node(), rn = rhs.node();
-
- // compare attributes
- if (lhs.attribute() && rhs.attribute())
- {
- // shared parent
- if (lhs.parent() == rhs.parent())
- {
- // determine sibling order
- for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
- if (a == rhs.attribute())
- return true;
-
- return false;
- }
-
- // compare attribute parents
- ln = lhs.parent();
- rn = rhs.parent();
- }
- else if (lhs.attribute())
- {
- // attributes go after the parent element
- if (lhs.parent() == rhs.node()) return false;
-
- ln = lhs.parent();
- }
- else if (rhs.attribute())
- {
- // attributes go after the parent element
- if (rhs.parent() == lhs.node()) return true;
-
- rn = rhs.parent();
- }
-
- if (ln == rn) return false;
-
- if (!ln || !rn) return ln < rn;
-
- return node_is_before(ln.internal_object(), rn.internal_object());
- }
- };
-
- struct duplicate_comparator
- {
- bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
- {
- if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
- else return rhs.attribute() ? false : lhs.node() < rhs.node();
- }
- };
-
- PUGI__FN double gen_nan()
- {
- #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
- union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
- u[0].i = 0x7fc00000;
- return u[0].f;
- #else
- // fallback
- const volatile double zero = 0.0;
- return zero / zero;
- #endif
- }
-
- PUGI__FN bool is_nan(double value)
- {
- #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
- return !!_isnan(value);
- #elif defined(fpclassify) && defined(FP_NAN)
- return fpclassify(value) == FP_NAN;
- #else
- // fallback
- const volatile double v = value;
- return v != v;
- #endif
- }
-
- PUGI__FN const char_t* convert_number_to_string_special(double value)
- {
- #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
- if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
- if (_isnan(value)) return PUGIXML_TEXT("NaN");
- return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
- #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
- switch (fpclassify(value))
- {
- case FP_NAN:
- return PUGIXML_TEXT("NaN");
-
- case FP_INFINITE:
- return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
-
- case FP_ZERO:
- return PUGIXML_TEXT("0");
-
- default:
- return 0;
- }
- #else
- // fallback
- const volatile double v = value;
-
- if (v == 0) return PUGIXML_TEXT("0");
- if (v != v) return PUGIXML_TEXT("NaN");
- if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
- return 0;
- #endif
- }
-
- PUGI__FN bool convert_number_to_boolean(double value)
- {
- return (value != 0 && !is_nan(value));
- }
-
- PUGI__FN void truncate_zeros(char* begin, char* end)
- {
- while (begin != end && end[-1] == '0') end--;
-
- *end = 0;
- }
-
- // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
-#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
- PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
- {
- // get base values
- int sign, exponent;
- _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
-
- // truncate redundant zeros
- truncate_zeros(buffer, buffer + strlen(buffer));
-
- // fill results
- *out_mantissa = buffer;
- *out_exponent = exponent;
- }
-#else
- PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
- {
- // get a scientific notation value with IEEE DBL_DIG decimals
- sprintf(buffer, "%.*e", DBL_DIG, value);
- assert(strlen(buffer) < buffer_size);
- (void)!buffer_size;
-
- // get the exponent (possibly negative)
- char* exponent_string = strchr(buffer, 'e');
- assert(exponent_string);
-
- int exponent = atoi(exponent_string + 1);
-
- // extract mantissa string: skip sign
- char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
- assert(mantissa[0] != '0' && mantissa[1] == '.');
-
- // divide mantissa by 10 to eliminate integer part
- mantissa[1] = mantissa[0];
- mantissa++;
- exponent++;
-
- // remove extra mantissa digits and zero-terminate mantissa
- truncate_zeros(mantissa, exponent_string);
-
- // fill results
- *out_mantissa = mantissa;
- *out_exponent = exponent;
- }
-#endif
-
- PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
- {
- // try special number conversion
- const char_t* special = convert_number_to_string_special(value);
- if (special) return xpath_string::from_const(special);
-
- // get mantissa + exponent form
- char mantissa_buffer[32];
-
- char* mantissa;
- int exponent;
- convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
-
- // allocate a buffer of suitable length for the number
- size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
- char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
- assert(result);
-
- // make the number!
- char_t* s = result;
-
- // sign
- if (value < 0) *s++ = '-';
-
- // integer part
- if (exponent <= 0)
- {
- *s++ = '0';
- }
- else
- {
- while (exponent > 0)
- {
- assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
- *s++ = *mantissa ? *mantissa++ : '0';
- exponent--;
- }
- }
-
- // fractional part
- if (*mantissa)
- {
- // decimal point
- *s++ = '.';
-
- // extra zeroes from negative exponent
- while (exponent < 0)
- {
- *s++ = '0';
- exponent++;
- }
-
- // extra mantissa digits
- while (*mantissa)
- {
- assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
- *s++ = *mantissa++;
- }
- }
-
- // zero-terminate
- assert(s < result + result_size);
- *s = 0;
-
- return xpath_string::from_heap_preallocated(result, s);
- }
-
- PUGI__FN bool check_string_to_number_format(const char_t* string)
- {
- // parse leading whitespace
- while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
-
- // parse sign
- if (*string == '-') ++string;
-
- if (!*string) return false;
-
- // if there is no integer part, there should be a decimal part with at least one digit
- if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
-
- // parse integer part
- while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
-
- // parse decimal part
- if (*string == '.')
- {
- ++string;
-
- while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
- }
-
- // parse trailing whitespace
- while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
-
- return *string == 0;
- }
-
- PUGI__FN double convert_string_to_number(const char_t* string)
- {
- // check string format
- if (!check_string_to_number_format(string)) return gen_nan();
-
- // parse string
- #ifdef PUGIXML_WCHAR_MODE
- return wcstod(string, 0);
- #else
- return strtod(string, 0);
- #endif
- }
-
- PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
- {
- size_t length = static_cast<size_t>(end - begin);
- char_t* scratch = buffer;
-
- if (length >= sizeof(buffer) / sizeof(buffer[0]))
- {
- // need to make dummy on-heap copy
- scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!scratch) return false;
- }
-
- // copy string to zero-terminated buffer and perform conversion
- memcpy(scratch, begin, length * sizeof(char_t));
- scratch[length] = 0;
-
- *out_result = convert_string_to_number(scratch);
-
- // free dummy buffer
- if (scratch != buffer) xml_memory::deallocate(scratch);
-
- return true;
- }
-
- PUGI__FN double round_nearest(double value)
- {
- return floor(value + 0.5);
- }
-
- PUGI__FN double round_nearest_nzero(double value)
- {
- // same as round_nearest, but returns -0 for [-0.5, -0]
- // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
- return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
- }
-
- PUGI__FN const char_t* qualified_name(const xpath_node& node)
- {
- return node.attribute() ? node.attribute().name() : node.node().name();
- }
-
- PUGI__FN const char_t* local_name(const xpath_node& node)
- {
- const char_t* name = qualified_name(node);
- const char_t* p = find_char(name, ':');
-
- return p ? p + 1 : name;
- }
-
- struct namespace_uri_predicate
- {
- const char_t* prefix;
- size_t prefix_length;
-
- namespace_uri_predicate(const char_t* name)
- {
- const char_t* pos = find_char(name, ':');
-
- prefix = pos ? name : 0;
- prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
- }
-
- bool operator()(xml_attribute a) const
- {
- const char_t* name = a.name();
-
- if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
-
- return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
- }
- };
-
- PUGI__FN const char_t* namespace_uri(xml_node node)
- {
- namespace_uri_predicate pred = node.name();
-
- xml_node p = node;
-
- while (p)
- {
- xml_attribute a = p.find_attribute(pred);
-
- if (a) return a.value();
-
- p = p.parent();
- }
-
- return PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
- {
- namespace_uri_predicate pred = attr.name();
-
- // Default namespace does not apply to attributes
- if (!pred.prefix) return PUGIXML_TEXT("");
-
- xml_node p = parent;
-
- while (p)
- {
- xml_attribute a = p.find_attribute(pred);
-
- if (a) return a.value();
-
- p = p.parent();
- }
-
- return PUGIXML_TEXT("");
- }
-
- PUGI__FN const char_t* namespace_uri(const xpath_node& node)
- {
- return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
- }
-
- PUGI__FN char_t* normalize_space(char_t* buffer)
- {
- char_t* write = buffer;
-
- for (char_t* it = buffer; *it; )
- {
- char_t ch = *it++;
-
- if (PUGI__IS_CHARTYPE(ch, ct_space))
- {
- // replace whitespace sequence with single space
- while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
-
- // avoid leading spaces
- if (write != buffer) *write++ = ' ';
- }
- else *write++ = ch;
- }
-
- // remove trailing space
- if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
-
- // zero-terminate
- *write = 0;
-
- return write;
- }
-
- PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
- {
- char_t* write = buffer;
-
- while (*buffer)
- {
- PUGI__DMC_VOLATILE char_t ch = *buffer++;
-
- const char_t* pos = find_char(from, ch);
-
- if (!pos)
- *write++ = ch; // do not process
- else if (static_cast<size_t>(pos - from) < to_length)
- *write++ = to[pos - from]; // replace
- }
-
- // zero-terminate
- *write = 0;
-
- return write;
- }
-
- PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
- {
- unsigned char table[128] = {0};
-
- while (*from)
- {
- unsigned int fc = static_cast<unsigned int>(*from);
- unsigned int tc = static_cast<unsigned int>(*to);
-
- if (fc >= 128 || tc >= 128)
- return 0;
-
- // code=128 means "skip character"
- if (!table[fc])
- table[fc] = static_cast<unsigned char>(tc ? tc : 128);
-
- from++;
- if (tc) to++;
- }
-
- for (int i = 0; i < 128; ++i)
- if (!table[i])
- table[i] = static_cast<unsigned char>(i);
-
- void* result = alloc->allocate_nothrow(sizeof(table));
-
- if (result)
- {
- memcpy(result, table, sizeof(table));
- }
-
- return static_cast<unsigned char*>(result);
- }
-
- PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
- {
- char_t* write = buffer;
-
- while (*buffer)
- {
- char_t ch = *buffer++;
- unsigned int index = static_cast<unsigned int>(ch);
-
- if (index < 128)
- {
- unsigned char code = table[index];
-
- // code=128 means "skip character" (table size is 128 so 128 can be a special value)
- // this code skips these characters without extra branches
- *write = static_cast<char_t>(code);
- write += 1 - (code >> 7);
- }
- else
- {
- *write++ = ch;
- }
- }
-
- // zero-terminate
- *write = 0;
-
- return write;
- }
-
- inline bool is_xpath_attribute(const char_t* name)
- {
- return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
- }
-
- struct xpath_variable_boolean: xpath_variable
- {
- xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
- {
- }
-
- bool value;
- char_t name[1];
- };
-
- struct xpath_variable_number: xpath_variable
- {
- xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
- {
- }
-
- double value;
- char_t name[1];
- };
-
- struct xpath_variable_string: xpath_variable
- {
- xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
- {
- }
-
- ~xpath_variable_string()
- {
- if (value) xml_memory::deallocate(value);
- }
-
- char_t* value;
- char_t name[1];
- };
-
- struct xpath_variable_node_set: xpath_variable
- {
- xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
- {
- }
-
- xpath_node_set value;
- char_t name[1];
- };
-
- static const xpath_node_set dummy_node_set;
-
- PUGI__FN unsigned int hash_string(const char_t* str)
- {
- // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
- unsigned int result = 0;
-
- while (*str)
- {
- result += static_cast<unsigned int>(*str++);
- result += result << 10;
- result ^= result >> 6;
- }
-
- result += result << 3;
- result ^= result >> 11;
- result += result << 15;
-
- return result;
- }
-
- template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
- {
- size_t length = strlength(name);
- if (length == 0) return 0; // empty variable names are invalid
-
- // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
- void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
- if (!memory) return 0;
-
- T* result = new (memory) T();
-
- memcpy(result->name, name, (length + 1) * sizeof(char_t));
-
- return result;
- }
-
- PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
- {
- switch (type)
- {
- case xpath_type_node_set:
- return new_xpath_variable<xpath_variable_node_set>(name);
-
- case xpath_type_number:
- return new_xpath_variable<xpath_variable_number>(name);
-
- case xpath_type_string:
- return new_xpath_variable<xpath_variable_string>(name);
-
- case xpath_type_boolean:
- return new_xpath_variable<xpath_variable_boolean>(name);
-
- default:
- return 0;
- }
- }
-
- template <typename T> PUGI__FN void delete_xpath_variable(T* var)
- {
- var->~T();
- xml_memory::deallocate(var);
- }
-
- PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
- {
- switch (type)
- {
- case xpath_type_node_set:
- delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
- break;
-
- case xpath_type_number:
- delete_xpath_variable(static_cast<xpath_variable_number*>(var));
- break;
-
- case xpath_type_string:
- delete_xpath_variable(static_cast<xpath_variable_string*>(var));
- break;
-
- case xpath_type_boolean:
- delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
- break;
-
- default:
- assert(!"Invalid variable type");
- }
- }
-
- PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
- {
- switch (rhs->type())
- {
- case xpath_type_node_set:
- return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
-
- case xpath_type_number:
- return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
-
- case xpath_type_string:
- return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
-
- case xpath_type_boolean:
- return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
-
- default:
- assert(!"Invalid variable type");
- return false;
- }
- }
-
- PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
- {
- size_t length = static_cast<size_t>(end - begin);
- char_t* scratch = buffer;
-
- if (length >= sizeof(buffer) / sizeof(buffer[0]))
- {
- // need to make dummy on-heap copy
- scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!scratch) return false;
- }
-
- // copy string to zero-terminated buffer and perform lookup
- memcpy(scratch, begin, length * sizeof(char_t));
- scratch[length] = 0;
-
- *out_result = set->get(scratch);
-
- // free dummy buffer
- if (scratch != buffer) xml_memory::deallocate(scratch);
-
- return true;
- }
-PUGI__NS_END
-
-// Internal node set class
-PUGI__NS_BEGIN
- PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
- {
- if (end - begin < 2)
- return xpath_node_set::type_sorted;
-
- document_order_comparator cmp;
-
- bool first = cmp(begin[0], begin[1]);
-
- for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
- if (cmp(it[0], it[1]) != first)
- return xpath_node_set::type_unsorted;
-
- return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
- }
-
- PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
- {
- xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
-
- if (type == xpath_node_set::type_unsorted)
- {
- xpath_node_set::type_t sorted = xpath_get_order(begin, end);
-
- if (sorted == xpath_node_set::type_unsorted)
- {
- sort(begin, end, document_order_comparator());
-
- type = xpath_node_set::type_sorted;
- }
- else
- type = sorted;
- }
-
- if (type != order) reverse(begin, end);
-
- return order;
- }
-
- PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
- {
- if (begin == end) return xpath_node();
-
- switch (type)
- {
- case xpath_node_set::type_sorted:
- return *begin;
-
- case xpath_node_set::type_sorted_reverse:
- return *(end - 1);
-
- case xpath_node_set::type_unsorted:
- return *min_element(begin, end, document_order_comparator());
-
- default:
- assert(!"Invalid node set type");
- return xpath_node();
- }
- }
-
- class xpath_node_set_raw
- {
- xpath_node_set::type_t _type;
-
- xpath_node* _begin;
- xpath_node* _end;
- xpath_node* _eos;
-
- public:
- xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
- {
- }
-
- xpath_node* begin() const
- {
- return _begin;
- }
-
- xpath_node* end() const
- {
- return _end;
- }
-
- bool empty() const
- {
- return _begin == _end;
- }
-
- size_t size() const
- {
- return static_cast<size_t>(_end - _begin);
- }
-
- xpath_node first() const
- {
- return xpath_first(_begin, _end, _type);
- }
-
- void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
-
- void push_back(const xpath_node& node, xpath_allocator* alloc)
- {
- if (_end != _eos)
- *_end++ = node;
- else
- push_back_grow(node, alloc);
- }
-
- void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
- {
- if (begin_ == end_) return;
-
- size_t size_ = static_cast<size_t>(_end - _begin);
- size_t capacity = static_cast<size_t>(_eos - _begin);
- size_t count = static_cast<size_t>(end_ - begin_);
-
- if (size_ + count > capacity)
- {
- // reallocate the old array or allocate a new one
- xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
- assert(data);
-
- // finalize
- _begin = data;
- _end = data + size_;
- _eos = data + size_ + count;
- }
-
- memcpy(_end, begin_, count * sizeof(xpath_node));
- _end += count;
- }
-
- void sort_do()
- {
- _type = xpath_sort(_begin, _end, _type, false);
- }
-
- void truncate(xpath_node* pos)
- {
- assert(_begin <= pos && pos <= _end);
-
- _end = pos;
- }
-
- void remove_duplicates()
- {
- if (_type == xpath_node_set::type_unsorted)
- sort(_begin, _end, duplicate_comparator());
-
- _end = unique(_begin, _end);
- }
-
- xpath_node_set::type_t type() const
- {
- return _type;
- }
-
- void set_type(xpath_node_set::type_t value)
- {
- _type = value;
- }
- };
-
- PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
- {
- size_t capacity = static_cast<size_t>(_eos - _begin);
-
- // get new capacity (1.5x rule)
- size_t new_capacity = capacity + capacity / 2 + 1;
-
- // reallocate the old array or allocate a new one
- xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
- assert(data);
-
- // finalize
- _begin = data;
- _end = data + capacity;
- _eos = data + new_capacity;
-
- // push
- *_end++ = node;
- }
-PUGI__NS_END
-
-PUGI__NS_BEGIN
- struct xpath_context
- {
- xpath_node n;
- size_t position, size;
-
- xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
- {
- }
- };
-
- enum lexeme_t
- {
- lex_none = 0,
- lex_equal,
- lex_not_equal,
- lex_less,
- lex_greater,
- lex_less_or_equal,
- lex_greater_or_equal,
- lex_plus,
- lex_minus,
- lex_multiply,
- lex_union,
- lex_var_ref,
- lex_open_brace,
- lex_close_brace,
- lex_quoted_string,
- lex_number,
- lex_slash,
- lex_double_slash,
- lex_open_square_brace,
- lex_close_square_brace,
- lex_string,
- lex_comma,
- lex_axis_attribute,
- lex_dot,
- lex_double_dot,
- lex_double_colon,
- lex_eof
- };
-
- struct xpath_lexer_string
- {
- const char_t* begin;
- const char_t* end;
-
- xpath_lexer_string(): begin(0), end(0)
- {
- }
-
- bool operator==(const char_t* other) const
- {
- size_t length = static_cast<size_t>(end - begin);
-
- return strequalrange(other, begin, length);
- }
- };
-
- class xpath_lexer
- {
- const char_t* _cur;
- const char_t* _cur_lexeme_pos;
- xpath_lexer_string _cur_lexeme_contents;
-
- lexeme_t _cur_lexeme;
-
- public:
- explicit xpath_lexer(const char_t* query): _cur(query)
- {
- next();
- }
-
- const char_t* state() const
- {
- return _cur;
- }
-
- void next()
- {
- const char_t* cur = _cur;
-
- while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
-
- // save lexeme position for error reporting
- _cur_lexeme_pos = cur;
-
- switch (*cur)
- {
- case 0:
- _cur_lexeme = lex_eof;
- break;
-
- case '>':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_greater_or_equal;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_greater;
- }
- break;
-
- case '<':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_less_or_equal;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_less;
- }
- break;
-
- case '!':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_not_equal;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- break;
-
- case '=':
- cur += 1;
- _cur_lexeme = lex_equal;
-
- break;
-
- case '+':
- cur += 1;
- _cur_lexeme = lex_plus;
-
- break;
-
- case '-':
- cur += 1;
- _cur_lexeme = lex_minus;
-
- break;
-
- case '*':
- cur += 1;
- _cur_lexeme = lex_multiply;
-
- break;
-
- case '|':
- cur += 1;
- _cur_lexeme = lex_union;
-
- break;
-
- case '$':
- cur += 1;
-
- if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
- {
- _cur_lexeme_contents.begin = cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
-
- if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
- {
- cur++; // :
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
- }
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_var_ref;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
-
- break;
-
- case '(':
- cur += 1;
- _cur_lexeme = lex_open_brace;
-
- break;
-
- case ')':
- cur += 1;
- _cur_lexeme = lex_close_brace;
-
- break;
-
- case '[':
- cur += 1;
- _cur_lexeme = lex_open_square_brace;
-
- break;
-
- case ']':
- cur += 1;
- _cur_lexeme = lex_close_square_brace;
-
- break;
-
- case ',':
- cur += 1;
- _cur_lexeme = lex_comma;
-
- break;
-
- case '/':
- if (*(cur+1) == '/')
- {
- cur += 2;
- _cur_lexeme = lex_double_slash;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_slash;
- }
- break;
-
- case '.':
- if (*(cur+1) == '.')
- {
- cur += 2;
- _cur_lexeme = lex_double_dot;
- }
- else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
- {
- _cur_lexeme_contents.begin = cur; // .
-
- ++cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_number;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_dot;
- }
- break;
-
- case '@':
- cur += 1;
- _cur_lexeme = lex_axis_attribute;
-
- break;
-
- case '"':
- case '\'':
- {
- char_t terminator = *cur;
-
- ++cur;
-
- _cur_lexeme_contents.begin = cur;
- while (*cur && *cur != terminator) cur++;
- _cur_lexeme_contents.end = cur;
-
- if (!*cur)
- _cur_lexeme = lex_none;
- else
- {
- cur += 1;
- _cur_lexeme = lex_quoted_string;
- }
-
- break;
- }
-
- case ':':
- if (*(cur+1) == ':')
- {
- cur += 2;
- _cur_lexeme = lex_double_colon;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- break;
-
- default:
- if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
- {
- _cur_lexeme_contents.begin = cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
-
- if (*cur == '.')
- {
- cur++;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
- }
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_number;
- }
- else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
- {
- _cur_lexeme_contents.begin = cur;
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
-
- if (cur[0] == ':')
- {
- if (cur[1] == '*') // namespace test ncname:*
- {
- cur += 2; // :*
- }
- else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
- {
- cur++; // :
-
- while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
- }
- }
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_string;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- }
-
- _cur = cur;
- }
-
- lexeme_t current() const
- {
- return _cur_lexeme;
- }
-
- const char_t* current_pos() const
- {
- return _cur_lexeme_pos;
- }
-
- const xpath_lexer_string& contents() const
- {
- assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
-
- return _cur_lexeme_contents;
- }
- };
-
- enum ast_type_t
- {
- ast_unknown,
- ast_op_or, // left or right
- ast_op_and, // left and right
- ast_op_equal, // left = right
- ast_op_not_equal, // left != right
- ast_op_less, // left < right
- ast_op_greater, // left > right
- ast_op_less_or_equal, // left <= right
- ast_op_greater_or_equal, // left >= right
- ast_op_add, // left + right
- ast_op_subtract, // left - right
- ast_op_multiply, // left * right
- ast_op_divide, // left / right
- ast_op_mod, // left % right
- ast_op_negate, // left - right
- ast_op_union, // left | right
- ast_predicate, // apply predicate to set; next points to next predicate
- ast_filter, // select * from left where right
- ast_string_constant, // string constant
- ast_number_constant, // number constant
- ast_variable, // variable
- ast_func_last, // last()
- ast_func_position, // position()
- ast_func_count, // count(left)
- ast_func_id, // id(left)
- ast_func_local_name_0, // local-name()
- ast_func_local_name_1, // local-name(left)
- ast_func_namespace_uri_0, // namespace-uri()
- ast_func_namespace_uri_1, // namespace-uri(left)
- ast_func_name_0, // name()
- ast_func_name_1, // name(left)
- ast_func_string_0, // string()
- ast_func_string_1, // string(left)
- ast_func_concat, // concat(left, right, siblings)
- ast_func_starts_with, // starts_with(left, right)
- ast_func_contains, // contains(left, right)
- ast_func_substring_before, // substring-before(left, right)
- ast_func_substring_after, // substring-after(left, right)
- ast_func_substring_2, // substring(left, right)
- ast_func_substring_3, // substring(left, right, third)
- ast_func_string_length_0, // string-length()
- ast_func_string_length_1, // string-length(left)
- ast_func_normalize_space_0, // normalize-space()
- ast_func_normalize_space_1, // normalize-space(left)
- ast_func_translate, // translate(left, right, third)
- ast_func_boolean, // boolean(left)
- ast_func_not, // not(left)
- ast_func_true, // true()
- ast_func_false, // false()
- ast_func_lang, // lang(left)
- ast_func_number_0, // number()
- ast_func_number_1, // number(left)
- ast_func_sum, // sum(left)
- ast_func_floor, // floor(left)
- ast_func_ceiling, // ceiling(left)
- ast_func_round, // round(left)
- ast_step, // process set left with step
- ast_step_root, // select root node
-
- ast_opt_translate_table, // translate(left, right, third) where right/third are constants
- ast_opt_compare_attribute // @name = 'string'
- };
-
- enum axis_t
- {
- axis_ancestor,
- axis_ancestor_or_self,
- axis_attribute,
- axis_child,
- axis_descendant,
- axis_descendant_or_self,
- axis_following,
- axis_following_sibling,
- axis_namespace,
- axis_parent,
- axis_preceding,
- axis_preceding_sibling,
- axis_self
- };
-
- enum nodetest_t
- {
- nodetest_none,
- nodetest_name,
- nodetest_type_node,
- nodetest_type_comment,
- nodetest_type_pi,
- nodetest_type_text,
- nodetest_pi,
- nodetest_all,
- nodetest_all_in_namespace
- };
-
- enum predicate_t
- {
- predicate_default,
- predicate_posinv,
- predicate_constant,
- predicate_constant_one
- };
-
- enum nodeset_eval_t
- {
- nodeset_eval_all,
- nodeset_eval_any,
- nodeset_eval_first
- };
-
- template <axis_t N> struct axis_to_type
- {
- static const axis_t axis;
- };
-
- template <axis_t N> const axis_t axis_to_type<N>::axis = N;
-
- class xpath_ast_node
- {
- private:
- // node type
- char _type;
- char _rettype;
-
- // for ast_step
- char _axis;
-
- // for ast_step/ast_predicate/ast_filter
- char _test;
-
- // tree node structure
- xpath_ast_node* _left;
- xpath_ast_node* _right;
- xpath_ast_node* _next;
-
- union
- {
- // value for ast_string_constant
- const char_t* string;
- // value for ast_number_constant
- double number;
- // variable for ast_variable
- xpath_variable* variable;
- // node test for ast_step (node name/namespace/node type/pi target)
- const char_t* nodetest;
- // table for ast_opt_translate_table
- const unsigned char* table;
- } _data;
-
- xpath_ast_node(const xpath_ast_node&);
- xpath_ast_node& operator=(const xpath_ast_node&);
-
- template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
- {
- xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
-
- if (lt != xpath_type_node_set && rt != xpath_type_node_set)
- {
- if (lt == xpath_type_boolean || rt == xpath_type_boolean)
- return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
- else if (lt == xpath_type_number || rt == xpath_type_number)
- return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
- else if (lt == xpath_type_string || rt == xpath_type_string)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string ls = lhs->eval_string(c, stack);
- xpath_string rs = rhs->eval_string(c, stack);
-
- return comp(ls, rs);
- }
- }
- else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
- return true;
- }
-
- return false;
- }
- else
- {
- if (lt == xpath_type_node_set)
- {
- swap(lhs, rhs);
- swap(lt, rt);
- }
-
- if (lt == xpath_type_boolean)
- return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
- else if (lt == xpath_type_number)
- {
- xpath_allocator_capture cr(stack.result);
-
- double l = lhs->eval_number(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
- return true;
- }
-
- return false;
- }
- else if (lt == xpath_type_string)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string l = lhs->eval_string(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(l, string_value(*ri, stack.result)))
- return true;
- }
-
- return false;
- }
- }
-
- assert(!"Wrong types");
- return false;
- }
-
- static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
- {
- return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
- }
-
- template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
- {
- xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
-
- if (lt != xpath_type_node_set && rt != xpath_type_node_set)
- return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
- else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
- {
- xpath_allocator_capture cri(stack.result);
-
- double l = convert_string_to_number(string_value(*li, stack.result).c_str());
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture crii(stack.result);
-
- if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
- return true;
- }
- }
-
- return false;
- }
- else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- double l = lhs->eval_number(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
- return true;
- }
-
- return false;
- }
- else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
- double r = rhs->eval_number(c, stack);
-
- for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
- {
- xpath_allocator_capture cri(stack.result);
-
- if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
- return true;
- }
-
- return false;
- }
- else
- {
- assert(!"Wrong types");
- return false;
- }
- }
-
- static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
- {
- assert(ns.size() >= first);
- assert(expr->rettype() != xpath_type_number);
-
- size_t i = 1;
- size_t size = ns.size() - first;
-
- xpath_node* last = ns.begin() + first;
-
- // remove_if... or well, sort of
- for (xpath_node* it = last; it != ns.end(); ++it, ++i)
- {
- xpath_context c(*it, i, size);
-
- if (expr->eval_boolean(c, stack))
- {
- *last++ = *it;
-
- if (once) break;
- }
- }
-
- ns.truncate(last);
- }
-
- static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
- {
- assert(ns.size() >= first);
- assert(expr->rettype() == xpath_type_number);
-
- size_t i = 1;
- size_t size = ns.size() - first;
-
- xpath_node* last = ns.begin() + first;
-
- // remove_if... or well, sort of
- for (xpath_node* it = last; it != ns.end(); ++it, ++i)
- {
- xpath_context c(*it, i, size);
-
- if (expr->eval_number(c, stack) == i)
- {
- *last++ = *it;
-
- if (once) break;
- }
- }
-
- ns.truncate(last);
- }
-
- static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
- {
- assert(ns.size() >= first);
- assert(expr->rettype() == xpath_type_number);
-
- size_t size = ns.size() - first;
-
- xpath_node* last = ns.begin() + first;
-
- xpath_context c(xpath_node(), 1, size);
-
- double er = expr->eval_number(c, stack);
-
- if (er >= 1.0 && er <= size)
- {
- size_t eri = static_cast<size_t>(er);
-
- if (er == eri)
- {
- xpath_node r = last[eri - 1];
-
- *last++ = r;
- }
- }
-
- ns.truncate(last);
- }
-
- void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
- {
- if (ns.size() == first) return;
-
- assert(_type == ast_filter || _type == ast_predicate);
-
- if (_test == predicate_constant || _test == predicate_constant_one)
- apply_predicate_number_const(ns, first, _right, stack);
- else if (_right->rettype() == xpath_type_number)
- apply_predicate_number(ns, first, _right, stack, once);
- else
- apply_predicate_boolean(ns, first, _right, stack, once);
- }
-
- void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
- {
- if (ns.size() == first) return;
-
- bool last_once = eval_once(ns.type(), eval);
-
- for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
- pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
- }
-
- bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
- {
- assert(a);
-
- const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
-
- switch (_test)
- {
- case nodetest_name:
- if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
- {
- ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
- return true;
- }
- break;
-
- case nodetest_type_node:
- case nodetest_all:
- if (is_xpath_attribute(name))
- {
- ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
- return true;
- }
- break;
-
- case nodetest_all_in_namespace:
- if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
- {
- ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
- return true;
- }
- break;
-
- default:
- ;
- }
-
- return false;
- }
-
- bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
- {
- assert(n);
-
- xml_node_type type = PUGI__NODETYPE(n);
-
- switch (_test)
- {
- case nodetest_name:
- if (type == node_element && n->name && strequal(n->name, _data.nodetest))
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_type_node:
- ns.push_back(xml_node(n), alloc);
- return true;
-
- case nodetest_type_comment:
- if (type == node_comment)
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_type_text:
- if (type == node_pcdata || type == node_cdata)
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_type_pi:
- if (type == node_pi)
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_pi:
- if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_all:
- if (type == node_element)
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- case nodetest_all_in_namespace:
- if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
- {
- ns.push_back(xml_node(n), alloc);
- return true;
- }
- break;
-
- default:
- assert(!"Unknown axis");
- }
-
- return false;
- }
-
- template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
- {
- const axis_t axis = T::axis;
-
- switch (axis)
- {
- case axis_attribute:
- {
- for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
- if (step_push(ns, a, n, alloc) & once)
- return;
-
- break;
- }
-
- case axis_child:
- {
- for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
- if (step_push(ns, c, alloc) & once)
- return;
-
- break;
- }
-
- case axis_descendant:
- case axis_descendant_or_self:
- {
- if (axis == axis_descendant_or_self)
- if (step_push(ns, n, alloc) & once)
- return;
-
- xml_node_struct* cur = n->first_child;
-
- while (cur)
- {
- if (step_push(ns, cur, alloc) & once)
- return;
-
- if (cur->first_child)
- cur = cur->first_child;
- else
- {
- while (!cur->next_sibling)
- {
- cur = cur->parent;
-
- if (cur == n) return;
- }
-
- cur = cur->next_sibling;
- }
- }
-
- break;
- }
-
- case axis_following_sibling:
- {
- for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
- if (step_push(ns, c, alloc) & once)
- return;
-
- break;
- }
-
- case axis_preceding_sibling:
- {
- for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
- if (step_push(ns, c, alloc) & once)
- return;
-
- break;
- }
-
- case axis_following:
- {
- xml_node_struct* cur = n;
-
- // exit from this node so that we don't include descendants
- while (!cur->next_sibling)
- {
- cur = cur->parent;
-
- if (!cur) return;
- }
-
- cur = cur->next_sibling;
-
- while (cur)
- {
- if (step_push(ns, cur, alloc) & once)
- return;
-
- if (cur->first_child)
- cur = cur->first_child;
- else
- {
- while (!cur->next_sibling)
- {
- cur = cur->parent;
-
- if (!cur) return;
- }
-
- cur = cur->next_sibling;
- }
- }
-
- break;
- }
-
- case axis_preceding:
- {
- xml_node_struct* cur = n;
-
- // exit from this node so that we don't include descendants
- while (!cur->prev_sibling_c->next_sibling)
- {
- cur = cur->parent;
-
- if (!cur) return;
- }
-
- cur = cur->prev_sibling_c;
-
- while (cur)
- {
- if (cur->first_child)
- cur = cur->first_child->prev_sibling_c;
- else
- {
- // leaf node, can't be ancestor
- if (step_push(ns, cur, alloc) & once)
- return;
-
- while (!cur->prev_sibling_c->next_sibling)
- {
- cur = cur->parent;
-
- if (!cur) return;
-
- if (!node_is_ancestor(cur, n))
- if (step_push(ns, cur, alloc) & once)
- return;
- }
-
- cur = cur->prev_sibling_c;
- }
- }
-
- break;
- }
-
- case axis_ancestor:
- case axis_ancestor_or_self:
- {
- if (axis == axis_ancestor_or_self)
- if (step_push(ns, n, alloc) & once)
- return;
-
- xml_node_struct* cur = n->parent;
-
- while (cur)
- {
- if (step_push(ns, cur, alloc) & once)
- return;
-
- cur = cur->parent;
- }
-
- break;
- }
-
- case axis_self:
- {
- step_push(ns, n, alloc);
-
- break;
- }
-
- case axis_parent:
- {
- if (n->parent)
- step_push(ns, n->parent, alloc);
-
- break;
- }
-
- default:
- assert(!"Unimplemented axis");
- }
- }
-
- template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
- {
- const axis_t axis = T::axis;
-
- switch (axis)
- {
- case axis_ancestor:
- case axis_ancestor_or_self:
- {
- if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
- if (step_push(ns, a, p, alloc) & once)
- return;
-
- xml_node_struct* cur = p;
-
- while (cur)
- {
- if (step_push(ns, cur, alloc) & once)
- return;
-
- cur = cur->parent;
- }
-
- break;
- }
-
- case axis_descendant_or_self:
- case axis_self:
- {
- if (_test == nodetest_type_node) // reject attributes based on principal node type test
- step_push(ns, a, p, alloc);
-
- break;
- }
-
- case axis_following:
- {
- xml_node_struct* cur = p;
-
- while (cur)
- {
- if (cur->first_child)
- cur = cur->first_child;
- else
- {
- while (!cur->next_sibling)
- {
- cur = cur->parent;
-
- if (!cur) return;
- }
-
- cur = cur->next_sibling;
- }
-
- if (step_push(ns, cur, alloc) & once)
- return;
- }
-
- break;
- }
-
- case axis_parent:
- {
- step_push(ns, p, alloc);
-
- break;
- }
-
- case axis_preceding:
- {
- // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
- step_fill(ns, p, alloc, once, v);
- break;
- }
-
- default:
- assert(!"Unimplemented axis");
- }
- }
-
- template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
- {
- const axis_t axis = T::axis;
- const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
-
- if (xn.node())
- step_fill(ns, xn.node().internal_object(), alloc, once, v);
- else if (axis_has_attributes && xn.attribute() && xn.parent())
- step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
- }
-
- template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
- {
- const axis_t axis = T::axis;
- const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
- const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
-
- bool once =
- (axis == axis_attribute && _test == nodetest_name) ||
- (!_right && eval_once(axis_type, eval)) ||
- (_right && !_right->_next && _right->_test == predicate_constant_one);
-
- xpath_node_set_raw ns;
- ns.set_type(axis_type);
-
- if (_left)
- {
- xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
-
- // self axis preserves the original order
- if (axis == axis_self) ns.set_type(s.type());
-
- for (const xpath_node* it = s.begin(); it != s.end(); ++it)
- {
- size_t size = ns.size();
-
- // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
- if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
-
- step_fill(ns, *it, stack.result, once, v);
- if (_right) apply_predicates(ns, size, stack, eval);
- }
- }
- else
- {
- step_fill(ns, c.n, stack.result, once, v);
- if (_right) apply_predicates(ns, 0, stack, eval);
- }
-
- // child, attribute and self axes always generate unique set of nodes
- // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
- if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
- ns.remove_duplicates();
-
- return ns;
- }
-
- public:
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
- {
- assert(type == ast_string_constant);
- _data.string = value;
- }
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
- {
- assert(type == ast_number_constant);
- _data.number = value;
- }
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
- {
- assert(type == ast_variable);
- _data.variable = value;
- }
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
- _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
- {
- }
-
- xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
- _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
- {
- assert(type == ast_step);
- _data.nodetest = contents;
- }
-
- xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
- _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
- {
- assert(type == ast_filter || type == ast_predicate);
- }
-
- void set_next(xpath_ast_node* value)
- {
- _next = value;
- }
-
- void set_right(xpath_ast_node* value)
- {
- _right = value;
- }
-
- bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
- {
- switch (_type)
- {
- case ast_op_or:
- return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
-
- case ast_op_and:
- return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
-
- case ast_op_equal:
- return compare_eq(_left, _right, c, stack, equal_to());
-
- case ast_op_not_equal:
- return compare_eq(_left, _right, c, stack, not_equal_to());
-
- case ast_op_less:
- return compare_rel(_left, _right, c, stack, less());
-
- case ast_op_greater:
- return compare_rel(_right, _left, c, stack, less());
-
- case ast_op_less_or_equal:
- return compare_rel(_left, _right, c, stack, less_equal());
-
- case ast_op_greater_or_equal:
- return compare_rel(_right, _left, c, stack, less_equal());
-
- case ast_func_starts_with:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string lr = _left->eval_string(c, stack);
- xpath_string rr = _right->eval_string(c, stack);
-
- return starts_with(lr.c_str(), rr.c_str());
- }
-
- case ast_func_contains:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_string lr = _left->eval_string(c, stack);
- xpath_string rr = _right->eval_string(c, stack);
-
- return find_substring(lr.c_str(), rr.c_str()) != 0;
- }
-
- case ast_func_boolean:
- return _left->eval_boolean(c, stack);
-
- case ast_func_not:
- return !_left->eval_boolean(c, stack);
-
- case ast_func_true:
- return true;
-
- case ast_func_false:
- return false;
-
- case ast_func_lang:
- {
- if (c.n.attribute()) return false;
-
- xpath_allocator_capture cr(stack.result);
-
- xpath_string lang = _left->eval_string(c, stack);
-
- for (xml_node n = c.n.node(); n; n = n.parent())
- {
- xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
-
- if (a)
- {
- const char_t* value = a.value();
-
- // strnicmp / strncasecmp is not portable
- for (const char_t* lit = lang.c_str(); *lit; ++lit)
- {
- if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
- ++value;
- }
-
- return *value == 0 || *value == '-';
- }
- }
-
- return false;
- }
-
- case ast_opt_compare_attribute:
- {
- const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
-
- xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
-
- return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
- }
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_boolean)
- return _data.variable->get_boolean();
-
- // fallthrough to type conversion
- }
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_number:
- return convert_number_to_boolean(eval_number(c, stack));
-
- case xpath_type_string:
- {
- xpath_allocator_capture cr(stack.result);
-
- return !eval_string(c, stack).empty();
- }
-
- case xpath_type_node_set:
- {
- xpath_allocator_capture cr(stack.result);
-
- return !eval_node_set(c, stack, nodeset_eval_any).empty();
- }
-
- default:
- assert(!"Wrong expression for return type boolean");
- return false;
- }
- }
- }
- }
-
- double eval_number(const xpath_context& c, const xpath_stack& stack)
- {
- switch (_type)
- {
- case ast_op_add:
- return _left->eval_number(c, stack) + _right->eval_number(c, stack);
-
- case ast_op_subtract:
- return _left->eval_number(c, stack) - _right->eval_number(c, stack);
-
- case ast_op_multiply:
- return _left->eval_number(c, stack) * _right->eval_number(c, stack);
-
- case ast_op_divide:
- return _left->eval_number(c, stack) / _right->eval_number(c, stack);
-
- case ast_op_mod:
- return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
-
- case ast_op_negate:
- return -_left->eval_number(c, stack);
-
- case ast_number_constant:
- return _data.number;
-
- case ast_func_last:
- return static_cast<double>(c.size);
-
- case ast_func_position:
- return static_cast<double>(c.position);
-
- case ast_func_count:
- {
- xpath_allocator_capture cr(stack.result);
-
- return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
- }
-
- case ast_func_string_length_0:
- {
- xpath_allocator_capture cr(stack.result);
-
- return static_cast<double>(string_value(c.n, stack.result).length());
- }
-
- case ast_func_string_length_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- return static_cast<double>(_left->eval_string(c, stack).length());
- }
-
- case ast_func_number_0:
- {
- xpath_allocator_capture cr(stack.result);
-
- return convert_string_to_number(string_value(c.n, stack.result).c_str());
- }
-
- case ast_func_number_1:
- return _left->eval_number(c, stack);
-
- case ast_func_sum:
- {
- xpath_allocator_capture cr(stack.result);
-
- double r = 0;
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
-
- for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
- {
- xpath_allocator_capture cri(stack.result);
-
- r += convert_string_to_number(string_value(*it, stack.result).c_str());
- }
-
- return r;
- }
-
- case ast_func_floor:
- {
- double r = _left->eval_number(c, stack);
-
- return r == r ? floor(r) : r;
- }
-
- case ast_func_ceiling:
- {
- double r = _left->eval_number(c, stack);
-
- return r == r ? ceil(r) : r;
- }
-
- case ast_func_round:
- return round_nearest_nzero(_left->eval_number(c, stack));
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_number)
- return _data.variable->get_number();
-
- // fallthrough to type conversion
- }
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_boolean:
- return eval_boolean(c, stack) ? 1 : 0;
-
- case xpath_type_string:
- {
- xpath_allocator_capture cr(stack.result);
-
- return convert_string_to_number(eval_string(c, stack).c_str());
- }
-
- case xpath_type_node_set:
- {
- xpath_allocator_capture cr(stack.result);
-
- return convert_string_to_number(eval_string(c, stack).c_str());
- }
-
- default:
- assert(!"Wrong expression for return type number");
- return 0;
- }
-
- }
- }
- }
-
- xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
- {
- assert(_type == ast_func_concat);
-
- xpath_allocator_capture ct(stack.temp);
-
- // count the string number
- size_t count = 1;
- for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
-
- // gather all strings
- xpath_string static_buffer[4];
- xpath_string* buffer = static_buffer;
-
- // allocate on-heap for large concats
- if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
- {
- buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
- assert(buffer);
- }
-
- // evaluate all strings to temporary stack
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- buffer[0] = _left->eval_string(c, swapped_stack);
-
- size_t pos = 1;
- for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
- assert(pos == count);
-
- // get total length
- size_t length = 0;
- for (size_t i = 0; i < count; ++i) length += buffer[i].length();
-
- // create final string
- char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
- assert(result);
-
- char_t* ri = result;
-
- for (size_t j = 0; j < count; ++j)
- for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
- *ri++ = *bi;
-
- *ri = 0;
-
- return xpath_string::from_heap_preallocated(result, ri);
- }
-
- xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
- {
- switch (_type)
- {
- case ast_string_constant:
- return xpath_string::from_const(_data.string);
-
- case ast_func_local_name_0:
- {
- xpath_node na = c.n;
-
- return xpath_string::from_const(local_name(na));
- }
-
- case ast_func_local_name_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
- xpath_node na = ns.first();
-
- return xpath_string::from_const(local_name(na));
- }
-
- case ast_func_name_0:
- {
- xpath_node na = c.n;
-
- return xpath_string::from_const(qualified_name(na));
- }
-
- case ast_func_name_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
- xpath_node na = ns.first();
-
- return xpath_string::from_const(qualified_name(na));
- }
-
- case ast_func_namespace_uri_0:
- {
- xpath_node na = c.n;
-
- return xpath_string::from_const(namespace_uri(na));
- }
-
- case ast_func_namespace_uri_1:
- {
- xpath_allocator_capture cr(stack.result);
-
- xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
- xpath_node na = ns.first();
-
- return xpath_string::from_const(namespace_uri(na));
- }
-
- case ast_func_string_0:
- return string_value(c.n, stack.result);
-
- case ast_func_string_1:
- return _left->eval_string(c, stack);
-
- case ast_func_concat:
- return eval_string_concat(c, stack);
-
- case ast_func_substring_before:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- xpath_string p = _right->eval_string(c, swapped_stack);
-
- const char_t* pos = find_substring(s.c_str(), p.c_str());
-
- return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
- }
-
- case ast_func_substring_after:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- xpath_string p = _right->eval_string(c, swapped_stack);
-
- const char_t* pos = find_substring(s.c_str(), p.c_str());
- if (!pos) return xpath_string();
-
- const char_t* rbegin = pos + p.length();
- const char_t* rend = s.c_str() + s.length();
-
- return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
- }
-
- case ast_func_substring_2:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- size_t s_length = s.length();
-
- double first = round_nearest(_right->eval_number(c, stack));
-
- if (is_nan(first)) return xpath_string(); // NaN
- else if (first >= s_length + 1) return xpath_string();
-
- size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
- assert(1 <= pos && pos <= s_length + 1);
-
- const char_t* rbegin = s.c_str() + (pos - 1);
- const char_t* rend = s.c_str() + s.length();
-
- return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
- }
-
- case ast_func_substring_3:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, swapped_stack);
- size_t s_length = s.length();
-
- double first = round_nearest(_right->eval_number(c, stack));
- double last = first + round_nearest(_right->_next->eval_number(c, stack));
-
- if (is_nan(first) || is_nan(last)) return xpath_string();
- else if (first >= s_length + 1) return xpath_string();
- else if (first >= last) return xpath_string();
- else if (last < 1) return xpath_string();
-
- size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
- size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
-
- assert(1 <= pos && pos <= end && end <= s_length + 1);
- const char_t* rbegin = s.c_str() + (pos - 1);
- const char_t* rend = s.c_str() + (end - 1);
-
- return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
- }
-
- case ast_func_normalize_space_0:
- {
- xpath_string s = string_value(c.n, stack.result);
-
- char_t* begin = s.data(stack.result);
- char_t* end = normalize_space(begin);
-
- return xpath_string::from_heap_preallocated(begin, end);
- }
-
- case ast_func_normalize_space_1:
- {
- xpath_string s = _left->eval_string(c, stack);
-
- char_t* begin = s.data(stack.result);
- char_t* end = normalize_space(begin);
-
- return xpath_string::from_heap_preallocated(begin, end);
- }
-
- case ast_func_translate:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_string s = _left->eval_string(c, stack);
- xpath_string from = _right->eval_string(c, swapped_stack);
- xpath_string to = _right->_next->eval_string(c, swapped_stack);
-
- char_t* begin = s.data(stack.result);
- char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
-
- return xpath_string::from_heap_preallocated(begin, end);
- }
-
- case ast_opt_translate_table:
- {
- xpath_string s = _left->eval_string(c, stack);
-
- char_t* begin = s.data(stack.result);
- char_t* end = translate_table(begin, _data.table);
-
- return xpath_string::from_heap_preallocated(begin, end);
- }
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_string)
- return xpath_string::from_const(_data.variable->get_string());
-
- // fallthrough to type conversion
- }
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_boolean:
- return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
-
- case xpath_type_number:
- return convert_number_to_string(eval_number(c, stack), stack.result);
-
- case xpath_type_node_set:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
- return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
- }
-
- default:
- assert(!"Wrong expression for return type string");
- return xpath_string();
- }
- }
- }
- }
-
- xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
- {
- switch (_type)
- {
- case ast_op_union:
- {
- xpath_allocator_capture cr(stack.temp);
-
- xpath_stack swapped_stack = {stack.temp, stack.result};
-
- xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
- xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
-
- // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
- rs.set_type(xpath_node_set::type_unsorted);
-
- rs.append(ls.begin(), ls.end(), stack.result);
- rs.remove_duplicates();
-
- return rs;
- }
-
- case ast_filter:
- {
- xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
-
- // either expression is a number or it contains position() call; sort by document order
- if (_test != predicate_posinv) set.sort_do();
-
- bool once = eval_once(set.type(), eval);
-
- apply_predicate(set, 0, stack, once);
-
- return set;
- }
-
- case ast_func_id:
- return xpath_node_set_raw();
-
- case ast_step:
- {
- switch (_axis)
- {
- case axis_ancestor:
- return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
-
- case axis_ancestor_or_self:
- return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
-
- case axis_attribute:
- return step_do(c, stack, eval, axis_to_type<axis_attribute>());
-
- case axis_child:
- return step_do(c, stack, eval, axis_to_type<axis_child>());
-
- case axis_descendant:
- return step_do(c, stack, eval, axis_to_type<axis_descendant>());
-
- case axis_descendant_or_self:
- return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
-
- case axis_following:
- return step_do(c, stack, eval, axis_to_type<axis_following>());
-
- case axis_following_sibling:
- return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
-
- case axis_namespace:
- // namespaced axis is not supported
- return xpath_node_set_raw();
-
- case axis_parent:
- return step_do(c, stack, eval, axis_to_type<axis_parent>());
-
- case axis_preceding:
- return step_do(c, stack, eval, axis_to_type<axis_preceding>());
-
- case axis_preceding_sibling:
- return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
-
- case axis_self:
- return step_do(c, stack, eval, axis_to_type<axis_self>());
-
- default:
- assert(!"Unknown axis");
- return xpath_node_set_raw();
- }
- }
-
- case ast_step_root:
- {
- assert(!_right); // root step can't have any predicates
-
- xpath_node_set_raw ns;
-
- ns.set_type(xpath_node_set::type_sorted);
-
- if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
- else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
-
- return ns;
- }
-
- case ast_variable:
- {
- assert(_rettype == _data.variable->type());
-
- if (_rettype == xpath_type_node_set)
- {
- const xpath_node_set& s = _data.variable->get_node_set();
-
- xpath_node_set_raw ns;
-
- ns.set_type(s.type());
- ns.append(s.begin(), s.end(), stack.result);
-
- return ns;
- }
-
- // fallthrough to type conversion
- }
-
- default:
- assert(!"Wrong expression for return type node set");
- return xpath_node_set_raw();
- }
- }
-
- void optimize(xpath_allocator* alloc)
- {
- if (_left) _left->optimize(alloc);
- if (_right) _right->optimize(alloc);
- if (_next) _next->optimize(alloc);
-
- optimize_self(alloc);
- }
-
- void optimize_self(xpath_allocator* alloc)
- {
- // Rewrite [position()=expr] with [expr]
- // Note that this step has to go before classification to recognize [position()=1]
- if ((_type == ast_filter || _type == ast_predicate) &&
- _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
- {
- _right = _right->_right;
- }
-
- // Classify filter/predicate ops to perform various optimizations during evaluation
- if (_type == ast_filter || _type == ast_predicate)
- {
- assert(_test == predicate_default);
-
- if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
- _test = predicate_constant_one;
- else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
- _test = predicate_constant;
- else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
- _test = predicate_posinv;
- }
-
- // Rewrite descendant-or-self::node()/child::foo with descendant::foo
- // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
- // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
- // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
- if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
- _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
- is_posinv_step())
- {
- if (_axis == axis_child || _axis == axis_descendant)
- _axis = axis_descendant;
- else
- _axis = axis_descendant_or_self;
-
- _left = _left->_left;
- }
-
- // Use optimized lookup table implementation for translate() with constant arguments
- if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
- {
- unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
-
- if (table)
- {
- _type = ast_opt_translate_table;
- _data.table = table;
- }
- }
-
- // Use optimized path for @attr = 'value' or @attr = $value
- if (_type == ast_op_equal &&
- _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
- (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
- {
- _type = ast_opt_compare_attribute;
- }
- }
-
- bool is_posinv_expr() const
- {
- switch (_type)
- {
- case ast_func_position:
- case ast_func_last:
- return false;
-
- case ast_string_constant:
- case ast_number_constant:
- case ast_variable:
- return true;
-
- case ast_step:
- case ast_step_root:
- return true;
-
- case ast_predicate:
- case ast_filter:
- return true;
-
- default:
- if (_left && !_left->is_posinv_expr()) return false;
-
- for (xpath_ast_node* n = _right; n; n = n->_next)
- if (!n->is_posinv_expr()) return false;
-
- return true;
- }
- }
-
- bool is_posinv_step() const
- {
- assert(_type == ast_step);
-
- for (xpath_ast_node* n = _right; n; n = n->_next)
- {
- assert(n->_type == ast_predicate);
-
- if (n->_test != predicate_posinv)
- return false;
- }
-
- return true;
- }
-
- xpath_value_type rettype() const
- {
- return static_cast<xpath_value_type>(_rettype);
- }
- };
-
- struct xpath_parser
- {
- xpath_allocator* _alloc;
- xpath_lexer _lexer;
-
- const char_t* _query;
- xpath_variable_set* _variables;
-
- xpath_parse_result* _result;
-
- char_t _scratch[32];
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- jmp_buf _error_handler;
- #endif
-
- void throw_error(const char* message)
- {
- _result->error = message;
- _result->offset = _lexer.current_pos() - _query;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- longjmp(_error_handler, 1);
- #else
- throw xpath_exception(*_result);
- #endif
- }
-
- void throw_error_oom()
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- throw_error("Out of memory");
- #else
- throw std::bad_alloc();
- #endif
- }
-
- void* alloc_node()
- {
- void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
-
- if (!result) throw_error_oom();
-
- return result;
- }
-
- const char_t* alloc_string(const xpath_lexer_string& value)
- {
- if (value.begin)
- {
- size_t length = static_cast<size_t>(value.end - value.begin);
-
- char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
- if (!c) throw_error_oom();
- assert(c); // workaround for clang static analysis
-
- memcpy(c, value.begin, length * sizeof(char_t));
- c[length] = 0;
-
- return c;
- }
- else return 0;
- }
-
- xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
- {
- assert(argc <= 1);
-
- if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
-
- return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
- }
-
- xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
- {
- switch (name.begin[0])
- {
- case 'b':
- if (name == PUGIXML_TEXT("boolean") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
-
- break;
-
- case 'c':
- if (name == PUGIXML_TEXT("count") && argc == 1)
- {
- if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
- return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
- }
- else if (name == PUGIXML_TEXT("contains") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
- else if (name == PUGIXML_TEXT("concat") && argc >= 2)
- return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
-
- break;
-
- case 'f':
- if (name == PUGIXML_TEXT("false") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
- else if (name == PUGIXML_TEXT("floor") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
-
- break;
-
- case 'i':
- if (name == PUGIXML_TEXT("id") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
-
- break;
-
- case 'l':
- if (name == PUGIXML_TEXT("last") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
- else if (name == PUGIXML_TEXT("lang") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
- else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
- return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("name") && argc <= 1)
- return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
- else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
- return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
- else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("not") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
- else if (name == PUGIXML_TEXT("number") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("position") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
-
- break;
-
- case 'r':
- if (name == PUGIXML_TEXT("round") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
-
- break;
-
- case 's':
- if (name == PUGIXML_TEXT("string") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
- else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
- else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
- return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("sum") && argc == 1)
- {
- if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
- return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
- }
-
- break;
-
- case 't':
- if (name == PUGIXML_TEXT("translate") && argc == 3)
- return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("true") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
-
- break;
-
- default:
- break;
- }
-
- throw_error("Unrecognized function or wrong parameter count");
-
- return 0;
- }
-
- axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
- {
- specified = true;
-
- switch (name.begin[0])
- {
- case 'a':
- if (name == PUGIXML_TEXT("ancestor"))
- return axis_ancestor;
- else if (name == PUGIXML_TEXT("ancestor-or-self"))
- return axis_ancestor_or_self;
- else if (name == PUGIXML_TEXT("attribute"))
- return axis_attribute;
-
- break;
-
- case 'c':
- if (name == PUGIXML_TEXT("child"))
- return axis_child;
-
- break;
-
- case 'd':
- if (name == PUGIXML_TEXT("descendant"))
- return axis_descendant;
- else if (name == PUGIXML_TEXT("descendant-or-self"))
- return axis_descendant_or_self;
-
- break;
-
- case 'f':
- if (name == PUGIXML_TEXT("following"))
- return axis_following;
- else if (name == PUGIXML_TEXT("following-sibling"))
- return axis_following_sibling;
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("namespace"))
- return axis_namespace;
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("parent"))
- return axis_parent;
- else if (name == PUGIXML_TEXT("preceding"))
- return axis_preceding;
- else if (name == PUGIXML_TEXT("preceding-sibling"))
- return axis_preceding_sibling;
-
- break;
-
- case 's':
- if (name == PUGIXML_TEXT("self"))
- return axis_self;
-
- break;
-
- default:
- break;
- }
-
- specified = false;
- return axis_child;
- }
-
- nodetest_t parse_node_test_type(const xpath_lexer_string& name)
- {
- switch (name.begin[0])
- {
- case 'c':
- if (name == PUGIXML_TEXT("comment"))
- return nodetest_type_comment;
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("node"))
- return nodetest_type_node;
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("processing-instruction"))
- return nodetest_type_pi;
-
- break;
-
- case 't':
- if (name == PUGIXML_TEXT("text"))
- return nodetest_type_text;
-
- break;
-
- default:
- break;
- }
-
- return nodetest_none;
- }
-
- // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
- xpath_ast_node* parse_primary_expression()
- {
- switch (_lexer.current())
- {
- case lex_var_ref:
- {
- xpath_lexer_string name = _lexer.contents();
-
- if (!_variables)
- throw_error("Unknown variable: variable set is not provided");
-
- xpath_variable* var = 0;
- if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
- throw_error_oom();
-
- if (!var)
- throw_error("Unknown variable: variable set does not contain the given name");
-
- _lexer.next();
-
- return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
- }
-
- case lex_open_brace:
- {
- _lexer.next();
-
- xpath_ast_node* n = parse_expression();
-
- if (_lexer.current() != lex_close_brace)
- throw_error("Unmatched braces");
-
- _lexer.next();
-
- return n;
- }
-
- case lex_quoted_string:
- {
- const char_t* value = alloc_string(_lexer.contents());
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
- _lexer.next();
-
- return n;
- }
-
- case lex_number:
- {
- double value = 0;
-
- if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
- throw_error_oom();
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
- _lexer.next();
-
- return n;
- }
-
- case lex_string:
- {
- xpath_ast_node* args[2] = {0};
- size_t argc = 0;
-
- xpath_lexer_string function = _lexer.contents();
- _lexer.next();
-
- xpath_ast_node* last_arg = 0;
-
- if (_lexer.current() != lex_open_brace)
- throw_error("Unrecognized function call");
- _lexer.next();
-
- if (_lexer.current() != lex_close_brace)
- args[argc++] = parse_expression();
-
- while (_lexer.current() != lex_close_brace)
- {
- if (_lexer.current() != lex_comma)
- throw_error("No comma between function arguments");
- _lexer.next();
-
- xpath_ast_node* n = parse_expression();
-
- if (argc < 2) args[argc] = n;
- else last_arg->set_next(n);
-
- argc++;
- last_arg = n;
- }
-
- _lexer.next();
-
- return parse_function(function, argc, args);
- }
-
- default:
- throw_error("Unrecognizable primary expression");
-
- return 0;
- }
- }
-
- // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
- // Predicate ::= '[' PredicateExpr ']'
- // PredicateExpr ::= Expr
- xpath_ast_node* parse_filter_expression()
- {
- xpath_ast_node* n = parse_primary_expression();
-
- while (_lexer.current() == lex_open_square_brace)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_expression();
-
- if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
-
- n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default);
-
- if (_lexer.current() != lex_close_square_brace)
- throw_error("Unmatched square brace");
-
- _lexer.next();
- }
-
- return n;
- }
-
- // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
- // AxisSpecifier ::= AxisName '::' | '@'?
- // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
- // NameTest ::= '*' | NCName ':' '*' | QName
- // AbbreviatedStep ::= '.' | '..'
- xpath_ast_node* parse_step(xpath_ast_node* set)
- {
- if (set && set->rettype() != xpath_type_node_set)
- throw_error("Step has to be applied to node set");
-
- bool axis_specified = false;
- axis_t axis = axis_child; // implied child axis
-
- if (_lexer.current() == lex_axis_attribute)
- {
- axis = axis_attribute;
- axis_specified = true;
-
- _lexer.next();
- }
- else if (_lexer.current() == lex_dot)
- {
- _lexer.next();
-
- return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
- }
- else if (_lexer.current() == lex_double_dot)
- {
- _lexer.next();
-
- return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
- }
-
- nodetest_t nt_type = nodetest_none;
- xpath_lexer_string nt_name;
-
- if (_lexer.current() == lex_string)
- {
- // node name test
- nt_name = _lexer.contents();
- _lexer.next();
-
- // was it an axis name?
- if (_lexer.current() == lex_double_colon)
- {
- // parse axis name
- if (axis_specified) throw_error("Two axis specifiers in one step");
-
- axis = parse_axis_name(nt_name, axis_specified);
-
- if (!axis_specified) throw_error("Unknown axis");
-
- // read actual node test
- _lexer.next();
-
- if (_lexer.current() == lex_multiply)
- {
- nt_type = nodetest_all;
- nt_name = xpath_lexer_string();
- _lexer.next();
- }
- else if (_lexer.current() == lex_string)
- {
- nt_name = _lexer.contents();
- _lexer.next();
- }
- else throw_error("Unrecognized node test");
- }
-
- if (nt_type == nodetest_none)
- {
- // node type test or processing-instruction
- if (_lexer.current() == lex_open_brace)
- {
- _lexer.next();
-
- if (_lexer.current() == lex_close_brace)
- {
- _lexer.next();
-
- nt_type = parse_node_test_type(nt_name);
-
- if (nt_type == nodetest_none) throw_error("Unrecognized node type");
-
- nt_name = xpath_lexer_string();
- }
- else if (nt_name == PUGIXML_TEXT("processing-instruction"))
- {
- if (_lexer.current() != lex_quoted_string)
- throw_error("Only literals are allowed as arguments to processing-instruction()");
-
- nt_type = nodetest_pi;
- nt_name = _lexer.contents();
- _lexer.next();
-
- if (_lexer.current() != lex_close_brace)
- throw_error("Unmatched brace near processing-instruction()");
- _lexer.next();
- }
- else
- throw_error("Unmatched brace near node type test");
-
- }
- // QName or NCName:*
- else
- {
- if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
- {
- nt_name.end--; // erase *
-
- nt_type = nodetest_all_in_namespace;
- }
- else nt_type = nodetest_name;
- }
- }
- }
- else if (_lexer.current() == lex_multiply)
- {
- nt_type = nodetest_all;
- _lexer.next();
- }
- else throw_error("Unrecognized node test");
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
-
- xpath_ast_node* last = 0;
-
- while (_lexer.current() == lex_open_square_brace)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_expression();
-
- xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default);
-
- if (_lexer.current() != lex_close_square_brace)
- throw_error("Unmatched square brace");
- _lexer.next();
-
- if (last) last->set_next(pred);
- else n->set_right(pred);
-
- last = pred;
- }
-
- return n;
- }
-
- // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
- xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
- {
- xpath_ast_node* n = parse_step(set);
-
- while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
- {
- lexeme_t l = _lexer.current();
- _lexer.next();
-
- if (l == lex_double_slash)
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
-
- n = parse_step(n);
- }
-
- return n;
- }
-
- // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
- // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
- xpath_ast_node* parse_location_path()
- {
- if (_lexer.current() == lex_slash)
- {
- _lexer.next();
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
-
- // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
- lexeme_t l = _lexer.current();
-
- if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
- return parse_relative_location_path(n);
- else
- return n;
- }
- else if (_lexer.current() == lex_double_slash)
- {
- _lexer.next();
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
-
- return parse_relative_location_path(n);
- }
-
- // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
- return parse_relative_location_path(0);
- }
-
- // PathExpr ::= LocationPath
- // | FilterExpr
- // | FilterExpr '/' RelativeLocationPath
- // | FilterExpr '//' RelativeLocationPath
- // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
- // UnaryExpr ::= UnionExpr | '-' UnaryExpr
- xpath_ast_node* parse_path_or_unary_expression()
- {
- // Clarification.
- // PathExpr begins with either LocationPath or FilterExpr.
- // FilterExpr begins with PrimaryExpr
- // PrimaryExpr begins with '$' in case of it being a variable reference,
- // '(' in case of it being an expression, string literal, number constant or
- // function call.
-
- if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
- _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
- _lexer.current() == lex_string)
- {
- if (_lexer.current() == lex_string)
- {
- // This is either a function call, or not - if not, we shall proceed with location path
- const char_t* state = _lexer.state();
-
- while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
-
- if (*state != '(') return parse_location_path();
-
- // This looks like a function call; however this still can be a node-test. Check it.
- if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
- }
-
- xpath_ast_node* n = parse_filter_expression();
-
- if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
- {
- lexeme_t l = _lexer.current();
- _lexer.next();
-
- if (l == lex_double_slash)
- {
- if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
-
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
- }
-
- // select from location path
- return parse_relative_location_path(n);
- }
-
- return n;
- }
- else if (_lexer.current() == lex_minus)
- {
- _lexer.next();
-
- // precedence 7+ - only parses union expressions
- xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
-
- return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
- }
- else
- return parse_location_path();
- }
-
- struct binary_op_t
- {
- ast_type_t asttype;
- xpath_value_type rettype;
- int precedence;
-
- binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
- {
- }
-
- binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
- {
- }
-
- static binary_op_t parse(xpath_lexer& lexer)
- {
- switch (lexer.current())
- {
- case lex_string:
- if (lexer.contents() == PUGIXML_TEXT("or"))
- return binary_op_t(ast_op_or, xpath_type_boolean, 1);
- else if (lexer.contents() == PUGIXML_TEXT("and"))
- return binary_op_t(ast_op_and, xpath_type_boolean, 2);
- else if (lexer.contents() == PUGIXML_TEXT("div"))
- return binary_op_t(ast_op_divide, xpath_type_number, 6);
- else if (lexer.contents() == PUGIXML_TEXT("mod"))
- return binary_op_t(ast_op_mod, xpath_type_number, 6);
- else
- return binary_op_t();
-
- case lex_equal:
- return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
-
- case lex_not_equal:
- return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
-
- case lex_less:
- return binary_op_t(ast_op_less, xpath_type_boolean, 4);
-
- case lex_greater:
- return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
-
- case lex_less_or_equal:
- return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
-
- case lex_greater_or_equal:
- return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
-
- case lex_plus:
- return binary_op_t(ast_op_add, xpath_type_number, 5);
-
- case lex_minus:
- return binary_op_t(ast_op_subtract, xpath_type_number, 5);
-
- case lex_multiply:
- return binary_op_t(ast_op_multiply, xpath_type_number, 6);
-
- case lex_union:
- return binary_op_t(ast_op_union, xpath_type_node_set, 7);
-
- default:
- return binary_op_t();
- }
- }
- };
-
- xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
- {
- binary_op_t op = binary_op_t::parse(_lexer);
-
- while (op.asttype != ast_unknown && op.precedence >= limit)
- {
- _lexer.next();
-
- xpath_ast_node* rhs = parse_path_or_unary_expression();
-
- binary_op_t nextop = binary_op_t::parse(_lexer);
-
- while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
- {
- rhs = parse_expression_rec(rhs, nextop.precedence);
-
- nextop = binary_op_t::parse(_lexer);
- }
-
- if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
- throw_error("Union operator has to be applied to node sets");
-
- lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
-
- op = binary_op_t::parse(_lexer);
- }
-
- return lhs;
- }
-
- // Expr ::= OrExpr
- // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
- // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
- // EqualityExpr ::= RelationalExpr
- // | EqualityExpr '=' RelationalExpr
- // | EqualityExpr '!=' RelationalExpr
- // RelationalExpr ::= AdditiveExpr
- // | RelationalExpr '<' AdditiveExpr
- // | RelationalExpr '>' AdditiveExpr
- // | RelationalExpr '<=' AdditiveExpr
- // | RelationalExpr '>=' AdditiveExpr
- // AdditiveExpr ::= MultiplicativeExpr
- // | AdditiveExpr '+' MultiplicativeExpr
- // | AdditiveExpr '-' MultiplicativeExpr
- // MultiplicativeExpr ::= UnaryExpr
- // | MultiplicativeExpr '*' UnaryExpr
- // | MultiplicativeExpr 'div' UnaryExpr
- // | MultiplicativeExpr 'mod' UnaryExpr
- xpath_ast_node* parse_expression()
- {
- return parse_expression_rec(parse_path_or_unary_expression(), 0);
- }
-
- xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
- {
- }
-
- xpath_ast_node* parse()
- {
- xpath_ast_node* result = parse_expression();
-
- if (_lexer.current() != lex_eof)
- {
- // there are still unparsed tokens left, error
- throw_error("Incorrect query");
- }
-
- return result;
- }
-
- static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
- {
- xpath_parser parser(query, variables, alloc, result);
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- int error = setjmp(parser._error_handler);
-
- return (error == 0) ? parser.parse() : 0;
- #else
- return parser.parse();
- #endif
- }
- };
-
- struct xpath_query_impl
- {
- static xpath_query_impl* create()
- {
- void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
- if (!memory) return 0;
-
- return new (memory) xpath_query_impl();
- }
-
- static void destroy(xpath_query_impl* impl)
- {
- // free all allocated pages
- impl->alloc.release();
-
- // free allocator memory (with the first page)
- xml_memory::deallocate(impl);
- }
-
- xpath_query_impl(): root(0), alloc(&block)
- {
- block.next = 0;
- block.capacity = sizeof(block.data);
- }
-
- xpath_ast_node* root;
- xpath_allocator alloc;
- xpath_memory_block block;
- };
-
- PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
- {
- if (!impl) return xpath_string();
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return xpath_string();
- #endif
-
- xpath_context c(n, 1, 1);
-
- return impl->root->eval_string(c, sd.stack);
- }
-
- PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
- {
- if (!impl) return 0;
-
- if (impl->root->rettype() != xpath_type_node_set)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- return 0;
- #else
- xpath_parse_result res;
- res.error = "Expression does not evaluate to node set";
-
- throw xpath_exception(res);
- #endif
- }
-
- return impl->root;
- }
-PUGI__NS_END
-
-namespace pugi
-{
-#ifndef PUGIXML_NO_EXCEPTIONS
- PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
- {
- assert(_result.error);
- }
-
- PUGI__FN const char* xpath_exception::what() const throw()
- {
- return _result.error;
- }
-
- PUGI__FN const xpath_parse_result& xpath_exception::result() const
- {
- return _result;
- }
-#endif
-
- PUGI__FN xpath_node::xpath_node()
- {
- }
-
- PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
- {
- }
-
- PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
- {
- }
-
- PUGI__FN xml_node xpath_node::node() const
- {
- return _attribute ? xml_node() : _node;
- }
-
- PUGI__FN xml_attribute xpath_node::attribute() const
- {
- return _attribute;
- }
-
- PUGI__FN xml_node xpath_node::parent() const
- {
- return _attribute ? _node : _node.parent();
- }
-
- PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
- {
- }
-
- PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
- {
- return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
- }
-
- PUGI__FN bool xpath_node::operator!() const
- {
- return !(_node || _attribute);
- }
-
- PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
- {
- return _node == n._node && _attribute == n._attribute;
- }
-
- PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
- {
- return _node != n._node || _attribute != n._attribute;
- }
-
-#ifdef __BORLANDC__
- PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
-#endif
-
- PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
- {
- assert(begin_ <= end_);
-
- size_t size_ = static_cast<size_t>(end_ - begin_);
-
- if (size_ <= 1)
- {
- // deallocate old buffer
- if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
-
- // use internal buffer
- if (begin_ != end_) _storage = *begin_;
-
- _begin = &_storage;
- _end = &_storage + size_;
- _type = type_;
- }
- else
- {
- // make heap copy
- xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
-
- if (!storage)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- return;
- #else
- throw std::bad_alloc();
- #endif
- }
-
- memcpy(storage, begin_, size_ * sizeof(xpath_node));
-
- // deallocate old buffer
- if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
-
- // finalize
- _begin = storage;
- _end = storage + size_;
- _type = type_;
- }
- }
-
-#if __cplusplus >= 201103
- PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs)
- {
- _type = rhs._type;
- _storage = rhs._storage;
- _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
- _end = _begin + (rhs._end - rhs._begin);
-
- rhs._type = type_unsorted;
- rhs._begin = &rhs._storage;
- rhs._end = rhs._begin;
- }
-#endif
-
- PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
- {
- }
-
- PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
- {
- _assign(begin_, end_, type_);
- }
-
- PUGI__FN xpath_node_set::~xpath_node_set()
- {
- if (_begin != &_storage)
- impl::xml_memory::deallocate(_begin);
- }
-
- PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
- {
- _assign(ns._begin, ns._end, ns._type);
- }
-
- PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
- {
- if (this == &ns) return *this;
-
- _assign(ns._begin, ns._end, ns._type);
-
- return *this;
- }
-
-#if __cplusplus >= 201103
- PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage)
- {
- _move(rhs);
- }
-
- PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs)
- {
- if (this == &rhs) return *this;
-
- if (_begin != &_storage)
- impl::xml_memory::deallocate(_begin);
-
- _move(rhs);
-
- return *this;
- }
-#endif
-
- PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
- {
- return _type;
- }
-
- PUGI__FN size_t xpath_node_set::size() const
- {
- return _end - _begin;
- }
-
- PUGI__FN bool xpath_node_set::empty() const
- {
- return _begin == _end;
- }
-
- PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
- {
- assert(index < size());
- return _begin[index];
- }
-
- PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
- {
- return _begin;
- }
-
- PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
- {
- return _end;
- }
-
- PUGI__FN void xpath_node_set::sort(bool reverse)
- {
- _type = impl::xpath_sort(_begin, _end, _type, reverse);
- }
-
- PUGI__FN xpath_node xpath_node_set::first() const
- {
- return impl::xpath_first(_begin, _end, _type);
- }
-
- PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
- {
- }
-
- PUGI__FN xpath_parse_result::operator bool() const
- {
- return error == 0;
- }
-
- PUGI__FN const char* xpath_parse_result::description() const
- {
- return error ? error : "No error";
- }
-
- PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
- {
- }
-
- PUGI__FN const char_t* xpath_variable::name() const
- {
- switch (_type)
- {
- case xpath_type_node_set:
- return static_cast<const impl::xpath_variable_node_set*>(this)->name;
-
- case xpath_type_number:
- return static_cast<const impl::xpath_variable_number*>(this)->name;
-
- case xpath_type_string:
- return static_cast<const impl::xpath_variable_string*>(this)->name;
-
- case xpath_type_boolean:
- return static_cast<const impl::xpath_variable_boolean*>(this)->name;
-
- default:
- assert(!"Invalid variable type");
- return 0;
- }
- }
-
- PUGI__FN xpath_value_type xpath_variable::type() const
- {
- return _type;
- }
-
- PUGI__FN bool xpath_variable::get_boolean() const
- {
- return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
- }
-
- PUGI__FN double xpath_variable::get_number() const
- {
- return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
- }
-
- PUGI__FN const char_t* xpath_variable::get_string() const
- {
- const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
- return value ? value : PUGIXML_TEXT("");
- }
-
- PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
- {
- return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
- }
-
- PUGI__FN bool xpath_variable::set(bool value)
- {
- if (_type != xpath_type_boolean) return false;
-
- static_cast<impl::xpath_variable_boolean*>(this)->value = value;
- return true;
- }
-
- PUGI__FN bool xpath_variable::set(double value)
- {
- if (_type != xpath_type_number) return false;
-
- static_cast<impl::xpath_variable_number*>(this)->value = value;
- return true;
- }
-
- PUGI__FN bool xpath_variable::set(const char_t* value)
- {
- if (_type != xpath_type_string) return false;
-
- impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
-
- // duplicate string
- size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
-
- char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
- if (!copy) return false;
-
- memcpy(copy, value, size);
-
- // replace old string
- if (var->value) impl::xml_memory::deallocate(var->value);
- var->value = copy;
-
- return true;
- }
-
- PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
- {
- if (_type != xpath_type_node_set) return false;
-
- static_cast<impl::xpath_variable_node_set*>(this)->value = value;
- return true;
- }
-
- PUGI__FN xpath_variable_set::xpath_variable_set()
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- _data[i] = 0;
- }
-
- PUGI__FN xpath_variable_set::~xpath_variable_set()
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- _destroy(_data[i]);
- }
-
- PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- _data[i] = 0;
-
- _assign(rhs);
- }
-
- PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
- {
- if (this == &rhs) return *this;
-
- _assign(rhs);
-
- return *this;
- }
-
-#if __cplusplus >= 201103
- PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs)
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- {
- _data[i] = rhs._data[i];
- rhs._data[i] = 0;
- }
- }
-
- PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs)
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- {
- _destroy(_data[i]);
-
- _data[i] = rhs._data[i];
- rhs._data[i] = 0;
- }
-
- return *this;
- }
-#endif
-
- PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
- {
- xpath_variable_set temp;
-
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
- return;
-
- _swap(temp);
- }
-
- PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
- {
- for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
- {
- xpath_variable* chain = _data[i];
-
- _data[i] = rhs._data[i];
- rhs._data[i] = chain;
- }
- }
-
- PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
- {
- const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
- size_t hash = impl::hash_string(name) % hash_size;
-
- // look for existing variable
- for (xpath_variable* var = _data[hash]; var; var = var->_next)
- if (impl::strequal(var->name(), name))
- return var;
-
- return 0;
- }
-
- PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
- {
- xpath_variable* last = 0;
-
- while (var)
- {
- // allocate storage for new variable
- xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
- if (!nvar) return false;
-
- // link the variable to the result immediately to handle failures gracefully
- if (last)
- last->_next = nvar;
- else
- *out_result = nvar;
-
- last = nvar;
-
- // copy the value; this can fail due to out-of-memory conditions
- if (!impl::copy_xpath_variable(nvar, var)) return false;
-
- var = var->_next;
- }
-
- return true;
- }
-
- PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
- {
- while (var)
- {
- xpath_variable* next = var->_next;
-
- impl::delete_xpath_variable(var->_type, var);
-
- var = next;
- }
- }
-
- PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
- {
- const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
- size_t hash = impl::hash_string(name) % hash_size;
-
- // look for existing variable
- for (xpath_variable* var = _data[hash]; var; var = var->_next)
- if (impl::strequal(var->name(), name))
- return var->type() == type ? var : 0;
-
- // add new variable
- xpath_variable* result = impl::new_xpath_variable(type, name);
-
- if (result)
- {
- result->_next = _data[hash];
-
- _data[hash] = result;
- }
-
- return result;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
- {
- xpath_variable* var = add(name, xpath_type_boolean);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
- {
- xpath_variable* var = add(name, xpath_type_number);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
- {
- xpath_variable* var = add(name, xpath_type_string);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
- {
- xpath_variable* var = add(name, xpath_type_node_set);
- return var ? var->set(value) : false;
- }
-
- PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
- {
- return _find(name);
- }
-
- PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
- {
- return _find(name);
- }
-
- PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
- {
- impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
-
- if (!qimpl)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- _result.error = "Out of memory";
- #else
- throw std::bad_alloc();
- #endif
- }
- else
- {
- using impl::auto_deleter; // MSVC7 workaround
- auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
-
- qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
-
- if (qimpl->root)
- {
- qimpl->root->optimize(&qimpl->alloc);
-
- _impl = impl.release();
- _result.error = 0;
- }
- }
- }
-
- PUGI__FN xpath_query::xpath_query(): _impl(0)
- {
- }
-
- PUGI__FN xpath_query::~xpath_query()
- {
- if (_impl)
- impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
- }
-
-#if __cplusplus >= 201103
- PUGI__FN xpath_query::xpath_query(xpath_query&& rhs)
- {
- _impl = rhs._impl;
- _result = rhs._result;
- rhs._impl = 0;
- rhs._result = xpath_parse_result();
- }
-
- PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs)
- {
- if (this == &rhs) return *this;
-
- if (_impl)
- impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
-
- _impl = rhs._impl;
- _result = rhs._result;
- rhs._impl = 0;
- rhs._result = xpath_parse_result();
-
- return *this;
- }
-#endif
-
- PUGI__FN xpath_value_type xpath_query::return_type() const
- {
- if (!_impl) return xpath_type_none;
-
- return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
- }
-
- PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
- {
- if (!_impl) return false;
-
- impl::xpath_context c(n, 1, 1);
- impl::xpath_stack_data sd;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return false;
- #endif
-
- return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
- }
-
- PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
- {
- if (!_impl) return impl::gen_nan();
-
- impl::xpath_context c(n, 1, 1);
- impl::xpath_stack_data sd;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return impl::gen_nan();
- #endif
-
- return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
- }
-
-#ifndef PUGIXML_NO_STL
- PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
- {
- impl::xpath_stack_data sd;
-
- impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
-
- return string_t(r.c_str(), r.length());
- }
-#endif
-
- PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
- {
- impl::xpath_stack_data sd;
-
- impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
-
- size_t full_size = r.length() + 1;
-
- if (capacity > 0)
- {
- size_t size = (full_size < capacity) ? full_size : capacity;
- assert(size > 0);
-
- memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
- buffer[size - 1] = 0;
- }
-
- return full_size;
- }
-
- PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
- {
- impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
- if (!root) return xpath_node_set();
-
- impl::xpath_context c(n, 1, 1);
- impl::xpath_stack_data sd;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return xpath_node_set();
- #endif
-
- impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
-
- return xpath_node_set(r.begin(), r.end(), r.type());
- }
-
- PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
- {
- impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
- if (!root) return xpath_node();
-
- impl::xpath_context c(n, 1, 1);
- impl::xpath_stack_data sd;
-
- #ifdef PUGIXML_NO_EXCEPTIONS
- if (setjmp(sd.error_handler)) return xpath_node();
- #endif
-
- impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
-
- return r.first();
- }
-
- PUGI__FN const xpath_parse_result& xpath_query::result() const
- {
- return _result;
- }
-
- PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
- {
- }
-
- PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
- {
- return _impl ? unspecified_bool_xpath_query : 0;
- }
-
- PUGI__FN bool xpath_query::operator!() const
- {
- return !_impl;
- }
-
- PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
- {
- xpath_query q(query, variables);
- return select_node(q);
- }
-
- PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
- {
- return query.evaluate_node(*this);
- }
-
- PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
- {
- xpath_query q(query, variables);
- return select_nodes(q);
- }
-
- PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
- {
- return query.evaluate_node_set(*this);
- }
-
- PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
- {
- xpath_query q(query, variables);
- return select_single_node(q);
- }
-
- PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
- {
- return query.evaluate_node(*this);
- }
-}
-
-#endif
-
-#ifdef __BORLANDC__
-# pragma option pop
-#endif
-
-// Intel C++ does not properly keep warning state for function templates,
-// so popping warning state at the end of translation unit leads to warnings in the middle.
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-# pragma warning(pop)
-#endif
-
-// Undefine all local macros (makes sure we're not leaking macros in header-only mode)
-#undef PUGI__NO_INLINE
-#undef PUGI__UNLIKELY
-#undef PUGI__STATIC_ASSERT
-#undef PUGI__DMC_VOLATILE
-#undef PUGI__MSVC_CRT_VERSION
-#undef PUGI__NS_BEGIN
-#undef PUGI__NS_END
-#undef PUGI__FN
-#undef PUGI__FN_NO_INLINE
-#undef PUGI__GETPAGE_IMPL
-#undef PUGI__GETPAGE
-#undef PUGI__NODETYPE
-#undef PUGI__IS_CHARTYPE_IMPL
-#undef PUGI__IS_CHARTYPE
-#undef PUGI__IS_CHARTYPEX
-#undef PUGI__ENDSWITH
-#undef PUGI__SKIPWS
-#undef PUGI__OPTSET
-#undef PUGI__PUSHNODE
-#undef PUGI__POPNODE
-#undef PUGI__SCANFOR
-#undef PUGI__SCANWHILE
-#undef PUGI__SCANWHILE_UNROLL
-#undef PUGI__ENDSEG
-#undef PUGI__THROW_ERROR
-#undef PUGI__CHECK_ERROR
-
-#endif
-
-/**
- * Copyright (c) 2006-2015 Arseny Kapoulkine
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
diff --git a/contrib/moses2/pugixml.hpp b/contrib/moses2/pugixml.hpp
deleted file mode 100644
index 9f7c3fbcf..000000000
--- a/contrib/moses2/pugixml.hpp
+++ /dev/null
@@ -1,1400 +0,0 @@
-/**
- * pugixml parser - version 1.7
- * --------------------------------------------------------
- * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
- * Report bugs and download new versions at http://pugixml.org/
- *
- * This library is distributed under the MIT License. See notice at the end
- * of this file.
- *
- * This work is based on the pugxml parser, which is:
- * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
- */
-
-#ifndef PUGIXML_VERSION
-// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
-# define PUGIXML_VERSION 170
-#endif
-
-// Include user configuration file (this can define various configuration macros)
-#include "pugiconfig.hpp"
-
-#ifndef HEADER_PUGIXML_HPP
-#define HEADER_PUGIXML_HPP
-
-// Include stddef.h for size_t and ptrdiff_t
-#include <stddef.h>
-
-// Include exception header for XPath
-#if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS)
-# include <exception>
-#endif
-
-// Include STL headers
-#ifndef PUGIXML_NO_STL
-# include <iterator>
-# include <iosfwd>
-# include <string>
-#endif
-
-// Macro for deprecated features
-#ifndef PUGIXML_DEPRECATED
-# if defined(__GNUC__)
-# define PUGIXML_DEPRECATED __attribute__((deprecated))
-# elif defined(_MSC_VER) && _MSC_VER >= 1300
-# define PUGIXML_DEPRECATED __declspec(deprecated)
-# else
-# define PUGIXML_DEPRECATED
-# endif
-#endif
-
-// If no API is defined, assume default
-#ifndef PUGIXML_API
-# define PUGIXML_API
-#endif
-
-// If no API for classes is defined, assume default
-#ifndef PUGIXML_CLASS
-# define PUGIXML_CLASS PUGIXML_API
-#endif
-
-// If no API for functions is defined, assume default
-#ifndef PUGIXML_FUNCTION
-# define PUGIXML_FUNCTION PUGIXML_API
-#endif
-
-// If the platform is known to have long long support, enable long long functions
-#ifndef PUGIXML_HAS_LONG_LONG
-# if __cplusplus >= 201103
-# define PUGIXML_HAS_LONG_LONG
-# elif defined(_MSC_VER) && _MSC_VER >= 1400
-# define PUGIXML_HAS_LONG_LONG
-# endif
-#endif
-
-// Character interface macros
-#ifdef PUGIXML_WCHAR_MODE
-# define PUGIXML_TEXT(t) L ## t
-# define PUGIXML_CHAR wchar_t
-#else
-# define PUGIXML_TEXT(t) t
-# define PUGIXML_CHAR char
-#endif
-
-namespace pugi
-{
- // Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE
- typedef PUGIXML_CHAR char_t;
-
-#ifndef PUGIXML_NO_STL
- // String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE
- typedef std::basic_string<PUGIXML_CHAR, std::char_traits<PUGIXML_CHAR>, std::allocator<PUGIXML_CHAR> > string_t;
-#endif
-}
-
-// The PugiXML namespace
-namespace pugi
-{
- // Tree node types
- enum xml_node_type
- {
- node_null, // Empty (null) node handle
- node_document, // A document tree's absolute root
- node_element, // Element tag, i.e. '<node/>'
- node_pcdata, // Plain character data, i.e. 'text'
- node_cdata, // Character data, i.e. '<![CDATA[text]]>'
- node_comment, // Comment tag, i.e. '<!-- text -->'
- node_pi, // Processing instruction, i.e. '<?name?>'
- node_declaration, // Document declaration, i.e. '<?xml version="1.0"?>'
- node_doctype // Document type declaration, i.e. '<!DOCTYPE doc>'
- };
-
- // Parsing options
-
- // Minimal parsing mode (equivalent to turning all other flags off).
- // Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed.
- const unsigned int parse_minimal = 0x0000;
-
- // This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default.
- const unsigned int parse_pi = 0x0001;
-
- // This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default.
- const unsigned int parse_comments = 0x0002;
-
- // This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default.
- const unsigned int parse_cdata = 0x0004;
-
- // This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree.
- // This flag is off by default; turning it on usually results in slower parsing and more memory consumption.
- const unsigned int parse_ws_pcdata = 0x0008;
-
- // This flag determines if character and entity references are expanded during parsing. This flag is on by default.
- const unsigned int parse_escapes = 0x0010;
-
- // This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
- const unsigned int parse_eol = 0x0020;
-
- // This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
- const unsigned int parse_wconv_attribute = 0x0040;
-
- // This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
- const unsigned int parse_wnorm_attribute = 0x0080;
-
- // This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
- const unsigned int parse_declaration = 0x0100;
-
- // This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default.
- const unsigned int parse_doctype = 0x0200;
-
- // This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only
- // of whitespace is added to the DOM tree.
- // This flag is off by default; turning it on may result in slower parsing and more memory consumption.
- const unsigned int parse_ws_pcdata_single = 0x0400;
-
- // This flag determines if leading and trailing whitespace is to be removed from plain character data. This flag is off by default.
- const unsigned int parse_trim_pcdata = 0x0800;
-
- // This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document
- // is a valid document. This flag is off by default.
- const unsigned int parse_fragment = 0x1000;
-
- // The default parsing mode.
- // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
- // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
- const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol;
-
- // The full parsing mode.
- // Nodes of all types are added to the DOM tree, character/reference entities are expanded,
- // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
- const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype;
-
- // These flags determine the encoding of input data for XML document
- enum xml_encoding
- {
- encoding_auto, // Auto-detect input encoding using BOM or < / <? detection; use UTF8 if BOM is not found
- encoding_utf8, // UTF8 encoding
- encoding_utf16_le, // Little-endian UTF16
- encoding_utf16_be, // Big-endian UTF16
- encoding_utf16, // UTF16 with native endianness
- encoding_utf32_le, // Little-endian UTF32
- encoding_utf32_be, // Big-endian UTF32
- encoding_utf32, // UTF32 with native endianness
- encoding_wchar, // The same encoding wchar_t has (either UTF16 or UTF32)
- encoding_latin1
- };
-
- // Formatting flags
-
- // Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
- const unsigned int format_indent = 0x01;
-
- // Write encoding-specific BOM to the output stream. This flag is off by default.
- const unsigned int format_write_bom = 0x02;
-
- // Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
- const unsigned int format_raw = 0x04;
-
- // Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
- const unsigned int format_no_declaration = 0x08;
-
- // Don't escape attribute values and PCDATA contents. This flag is off by default.
- const unsigned int format_no_escapes = 0x10;
-
- // Open file using text mode in xml_document::save_file. This enables special character (i.e. new-line) conversions on some systems. This flag is off by default.
- const unsigned int format_save_file_text = 0x20;
-
- // Write every attribute on a new line with appropriate indentation. This flag is off by default.
- const unsigned int format_indent_attributes = 0x40;
-
- // The default set of formatting flags.
- // Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
- const unsigned int format_default = format_indent;
-
- // Forward declarations
- struct xml_attribute_struct;
- struct xml_node_struct;
-
- class xml_node_iterator;
- class xml_attribute_iterator;
- class xml_named_node_iterator;
-
- class xml_tree_walker;
-
- struct xml_parse_result;
-
- class xml_node;
-
- class xml_text;
-
- #ifndef PUGIXML_NO_XPATH
- class xpath_node;
- class xpath_node_set;
- class xpath_query;
- class xpath_variable_set;
- #endif
-
- // Range-based for loop support
- template <typename It> class xml_object_range
- {
- public:
- typedef It const_iterator;
- typedef It iterator;
-
- xml_object_range(It b, It e): _begin(b), _end(e)
- {
- }
-
- It begin() const { return _begin; }
- It end() const { return _end; }
-
- private:
- It _begin, _end;
- };
-
- // Writer interface for node printing (see xml_node::print)
- class PUGIXML_CLASS xml_writer
- {
- public:
- virtual ~xml_writer() {}
-
- // Write memory chunk into stream/file/whatever
- virtual void write(const void* data, size_t size) = 0;
- };
-
- // xml_writer implementation for FILE*
- class PUGIXML_CLASS xml_writer_file: public xml_writer
- {
- public:
- // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
- xml_writer_file(void* file);
-
- virtual void write(const void* data, size_t size);
-
- private:
- void* file;
- };
-
- #ifndef PUGIXML_NO_STL
- // xml_writer implementation for streams
- class PUGIXML_CLASS xml_writer_stream: public xml_writer
- {
- public:
- // Construct writer from an output stream object
- xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
- xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
-
- virtual void write(const void* data, size_t size);
-
- private:
- std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
- std::basic_ostream<wchar_t, std::char_traits<wchar_t> >* wide_stream;
- };
- #endif
-
- // A light-weight handle for manipulating attributes in DOM tree
- class PUGIXML_CLASS xml_attribute
- {
- friend class xml_attribute_iterator;
- friend class xml_node;
-
- private:
- xml_attribute_struct* _attr;
-
- typedef void (*unspecified_bool_type)(xml_attribute***);
-
- public:
- // Default constructor. Constructs an empty attribute.
- xml_attribute();
-
- // Constructs attribute from internal pointer
- explicit xml_attribute(xml_attribute_struct* attr);
-
- // Safe bool conversion operator
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
-
- // Comparison operators (compares wrapped attribute pointers)
- bool operator==(const xml_attribute& r) const;
- bool operator!=(const xml_attribute& r) const;
- bool operator<(const xml_attribute& r) const;
- bool operator>(const xml_attribute& r) const;
- bool operator<=(const xml_attribute& r) const;
- bool operator>=(const xml_attribute& r) const;
-
- // Check if attribute is empty
- bool empty() const;
-
- // Get attribute name/value, or "" if attribute is empty
- const char_t* name() const;
- const char_t* value() const;
-
- // Get attribute value, or the default value if attribute is empty
- const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
-
- // Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty
- int as_int(int def = 0) const;
- unsigned int as_uint(unsigned int def = 0) const;
- double as_double(double def = 0) const;
- float as_float(float def = 0) const;
-
- #ifdef PUGIXML_HAS_LONG_LONG
- long long as_llong(long long def = 0) const;
- unsigned long long as_ullong(unsigned long long def = 0) const;
- #endif
-
- // Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty
- bool as_bool(bool def = false) const;
-
- // Set attribute name/value (returns false if attribute is empty or there is not enough memory)
- bool set_name(const char_t* rhs);
- bool set_value(const char_t* rhs);
-
- // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
- bool set_value(int rhs);
- bool set_value(unsigned int rhs);
- bool set_value(double rhs);
- bool set_value(float rhs);
- bool set_value(bool rhs);
-
- #ifdef PUGIXML_HAS_LONG_LONG
- bool set_value(long long rhs);
- bool set_value(unsigned long long rhs);
- #endif
-
- // Set attribute value (equivalent to set_value without error checking)
- xml_attribute& operator=(const char_t* rhs);
- xml_attribute& operator=(int rhs);
- xml_attribute& operator=(unsigned int rhs);
- xml_attribute& operator=(double rhs);
- xml_attribute& operator=(float rhs);
- xml_attribute& operator=(bool rhs);
-
- #ifdef PUGIXML_HAS_LONG_LONG
- xml_attribute& operator=(long long rhs);
- xml_attribute& operator=(unsigned long long rhs);
- #endif
-
- // Get next/previous attribute in the attribute list of the parent node
- xml_attribute next_attribute() const;
- xml_attribute previous_attribute() const;
-
- // Get hash value (unique for handles to the same object)
- size_t hash_value() const;
-
- // Get internal pointer
- xml_attribute_struct* internal_object() const;
- };
-
-#ifdef __BORLANDC__
- // Borland C++ workaround
- bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs);
- bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs);
-#endif
-
- // A light-weight handle for manipulating nodes in DOM tree
- class PUGIXML_CLASS xml_node
- {
- friend class xml_attribute_iterator;
- friend class xml_node_iterator;
- friend class xml_named_node_iterator;
-
- protected:
- xml_node_struct* _root;
-
- typedef void (*unspecified_bool_type)(xml_node***);
-
- public:
- // Default constructor. Constructs an empty node.
- xml_node();
-
- // Constructs node from internal pointer
- explicit xml_node(xml_node_struct* p);
-
- // Safe bool conversion operator
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
-
- // Comparison operators (compares wrapped node pointers)
- bool operator==(const xml_node& r) const;
- bool operator!=(const xml_node& r) const;
- bool operator<(const xml_node& r) const;
- bool operator>(const xml_node& r) const;
- bool operator<=(const xml_node& r) const;
- bool operator>=(const xml_node& r) const;
-
- // Check if node is empty.
- bool empty() const;
-
- // Get node type
- xml_node_type type() const;
-
- // Get node name, or "" if node is empty or it has no name
- const char_t* name() const;
-
- // Get node value, or "" if node is empty or it has no value
- // Note: For <node>text</node> node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes.
- const char_t* value() const;
-
- // Get attribute list
- xml_attribute first_attribute() const;
- xml_attribute last_attribute() const;
-
- // Get children list
- xml_node first_child() const;
- xml_node last_child() const;
-
- // Get next/previous sibling in the children list of the parent node
- xml_node next_sibling() const;
- xml_node previous_sibling() const;
-
- // Get parent node
- xml_node parent() const;
-
- // Get root of DOM tree this node belongs to
- xml_node root() const;
-
- // Get text object for the current node
- xml_text text() const;
-
- // Get child, attribute or next/previous sibling with the specified name
- xml_node child(const char_t* name) const;
- xml_attribute attribute(const char_t* name) const;
- xml_node next_sibling(const char_t* name) const;
- xml_node previous_sibling(const char_t* name) const;
-
- // Get attribute, starting the search from a hint (and updating hint so that searching for a sequence of attributes is fast)
- xml_attribute attribute(const char_t* name, xml_attribute& hint) const;
-
- // Get child value of current node; that is, value of the first child node of type PCDATA/CDATA
- const char_t* child_value() const;
-
- // Get child value of child with specified name. Equivalent to child(name).child_value().
- const char_t* child_value(const char_t* name) const;
-
- // Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
- bool set_name(const char_t* rhs);
- bool set_value(const char_t* rhs);
-
- // Add attribute with specified name. Returns added attribute, or empty attribute on errors.
- xml_attribute append_attribute(const char_t* name);
- xml_attribute prepend_attribute(const char_t* name);
- xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr);
- xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr);
-
- // Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors.
- xml_attribute append_copy(const xml_attribute& proto);
- xml_attribute prepend_copy(const xml_attribute& proto);
- xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
- xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
-
- // Add child node with specified type. Returns added node, or empty node on errors.
- xml_node append_child(xml_node_type type = node_element);
- xml_node prepend_child(xml_node_type type = node_element);
- xml_node insert_child_after(xml_node_type type, const xml_node& node);
- xml_node insert_child_before(xml_node_type type, const xml_node& node);
-
- // Add child element with specified name. Returns added node, or empty node on errors.
- xml_node append_child(const char_t* name);
- xml_node prepend_child(const char_t* name);
- xml_node insert_child_after(const char_t* name, const xml_node& node);
- xml_node insert_child_before(const char_t* name, const xml_node& node);
-
- // Add a copy of the specified node as a child. Returns added node, or empty node on errors.
- xml_node append_copy(const xml_node& proto);
- xml_node prepend_copy(const xml_node& proto);
- xml_node insert_copy_after(const xml_node& proto, const xml_node& node);
- xml_node insert_copy_before(const xml_node& proto, const xml_node& node);
-
- // Move the specified node to become a child of this node. Returns moved node, or empty node on errors.
- xml_node append_move(const xml_node& moved);
- xml_node prepend_move(const xml_node& moved);
- xml_node insert_move_after(const xml_node& moved, const xml_node& node);
- xml_node insert_move_before(const xml_node& moved, const xml_node& node);
-
- // Remove specified attribute
- bool remove_attribute(const xml_attribute& a);
- bool remove_attribute(const char_t* name);
-
- // Remove specified child
- bool remove_child(const xml_node& n);
- bool remove_child(const char_t* name);
-
- // Parses buffer as an XML document fragment and appends all nodes as children of the current node.
- // Copies/converts the buffer, so it may be deleted or changed after the function returns.
- // Note: append_buffer allocates memory that has the lifetime of the owning document; removing the appended nodes does not immediately reclaim that memory.
- xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
- // Find attribute using predicate. Returns first attribute for which predicate returned true.
- template <typename Predicate> xml_attribute find_attribute(Predicate pred) const
- {
- if (!_root) return xml_attribute();
-
- for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
- if (pred(attrib))
- return attrib;
-
- return xml_attribute();
- }
-
- // Find child node using predicate. Returns first child for which predicate returned true.
- template <typename Predicate> xml_node find_child(Predicate pred) const
- {
- if (!_root) return xml_node();
-
- for (xml_node node = first_child(); node; node = node.next_sibling())
- if (pred(node))
- return node;
-
- return xml_node();
- }
-
- // Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true.
- template <typename Predicate> xml_node find_node(Predicate pred) const
- {
- if (!_root) return xml_node();
-
- xml_node cur = first_child();
-
- while (cur._root && cur._root != _root)
- {
- if (pred(cur)) return cur;
-
- if (cur.first_child()) cur = cur.first_child();
- else if (cur.next_sibling()) cur = cur.next_sibling();
- else
- {
- while (!cur.next_sibling() && cur._root != _root) cur = cur.parent();
-
- if (cur._root != _root) cur = cur.next_sibling();
- }
- }
-
- return xml_node();
- }
-
- // Find child node by attribute name/value
- xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
- xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
-
- #ifndef PUGIXML_NO_STL
- // Get the absolute node path from root as a text string.
- string_t path(char_t delimiter = '/') const;
- #endif
-
- // Search for a node by path consisting of node names and . or .. elements.
- xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const;
-
- // Recursively traverse subtree with xml_tree_walker
- bool traverse(xml_tree_walker& walker);
-
- #ifndef PUGIXML_NO_XPATH
- // Select single node by evaluating XPath query. Returns first node from the resulting node set.
- xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const;
- xpath_node select_node(const xpath_query& query) const;
-
- // Select node set by evaluating XPath query
- xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
- xpath_node_set select_nodes(const xpath_query& query) const;
-
- // (deprecated: use select_node instead) Select single node by evaluating XPath query.
- xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
- xpath_node select_single_node(const xpath_query& query) const;
-
- #endif
-
- // Print subtree using a writer object
- void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
-
- #ifndef PUGIXML_NO_STL
- // Print subtree to stream
- void print(std::basic_ostream<char, std::char_traits<char> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
- void print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
- #endif
-
- // Child nodes iterators
- typedef xml_node_iterator iterator;
-
- iterator begin() const;
- iterator end() const;
-
- // Attribute iterators
- typedef xml_attribute_iterator attribute_iterator;
-
- attribute_iterator attributes_begin() const;
- attribute_iterator attributes_end() const;
-
- // Range-based for support
- xml_object_range<xml_node_iterator> children() const;
- xml_object_range<xml_named_node_iterator> children(const char_t* name) const;
- xml_object_range<xml_attribute_iterator> attributes() const;
-
- // Get node offset in parsed file/string (in char_t units) for debugging purposes
- ptrdiff_t offset_debug() const;
-
- // Get hash value (unique for handles to the same object)
- size_t hash_value() const;
-
- // Get internal pointer
- xml_node_struct* internal_object() const;
- };
-
-#ifdef __BORLANDC__
- // Borland C++ workaround
- bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs);
- bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs);
-#endif
-
- // A helper for working with text inside PCDATA nodes
- class PUGIXML_CLASS xml_text
- {
- friend class xml_node;
-
- xml_node_struct* _root;
-
- typedef void (*unspecified_bool_type)(xml_text***);
-
- explicit xml_text(xml_node_struct* root);
-
- xml_node_struct* _data_new();
- xml_node_struct* _data() const;
-
- public:
- // Default constructor. Constructs an empty object.
- xml_text();
-
- // Safe bool conversion operator
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
-
- // Check if text object is empty
- bool empty() const;
-
- // Get text, or "" if object is empty
- const char_t* get() const;
-
- // Get text, or the default value if object is empty
- const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
-
- // Get text as a number, or the default value if conversion did not succeed or object is empty
- int as_int(int def = 0) const;
- unsigned int as_uint(unsigned int def = 0) const;
- double as_double(double def = 0) const;
- float as_float(float def = 0) const;
-
- #ifdef PUGIXML_HAS_LONG_LONG
- long long as_llong(long long def = 0) const;
- unsigned long long as_ullong(unsigned long long def = 0) const;
- #endif
-
- // Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty
- bool as_bool(bool def = false) const;
-
- // Set text (returns false if object is empty or there is not enough memory)
- bool set(const char_t* rhs);
-
- // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
- bool set(int rhs);
- bool set(unsigned int rhs);
- bool set(double rhs);
- bool set(float rhs);
- bool set(bool rhs);
-
- #ifdef PUGIXML_HAS_LONG_LONG
- bool set(long long rhs);
- bool set(unsigned long long rhs);
- #endif
-
- // Set text (equivalent to set without error checking)
- xml_text& operator=(const char_t* rhs);
- xml_text& operator=(int rhs);
- xml_text& operator=(unsigned int rhs);
- xml_text& operator=(double rhs);
- xml_text& operator=(float rhs);
- xml_text& operator=(bool rhs);
-
- #ifdef PUGIXML_HAS_LONG_LONG
- xml_text& operator=(long long rhs);
- xml_text& operator=(unsigned long long rhs);
- #endif
-
- // Get the data node (node_pcdata or node_cdata) for this object
- xml_node data() const;
- };
-
-#ifdef __BORLANDC__
- // Borland C++ workaround
- bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs);
- bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs);
-#endif
-
- // Child node iterator (a bidirectional iterator over a collection of xml_node)
- class PUGIXML_CLASS xml_node_iterator
- {
- friend class xml_node;
-
- private:
- mutable xml_node _wrap;
- xml_node _parent;
-
- xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent);
-
- public:
- // Iterator traits
- typedef ptrdiff_t difference_type;
- typedef xml_node value_type;
- typedef xml_node* pointer;
- typedef xml_node& reference;
-
- #ifndef PUGIXML_NO_STL
- typedef std::bidirectional_iterator_tag iterator_category;
- #endif
-
- // Default constructor
- xml_node_iterator();
-
- // Construct an iterator which points to the specified node
- xml_node_iterator(const xml_node& node);
-
- // Iterator operators
- bool operator==(const xml_node_iterator& rhs) const;
- bool operator!=(const xml_node_iterator& rhs) const;
-
- xml_node& operator*() const;
- xml_node* operator->() const;
-
- const xml_node_iterator& operator++();
- xml_node_iterator operator++(int);
-
- const xml_node_iterator& operator--();
- xml_node_iterator operator--(int);
- };
-
- // Attribute iterator (a bidirectional iterator over a collection of xml_attribute)
- class PUGIXML_CLASS xml_attribute_iterator
- {
- friend class xml_node;
-
- private:
- mutable xml_attribute _wrap;
- xml_node _parent;
-
- xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent);
-
- public:
- // Iterator traits
- typedef ptrdiff_t difference_type;
- typedef xml_attribute value_type;
- typedef xml_attribute* pointer;
- typedef xml_attribute& reference;
-
- #ifndef PUGIXML_NO_STL
- typedef std::bidirectional_iterator_tag iterator_category;
- #endif
-
- // Default constructor
- xml_attribute_iterator();
-
- // Construct an iterator which points to the specified attribute
- xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent);
-
- // Iterator operators
- bool operator==(const xml_attribute_iterator& rhs) const;
- bool operator!=(const xml_attribute_iterator& rhs) const;
-
- xml_attribute& operator*() const;
- xml_attribute* operator->() const;
-
- const xml_attribute_iterator& operator++();
- xml_attribute_iterator operator++(int);
-
- const xml_attribute_iterator& operator--();
- xml_attribute_iterator operator--(int);
- };
-
- // Named node range helper
- class PUGIXML_CLASS xml_named_node_iterator
- {
- friend class xml_node;
-
- public:
- // Iterator traits
- typedef ptrdiff_t difference_type;
- typedef xml_node value_type;
- typedef xml_node* pointer;
- typedef xml_node& reference;
-
- #ifndef PUGIXML_NO_STL
- typedef std::bidirectional_iterator_tag iterator_category;
- #endif
-
- // Default constructor
- xml_named_node_iterator();
-
- // Construct an iterator which points to the specified node
- xml_named_node_iterator(const xml_node& node, const char_t* name);
-
- // Iterator operators
- bool operator==(const xml_named_node_iterator& rhs) const;
- bool operator!=(const xml_named_node_iterator& rhs) const;
-
- xml_node& operator*() const;
- xml_node* operator->() const;
-
- const xml_named_node_iterator& operator++();
- xml_named_node_iterator operator++(int);
-
- const xml_named_node_iterator& operator--();
- xml_named_node_iterator operator--(int);
-
- private:
- mutable xml_node _wrap;
- xml_node _parent;
- const char_t* _name;
-
- xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name);
- };
-
- // Abstract tree walker class (see xml_node::traverse)
- class PUGIXML_CLASS xml_tree_walker
- {
- friend class xml_node;
-
- private:
- int _depth;
-
- protected:
- // Get current traversal depth
- int depth() const;
-
- public:
- xml_tree_walker();
- virtual ~xml_tree_walker();
-
- // Callback that is called when traversal begins
- virtual bool begin(xml_node& node);
-
- // Callback that is called for each node traversed
- virtual bool for_each(xml_node& node) = 0;
-
- // Callback that is called when traversal ends
- virtual bool end(xml_node& node);
- };
-
- // Parsing status, returned as part of xml_parse_result object
- enum xml_parse_status
- {
- status_ok = 0, // No error
-
- status_file_not_found, // File was not found during load_file()
- status_io_error, // Error reading from file/stream
- status_out_of_memory, // Could not allocate memory
- status_internal_error, // Internal error occurred
-
- status_unrecognized_tag, // Parser could not determine tag type
-
- status_bad_pi, // Parsing error occurred while parsing document declaration/processing instruction
- status_bad_comment, // Parsing error occurred while parsing comment
- status_bad_cdata, // Parsing error occurred while parsing CDATA section
- status_bad_doctype, // Parsing error occurred while parsing document type declaration
- status_bad_pcdata, // Parsing error occurred while parsing PCDATA section
- status_bad_start_element, // Parsing error occurred while parsing start element tag
- status_bad_attribute, // Parsing error occurred while parsing element attribute
- status_bad_end_element, // Parsing error occurred while parsing end element tag
- status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag)
-
- status_append_invalid_root, // Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer)
-
- status_no_document_element // Parsing resulted in a document without element nodes
- };
-
- // Parsing result
- struct PUGIXML_CLASS xml_parse_result
- {
- // Parsing status (see xml_parse_status)
- xml_parse_status status;
-
- // Last parsed offset (in char_t units from start of input data)
- ptrdiff_t offset;
-
- // Source document encoding
- xml_encoding encoding;
-
- // Default constructor, initializes object to failed state
- xml_parse_result();
-
- // Cast to bool operator
- operator bool() const;
-
- // Get error description
- const char* description() const;
- };
-
- // Document class (DOM tree root)
- class PUGIXML_CLASS xml_document: public xml_node
- {
- private:
- char_t* _buffer;
-
- char _memory[192];
-
- // Non-copyable semantics
- xml_document(const xml_document&);
- xml_document& operator=(const xml_document&);
-
- void create();
- void destroy();
-
- public:
- // Default constructor, makes empty document
- xml_document();
-
- // Destructor, invalidates all node/attribute handles to this document
- ~xml_document();
-
- // Removes all nodes, leaving the empty document
- void reset();
-
- // Removes all nodes, then copies the entire contents of the specified document
- void reset(const xml_document& proto);
-
- #ifndef PUGIXML_NO_STL
- // Load document from stream.
- xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
- xml_parse_result load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options = parse_default);
- #endif
-
- // (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
- xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
-
- // Load document from zero-terminated string. No encoding conversions are applied.
- xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
-
- // Load document from file
- xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
- xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
- // Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns.
- xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
- // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
- // You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed.
- xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
- // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
- // You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore).
- xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
- // Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details).
- void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
-
- #ifndef PUGIXML_NO_STL
- // Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details).
- void save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
- void save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
- #endif
-
- // Save XML to file
- bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
- bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
-
- // Get document element
- xml_node document_element() const;
- };
-
-#ifndef PUGIXML_NO_XPATH
- // XPath query return type
- enum xpath_value_type
- {
- xpath_type_none, // Unknown type (query failed to compile)
- xpath_type_node_set, // Node set (xpath_node_set)
- xpath_type_number, // Number
- xpath_type_string, // String
- xpath_type_boolean // Boolean
- };
-
- // XPath parsing result
- struct PUGIXML_CLASS xpath_parse_result
- {
- // Error message (0 if no error)
- const char* error;
-
- // Last parsed offset (in char_t units from string start)
- ptrdiff_t offset;
-
- // Default constructor, initializes object to failed state
- xpath_parse_result();
-
- // Cast to bool operator
- operator bool() const;
-
- // Get error description
- const char* description() const;
- };
-
- // A single XPath variable
- class PUGIXML_CLASS xpath_variable
- {
- friend class xpath_variable_set;
-
- protected:
- xpath_value_type _type;
- xpath_variable* _next;
-
- xpath_variable(xpath_value_type type);
-
- // Non-copyable semantics
- xpath_variable(const xpath_variable&);
- xpath_variable& operator=(const xpath_variable&);
-
- public:
- // Get variable name
- const char_t* name() const;
-
- // Get variable type
- xpath_value_type type() const;
-
- // Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error
- bool get_boolean() const;
- double get_number() const;
- const char_t* get_string() const;
- const xpath_node_set& get_node_set() const;
-
- // Set variable value; no type conversion is performed, false is returned on type mismatch error
- bool set(bool value);
- bool set(double value);
- bool set(const char_t* value);
- bool set(const xpath_node_set& value);
- };
-
- // A set of XPath variables
- class PUGIXML_CLASS xpath_variable_set
- {
- private:
- xpath_variable* _data[64];
-
- void _assign(const xpath_variable_set& rhs);
- void _swap(xpath_variable_set& rhs);
-
- xpath_variable* _find(const char_t* name) const;
-
- static bool _clone(xpath_variable* var, xpath_variable** out_result);
- static void _destroy(xpath_variable* var);
-
- public:
- // Default constructor/destructor
- xpath_variable_set();
- ~xpath_variable_set();
-
- // Copy constructor/assignment operator
- xpath_variable_set(const xpath_variable_set& rhs);
- xpath_variable_set& operator=(const xpath_variable_set& rhs);
-
- #if __cplusplus >= 201103
- // Move semantics support
- xpath_variable_set(xpath_variable_set&& rhs);
- xpath_variable_set& operator=(xpath_variable_set&& rhs);
- #endif
-
- // Add a new variable or get the existing one, if the types match
- xpath_variable* add(const char_t* name, xpath_value_type type);
-
- // Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch
- bool set(const char_t* name, bool value);
- bool set(const char_t* name, double value);
- bool set(const char_t* name, const char_t* value);
- bool set(const char_t* name, const xpath_node_set& value);
-
- // Get existing variable by name
- xpath_variable* get(const char_t* name);
- const xpath_variable* get(const char_t* name) const;
- };
-
- // A compiled XPath query object
- class PUGIXML_CLASS xpath_query
- {
- private:
- void* _impl;
- xpath_parse_result _result;
-
- typedef void (*unspecified_bool_type)(xpath_query***);
-
- // Non-copyable semantics
- xpath_query(const xpath_query&);
- xpath_query& operator=(const xpath_query&);
-
- public:
- // Construct a compiled object from XPath expression.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors.
- explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0);
-
- // Constructor
- xpath_query();
-
- // Destructor
- ~xpath_query();
-
- #if __cplusplus >= 201103
- // Move semantics support
- xpath_query(xpath_query&& rhs);
- xpath_query& operator=(xpath_query&& rhs);
- #endif
-
- // Get query expression return type
- xpath_value_type return_type() const;
-
- // Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
- bool evaluate_boolean(const xpath_node& n) const;
-
- // Evaluate expression as double value in the specified context; performs type conversion if necessary.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
- double evaluate_number(const xpath_node& n) const;
-
- #ifndef PUGIXML_NO_STL
- // Evaluate expression as string value in the specified context; performs type conversion if necessary.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
- string_t evaluate_string(const xpath_node& n) const;
- #endif
-
- // Evaluate expression as string value in the specified context; performs type conversion if necessary.
- // At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
- // If PUGIXML_NO_EXCEPTIONS is defined, returns empty set instead.
- size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
-
- // Evaluate expression as node set in the specified context.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
- // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead.
- xpath_node_set evaluate_node_set(const xpath_node& n) const;
-
- // Evaluate expression as node set in the specified context.
- // Return first node in document order, or empty node if node set is empty.
- // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
- // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node instead.
- xpath_node evaluate_node(const xpath_node& n) const;
-
- // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode)
- const xpath_parse_result& result() const;
-
- // Safe bool conversion operator
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
- };
-
- #ifndef PUGIXML_NO_EXCEPTIONS
- // XPath exception class
- class PUGIXML_CLASS xpath_exception: public std::exception
- {
- private:
- xpath_parse_result _result;
-
- public:
- // Construct exception from parse result
- explicit xpath_exception(const xpath_parse_result& result);
-
- // Get error message
- virtual const char* what() const throw();
-
- // Get parse result
- const xpath_parse_result& result() const;
- };
- #endif
-
- // XPath node class (either xml_node or xml_attribute)
- class PUGIXML_CLASS xpath_node
- {
- private:
- xml_node _node;
- xml_attribute _attribute;
-
- typedef void (*unspecified_bool_type)(xpath_node***);
-
- public:
- // Default constructor; constructs empty XPath node
- xpath_node();
-
- // Construct XPath node from XML node/attribute
- xpath_node(const xml_node& node);
- xpath_node(const xml_attribute& attribute, const xml_node& parent);
-
- // Get node/attribute, if any
- xml_node node() const;
- xml_attribute attribute() const;
-
- // Get parent of contained node/attribute
- xml_node parent() const;
-
- // Safe bool conversion operator
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
-
- // Comparison operators
- bool operator==(const xpath_node& n) const;
- bool operator!=(const xpath_node& n) const;
- };
-
-#ifdef __BORLANDC__
- // Borland C++ workaround
- bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs);
- bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs);
-#endif
-
- // A fixed-size collection of XPath nodes
- class PUGIXML_CLASS xpath_node_set
- {
- public:
- // Collection type
- enum type_t
- {
- type_unsorted, // Not ordered
- type_sorted, // Sorted by document order (ascending)
- type_sorted_reverse // Sorted by document order (descending)
- };
-
- // Constant iterator type
- typedef const xpath_node* const_iterator;
-
- // We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work
- typedef const xpath_node* iterator;
-
- // Default constructor. Constructs empty set.
- xpath_node_set();
-
- // Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful
- xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
-
- // Destructor
- ~xpath_node_set();
-
- // Copy constructor/assignment operator
- xpath_node_set(const xpath_node_set& ns);
- xpath_node_set& operator=(const xpath_node_set& ns);
-
- #if __cplusplus >= 201103
- // Move semantics support
- xpath_node_set(xpath_node_set&& rhs);
- xpath_node_set& operator=(xpath_node_set&& rhs);
- #endif
-
- // Get collection type
- type_t type() const;
-
- // Get collection size
- size_t size() const;
-
- // Indexing operator
- const xpath_node& operator[](size_t index) const;
-
- // Collection iterators
- const_iterator begin() const;
- const_iterator end() const;
-
- // Sort the collection in ascending/descending order by document order
- void sort(bool reverse = false);
-
- // Get first node in the collection by document order
- xpath_node first() const;
-
- // Check if collection is empty
- bool empty() const;
-
- private:
- type_t _type;
-
- xpath_node _storage;
-
- xpath_node* _begin;
- xpath_node* _end;
-
- void _assign(const_iterator begin, const_iterator end, type_t type);
- void _move(xpath_node_set& rhs);
- };
-#endif
-
-#ifndef PUGIXML_NO_STL
- // Convert wide string to UTF8
- std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
- std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
-
- // Convert UTF8 to wide string
- std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
- std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
-#endif
-
- // Memory allocation function interface; returns pointer to allocated memory or NULL on failure
- typedef void* (*allocation_function)(size_t size);
-
- // Memory deallocation function interface
- typedef void (*deallocation_function)(void* ptr);
-
- // Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
- void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
-
- // Get current memory management functions
- allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
- deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
-}
-
-#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
-namespace std
-{
- // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
- std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&);
- std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&);
- std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&);
-}
-#endif
-
-#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
-namespace std
-{
- // Workarounds for (non-standard) iterator category detection
- std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&);
- std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&);
- std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&);
-}
-#endif
-
-#endif
-
-// Make sure implementation is included in header-only mode
-// Use macro expansion in #include to work around QMake (QTBUG-11923)
-#if defined(PUGIXML_HEADER_ONLY) && !defined(PUGIXML_SOURCE)
-# define PUGIXML_SOURCE "pugixml.cpp"
-# include PUGIXML_SOURCE
-#endif
-
-/**
- * Copyright (c) 2006-2015 Arseny Kapoulkine
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
diff --git a/contrib/moses2/server/Server.cpp b/contrib/moses2/server/Server.cpp
deleted file mode 100644
index 4befff98e..000000000
--- a/contrib/moses2/server/Server.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Server.cpp
- *
- * Created on: 1 Apr 2016
- * Author: hieu
- */
-#include <iostream>
-#include "../System.h"
-#include "Server.h"
-#include "Translator.h"
-#include "../parameters/ServerOptions.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-Server::Server(ServerOptions &server_options, System &system)
-:m_server_options(server_options)
-,m_translator(new Translator(*this, system))
-{
- m_registry.addMethod("translate", m_translator);
-}
-
-Server::~Server()
-{
- unlink(m_pidfile.c_str());
-}
-
-void Server::run(System &system)
-{
- xmlrpc_c::serverAbyss myAbyssServer
- (xmlrpc_c::serverAbyss::constrOpt()
- .registryP(&m_registry)
- .portNumber(m_server_options.port) // TCP port on which to listen
- .logFileName(m_server_options.logfile)
- .allowOrigin("*")
- .maxConn(m_server_options.maxConn)
- .maxConnBacklog(m_server_options.maxConnBacklog)
- .keepaliveTimeout(m_server_options.keepaliveTimeout)
- .keepaliveMaxConn(m_server_options.keepaliveMaxConn)
- .timeout(m_server_options.timeout)
- );
- std::ostringstream pidfilename;
- pidfilename << "/tmp/moses-server." << m_server_options.port << ".pid";
- m_pidfile = pidfilename.str();
- std::ofstream pidfile(m_pidfile.c_str());
- pidfile << getpid() << std::endl;
- pidfile.close();
- cerr << "Listening on port " << m_server_options.port << std::endl;
- if (m_server_options.is_serial)
- {
- cerr << "Running server in serial mode." << std::endl;
- while(true) myAbyssServer.runOnce();
- }
- else myAbyssServer.run();
-
- std::cerr << "xmlrpc_c::serverAbyss.run() returned but it should not."
- << std::endl;
-}
-
-ServerOptions const&Server::options() const
-{
- return m_server_options;
-}
-
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/server/Server.h b/contrib/moses2/server/Server.h
deleted file mode 100644
index d19ef75d2..000000000
--- a/contrib/moses2/server/Server.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Server.h
- *
- * Created on: 1 Apr 2016
- * Author: hieu
- */
-#pragma once
-
-#include <xmlrpc-c/base.hpp>
-#include <xmlrpc-c/registry.hpp>
-#include <xmlrpc-c/server_abyss.hpp>
-
-namespace Moses2
-{
-class System;
-class ServerOptions;
-class Manager;
-
-class Server
-{
-public:
- Server(ServerOptions &server_options, System &system);
- virtual ~Server();
-
- void run(System &system);
-
- ServerOptions const&
- options() const;
-
-protected:
- ServerOptions &m_server_options;
- std::string m_pidfile;
- xmlrpc_c::registry m_registry;
- xmlrpc_c::methodPtr const m_translator;
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/contrib/moses2/server/TranslationRequest.cpp b/contrib/moses2/server/TranslationRequest.cpp
deleted file mode 100644
index dd37d621c..000000000
--- a/contrib/moses2/server/TranslationRequest.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-#include <boost/foreach.hpp>
-#include "TranslationRequest.h"
-#include "../ManagerBase.h"
-#include "../System.h"
-
-using namespace std;
-
-namespace Moses2
-{
-TranslationRequest::
-TranslationRequest(xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond,
- boost::mutex& mut,
- System &system,
- const std::string &line,
- long translationId)
-:TranslationTask(system, line, translationId)
-,m_cond(cond)
-,m_mutex(mut)
-,m_done(false)
-{
-
-}
-
-boost::shared_ptr<TranslationRequest>
-TranslationRequest::
-create(Translator* translator,
- xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond,
- boost::mutex& mut,
- System &system,
- const std::string &line,
- long translationId)
-{
- boost::shared_ptr<TranslationRequest> ret;
- TranslationRequest *request = new TranslationRequest(paramList, cond, mut, system, line, translationId);
- ret.reset(request);
- ret->m_translator = translator;
- return ret;
-}
-
-void
-TranslationRequest::
-Run()
-{
- m_mgr->Decode();
-
- string out;
- out = m_mgr->OutputBest();
- m_retData["text"] = xmlrpc_c::value_string(out);
-
- {
- boost::lock_guard<boost::mutex> lock(m_mutex);
- m_done = true;
- }
- m_cond.notify_one();
-
- delete m_mgr;
-}
-
-void TranslationRequest::pack_hypothesis(const Manager& manager, Hypothesis const* h,
- std::string const& key,
- std::map<std::string, xmlrpc_c::value> & dest) const
-{
-
-}
-
-}
diff --git a/contrib/moses2/server/TranslationRequest.h b/contrib/moses2/server/TranslationRequest.h
deleted file mode 100644
index 0f63bc57a..000000000
--- a/contrib/moses2/server/TranslationRequest.h
+++ /dev/null
@@ -1,81 +0,0 @@
-// -*- c++ -*-
-#pragma once
-
-#include <string>
-#include <map>
-#include <vector>
-
-#ifdef WITH_THREADS
-#include <boost/thread.hpp>
-#endif
-
-#include <boost/shared_ptr.hpp>
-#include <xmlrpc-c/base.hpp>
-#include "../TranslationTask.h"
-
-#include "Translator.h"
-
-namespace Moses2
-{
-class Hypothesis;
-class System;
-class Manager;
-
-class
-TranslationRequest : public virtual TranslationTask
-{
-protected:
- std::map<std::string, xmlrpc_c::value> m_retData;
- Translator* m_translator;
-
- boost::condition_variable& m_cond;
- boost::mutex& m_mutex;
- bool m_done;
-
- TranslationRequest(xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond,
- boost::mutex& mut,
- System &system,
- const std::string &line,
- long translationId);
-
- void
- pack_hypothesis(const Manager& manager, Hypothesis const* h,
- std::string const& key,
- std::map<std::string, xmlrpc_c::value> & dest) const;
-
-public:
-
- static
- boost::shared_ptr<TranslationRequest>
- create(Translator* translator,
- xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond,
- boost::mutex& mut,
- System &system,
- const std::string &line,
- long translationId);
-
-
- virtual bool
- DeleteAfterExecution() {
- return false;
- }
-
- bool
- IsDone() const {
- return m_done;
- }
-
- std::map<std::string, xmlrpc_c::value> const&
- GetRetData() {
- return m_retData;
- }
-
- void
- Run();
-
-
-};
-
-}
diff --git a/contrib/moses2/server/Translator.cpp b/contrib/moses2/server/Translator.cpp
deleted file mode 100644
index fd855c136..000000000
--- a/contrib/moses2/server/Translator.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Translator.cpp
- *
- * Created on: 1 Apr 2016
- * Author: hieu
- */
-#include <boost/shared_ptr.hpp>
-#include "Translator.h"
-#include "TranslationRequest.h"
-#include "Server.h"
-#include "../parameters/ServerOptions.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-Translator::Translator(Server& server, System &system)
-: m_server(server),
- m_threadPool(server.options().numThreads),
- m_system(system),
- m_translationId(0)
-{
- // signature and help strings are documentation -- the client
- // can query this information with a system.methodSignature and
- // system.methodHelp RPC.
- this->_signature = "S:S";
- this->_help = "Does translation";
-}
-
-Translator::~Translator()
-{
- // TODO Auto-generated destructor stub
-}
-
-void Translator::execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value *const retvalP)
-{
- typedef std::map<std::string,xmlrpc_c::value> param_t;
- param_t const& params = paramList.getStruct(0);
- param_t::const_iterator si;
- si = params.find("text");
- if (si == params.end()) {
- throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE);
- }
-
- string line = xmlrpc_c::value_string(si->second);
- long translationId;
-
- // get unique id. Thread safe
- {
- boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
- translationId = m_translationId++;
- }
-
- boost::condition_variable cond;
- boost::mutex mut;
- boost::shared_ptr<TranslationRequest> task;
- task = TranslationRequest::create(this, paramList,cond,mut, m_system, line, translationId);
- m_threadPool.Submit(task);
- boost::unique_lock<boost::mutex> lock(mut);
- while (!task->IsDone()) {
- cond.wait(lock);
- }
- *retvalP = xmlrpc_c::value_struct(task->GetRetData());
-}
-
-} /* namespace Moses2 */
diff --git a/contrib/moses2/server/Translator.h b/contrib/moses2/server/Translator.h
deleted file mode 100644
index ba2c68ceb..000000000
--- a/contrib/moses2/server/Translator.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Translator.h
- *
- * Created on: 1 Apr 2016
- * Author: hieu
- */
-
-#pragma once
-#include <boost/thread/shared_mutex.hpp>
-#include <xmlrpc-c/base.hpp>
-#include <xmlrpc-c/registry.hpp>
-#include <xmlrpc-c/server_abyss.hpp>
-#include "../legacy/ThreadPool.h"
-
-namespace Moses2
-{
-class Server;
-class System;
-class Manager;
-
-class Translator : public xmlrpc_c::method
-{
-public:
- Translator(Server& server, System &system);
- virtual ~Translator();
-
- void execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value * const retvalP);
-
-protected:
- Server& m_server;
- Moses2::ThreadPool m_threadPool;
- System &m_system;
- long m_translationId;
- boost::shared_mutex m_accessLock;
-
-};
-
-} /* namespace Moses2 */
-